Repository: pbnjay/grate Branch: main Commit: 3f8e65d74a14 Files: 40 Total size: 174.9 KB Directory structure: gitextract_81jad5jf/ ├── .github/ │ └── workflows/ │ └── go.yml ├── .gitignore ├── LICENSE ├── README.md ├── cmd/ │ ├── grate2tsv/ │ │ └── main.go │ └── grater/ │ └── main.go ├── commonxl/ │ ├── cell.go │ ├── dates.go │ ├── fmt.go │ ├── fmt_test.go │ ├── formats.go │ ├── frac_test.go │ ├── numbers.go │ └── sheet.go ├── errs.go ├── go.mod ├── grate.go ├── simple/ │ ├── csv.go │ ├── simple.go │ └── tsv.go ├── xls/ │ ├── cfb/ │ │ ├── cfb.go │ │ ├── interface.go │ │ ├── simple_test.go │ │ └── slicereader.go │ ├── comp_test.go │ ├── crypto/ │ │ ├── crypto.go │ │ └── rc4.go │ ├── hyperlinks.go │ ├── records.go │ ├── sheets.go │ ├── simple_test.go │ ├── strings.go │ ├── structs.go │ └── xls.go └── xlsx/ ├── comp_test.go ├── sheets.go ├── simple_test.go ├── types.go ├── workbook.go └── xlsx.go ================================================ FILE CONTENTS ================================================ ================================================ FILE: .github/workflows/go.yml ================================================ name: Go on: push: branches: [ main ] pull_request: branches: [ main ] jobs: build: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - name: Set up Go uses: actions/setup-go@v2 with: go-version: 1.17 - name: Build run: go build -v ./... - name: Test XLS run: go test -v ./xls - name: Test XLSX run: go test -v ./xlsx - name: Test CommonXL run: go test -v ./commonxl ================================================ FILE: .gitignore ================================================ cmd/grate2tsv/results testdata *.pprof *.pdf ================================================ FILE: LICENSE ================================================ MIT License Copyright (c) 2021 Jeremy Jay Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: README.md ================================================ # grate A Go native tabular data extraction package. Currently supports `.xls`, `.xlsx`, `.csv`, `.tsv` formats. # Why? Grate focuses on speed and stability first, and makes no attempt to parse charts, figures, or other content types that may be present embedded within the input files. It tries to perform as few allocations as possible and errs on the side of caution. There are certainly still some bugs and edge cases, but we have run it successfully on a set of 400k `.xls` and `.xlsx` files to catch many bugs and error conditions. Please file an issue with any feedback and additional problem files. # Usage Grate provides a simple standard interface for all supported filetypes, allowing access to both named worksheets in spreadsheets and single tables in plaintext formats. ```go package main import ( "fmt" "os" "strings" "github.com/pbnjay/grate" _ "github.com/pbnjay/grate/simple" // tsv and csv support _ "github.com/pbnjay/grate/xls" _ "github.com/pbnjay/grate/xlsx" ) func main() { wb, _ := grate.Open(os.Args[1]) // open the file sheets, _ := wb.List() // list available sheets for _, s := range sheets { // enumerate each sheet name sheet, _ := wb.Get(s) // open the sheet for sheet.Next() { // enumerate each row of data row := sheet.Strings() // get the row's content as []string fmt.Println(strings.Join(row, "\t")) } } wb.Close() } ``` # License All source code is licensed under the [MIT License](https://raw.github.com/pbnjay/grate/master/LICENSE). ================================================ FILE: cmd/grate2tsv/main.go ================================================ // Command grate2tsv is a highly parallel tabular data extraction tool. It's // probably not necessary in your situation, but is included here since it // is a good stress test of the codebase. // // Files on the command line will be parsed and extracted to the "results" // subdirectory under a heirarchical arrangement (to make our filesystems // more responsive), and a "results.txt" file will be created logging basic // information and errors for each file. package main import ( "bufio" "crypto/md5" "flag" "fmt" "io" "io/ioutil" "log" "os" "path/filepath" "regexp" "runtime" "runtime/pprof" "strings" "sync" "time" "github.com/pbnjay/grate" _ "github.com/pbnjay/grate/simple" _ "github.com/pbnjay/grate/xls" _ "github.com/pbnjay/grate/xlsx" ) var ( logfile = flag.String("l", "", "save processing logs to `filename.txt`") pretend = flag.Bool("p", false, "pretend to output .tsv") infoFile = flag.String("i", "results.txt", "`filename` to record stats about the process") removeNewlines = flag.Bool("r", true, "remove embedded tabs, newlines, and condense spaces in cell contents") trimSpaces = flag.Bool("w", true, "trim whitespace from cell contents") skipBlanks = flag.Bool("b", true, "discard blank rows from the output") cpuprofile = flag.String("cpuprofile", "", "write cpu profile to file") memprofile = flag.String("memprofile", "", "write memory profile to file") timeFormat = "2006-01-02 15:04:05" fstats *os.File procWG sync.WaitGroup cleanup = make(chan *output, 100) outpool = sync.Pool{New: func() interface{} { return &output{} }} ) type output struct { f *os.File b *bufio.Writer } func main() { flag.Parse() if *memprofile != "" { f, err := os.Create(*memprofile) if err != nil { log.Fatal(err) } defer func() { runtime.GC() pprof.WriteHeapProfile(f) f.Close() }() } if *cpuprofile != "" { f, err := os.Create(*cpuprofile) if err != nil { log.Fatal(err) } pprof.StartCPUProfile(f) defer pprof.StopCPUProfile() } if *logfile != "" { fo, err := os.Create(*logfile) if err != nil { log.Fatal(err) } defer fo.Close() log.SetOutput(fo) } done := make(chan int) go func() { for x := range cleanup { x.b.Flush() x.f.Close() outpool.Put(x) } done <- 1 }() var err error fstats, err = os.OpenFile(*infoFile, os.O_CREATE|os.O_RDWR, 0644) if err != nil { log.Fatal(err) } defer fstats.Close() pos, err := fstats.Seek(0, io.SeekEnd) if err != nil { log.Fatal(err) } if pos == 0 { fmt.Fprintf(fstats, "time\tfilename\tsheet\trows\tcolumns\terrors\n") } filenameChan := make(chan string) // fan out to 1/2 of CPU cores // (e.g. each file-processor can use 2 cpus) outMu := &sync.Mutex{} nparallel := runtime.NumCPU() / 2 procWG.Add(nparallel) for i := 0; i < nparallel; i++ { go runProcessor(filenameChan, outMu) } for _, fn := range flag.Args() { filenameChan <- fn } close(filenameChan) procWG.Wait() close(cleanup) <-done } func runProcessor(from chan string, mu *sync.Mutex) { for fn := range from { nowFmt := time.Now().Format(timeFormat) results, err := processFile(fn) mu.Lock() if err != nil { // returned errors are fatal fmt.Fprintf(fstats, "%s\t%s\t-\t-\t-\t%s\n", nowFmt, fn, err.Error()) mu.Unlock() continue } for _, res := range results { e := "-" if res.Err != nil { e = res.Err.Error() } fmt.Fprintf(fstats, "%s\t%s\t%s\t%d\t%d\t%s\n", nowFmt, res.Filename, res.SheetName, res.NumRows, res.NumCols, e) } mu.Unlock() } procWG.Done() } var ( sanitize = regexp.MustCompile("[^a-zA-Z0-9]+") newlines = regexp.MustCompile("[ \n\r\t]+") ) type stats struct { Filename string Hash string SheetName string NumRows int NumCols int Err error } func processFile(fn string) ([]stats, error) { //log.Printf("Opening file '%s' ...", fn) wb, err := grate.Open(fn) if err != nil { return nil, err } defer wb.Close() results := []stats{} ext := filepath.Ext(fn) fn2 := filepath.Base(strings.TrimSuffix(fn, ext)) subparts := fmt.Sprintf("%x", md5.Sum([]byte(fn2))) subdir := filepath.Join("results", subparts[:2], subparts[2:4]) os.MkdirAll(subdir, 0755) log.Printf(subparts[:8]+" Processing file '%s'", fn2) sheets, err := wb.List() if err != nil { return nil, err } for _, s := range sheets { ps := stats{ Filename: fn, Hash: subparts[:8], SheetName: s, } log.Printf(subparts[:8]+" Opening Sheet '%s'...", s) sheet, err := wb.Get(s) if err != nil { ps.Err = err results = append(results, ps) continue } if sheet.IsEmpty() { log.Println(subparts[:8] + " Empty sheet. Skipping.") results = append(results, ps) continue } s2 := sanitize.ReplaceAllString(s, "_") if s == fn { s2 = "main" } var ox *output var w io.Writer = ioutil.Discard if !*pretend { f, err := os.Create(subdir + "/" + fn2 + "." + s2 + ".tsv") if err != nil { return nil, err } ox = outpool.Get().(*output) ox.f = f ox.b = bufio.NewWriter(f) w = ox.b } for sheet.Next() { row := sheet.Strings() nonblank := false for i, x := range row { if *removeNewlines { x = newlines.ReplaceAllString(x, " ") } if *trimSpaces { x = strings.TrimSpace(x) row[i] = x } if x != "" { nonblank = true if ps.NumCols < i { ps.NumCols = i } } } if nonblank || !*skipBlanks { for i, v := range row { if i != 0 { w.Write([]byte{'\t'}) } w.Write([]byte(v)) } w.Write([]byte{'\n'}) ps.NumRows++ } } results = append(results, ps) if ox != nil { cleanup <- ox } } return results, nil } ================================================ FILE: cmd/grater/main.go ================================================ // Command grater extracts contents of the tabular files to stdout. package main import ( "flag" "fmt" "os" "strings" "github.com/pbnjay/grate" _ "github.com/pbnjay/grate/simple" // tsv and csv support _ "github.com/pbnjay/grate/xls" _ "github.com/pbnjay/grate/xlsx" ) func main() { flagDebug := flag.Bool("v", false, "debug log") flag.Parse() if flag.NArg() < 1 { fmt.Fprintf(os.Stderr, "USAGE: %s [file1.xls file2.xlsx file3.tsv ...]\n", os.Args[0]) fmt.Fprintf(os.Stderr, " Extracts contents of the tabular files to stdout\n") os.Exit(1) } grate.Debug = *flagDebug for _, fn := range flag.Args() { wb, err := grate.Open(fn) if err != nil { fmt.Fprintln(os.Stderr, err) continue } sheets, err := wb.List() if err != nil { wb.Close() fmt.Fprintln(os.Stderr, err) continue } for _, s := range sheets { sheet, err := wb.Get(s) if err != nil { fmt.Fprintln(os.Stderr, err) continue } for sheet.Next() { if *flagDebug { dtypes := sheet.Types() fmt.Println(strings.Join(dtypes, "\t")) } row := sheet.Strings() fmt.Println(strings.Join(row, "\t")) } } wb.Close() } } ================================================ FILE: commonxl/cell.go ================================================ package commonxl import ( "fmt" "math" "net/url" "strconv" "time" "unicode/utf16" ) // CellType annotates the type of data extracted in the cell. type CellType uint16 // CellType annotations for various cell value types. const ( BlankCell CellType = iota IntegerCell FloatCell StringCell BooleanCell DateCell HyperlinkStringCell // internal type to separate URLs StaticCell // placeholder, internal use only ) // String returns a string description of the cell data type. func (c CellType) String() string { switch c { case BlankCell: return "blank" case IntegerCell: return "integer" case FloatCell: return "float" case BooleanCell: return "boolean" case DateCell: return "date" case HyperlinkStringCell: return "hyperlink" case StaticCell: return "static" default: // StringCell, StaticCell return "string" } } // Cell represents a single cell value. type Cell []interface{} // internally, it is a slice sized 2 or 3 // [Value, CellType] or [Value, CellType, FormatNumber] // where FormatNumber is a uint16 if not 0 // Value returns the contents as a generic interface{}. func (c Cell) Value() interface{} { if len(c) == 0 { return "" } return c[0] } // SetURL adds a URL hyperlink to the cell. func (c *Cell) SetURL(link string) { (*c)[1] = HyperlinkStringCell if len(*c) == 2 { *c = append(*c, uint16(0), link) } else { // len = 3 already *c = append(*c, link) } } // URL returns the parsed URL when a cell contains a hyperlink. func (c Cell) URL() (*url.URL, bool) { if c.Type() == HyperlinkStringCell && len(c) >= 4 { u, err := url.Parse(c[3].(string)) return u, err == nil } return nil, false } // Type returns the CellType of the value. func (c Cell) Type() CellType { if len(c) < 2 { return BlankCell } return c[1].(CellType) } // FormatNo returns the NumberFormat used for display. func (c Cell) FormatNo() uint16 { if len(c) == 3 { return c[2].(uint16) } return 0 } // Clone returns the new copy of this Cell. func (c Cell) Clone() Cell { c2 := make([]interface{}, len(c)) for i, x := range c { c2[i] = x } return c2 } /////// var boolStrings = map[string]bool{ "yes": true, "true": true, "t": true, "y": true, "1": true, "on": true, "no": false, "false": false, "f": false, "n": false, "0": false, "off": false, "YES": true, "TRUE": true, "T": true, "Y": true, "1.0": true, "ON": true, "NO": false, "FALSE": false, "F": false, "N": false, "0.0": false, "OFF": false, } // NewCellWithType creates a new cell value with the given type, coercing as necessary. func NewCellWithType(value interface{}, t CellType, f *Formatter) Cell { c := NewCell(value) if c[1] == t { // fast path if it was already typed correctly return c } if c[1] == BooleanCell { if t == IntegerCell { if c[0].(bool) { c[0] = int64(1) } else { c[0] = int64(0) } c[1] = IntegerCell } else if t == FloatCell { if c[0].(bool) { c[0] = float64(1.0) } else { c[0] = float64(0.0) } c[1] = FloatCell } else if t == StringCell { if c[0].(bool) { c[0] = "TRUE" } else { c[0] = "FALSE" } c[1] = FloatCell } } if c[1] == FloatCell { if t == IntegerCell { c[0] = int64(c[0].(float64)) c[1] = IntegerCell } else if t == BooleanCell { c[0] = c[0].(float64) != 0.0 c[1] = BooleanCell } } if c[1] == IntegerCell { if t == FloatCell { c[0] = float64(c[0].(int64)) c[1] = FloatCell } else if t == BooleanCell { c[0] = c[0].(int64) != 0 c[1] = BooleanCell } } if c[1] == StringCell { if t == IntegerCell { x, _ := strconv.ParseInt(c[0].(string), 10, 64) c[0] = x c[1] = IntegerCell } else if t == FloatCell { x, _ := strconv.ParseFloat(c[0].(string), 64) c[0] = x c[1] = FloatCell } else if t == BooleanCell { c[0] = boolStrings[c[0].(string)] c[1] = BooleanCell } } if t == StringCell { c[0] = fmt.Sprint(c[0]) c[1] = StringCell } if t == DateCell { if c[1] == FloatCell { c[0] = f.ConvertToDate(c[0].(float64)) } else if c[1] == IntegerCell { c[0] = f.ConvertToDate(float64(c[0].(int64))) } c[1] = DateCell } return c } // NewCell creates a new cell value from any builtin type. func NewCell(value interface{}) Cell { c := make([]interface{}, 2) switch v := value.(type) { case bool: c[0] = v c[1] = BooleanCell case int: c[0] = int64(v) c[1] = IntegerCell case int8: c[0] = int64(v) c[1] = IntegerCell case int16: c[0] = int64(v) c[1] = IntegerCell case int32: c[0] = int64(v) c[1] = IntegerCell case int64: c[0] = int64(v) c[1] = IntegerCell case uint8: c[0] = int64(v) c[1] = IntegerCell case uint16: c[0] = int64(v) c[1] = IntegerCell case uint32: c[0] = int64(v) c[1] = IntegerCell case uint: if int64(v) > int64(math.MaxInt64) { c[0] = float64(v) c[1] = FloatCell } else { c[0] = int64(v) c[1] = IntegerCell } case uint64: if v > math.MaxInt64 { c[0] = float64(v) c[1] = FloatCell } else { c[0] = int64(v) c[1] = IntegerCell } case float32: c[0] = float64(v) c[1] = FloatCell case float64: c[0] = float64(v) c[1] = FloatCell case string: if len(v) == 0 { c[0] = nil c[1] = BlankCell } else { c[0] = v c[1] = StringCell } case []byte: if len(v) == 0 { c[0] = nil c[1] = BlankCell } else { c[0] = string(v) c[1] = StringCell } case []uint16: if len(v) == 0 { c[0] = nil c[1] = BlankCell } else { c[0] = string(utf16.Decode(v)) c[1] = StringCell } case []rune: if len(v) == 0 { c[0] = nil c[1] = BlankCell } else { c[0] = string(v) c[1] = StringCell } case time.Time: c[0] = v c[1] = DateCell case fmt.Stringer: s := v.String() if len(s) == 0 { c[0] = nil c[1] = BlankCell } else { c[0] = s c[1] = StringCell } default: panic("grate: data type not handled") } return Cell(c) } // SetFormatNumber changes the number format stored with the cell. func (c *Cell) SetFormatNumber(f uint16) { if f == 0 { *c = (*c)[:2] return } if len(*c) == 2 { *c = append(*c, f) } else { (*c)[2] = f } } func (c Cell) Equal(other Cell) bool { if c.Type() == FloatCell || other.Type() == FloatCell || c.Type() == IntegerCell || other.Type() == IntegerCell { v1, ok := c[0].(float64) v1x, okx := c[0].(int64) if okx { v1 = float64(v1x) ok = true } if !ok { fmt.Sscanf(fmt.Sprint(c[0]), "%g", &v1) } v2, ok := other[0].(float64) v2x, okx := other[0].(int64) if okx { v2 = float64(v2x) ok = true } if !ok { fmt.Sscanf(fmt.Sprint(c[0]), "%g", &v2) } return v1 == v2 } return c.Less(other) == other.Less(c) } func (c Cell) Less(other Cell) bool { if len(c) == 0 { return false } switch v1 := c[0].(type) { case nil: return false case bool: // F < T = T // F < F = F // T < T = F // T < F = F if v1 { return false } // if v2 is truthy, return true switch v2 := other[0].(type) { case nil: return false case bool: return v2 case int64: return v2 != 0 case float64: return v2 != 0.0 case string: return boolStrings[v2] } case int64: // v1 < v2 switch v2 := other[0].(type) { case nil: return false case bool: x := int64(0) if v2 { x = 1 } return v1 < x case int64: return v1 < v2 case float64: if v2 < math.MinInt64 { return false } if v2 > math.MaxInt64 { return true } return float64(v1) < v2 case string: var x int64 _, err := fmt.Sscanf(v2, "%d", &x) if err == nil { return v1 < x } return fmt.Sprint(v1) < v2 } case float64: switch v2 := other[0].(type) { case nil: return false case bool: x := float64(0.0) if v2 { x = 1.0 } return v1 < x case int64: if v1 < math.MinInt64 { return true } if v1 > math.MaxInt64 { return false } return v1 < float64(v2) case float64: return v1 < v2 case string: var x float64 _, err := fmt.Sscanf(v2, "%g", &x) if err == nil { return v1 < x } return fmt.Sprint(v1) < v2 } case string: //return v1 < fmt.Sprint(other[0]) switch v2 := other[0].(type) { case nil: return false case bool: return v2 && !boolStrings[v1] case int64: var x int64 _, err := fmt.Sscanf(v1, "%d", &x) if err == nil { return x < v2 } return v1 < fmt.Sprint(v2) case float64: var x float64 _, err := fmt.Sscanf(v1, "%g", &x) if err == nil { return x < v2 } return v1 < fmt.Sprint(v2) case string: return v1 < v2 } } panic("unable to compare cells (invalid internal type)") } ================================================ FILE: commonxl/dates.go ================================================ package commonxl import ( "strings" "time" ) // ConvertToDate converts a floating-point value using the // Excel date serialization conventions. func (x *Formatter) ConvertToDate(val float64) time.Time { // http://web.archive.org/web/20190808062235/http://aa.usno.navy.mil/faq/docs/JD_Formula.php v := int(val) if v < 61 { jdate := val + 0.5 if (x.flags & fMode1904) != 0 { jdate += 2416480.5 } else { jdate += 2415018.5 } JD := int(jdate) frac := jdate - float64(JD) L := JD + 68569 N := 4 * L / 146097 L = L - (146097*N+3)/4 I := 4000 * (L + 1) / 1461001 L = L - 1461*I/4 + 31 J := 80 * L / 2447 day := L - 2447*J/80 L = J / 11 month := time.Month(J + 2 - 12*L) year := 100*(N-49) + I + L t := time.Duration(float64(time.Hour*24) * frac) return time.Date(year, month, day, 0, 0, 0, 0, time.UTC).Add(t) } frac := val - float64(v) date := time.Date(1904, 1, 1, 0, 0, 0, 0, time.UTC) if (x.flags & fMode1904) == 0 { date = time.Date(1899, 12, 30, 0, 0, 0, 0, time.UTC) } t := time.Duration(float64(time.Hour*24) * frac) return date.AddDate(0, 0, v).Add(t) } func timeFmtFunc(f string) FmtFunc { return func(x *Formatter, v interface{}) string { t, ok := v.(time.Time) if !ok { fval, ok := convertToFloat64(v) if !ok { return "MUST BE time.Time OR numeric TO FORMAT CORRECTLY" } t = x.ConvertToDate(fval) } //log.Println("formatting date", t, "with", f, "=", t.Format(f)) return t.Format(f) } } // same as above but replaces "AM" and "PM" with chinese translations. // TODO: implement others func cnTimeFmtFunc(f string) FmtFunc { return func(x *Formatter, v interface{}) string { t, ok := v.(time.Time) if !ok { fval, ok := convertToFloat64(v) if !ok { return "MUST BE time.Time OR numeric TO FORMAT CORRECTLY" } t = x.ConvertToDate(fval) } s := t.Format(f) s = strings.Replace(s, `AM`, `上午`, 1) return strings.Replace(s, `PM`, `下午`, 1) } } ================================================ FILE: commonxl/fmt.go ================================================ package commonxl import ( "fmt" "strconv" "strings" ) // FmtFunc will format a value according to the designated style. type FmtFunc func(*Formatter, interface{}) string func staticFmtFunc(s string) FmtFunc { return func(x *Formatter, v interface{}) string { return s } } func surround(pre string, ff FmtFunc, post string) FmtFunc { return func(x *Formatter, v interface{}) string { return pre + ff(x, v) + post } } func addNegParens(ff FmtFunc) FmtFunc { return func(x *Formatter, v interface{}) string { s1 := ff(x, v) if s1[0] == '-' { return "(" + s1[1:] + ")" } return s1 } } func addCommas(ff FmtFunc) FmtFunc { return func(x *Formatter, v interface{}) string { s1 := ff(x, v) isNeg := false if s1[0] == '-' { isNeg = true s1 = s1[1:] } endIndex := strings.IndexAny(s1, ".eE") if endIndex < 0 { endIndex = len(s1) } for endIndex > 3 { endIndex -= 3 s1 = s1[:endIndex] + "," + s1[endIndex:] } if isNeg { return "-" + s1 } return s1 } } func identFunc(x *Formatter, v interface{}) string { switch x := v.(type) { case bool: if x { return "TRUE" } return "FALSE" case int64: s := strconv.FormatInt(x, 10) if len(s) <= 11 { return s } case float64: s := strconv.FormatFloat(x, 'f', -1, 64) if len(s) <= 11 || (len(s) == 12 && x < 0) { return s } s = strconv.FormatFloat(x, 'g', 6, 64) if len(s) <= 11 { return s } case string: return x case fmt.Stringer: return x.String() } return fmt.Sprint(v) } func sprintfFunc(fs string, mul int) FmtFunc { wantInt64 := strings.Contains(fs, "%d") return func(x *Formatter, v interface{}) string { switch val := v.(type) { case int, uint, int64, uint64, int32, uint32, uint16, int16: return fmt.Sprintf(fs, v) case float64: val *= float64(mul) if wantInt64 { v2 := int64(val) return fmt.Sprintf(fs, v2) } return fmt.Sprintf(fs, val) } return fmt.Sprint(v) } } func convertToInt64(v interface{}) (int64, bool) { x, ok := convertToFloat64(v) return int64(x), ok } func convertToFloat64(v interface{}) (float64, bool) { switch val := v.(type) { case float64: return val, true case bool: if val { return 1.0, true } return 0.0, true case int: return float64(val), true case int8: return float64(val), true case int16: return float64(val), true case int32: return float64(val), true case int64: return float64(val), true case uint: return float64(val), true case uint8: return float64(val), true case uint16: return float64(val), true case uint32: return float64(val), true case uint64: return float64(val), true case float32: return float64(val), true case string: nf, err := strconv.ParseFloat(val, 64) return nf, err == nil default: return 0.0, false } } // replaces a zero with a dash func zeroDashFunc(ff FmtFunc) FmtFunc { return func(x *Formatter, v interface{}) string { fval, ok := convertToFloat64(v) if !ok { // strings etc returned as-is return fmt.Sprint(v) } if fval == 0.0 { return "-" } return ff(x, v) } } func fracFmtFunc(n int) FmtFunc { return func(x *Formatter, v interface{}) string { f, ok := convertToFloat64(v) if !ok { return "MUST BE numeric TO FORMAT CORRECTLY" } w, n, d := DecimalToWholeFraction(f, n, n) if n == 0 { return fmt.Sprintf("%d", w) } if w == 0 { if f < 0 && n > 0 { n = -n } return fmt.Sprintf("%d/%d", n, d) } return fmt.Sprintf("%d %d/%d", w, n, d) } } // handle (up to) all four format cases: // positive;negative;zero;other func switchFmtFunc(pos FmtFunc, others ...FmtFunc) FmtFunc { stringFF := identFunc zeroFF := pos negFF := pos if len(others) > 0 { negFF = others[0] if len(others) > 1 { zeroFF = others[1] if len(others) > 2 { stringFF = others[2] } } } return func(x *Formatter, v interface{}) string { val, ok := convertToFloat64(v) if !ok { return stringFF(x, v) } if val == 0.0 { return zeroFF(x, v) } if val < 0.0 { return negFF(x, v) } return pos(x, v) } } // mapping of standard built-ins to Go date format funcs. var goFormatters = map[uint16]FmtFunc{ 0: identFunc, // FIXME: better "general" formatter 49: identFunc, 14: timeFmtFunc(`01-02-06`), 15: timeFmtFunc(`2-Jan-06`), 16: timeFmtFunc(`2-Jan`), 17: timeFmtFunc(`Jan-06`), 20: timeFmtFunc(`15:04`), 21: timeFmtFunc(`15:04:05`), 22: timeFmtFunc(`1/2/06 15:04`), 45: timeFmtFunc(`04:05`), 46: timeFmtFunc(`3:04:05`), 47: timeFmtFunc(`0405.9`), 27: timeFmtFunc(`2006"年"1"月"`), 28: timeFmtFunc(`1"月"2"日"`), 29: timeFmtFunc(`1"月"2"日"`), 30: timeFmtFunc(`1-2-06`), 31: timeFmtFunc(`2006"年"1"月"2"日"`), 32: timeFmtFunc(`15"时"04"分"`), 33: timeFmtFunc(`15"时"04"分"05"秒"`), 36: timeFmtFunc(`2006"年"2"月"`), 50: timeFmtFunc(`2006"年"2"月"`), 51: timeFmtFunc(`1"月"2"日"`), 52: timeFmtFunc(`2006"年"1"月"`), 53: timeFmtFunc(`1"月"2"日"`), 54: timeFmtFunc(`1"月"2"日"`), 57: timeFmtFunc(`2006"年"1"月"`), 58: timeFmtFunc(`1"月"2"日"`), 71: timeFmtFunc(`2/1/2006`), 72: timeFmtFunc(`2-Jan-06`), 73: timeFmtFunc(`2-Jan`), 74: timeFmtFunc(`Jan-06`), 75: timeFmtFunc(`15:04`), 76: timeFmtFunc(`15:04:05`), 77: timeFmtFunc(`2/1/2006 15:04`), 78: timeFmtFunc(`04:05`), 79: timeFmtFunc(`15:04:05`), 80: timeFmtFunc(`04:05.9`), 81: timeFmtFunc(`2/1/06`), 18: timeFmtFunc(`3:04 PM`), 19: timeFmtFunc(`3:04:05 PM`), 34: cnTimeFmtFunc(`PM 3"时"04"分"`), 35: cnTimeFmtFunc(`PM 3"时"04"分"05"秒"`), 55: cnTimeFmtFunc(`PM 3"时"04"分"`), 56: cnTimeFmtFunc(`PM 3"时"04"分"05"秒`), 12: fracFmtFunc(1), 13: fracFmtFunc(2), 69: fracFmtFunc(1), 70: fracFmtFunc(2), 1: sprintfFunc(`%d`, 1), 2: sprintfFunc(`%4.2f`, 1), 59: sprintfFunc(`%d`, 1), 60: sprintfFunc(`%4.2f`, 1), 9: sprintfFunc(`%d%%`, 100), 10: sprintfFunc(`%4.2f%%`, 100), 67: sprintfFunc(`%d%%`, 100), 68: sprintfFunc(`%4.2f%%`, 100), 3: addCommas(sprintfFunc("%d", 1)), 61: addCommas(sprintfFunc("%d", 1)), 37: addNegParens(addCommas(sprintfFunc("%d", 1))), 38: addNegParens(addCommas(sprintfFunc("%d", 1))), 4: addCommas(sprintfFunc("%4.2f", 1)), 62: addCommas(sprintfFunc("%4.2f", 1)), 39: addNegParens(addCommas(sprintfFunc("%4.2f", 1))), 40: addNegParens(addCommas(sprintfFunc("%4.2f", 1))), 11: sprintfFunc(`%4.2E`, 1), 48: sprintfFunc(`%3.1E`, 1), 41: zeroDashFunc(addCommas(sprintfFunc("%d", 1))), 43: zeroDashFunc(addCommas(sprintfFunc("%4.2f", 1))), 42: switchFmtFunc( surround("$", addCommas(sprintfFunc("%d", 1)), ""), surround("$(", addCommas(sprintfFunc("%d", 1)), ")"), staticFmtFunc("$-")), 44: switchFmtFunc( surround("$", addCommas(sprintfFunc("%4.2f", 1)), ""), surround("$(", addCommas(sprintfFunc("%4.2f", 1)), ")"), staticFmtFunc("$-")), } ================================================ FILE: commonxl/fmt_test.go ================================================ package commonxl import ( "log" "testing" "time" ) type testcaseNums struct { v interface{} s string } var commas = []testcaseNums{ {10, "10"}, {float64(10), "10"}, {float64(10) + 0.12345, "10.12345"}, {-10, "-10"}, {float64(-10), "-10"}, {float64(-10) + 0.12345, "-9.87655"}, {uint16(10), "10"}, {100, "100"}, {float64(100), "100"}, {float64(100) + 0.12345, "100.12345"}, {-100, "-100"}, {float64(-100), "-100"}, {float64(-100) + 0.12345, "-99.87655"}, {uint16(100), "100"}, {1000, "1,000"}, {float64(1000), "1,000"}, {float64(1000) + 0.12345, "1,000.12345"}, {-1000, "-1,000"}, {float64(-1000), "-1,000"}, {float64(-1000) + 0.12345, "-999.87655"}, {uint16(1000), "1,000"}, {10000, "10,000"}, {float64(10000), "10,000"}, {float64(10000) + 0.12345, "10,000.12345"}, {-10000, "-10,000"}, {float64(-10000), "-10,000"}, {float64(-10000) + 0.12345, "-9,999.87655"}, {uint16(10000), "10,000"}, {100000, "100,000"}, {float64(100000), "100,000"}, {float64(100000) + 0.12345, "100,000.12345"}, {-100000, "-100,000"}, {float64(-100000), "-100,000"}, {float64(-100000) + 0.12345, "-99,999.87655"}, {uint64(100000), "100,000"}, {1000000, "1,000,000"}, {float64(1000000), "1e+06"}, {float64(1000000) + 0.12345, "1.00000012345e+06"}, {-1000000, "-1,000,000"}, {float64(-1000000), "-1e+06"}, {float64(-1000000) + 0.12345, "-999,999.87655"}, {uint64(1000000), "1,000,000"}, {10000000, "10,000,000"}, {float64(10000000), "1e+07"}, {float64(10000000) + 0.12345, "1.000000012345e+07"}, {-10000000, "-10,000,000"}, {float64(-10000000), "-1e+07"}, {float64(-10000000) + 0.12345, "-9.99999987655e+06"}, {uint64(10000000), "10,000,000"}, {100000000, "100,000,000"}, {float64(100000000), "1e+08"}, {float64(100000000) + 0.12345, "1.0000000012345e+08"}, {-100000000, "-100,000,000"}, {float64(-100000000), "-1e+08"}, {float64(-100000000) + 0.12345, "-9.999999987655e+07"}, {uint64(100000000), "100,000,000"}, } func TestCommas(t *testing.T) { cf := addCommas(identFunc) for _, c := range commas { fs := cf(nil, c.v) if c.s != fs { t.Fatalf("commas failed: get '%s' but expected '%s' for %T(%v)", fs, c.s, c.v, c.v) } } } func TestDateFormats(t *testing.T) { var testDates = []time.Time{ time.Date(1901, 7, 11, 1, 5, 0, 0, time.UTC), time.Date(1905, 7, 11, 4, 10, 0, 0, time.UTC), time.Date(1904, 7, 11, 8, 15, 0, 0, time.UTC), time.Date(1993, 7, 11, 12, 20, 0, 0, time.UTC), time.Date(1983, 7, 11, 16, 30, 0, 0, time.UTC), time.Date(1983, 7, 11, 20, 45, 0, 0, time.UTC), time.Date(2000, 12, 31, 23, 59, 0, 0, time.UTC), time.Date(2002, 12, 31, 23, 59, 0, 0, time.UTC), time.Date(2012, 3, 10, 9, 30, 0, 0, time.UTC), time.Date(2014, 3, 27, 9, 37, 0, 0, time.UTC), } fx := &Formatter{} for _, t := range testDates { for fid, ctype := range builtInFormatTypes { if ctype != DateCell { continue } ff, _ := goFormatters[fid] // mainly testing these don't crash... log.Println(ff(fx, t)) } } } func TestBoolFormats(t *testing.T) { ff, _ := makeFormatter(`"yes";"yes";"no"`) if "no" != ff(nil, false) { t.Fatal(`false should be "no"`) } if "no" != ff(nil, 0) { t.Fatal(`0 should be "no"`) } if "no" != ff(nil, 0.0) { t.Fatal(`0.0 should be "no"`) } ///// if "yes" != ff(nil, true) { t.Fatal(`true should be "yes"`) } if "yes" != ff(nil, 99) { t.Fatal(`99 should be "yes"`) } if "yes" != ff(nil, -4) { t.Fatal(`-4 should be "yes"`) } if "yes" != ff(nil, 4.0) { t.Fatal(`4.0 should be "yes"`) } if "yes" != ff(nil, -99.0) { t.Fatal(`-99.0 should be "yes"`) } } ================================================ FILE: commonxl/formats.go ================================================ package commonxl import ( "errors" "fmt" "regexp" "strings" ) // Formatter contains formatting methods common to Excel spreadsheets. type Formatter struct { flags uint64 customCodes map[uint16]FmtFunc customCodeTypes map[uint16]CellType } const ( fMode1904 uint64 = 1 ) // Mode1904 indicates that dates start on Jan 1, 1904 // this setting was used in early MacOS Excel applications. func (x *Formatter) Mode1904(enabled bool) { if enabled { x.flags |= fMode1904 } else { x.flags = x.flags &^ fMode1904 } } // Add a custom number format to the formatter. func (x *Formatter) Add(fmtID uint16, formatCode string) error { if x.customCodes == nil { x.customCodes = make(map[uint16]FmtFunc) x.customCodeTypes = make(map[uint16]CellType) } if strings.ToLower(formatCode) == "general" { x.customCodes[fmtID] = goFormatters[0] return nil } _, ok := goFormatters[fmtID] if ok { return errors.New("grate/commonxl: cannot replace default number formats") } _, ok2 := x.customCodes[fmtID] if ok2 { return errors.New("grate/commonxl: cannot replace existing number formats") } x.customCodes[fmtID], x.customCodeTypes[fmtID] = makeFormatter(formatCode) return nil } func (x *Formatter) getCellType(fmtID uint16) (CellType, bool) { if ct, ok := builtInFormatTypes[fmtID]; ok { return ct, true } if x.customCodeTypes != nil { ct, ok := x.customCodeTypes[fmtID] return ct, ok } return 0, false } var ( minsMatch = regexp.MustCompile("h.*m.*s") nonEsc = regexp.MustCompile(`([^"]|^)"`) squash = regexp.MustCompile(`[*_].`) fixEsc = regexp.MustCompile(`\\(.)`) formatMatchBrackets = regexp.MustCompile(`\[[^\]]*\]`) formatMatchTextLiteral = regexp.MustCompile(`"[^"]*"`) ) func makeFormatter(s string) (FmtFunc, CellType) { //log.Printf("makeFormatter('%s')", s) // remove any coloring marks s = formatMatchBrackets.ReplaceAllString(s, "") if strings.Contains(s, ";") { parts := strings.Split(s, ";") posFF, ctypePos := makeFormatter(parts[0]) rem := make([]FmtFunc, len(parts)-1) for i, ps := range parts[1:] { rem[i], _ = makeFormatter(ps) } return switchFmtFunc(posFF, rem...), ctypePos } // escaped characters, and quoted text s2 := fixEsc.ReplaceAllString(s, "") s2 = formatMatchTextLiteral.ReplaceAllString(s, "") if strings.ContainsAny(s2, "ymdhs") { // it's a date/time format if loc := minsMatch.FindStringIndex(s); loc != nil { // m or mm in loc[0]:loc[1] is a minute format inner := s[loc[0]:loc[1]] inner = strings.Replace(inner, "mm", "04", 1) inner = strings.Replace(inner, "m", "4", 1) s = s[:loc[0]] + inner + s[loc[1]:] } dfreps := [][]string{ {"hh", "15"}, {"h", "15"}, {"ss", "05"}, {"s", "5"}, {"mmmmm", "Jan"}, // super ambiguous, replace with 3-letter month {"mmmm", "January"}, {"mmm", "Jan"}, {"mm", "01"}, {"m", "1"}, {"dddd", "Monday"}, {"ddd", "Mon"}, {"dd", "02"}, {"d", "2"}, {"yyyy", "2006"}, {"yy", "06"}, } if strings.Contains(s, "AM") || strings.Contains(s, "PM") { dfreps[0][1] = "03" dfreps[1][1] = "3" } for _, dfr := range dfreps { s = strings.Replace(s, dfr[0], dfr[1], 1) } s = nonEsc.ReplaceAllString(s, `$1`) s = squash.ReplaceAllString(s, ``) s = fixEsc.ReplaceAllString(s, `$1`) //log.Printf(" made time formatter '%s'", s) return timeFmtFunc(s), DateCell } var ff FmtFunc var ctype CellType if strings.ContainsAny(s, ".Ee") { verb := "f" if strings.ContainsAny(s, "Ee") { verb = "E" } s = regexp.MustCompile("[eE]+[+-]0+").ReplaceAllString(s, "") s2 := strings.ReplaceAll(s, ",", "") i1 := strings.IndexAny(s2, "0") i2 := strings.IndexByte(s2, '.') i3 := strings.LastIndexAny(s2, "0.") mul := 1 if strings.Contains(s2, "%") { mul = 100 } sf := fmt.Sprintf("%%%d.%d%s", i3-i1, i3-i2, verb) //log.Printf(" made float formatter '%s'", sf) ff = sprintfFunc(sf, mul) ctype = FloatCell } else { s2 := strings.ReplaceAll(s, ",", "") i1 := strings.IndexAny(s2, "0") i2 := strings.LastIndexAny(s2, "0.") mul := 1 if strings.Contains(s2, "%") { mul = 100 } sf := fmt.Sprintf("%%%dd", i2-i1) if (i2 - i1) == 0 { sf = "%d" } //log.Printf(" made int formatter '%s'", sf) ff = sprintfFunc(sf, mul) ctype = IntegerCell } if strings.Contains(s, ",") { ff = addCommas(ff) //log.Printf(" added commas") } surReg := regexp.MustCompile(`[0#?,.]+`) prepost := surReg.Split(s, 2) if len(prepost) > 0 && len(prepost[0]) > 0 { prepost[0] = nonEsc.ReplaceAllString(prepost[0], `$1`) prepost[0] = squash.ReplaceAllString(prepost[0], ``) prepost[0] = fixEsc.ReplaceAllString(prepost[0], `$1`) } if len(prepost) == 1 { if prepost[0] == "@" { return identFunc, StringCell } //log.Printf(" added static ('%s')", prepost[0]) return staticFmtFunc(prepost[0]), StringCell } if len(prepost[0]) > 0 || len(prepost[1]) > 0 { prepost[1] = nonEsc.ReplaceAllString(prepost[1], `$1`) prepost[1] = squash.ReplaceAllString(prepost[1], ``) prepost[1] = fixEsc.ReplaceAllString(prepost[1], `$1`) ff = surround(prepost[0], ff, prepost[1]) //log.Printf(" added surround ('%s' ... '%s')", prepost[0], prepost[1]) } return ff, ctype } // Get the number format func to use for formatting values, // it returns false when fmtID is unknown. func (x *Formatter) Get(fmtID uint16) (FmtFunc, bool) { ff, ok := goFormatters[fmtID] if !ok { fs, ok2 := x.customCodes[fmtID] if ok2 { return fs, true } ff = identFunc } return ff, ok } // Apply the specified number format to the value. // Returns false when fmtID is unknown. func (x *Formatter) Apply(fmtID uint16, val interface{}) (string, bool) { ff, ok := goFormatters[fmtID] if !ok { fs, ok2 := x.customCodes[fmtID] if ok2 { return fs(x, val), true } } return ff(x, val), ok } // builtInFormats are all the built-in number formats for XLS/XLSX. var builtInFormats = map[uint16]string{ 0: `General`, 1: `0`, 2: `0.00`, 3: `#,##0`, 4: `#,##0.00`, 9: `0%`, 10: `0.00%`, 11: `0.00E+00`, 12: `# ?/?`, 13: `# ??/??`, 14: `mm-dd-yy`, 15: `d-mmm-yy`, 16: `d-mmm`, 17: `mmm-yy`, 18: `h:mm AM/PM`, 19: `h:mm:ss AM/PM`, 20: `h:mm`, 21: `h:mm:ss`, 22: `m/d/yy h:mm`, 37: `#,##0 ;(#,##0)`, 38: `#,##0 ;[Red](#,##0)`, 39: `#,##0.00;(#,##0.00)`, 40: `#,##0.00;[Red](#,##0.00)`, 41: `_(* #,##0_);_(* \(#,##0\);_(* "-"_);_(@_)`, 42: `_("$"* #,##0_);_("$"* \(#,##0\);_("$"* "-"_);_(@_)`, 43: `_(* #,##0.00_);_(* \(#,##0.00\);_(* "-"??_);_(@_)`, 44: `_("$"* #,##0.00_);_("$"* \(#,##0.00\);_("$"* "-"??_);_(@_)`, 45: `mm:ss`, 46: `[h]:mm:ss`, 47: `mmss.0`, 48: `##0.0E+0`, 49: `@`, // zh-cn format codes 27: `yyyy"年"m"月"`, 28: `m"月"d"日"`, 29: `m"月"d"日"`, 30: `m-d-yy`, 31: `yyyy"年"m"月"d"日"`, 32: `h"时"mm"分"`, 33: `h"时"mm"分"ss"秒"`, 34: `上午/下午 h"时"mm"分"`, 35: `上午/下午 h"时"mm"分"ss"秒"`, 36: `yyyy"年"m"月"`, 50: `yyyy"年"m"月"`, 51: `m"月"d"日"`, 52: `yyyy"年"m"月"`, 53: `m"月"d"日"`, 54: `m"月"d"日"`, 55: `上午/下午 h"时"mm"分"`, 56: `上午/下午 h"时"mm"分"ss"秒`, 57: `yyyy"年"m"月"`, 58: `m"月"d"日"`, // th-th format codes (in the spec these have a "t" prefix?) 59: `0`, 60: `0.00`, 61: `#,##0`, 62: `#,##0.00`, 67: `0%`, 68: `0.00%`, 69: `# ?/?`, 70: `# ??/??`, // th format code, but translated to aid the parser 71: `d/m/yyyy`, // `ว/ด/ปปปป`, 72: `d-mmm-yy`, // `ว-ดดด-ปป`, 73: `d-mmm`, // `ว-ดดด`, 74: `mmm-yy`, // `ดดด-ปป`, 75: `h:mm`, // `ช:นน`, 76: `h:mm:ss`, // `ช:นน:ทท`, 77: `d/m/yyyy h:mm`, // `ว/ด/ปปปป ช:นน`, 78: `mm:ss`, // `นน:ทท`, 79: `[h]:mm:ss`, // `[ช]:นน:ทท`, 80: `mm:ss.0`, // `นน:ทท.0`, 81: `d/m/bb`, // `d/m/bb`, } // builtInFormatTypes are the underlying datatypes for built-in number formats in XLS/XLSX. var builtInFormatTypes = map[uint16]CellType{ // 0 has no defined type 1: IntegerCell, 2: FloatCell, 3: IntegerCell, 4: FloatCell, 9: FloatCell, 10: FloatCell, 11: FloatCell, 12: FloatCell, 13: FloatCell, 14: DateCell, 15: DateCell, 16: DateCell, 17: DateCell, 18: DateCell, 19: DateCell, 20: DateCell, 21: DateCell, 22: DateCell, 37: IntegerCell, 38: IntegerCell, 39: FloatCell, 40: FloatCell, 41: IntegerCell, 42: IntegerCell, 43: FloatCell, 44: FloatCell, 45: DateCell, // Durations? 46: DateCell, 47: DateCell, 48: FloatCell, 49: StringCell, 27: DateCell, 28: DateCell, 29: DateCell, 30: DateCell, 31: DateCell, 32: DateCell, 33: DateCell, 34: DateCell, 35: DateCell, 36: DateCell, 50: DateCell, 51: DateCell, 52: DateCell, 53: DateCell, 54: DateCell, 55: DateCell, 56: DateCell, 57: DateCell, 58: DateCell, 59: IntegerCell, 60: FloatCell, 61: IntegerCell, 62: FloatCell, 67: FloatCell, 68: FloatCell, 69: FloatCell, 70: FloatCell, 71: DateCell, 72: DateCell, 73: DateCell, 74: DateCell, 75: DateCell, 76: DateCell, 77: DateCell, 78: DateCell, 79: DateCell, 80: DateCell, 81: DateCell, } ================================================ FILE: commonxl/frac_test.go ================================================ package commonxl import ( "math" "testing" ) type testcaseFrac struct { v float64 s string n int } var fracs = []testcaseFrac{ {0, "0", 1}, {0.5, "1/2", 1}, {-0.5, "-1/2", 1}, {0.125, "1/8", 1}, {10, "10", 1}, {-10, "-10", 1}, {10.5, "10 1/2", 1}, {-10.5, "-10 1/2", 1}, {10.25, "10 1/4", 1}, {10.75, "10 3/4", 1}, {10.667, "10 2/3", 1}, {-10.25, "-10 1/4", 1}, {-10.75, "-10 3/4", 1}, {-10.667, "-10 2/3", 1}, {3.14159, "3 1/7", 1}, {3.14159, "3 1/7", 2}, {3.14159, "3 16/113", 3}, {3.14159, "3 431/3044", 4}, {3.14159, "3 3432/24239", 5}, {3.14159, "3 14159/100000", 6}, {math.Pi, "3 1/7", 1}, {math.Pi, "3 1/7", 2}, {math.Pi, "3 16/113", 3}, // err = 2.6e-7 {math.Pi, "3 16/113", 4}, // better because 431/3044 err = 2.6e-6 {math.Pi, "3 14093/99532", 5}, {math.Pi, "3 14093/99532", 6}, {-math.Pi, "-3 1/7", 1}, {-math.Pi, "-3 1/7", 2}, {-math.Pi, "-3 16/113", 3}, // err = 2.6e-7 {-math.Pi, "-3 16/113", 4}, // better because 431/3044 err = 2.6e-6 {-math.Pi, "-3 14093/99532", 5}, {-math.Pi, "-3 14093/99532", 6}, // TODO: fixed denominator fractions (e.g. "??/8" ) // TODO: string interpolations (e.g. '0 "pounds and " ??/100 "pence"') // examples: https://bettersolutions.com/excel/formatting/number-tab-fractions.htm } func TestFractions(t *testing.T) { for _, c := range fracs { ff := fracFmtFunc(c.n) fs := ff(nil, c.v) if c.s != fs { t.Fatalf("fractions failed: got: '%s' expected: '%s' for %T(%v)", fs, c.s, c.v, c.v) } } } ================================================ FILE: commonxl/numbers.go ================================================ package commonxl import ( "math" ) // DecimalToWholeFraction converts a floating point value into a whole // number and fraction approximation with at most nn digits in the numerator // and nd digits in the denominator. func DecimalToWholeFraction(val float64, nn, nd int) (whole, num, den int) { wholeF, part := math.Modf(val) if part == 0.0 { return int(wholeF), 0, 1 } if part < 0.0 { part = -part } whole = int(wholeF) num, den = DecimalToFraction(part, nn, nd) return } // DecimalToFraction converts a floating point value into a fraction // approximation with at most nn digits in the numerator and nd // digits in the denominator. func DecimalToFraction(val float64, nn, nd int) (num, den int) { // http://web.archive.org/web/20111027100847/http://homepage.smc.edu/kennedy_john/DEC2FRAC.PDF sign := 1 z := val if val < 0 { sign = -1 z = -val } if nn == 0 { nn = 2 } if nd == 0 { nd = 2 } maxn := math.Pow(10.0, float64(nn)) // numerator with nn digits maxd := math.Pow(10.0, float64(nd)) // denominator with nd digits _, fracPart := math.Modf(val) if fracPart == 0.0 { return int(z) * sign, 1 } if fracPart < 1e-9 { return sign, int(1e9) } if fracPart > 1e9 { return int(1e9) * sign, 1 } diff := 1.0 denom := 1.0 numer := 0.0 var lastDenom, lastNumer float64 for diff > 1e-10 && z != math.Floor(z) { z = 1 / (z - math.Floor(z)) tmp := denom denom = (denom * math.Floor(z)) + lastDenom lastDenom = tmp lastNumer = numer numer = math.Round(val * denom) if numer >= maxn || denom >= maxd { return sign * int(lastNumer), int(lastDenom) } diff = val - (numer / denom) if diff < 0.0 { diff = -diff } } return sign * int(numer), int(denom) } ================================================ FILE: commonxl/sheet.go ================================================ package commonxl import ( "fmt" "log" "time" "github.com/pbnjay/grate" ) // Sheet holds raw and rendered values for a spreadsheet. type Sheet struct { Formatter *Formatter NumRows int NumCols int Rows [][]Cell CurRow int } // Resize the sheet for the number of rows and cols given. // Newly added cells default to blank. func (s *Sheet) Resize(rows, cols int) { for i := range s.Rows { if i > rows { break } n := cols - len(s.Rows[i]) if n <= 0 { continue } s.Rows[i] = append(s.Rows[i], make([]Cell, n)...) } if rows <= 0 { rows = 1 } if cols <= 0 { cols = 1 } s.CurRow = 0 s.NumRows = rows s.NumCols = cols for rows >= len(s.Rows) { s.Rows = append(s.Rows, make([]Cell, cols)) } } // Put the value at the cell location given. func (s *Sheet) Put(row, col int, value interface{}, fmtNum uint16) { //log.Println(row, col, value, fmtNum) if row >= s.NumRows || col >= s.NumCols { if grate.Debug { log.Printf("grate: cell out of bounds row %d>=%d, col %d>=%d", row, s.NumRows, col, s.NumCols) } // per the spec, this is an invalid Excel file // but we'll resize in place instead of crashing out if row >= s.NumRows { s.NumRows = row + 1 } if col >= s.NumCols { s.NumCols = col + 1 } s.Resize(s.NumRows, s.NumCols) } if spec, ok := value.(string); ok { if spec == grate.EndRowMerged || spec == grate.EndColumnMerged || spec == grate.ContinueRowMerged || spec == grate.ContinueColumnMerged { s.Rows[row][col] = NewCell(value) s.Rows[row][col][1] = StaticCell return } } ct, ok := s.Formatter.getCellType(fmtNum) if !ok || fmtNum == 0 { s.Rows[row][col] = NewCell(value) } else { s.Rows[row][col] = NewCellWithType(value, ct, s.Formatter) } s.Rows[row][col].SetFormatNumber(fmtNum) } // Set changes the value in an existing cell location. // NB Currently only used for populating string results for formulas. func (s *Sheet) Set(row, col int, value interface{}) { if row > s.NumRows || col > s.NumCols { log.Println("grate: cell out of bounds") return } s.Rows[row][col][0] = value s.Rows[row][col][1] = StringCell } // SetURL adds a hyperlink to an existing cell location. func (s *Sheet) SetURL(row, col int, link string) { if row > s.NumRows || col > s.NumCols { log.Println("grate: cell out of bounds") return } s.Rows[row][col].SetURL(link) } // Next advances to the next record of content. // It MUST be called prior to any Scan(). func (s *Sheet) Next() bool { if (s.CurRow + 1) > len(s.Rows) { return false } s.CurRow++ return true } // Raw extracts the raw Cell interfaces underlying the current row. func (s *Sheet) Raw() []Cell { rr := make([]Cell, s.NumCols) for i, cell := range s.Rows[s.CurRow-1] { rr[i] = cell.Clone() } return rr } // Strings extracts values from the current record into a list of strings. func (s *Sheet) Strings() []string { res := make([]string, s.NumCols) for i, cell := range s.Rows[s.CurRow-1] { if cell.Type() == BlankCell { res[i] = "" continue } if cell.Type() == StaticCell { res[i] = cell.Value().(string) continue } val := cell.Value() fs, ok := s.Formatter.Apply(cell.FormatNo(), val) if !ok { fs = fmt.Sprint(val) } res[i] = fs } return res } // Types extracts the data types from the current record into a list. // options: "boolean", "integer", "float", "string", "date", // and special cases: "blank", "hyperlink" which are string types func (s *Sheet) Types() []string { res := make([]string, s.NumCols) for i, cell := range s.Rows[s.CurRow-1] { res[i] = cell.Type().String() } return res } // Formats extracts the format code for the current record into a list. func (s *Sheet) Formats() []string { ok := true res := make([]string, s.NumCols) for i, cell := range s.Rows[s.CurRow-1] { res[i], ok = builtInFormats[cell.FormatNo()] if !ok { res[i] = fmt.Sprint(cell.FormatNo()) } } return res } // Scan extracts values from the current record into the provided arguments // Arguments must be pointers to one of 5 supported types: // bool, int64, float64, string, or time.Time // If invalid, returns ErrInvalidScanType func (s *Sheet) Scan(args ...interface{}) error { row := s.Rows[s.CurRow-1] for i, a := range args { val := row[i].Value() switch v := a.(type) { case bool, int64, float64, string, time.Time: return fmt.Errorf("scan destinations must be pointer (arg %d is not)", i) case *bool: if x, ok := val.(bool); ok { *v = x } else { return fmt.Errorf("scan destination %d expected *%T, not *bool", i, val) } case *int64: if x, ok := val.(int64); ok { *v = x } else { return fmt.Errorf("scan destination %d expected *%T, not *int64", i, val) } case *float64: if x, ok := val.(float64); ok { *v = x } else { return fmt.Errorf("scan destination %d expected *%T, not *float64", i, val) } case *string: if x, ok := val.(string); ok { *v = x } else { return fmt.Errorf("scan destination %d expected *%T, not *string", i, val) } case *time.Time: if x, ok := val.(time.Time); ok { *v = x } else { return fmt.Errorf("scan destination %d expected *%T, not *time.Time", i, val) } default: return fmt.Errorf("scan destination for arg %d is not supported (%T)", i, a) } } return nil } // IsEmpty returns true if there are no data values. func (s *Sheet) IsEmpty() bool { return (s.NumCols <= 1 && s.NumRows <= 1) } // Err returns the last error that occured. func (s *Sheet) Err() error { return nil } ================================================ FILE: errs.go ================================================ package grate import "errors" var ( // configure at build time by adding go build arguments: // -ldflags="-X github.com/pbnjay/grate.loglevel=debug" loglevel string = "warn" // Debug should be set to true to expose detailed logging. Debug bool = (loglevel == "debug") ) // ErrInvalidScanType is returned by Scan for invalid arguments. var ErrInvalidScanType = errors.New("grate: Scan only supports *bool, *int, *float64, *string, *time.Time arguments") // ErrNotInFormat is used to auto-detect file types using the defined OpenFunc // It is returned by OpenFunc when the code does not detect correct file formats. var ErrNotInFormat = errors.New("grate: file is not in this format") // ErrUnknownFormat is used when grate does not know how to open a file format. var ErrUnknownFormat = errors.New("grate: file format is not known/supported") type errx struct { errs []error } func (e errx) Error() string { return e.errs[0].Error() } func (e errx) Unwrap() error { if len(e.errs) > 1 { return e.errs[1] } return nil } // WrapErr wraps a set of errors. func WrapErr(e ...error) error { if len(e) == 1 { return e[0] } return errx{errs: e} } ================================================ FILE: go.mod ================================================ module github.com/pbnjay/grate go 1.16 ================================================ FILE: grate.go ================================================ // Package grate opens tabular data files (such as spreadsheets and delimited plaintext files) // and allows programmatic access to the data contents in a consistent interface. package grate import ( "errors" "log" "sort" ) // Source represents a set of data collections. type Source interface { // List the individual data tables within this source. List() ([]string, error) // Get a Collection from the source by name. Get(name string) (Collection, error) // Close the source and discard memory. Close() error } // Collection represents an iterable collection of records. type Collection interface { // Next advances to the next record of content. // It MUST be called prior to any Scan(). Next() bool // Strings extracts values from the current record into a list of strings. Strings() []string // Types extracts the data types from the current record into a list. // options: "boolean", "integer", "float", "string", "date", // and special cases: "blank", "hyperlink" which are string types Types() []string // Formats extracts the format codes for the current record into a list. Formats() []string // Scan extracts values from the current record into the provided arguments // Arguments must be pointers to one of 5 supported types: // bool, int64, float64, string, or time.Time // If invalid, returns ErrInvalidScanType Scan(args ...interface{}) error // IsEmpty returns true if there are no data values. IsEmpty() bool // Err returns the last error that occured. Err() error } // OpenFunc defines a Source's instantiation function. // It should return ErrNotInFormat immediately if filename is not of the correct file type. type OpenFunc func(filename string) (Source, error) // Open a tabular data file and return a Source for accessing it's contents. func Open(filename string) (Source, error) { for _, o := range srcTable { src, err := o.op(filename) if err == nil { return src, nil } if !errors.Is(err, ErrNotInFormat) { return nil, err } if Debug { log.Println(" ", filename, "is not in", o.name, "format") } } return nil, ErrUnknownFormat } type srcOpenTab struct { name string pri int op OpenFunc } var srcTable = make([]*srcOpenTab, 0, 20) // Register the named source as a grate datasource implementation. func Register(name string, priority int, opener OpenFunc) error { if Debug { log.Println("Registering the", name, "format at priority", priority) } srcTable = append(srcTable, &srcOpenTab{name: name, pri: priority, op: opener}) sort.Slice(srcTable, func(i, j int) bool { return srcTable[i].pri < srcTable[j].pri }) return nil } const ( // ContinueColumnMerged marks a continuation column within a merged cell. ContinueColumnMerged = "→" // EndColumnMerged marks the last column of a merged cell. EndColumnMerged = "⇥" // ContinueRowMerged marks a continuation row within a merged cell. ContinueRowMerged = "↓" // EndRowMerged marks the last row of a merged cell. EndRowMerged = "⤓" ) ================================================ FILE: simple/csv.go ================================================ package simple import ( "encoding/csv" "os" "github.com/pbnjay/grate" ) var _ = grate.Register("csv", 15, OpenCSV) // OpenCSV defines a Source's instantiation function. // It should return ErrNotInFormat immediately if filename is not of the correct file type. func OpenCSV(filename string) (grate.Source, error) { f, err := os.Open(filename) if err != nil { return nil, err } defer f.Close() t := &simpleFile{ filename: filename, iterRow: -1, } s := csv.NewReader(f) s.FieldsPerRecord = -1 total := 0 ncols := make(map[int]int) rec, err := s.Read() for ; err == nil; rec, err = s.Read() { ncols[len(rec)]++ total++ t.rows = append(t.rows, rec) } if err != nil { switch perr := err.(type) { case *csv.ParseError: return nil, grate.WrapErr(perr, grate.ErrNotInFormat) } if total < 10 { // probably? not in this format return nil, grate.WrapErr(err, grate.ErrNotInFormat) } return nil, err } // kinda arbitrary metrics for detecting CSV looksGood := 0 for c, n := range ncols { if c <= 1 { continue } if n > 10 && float64(n)/float64(total) > 0.8 { // more than 80% of rows have the same number of columns, we're good looksGood = 2 } else if n > 25 && looksGood == 0 { looksGood = 1 } } if looksGood == 1 { return t, grate.ErrNotInFormat } return t, nil } ================================================ FILE: simple/simple.go ================================================ package simple import ( "errors" "fmt" "path/filepath" "strconv" "strings" "time" "github.com/pbnjay/grate" ) // represents a set of data collections. type simpleFile struct { filename string rows [][]string iterRow int } // List the individual data tables within this source. func (t *simpleFile) List() ([]string, error) { return []string{filepath.Base(t.filename)}, nil } func (t *simpleFile) Close() error { return nil } // Get a Collection from the source by name. func (t *simpleFile) Get(name string) (grate.Collection, error) { return t, nil } // Next advances to the next record of content. // It MUST be called prior to any Scan(). func (t *simpleFile) Next() bool { t.iterRow++ return t.iterRow < len(t.rows) } // Strings extracts values from the current record into a list of strings. func (t *simpleFile) Strings() []string { return t.rows[t.iterRow] } // Formats extracts the format code for the current record into a list. func (t *simpleFile) Formats() []string { res := make([]string, len(t.rows[t.iterRow])) for i := range res { res[i] = "General" } return res } // Types extracts the data types from the current record into a list. // options: "boolean", "integer", "float", "string", "date", // and special cases: "blank", "hyperlink" which are string types func (t *simpleFile) Types() []string { res := make([]string, len(t.rows[t.iterRow])) for i, v := range t.rows[t.iterRow] { if v == "" { res[i] = "blank" } else { res[i] = "string" } } return res } // Scan extracts values from the current record into the provided arguments // Arguments must be pointers to one of 5 supported types: // bool, int, float64, string, or time.Time func (t *simpleFile) Scan(args ...interface{}) error { var err error row := t.rows[t.iterRow] if len(row) != len(args) { return fmt.Errorf("grate/simple: expected %d Scan destinations, got %d", len(row), len(args)) } for i, a := range args { switch v := a.(type) { case *bool: switch strings.ToLower(row[i]) { case "1", "t", "true", "y", "yes": *v = true default: *v = false } case *int: var n int64 n, err = strconv.ParseInt(row[i], 10, 64) *v = int(n) case *float64: *v, err = strconv.ParseFloat(row[i], 64) case *string: *v = row[i] case *time.Time: return errors.New("grate/simple: time.Time not supported, you must parse date strings manually") default: return grate.ErrInvalidScanType } if err != nil { return err } } return nil } // IsEmpty returns true if there are no data values. func (t *simpleFile) IsEmpty() bool { return len(t.rows) == 0 } // Err returns the last error that occured. func (t *simpleFile) Err() error { return nil } ================================================ FILE: simple/tsv.go ================================================ package simple import ( "bufio" "os" "strings" "github.com/pbnjay/grate" ) var _ = grate.Register("tsv", 10, OpenTSV) // OpenTSV defines a Source's instantiation function. // It should return ErrNotInFormat immediately if filename is not of the correct file type. func OpenTSV(filename string) (grate.Source, error) { f, err := os.Open(filename) if err != nil { return nil, err } defer f.Close() t := &simpleFile{ filename: filename, iterRow: -1, } s := bufio.NewScanner(f) total := 0 ncols := make(map[int]int) for s.Scan() { r := strings.Split(s.Text(), "\t") ncols[len(r)]++ total++ t.rows = append(t.rows, r) } if s.Err() != nil { // this can only be read errors, not format return nil, s.Err() } // kinda arbitrary metrics for detecting TSV looksGood := 0 for c, n := range ncols { if c <= 1 { continue } if n > 10 && float64(n)/float64(total) > 0.8 { // more than 80% of rows have the same number of columns, we're good looksGood = 2 } else if n > 25 && looksGood == 0 { looksGood = 1 } } if looksGood == 1 { return t, grate.ErrNotInFormat } return t, nil } ================================================ FILE: xls/cfb/cfb.go ================================================ // Package cfb implements the Microsoft Compound File Binary File Format. package cfb // https://docs.microsoft.com/en-us/openspecs/windows_protocols/ms-cfb/53989ce4-7b05-4f8d-829b-d08d6148375b // Note for myself: // Storage = Directory // Stream = File import ( "bytes" "encoding/binary" "errors" "io" "io/ioutil" "log" "unicode/utf16" "github.com/pbnjay/grate" ) const fullAssertions = true const ( secFree uint32 = 0xFFFFFFFF // FREESECT secEndOfChain uint32 = 0xFFFFFFFE // ENDOFCHAIN secFAT uint32 = 0xFFFFFFFD // FATSECT secDIFAT uint32 = 0xFFFFFFFC // DIFSECT secReserved uint32 = 0xFFFFFFFB secMaxRegular uint32 = 0xFFFFFFFA // MAXREGSECT ) // Header of the Compound File MUST be at the beginning of the file (offset 0). type header struct { Signature uint64 // Identification signature for the compound file structure, and MUST be set to the value 0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1. ClassID [2]uint64 // Reserved and unused class ID that MUST be set to all zeroes (CLSID_NULL). MinorVersion uint16 // Version number for nonbreaking changes. This field SHOULD be set to 0x003E if the major version field is either 0x0003 or 0x0004. MajorVersion uint16 // Version number for breaking changes. This field MUST be set to either 0x0003 (version 3) or 0x0004 (version 4). ByteOrder uint16 // This field MUST be set to 0xFFFE. This field is a byte order mark for all integer fields, specifying little-endian byte order. SectorShift uint16 // This field MUST be set to 0x0009, or 0x000c, depending on the Major Version field. This field specifies the sector size of the compound file as a power of 2. MiniSectorShift uint16 // This field MUST be set to 0x0006. This field specifies the sector size of the Mini Stream as a power of 2. The sector size of the Mini Stream MUST be 64 bytes. Reserved1 [6]byte // This field MUST be set to all zeroes. NumDirectorySectors int32 // This integer field contains the count of the number of directory sectors in the compound file. NumFATSectors int32 // This integer field contains the count of the number of FAT sectors in the compound file. FirstDirectorySectorLocation uint32 // This integer field contains the starting sector number for the directory stream. TransactionSignature int32 // This integer field MAY contain a sequence number that is incremented every time the compound file is saved by an implementation that supports file transactions. This is the field that MUST be set to all zeroes if file transactions are not implemented.<1> MiniStreamCutoffSize int32 // This integer field MUST be set to 0x00001000. This field specifies the maximum size of a user-defined data stream that is allocated from the mini FAT and mini stream, and that cutoff is 4,096 bytes. Any user-defined data stream that is greater than or equal to this cutoff size must be allocated as normal sectors from the FAT. FirstMiniFATSectorLocation uint32 // This integer field contains the starting sector number for the mini FAT. NumMiniFATSectors int32 // This integer field contains the count of the number of mini FAT sectors in the compound file. FirstDIFATSectorLocation uint32 // This integer field contains the starting sector number for the DIFAT. NumDIFATSectors int32 // This integer field contains the count of the number of DIFAT sectors in the compound file. DIFAT [109]uint32 // This array of 32-bit integer fields contains the first 109 FAT sector locations of the compound file. } type objectType byte const ( typeUnknown objectType = 0x00 typeStorage objectType = 0x01 typeStream objectType = 0x02 typeRootStorage objectType = 0x05 ) type directory struct { Name [32]uint16 // 32 utf16 characters NameByteLen int16 // length of Name in bytes ObjectType objectType ColorFlag byte // 0=red, 1=black LeftSiblingID uint32 // stream ids RightSiblingID uint32 ChildID uint32 ClassID [2]uint64 // GUID StateBits uint32 CreationTime int64 ModifiedTime int64 StartingSectorLocation int32 StreamSize uint64 } func (d *directory) String() string { if (d.NameByteLen&1) == 1 || d.NameByteLen > 64 { return "" } r16 := utf16.Decode(d.Name[:int(d.NameByteLen)/2]) // trim off null terminator return string(r16[:len(r16)-1]) } // Document represents a Compound File Binary Format document. type Document struct { // the entire file, loaded into memory data []byte // pre-parsed info header *header dir []*directory // lookup tables for all the sectors fat []uint32 minifat []uint32 ministreamstart uint32 ministreamsize uint32 } func (d *Document) load(rx io.ReadSeeker) error { var err error d.data, err = ioutil.ReadAll(rx) if err != nil { return err } br := bytes.NewReader(d.data) h := &header{} err = binary.Read(br, binary.LittleEndian, h) if h.Signature != 0xe11ab1a1e011cfd0 { return grate.ErrNotInFormat // errors.New("ole2: invalid format") } if h.ByteOrder != 0xFFFE { return grate.ErrNotInFormat //errors.New("ole2: invalid format") } if fullAssertions { if h.ClassID[0] != 0 || h.ClassID[1] != 0 { return grate.ErrNotInFormat //errors.New("ole2: invalid CLSID") } if h.MajorVersion != 3 && h.MajorVersion != 4 { return errors.New("ole2: unknown major version") } if h.MinorVersion != 0x3B && h.MinorVersion != 0x3E { log.Printf("WARNING MinorVersion = 0x%02x NOT 0x3E", h.MinorVersion) //return errors.New("ole2: unknown minor version") } for _, v := range h.Reserved1 { if v != 0 { return errors.New("ole2: reserved section is non-zero") } } if h.MajorVersion == 3 { if h.SectorShift != 9 { return errors.New("ole2: invalid sector size") } if h.NumDirectorySectors != 0 { return errors.New("ole2: version 3 does not support directory sectors") } } if h.MajorVersion == 4 { if h.SectorShift != 12 { return errors.New("ole2: invalid sector size") } } if h.MiniSectorShift != 6 { return errors.New("ole2: invalid mini sector size") } if h.MiniStreamCutoffSize != 0x00001000 { return errors.New("ole2: invalid mini sector cutoff") } } d.header = h numFATentries := (1 << (h.SectorShift - 2)) le := binary.LittleEndian d.fat = make([]uint32, 0, numFATentries*int(1+d.header.NumFATSectors)) d.minifat = make([]uint32, 0, numFATentries*int(1+h.NumMiniFATSectors)) // step 1: read the DIFAT sector list for i := 0; i < 109; i++ { sid := h.DIFAT[i] if sid == secFree { break } offs := int64(1+sid) << int32(h.SectorShift) if offs >= int64(len(d.data)) { return errors.New("xls/cfb: unable to load file") } sector := d.data[offs:] for j := 0; j < numFATentries; j++ { sid2 := le.Uint32(sector) d.fat = append(d.fat, sid2) sector = sector[4:] } } if h.NumDIFATSectors > 0 { sid1 := h.FirstDIFATSectorLocation for sid1 != secEndOfChain { offs := int64(1+sid1) << int32(h.SectorShift) difatSector := d.data[offs:] for i := 0; i < numFATentries-1; i++ { sid2 := le.Uint32(difatSector) if sid2 == secFree || sid2 == secEndOfChain { difatSector = difatSector[4:] continue } offs := int64(1+sid2) << int32(h.SectorShift) if offs >= int64(len(d.data)) { return errors.New("xls/cfb: unable to load file") } sector := d.data[offs:] for j := 0; j < numFATentries; j++ { sid3 := le.Uint32(sector) d.fat = append(d.fat, sid3) sector = sector[4:] } difatSector = difatSector[4:] } // chain the next DIFAT sector sid1 = le.Uint32(difatSector) } } // step 2: read the mini FAT sid := h.FirstMiniFATSectorLocation for sid != secEndOfChain { offs := int64(1+sid) << int32(h.SectorShift) if offs >= int64(len(d.data)) { return errors.New("xls/cfb: unable to load file") } sector := d.data[offs:] for j := 0; j < numFATentries; j++ { sid = le.Uint32(sector) d.minifat = append(d.minifat, sid) sector = sector[4:] } if len(d.minifat) >= int(h.NumMiniFATSectors) { break } // chain the next mini FAT sector sid = le.Uint32(sector) } // step 3: read the Directory Entries err = d.buildDirs(br) return err } func (d *Document) buildDirs(br *bytes.Reader) error { h := d.header le := binary.LittleEndian // step 2: read the Directory sid := h.FirstDirectorySectorLocation offs := int64(1+sid) << int64(h.SectorShift) br.Seek(offs, io.SeekStart) for j := 0; j < 4; j++ { dirent := &directory{} binary.Read(br, le, dirent) if d.header.MajorVersion == 3 { // mask out upper 32bits dirent.StreamSize = dirent.StreamSize & 0xFFFFFFFF } switch dirent.ObjectType { case typeRootStorage: d.ministreamstart = uint32(dirent.StartingSectorLocation) d.ministreamsize = uint32(dirent.StreamSize) case typeStorage: //log.Println("got a storage? what to do now?") case typeStream: /* var freader io.Reader if dirent.StreamSize < uint64(d.header.MiniStreamCutoffSize) { freader = d.getMiniStreamReader(uint32(dirent.StartingSectorLocation), dirent.StreamSize) } else if dirent.StreamSize != 0 { freader = d.getStreamReader(uint32(dirent.StartingSectorLocation), dirent.StreamSize) } */ case typeUnknown: return nil } d.dir = append(d.dir, dirent) } return nil } func (d *Document) getStreamReader(sid uint32, size uint64) (io.ReadSeeker, error) { // NB streamData is a slice of slices of the raw data, so this is the // only allocation - for the (much smaller) list of sector slices streamData := make([][]byte, 1+(size>>d.header.SectorShift)) x := 0 secSize := int64(1) << int32(d.header.SectorShift) for sid != secEndOfChain && sid != secFree { offs := int64(1+sid) << int64(d.header.SectorShift) if offs > int64(len(d.data)) { return nil, errors.New("ole2: corrupt data format") } slice := d.data[offs : offs+secSize] if size < uint64(len(slice)) { slice = slice[:size] size = 0 } else { size -= uint64(len(slice)) } streamData[x] = slice if size == 0 { break } sid = d.fat[sid] x++ } if size != 0 { return nil, errors.New("ole2: incomplete read") } return &SliceReader{Data: streamData}, nil } func (d *Document) getMiniStreamReader(sid uint32, size uint64) (io.ReadSeeker, error) { // TODO: move into a separate cache so we don't recalculate it each time fatStreamData := make([][]byte, 1+(d.ministreamsize>>d.header.SectorShift)) // NB streamData is a slice of slices of the raw data, so this is the // only allocation - for the (much smaller) list of sector slices streamData := make([][]byte, 1+(size>>d.header.MiniSectorShift)) x := 0 fsid := d.ministreamstart fsize := uint64(d.ministreamsize) secSize := int64(1) << int64(d.header.SectorShift) for fsid != secEndOfChain && fsid != secFree { offs := int64(1+fsid) << int64(d.header.SectorShift) slice := d.data[offs : offs+secSize] if fsize < uint64(len(slice)) { slice = slice[:fsize] fsize = 0 } else { fsize -= uint64(len(slice)) } fatStreamData[x] = slice x++ fsid = d.fat[fsid] } x = 0 miniSecSize := int64(1) << int64(d.header.MiniSectorShift) for sid != secEndOfChain && sid != secFree { offs := int64(sid) << int64(d.header.MiniSectorShift) so, si := offs/secSize, offs%secSize data := fatStreamData[so] slice := data[si : si+miniSecSize] if size < uint64(len(slice)) { slice = slice[:size] size = 0 } else { size -= uint64(len(slice)) } streamData[x] = slice x++ sid = d.minifat[sid] } return &SliceReader{Data: streamData}, nil } ================================================ FILE: xls/cfb/interface.go ================================================ package cfb import ( "fmt" "io" "os" ) // Open a Compound File Binary Format document. func Open(filename string) (*Document, error) { d := &Document{} f, err := os.Open(filename) if err != nil { return nil, err } err = d.load(f) if err != nil { return nil, err } return d, nil } // List the streams contained in the document. func (d *Document) List() ([]string, error) { var res []string for _, e := range d.dir { if e.ObjectType == typeStream { res = append(res, e.String()) } } return res, nil } // Open the named stream contained in the document. func (d *Document) Open(name string) (io.ReadSeeker, error) { for _, e := range d.dir { if e.String() == name && e.ObjectType == typeStream { if e.StreamSize < uint64(d.header.MiniStreamCutoffSize) { return d.getMiniStreamReader(uint32(e.StartingSectorLocation), e.StreamSize) } else if e.StreamSize != 0 { return d.getStreamReader(uint32(e.StartingSectorLocation), e.StreamSize) } } } return nil, fmt.Errorf("cfb: stream '%s' not found", name) } ================================================ FILE: xls/cfb/simple_test.go ================================================ package cfb import ( "io" "io/ioutil" "log" "os" "testing" ) func TestHeader(t *testing.T) { d := &Document{} f, _ := os.Open("../../testdata/test.xls") err := d.load(f) if err != nil { t.Fatal(err) } } func TestHeader2(t *testing.T) { d := &Document{} f, _ := os.Open("../../testdata/test2.xls") err := d.load(f) if err != nil { t.Fatal(err) } } func TestHeader3(t *testing.T) { d := &Document{} f, _ := os.Open("../../testdata/test3.xls") err := d.load(f) if err != nil { t.Fatal(err) } } func TestHeader4(t *testing.T) { d := &Document{} f, _ := os.Open("../../testdata/test4.xls") err := d.load(f) if err != nil { t.Fatal(err) } log.Println(d.List()) r, err := d.Open("Workbook") if err != nil { t.Fatal(err) } book, err := ioutil.ReadAll(r) if err != nil { t.Fatal(err) } log.Println(len(book)) r, err = d.Open("\x05DocumentSummaryInformation") if err != nil { t.Fatal(err) } data, err := ioutil.ReadAll(r) if err != nil { t.Fatal(err) } log.Println(len(data)) } var testSlices = [][]byte{ {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, {10, 11, 12, 13, 14, 15, 16, 17, 18, 19}, {20, 21, 22, 23, 24, 25, 26, 27, 28, 29}, {30, 31, 32, 33, 34, 35, 36, 37, 38, 39}, {40, 41, 42, 43, 44, 45, 46, 47, 48, 49}, } func TestSliceReader(t *testing.T) { sr := &SliceReader{ Data: testSlices, } var uno, old [1]byte _, err := sr.Read(uno[:]) for err == nil { old[0] = uno[0] _, err = sr.Read(uno[:]) if err == nil && uno[0] != (old[0]+1) { log.Printf("read data out of order new=%d, old=%d", old[0], uno[0]) t.Fail() } } sr.Seek(0, io.SeekStart) _, err = sr.Read(uno[:]) for err == nil { old[0] = uno[0] _, err = sr.Read(uno[:]) if err == nil && uno[0] != (old[0]+1) { log.Printf("read data out of order new=%d, old=%d", old[0], uno[0]) t.Fail() } } sr.Seek(10, io.SeekStart) _, err = sr.Read(uno[:]) if uno[0] != 10 { log.Printf("unexpected element %d (expected %d)", uno[0], 10) t.Fail() } sr.Seek(35, io.SeekStart) _, err = sr.Read(uno[:]) if uno[0] != 35 { log.Printf("unexpected element %d (expected %d)", uno[0], 35) t.Fail() } sr.Seek(7, io.SeekCurrent) _, err = sr.Read(uno[:]) if uno[0] != 43 { log.Printf("unexpected element %d (expected %d)", uno[0], 43) t.Fail() } sr.Seek(-9, io.SeekCurrent) _, err = sr.Read(uno[:]) if uno[0] != 35 { log.Printf("unexpected element %d (expected %d)", uno[0], 35) t.Fail() } } ================================================ FILE: xls/cfb/slicereader.go ================================================ package cfb import ( "errors" "io" ) // SliceReader wraps a list of slices as a io.ReadSeeker that // can transparently merge them into a single coherent stream. type SliceReader struct { CSize []int64 Data [][]byte Index uint Offset uint } // Read implements the io.Reader interface. func (s *SliceReader) Read(b []byte) (int, error) { if s.Index >= uint(len(s.Data)) { return 0, io.EOF } n := copy(b, s.Data[s.Index][s.Offset:]) if n > 0 { s.Offset += uint(n) if s.Offset == uint(len(s.Data[s.Index])) { s.Offset = 0 s.Index++ } return n, nil } return 0, io.EOF } var x io.Seeker // Seek implements the io.Seeker interface. func (s *SliceReader) Seek(offset int64, whence int) (int64, error) { if len(s.CSize) != len(s.Data) { // calculate the cumulative block size cache s.CSize = make([]int64, len(s.Data)) sz := int64(0) for i, d := range s.Data { s.CSize[i] = sz sz += int64(len(d)) } } if s.Index >= uint(len(s.CSize)) { s.Index = uint(len(s.CSize) - 1) s.Offset = uint(len(s.Data[s.Index])) } // current offset in stream trueOffset := int64(s.Offset) + s.CSize[int(s.Index)] if offset == 0 && whence == io.SeekCurrent { // just asking for current position return trueOffset, nil } switch whence { case io.SeekStart: if offset < 0 { return -1, errors.New("xls: invalid seek offset") } s.Index = 0 s.Offset = 0 trueOffset = 0 case io.SeekEnd: if offset > 0 { return -1, errors.New("xls: invalid seek offset") } s.Index = uint(len(s.Data) - 1) s.Offset = uint(len(s.Data[s.Index])) trueOffset = int64(s.Offset) + s.CSize[s.Index] default: // current position already defined } wantOffset := offset + trueOffset for trueOffset != wantOffset { loOffset := s.CSize[int(s.Index)] hiOffset := s.CSize[int(s.Index)] + int64(len(s.Data[s.Index])) if wantOffset > loOffset && wantOffset < hiOffset { s.Offset = uint(wantOffset - loOffset) return wantOffset, nil } if trueOffset > wantOffset { s.Index-- s.Offset = 0 trueOffset = s.CSize[int(s.Index)] } else if trueOffset < wantOffset { s.Index++ s.Offset = 0 trueOffset = s.CSize[int(s.Index)] } } return wantOffset, nil } ================================================ FILE: xls/comp_test.go ================================================ package xls import ( "os" "path/filepath" "strings" "testing" ) func TestAllFiles(t *testing.T) { err := filepath.Walk("../testdata", func(p string, info os.FileInfo, err error) error { if info.IsDir() { return nil } if !strings.HasSuffix(info.Name(), ".xls") { return nil } wb, err := Open(p) if err != nil { return err } sheets, err := wb.List() if err != nil { return err } for _, s := range sheets { sheet, err := wb.Get(s) if err != nil { return err } for sheet.Next() { sheet.Strings() } } return wb.Close() }) if err != nil { t.Fatal(err) } } ================================================ FILE: xls/crypto/crypto.go ================================================ // Package crypto implements excel encryption algorithms from the // MS-OFFCRYPTO design specs. Currently only standard/basic RC4 // "obfuscation" is supported. package crypto import ( "bytes" "encoding/binary" "fmt" ) // Decryptor describes methods to decrypt an excel sheet. type Decryptor interface { // SetPassword for the decryption. SetPassword(password []byte) // Read implements the io.Reader interface. Read(p []byte) (n int, err error) // Write implements the io.Writer interface. Write(p []byte) (n int, err error) // Bytes returns the decrypted data. Bytes() []byte // Flush tells the decryptor to decrypt the latest block. Flush() // Reset the decryptor, and clear all written and readable data. Reset() } // Algorithms designed based on specs in MS-OFFCRYPTO: // https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-offcrypto/3c34d72a-1a61-4b52-a893-196f9157f083 // Important notes from MS-XLS section 2.2.10: // https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-xls/cd03cb5f-ca02-4934-a391-bb674cb8aa06 // When obfuscating or encrypting BIFF records in these streams the record type and // record size components MUST NOT be obfuscated or encrypted. // In addition the following records MUST NOT be obfuscated or encrypted: // BOF (section 2.4.21), FilePass (section 2.4.117), UsrExcl (section 2.4.339), // FileLock (section 2.4.116), InterfaceHdr (section 2.4.146), RRDInfo (section 2.4.227), // and RRDHead (section 2.4.226). Additionally, the lbPlyPos field of the BoundSheet8 // record (section 2.4.28) MUST NOT be encrypted. // For RC4 encryption and RC4 CryptoAPI encryption, the Unicode password string is used // to generate the encryption key as specified in [MS-OFFCRYPTO] section 2.3.6.2 or // [MS-OFFCRYPTO] section 2.3.5.2 depending on the RC4 algorithm used. The record data // is then encrypted by the specific RC4 algorithm in 1024-byte blocks. The block number // is set to zero at the beginning of every BIFF record stream, and incremented by one // at each 1024-byte boundary. Bytes to be encrypted are passed into the RC4 encryption // function and then written to the stream. For unencrypted records and the record // headers consisting of the record type and record size, a byte buffer of all zeros, // of the same size as the section of unencrypted bytes, is passed into the RC4 // encryption function. The results are then ignored and the unencrypted bytes are // written to the stream. // DefaultXLSPassword is the default encryption password defined by note // <100> Section 2.4.191: If the value of the wPassword field of the Password record in // the Globals Substream is not 0x0000, Excel 97, Excel 2000, Excel 2002, Office Excel // 2003, Office Excel 2007, and Excel 2010 encrypt the document as specified in [MS-OFFCRYPTO], // section 2.3. If an encryption password is not specified or the workbook or sheet is only // protected, the document is encrypted with the default password of: // DefaultXLSPassword is the default Excel encryption password. var DefaultXLSPassword = "VelvetSweatshop" ///////////// // 2.3.6.1 type basicRC4Encryption struct { MajorVersion uint16 MinorVersion uint16 Salt [16]byte Verifier [16]byte VerifierHash [16]byte } // NewBasicRC4 implements the standard RC4 decryption. func NewBasicRC4(data []byte) (Decryptor, error) { h := basicRC4Encryption{} b := bytes.NewReader(data) err := binary.Read(b, binary.LittleEndian, &h) if err != nil { return nil, err } if h.MinorVersion != 1 { return nil, fmt.Errorf("xls: unknown basic-RC4 minor version %d (%d byte record)", h.MinorVersion, len(data)) } if len(data) != 52 { return nil, fmt.Errorf("xls: data length is invalid (expected 52 bytes, got %d)", len(data)) } d := &rc4Writer{ Salt: make([]byte, len(h.Salt)), } copy(d.Salt, h.Salt[:]) return d, d.Verify(h.Verifier[:], h.VerifierHash[:]) } ================================================ FILE: xls/crypto/rc4.go ================================================ package crypto import ( "bytes" "crypto/md5" "crypto/rc4" "encoding/binary" "fmt" ) var _ Decryptor = &rc4Writer{} func (d *rc4Writer) Write(data []byte) (n int, err error) { x := len(data) for len(data) > 0 { n := copy(d.bytes[d.offset:], data) d.offset += n if d.offset >= 1024 { if d.offset != 1024 { panic("invalid offset from write") } d.Flush() } data = data[n:] } return x, nil } func (d *rc4Writer) Read(data []byte) (n int, err error) { return d.buf.Read(data) } // Reset to block 0, and clear all written and readable data. func (d *rc4Writer) Reset() { d.block = 0 d.offset = 0 d.buf.Reset() } // Flush tells the decryptor to decrypt the latest block. func (d *rc4Writer) Flush() { var zeros [1024]byte endpad := 0 if d.offset < 1024 { endpad = copy(d.bytes[d.offset:], zeros[:]) d.offset += endpad } if d.offset != 1024 { panic("invalid offset fill") } // decrypt and write results to output buffer d.startBlock() d.dec.XORKeyStream(d.bytes[:], d.bytes[:]) d.buf.Write(d.bytes[:1024-endpad]) d.offset = 0 d.block++ } // SetPassword for the decryption. func (d *rc4Writer) SetPassword(password []byte) { d.Password = make([]rune, len(password)) for i, p := range password { d.Password[i] = rune(p) } /// compute the first part of the encryption key result := generateStd97Key(d.Password, d.Salt) d.encKey = make([]byte, len(result)) copy(d.encKey, result) } type rc4Writer struct { block uint32 offset int bytes [1024]byte // records the decrypted data buf bytes.Buffer /////// // decrypter for RC4 content streams dec *rc4.Cipher cipherKey []byte // H1 per 2.3.6.2 encKey []byte // Hfinal per 2.3.6.2 Salt []byte Password []rune } func (d *rc4Writer) Bytes() []byte { return d.buf.Bytes() } func (d *rc4Writer) Verify(everifier, everifierHash []byte) error { d.Reset() d.startBlock() var temp1 [16]byte var temp2 [16]byte d.dec.XORKeyStream(temp1[:], everifier) d.dec.XORKeyStream(temp2[:], everifierHash) newhash := md5.Sum(temp1[:]) for i, c := range newhash { if temp2[i] != c { return fmt.Errorf("verification failed") } } return nil } ///////////////////// func (d *rc4Writer) startBlock() { if d.encKey == nil { d.SetPassword([]byte(DefaultXLSPassword)) } d.cipherKey = make([]byte, 16) copy(d.cipherKey, d.encKey[:5]) binary.LittleEndian.PutUint32(d.cipherKey[5:], d.block) mhash := md5.Sum(d.cipherKey[:9]) d.dec, _ = rc4.NewCipher(mhash[:]) } func generateStd97Key(passData []rune, salt []byte) []byte { if len(passData) == 0 || len(salt) != 16 { panic("invalid keygen material") } passBytes := make([]byte, len(passData)*2) for i, c := range passData { binary.LittleEndian.PutUint16(passBytes[2*i:], uint16(c)) } // digest the IV then copy back into pKeyData h0 := md5.Sum(passBytes) // now do the final set of keygen ops msum := md5.New() for i := 0; i < 16; i++ { msum.Write(h0[:5]) msum.Write(salt) } // return H1 temp := make([]byte, 0, 16) temp = msum.Sum(temp) return temp } ================================================ FILE: xls/hyperlinks.go ================================================ package xls import ( "encoding/binary" "errors" "fmt" "strings" "unicode/utf16" ) func decodeHyperlinks(raw []byte) (displayText, linkText string, err error) { raw = raw[16:] // skip classid slen := binary.LittleEndian.Uint32(raw[:4]) if slen != 2 { return "", "", errors.New("xls: unknown hyperlink version") } flags := binary.LittleEndian.Uint32(raw[4:8]) raw = raw[8:] if (flags & hlstmfHasDisplayName) != 0 { slen = binary.LittleEndian.Uint32(raw[:4]) raw = raw[4:] us := make([]uint16, slen) for i := 0; i < int(slen); i++ { us[i] = binary.LittleEndian.Uint16(raw) raw = raw[2:] } displayText = string(utf16.Decode(us)) } if (flags & hlstmfHasFrameName) != 0 { // skip a HyperlinkString containing target Frame slen = binary.LittleEndian.Uint32(raw[:4]) raw = raw[4+(slen*2):] } if (flags & hlstmfHasMoniker) != 0 { if (flags & hlstmfMonikerSavedAsStr) != 0 { // read HyperlinkString containing the URL slen = binary.LittleEndian.Uint32(raw[:4]) raw = raw[4:] us := make([]uint16, slen) for i := 0; i < int(slen); i++ { us[i] = binary.LittleEndian.Uint16(raw) raw = raw[2:] } linkText = string(utf16.Decode(us)) } else { n := 0 var err error linkText, n, err = parseHyperlinkMoniker(raw) raw = raw[n:] if err != nil { return "", "", err } } } if (flags & hlstmfHasLocationStr) != 0 { slen = binary.LittleEndian.Uint32(raw[:4]) raw = raw[4:] us := make([]uint16, slen) for i := 0; i < int(slen); i++ { us[i] = binary.LittleEndian.Uint16(raw) raw = raw[2:] } linkText = string(utf16.Decode(us)) } linkText = strings.Trim(linkText, " \v\f\t\r\n\x00") displayText = strings.Trim(displayText, " \v\f\t\r\n\x00") return } func parseHyperlinkMoniker(raw []byte) (string, int, error) { classid := raw[:16] no := 16 isURLMoniker := true isFileMoniker := true urlMonikerClassID := [16]byte{0xE0, 0xC9, 0xEA, 0x79, 0xF9, 0xBA, 0xCE, 0x11, 0x8C, 0x82, 0x00, 0xAA, 0x00, 0x4B, 0xA9, 0x0B} fileMonikerClassID := [16]byte{0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46} for i, b := range classid { if urlMonikerClassID[i] != b { isURLMoniker = false } if fileMonikerClassID[i] != b { isFileMoniker = false } } if isURLMoniker { length := binary.LittleEndian.Uint32(raw[no:]) no += 4 length /= 2 buf := make([]uint16, length) for i := 0; i < int(length); i++ { buf[i] = binary.LittleEndian.Uint16(raw[no:]) no += 2 } if length > 12 && buf[length-13] == 0 { buf = buf[:length-12] } return string(utf16.Decode(buf)), no, nil } if isFileMoniker { //x := binary.LittleEndian.Uint16(raw[no:]) //cAnti length := binary.LittleEndian.Uint32(raw[no+2:]) //ansiLength no += 6 buf := raw[no : no+int(length)] // skip 24 more bytes for misc fixed properties no += int(length) + 24 length = binary.LittleEndian.Uint32(raw[no:]) // cbUnicodePathSize no += 4 if length > 0 { no += 6 length -= 6 buf2 := make([]uint16, length/2) for i := 0; i < int(length/2); i++ { buf2[i] = binary.LittleEndian.Uint16(raw[no:]) no += 2 } return string(utf16.Decode(buf2)), no, nil } return string(buf), no, nil } return "", 0, fmt.Errorf("xls: unknown moniker classid") } // HLink flags const ( hlstmfHasMoniker = uint32(0x001) hlstmfIsAbsolute = uint32(0x002) hlstmfSiteGaveDisplayName = uint32(0x004) hlstmfHasLocationStr = uint32(0x008) hlstmfHasDisplayName = uint32(0x010) hlstmfHasGUID = uint32(0x020) hlstmfHasCreationTime = uint32(0x040) hlstmfHasFrameName = uint32(0x080) hlstmfMonikerSavedAsStr = uint32(0x100) hlstmfAbsFromGetdataRel = uint32(0x200) ) ================================================ FILE: xls/records.go ================================================ package xls import "fmt" type recordType uint16 // Record types defined by the XLS specification document, section 2.3/2.4. // https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-xls/43684742-8fcd-4fcd-92df-157d8d7241f9 const ( RecTypeFormula recordType = 6 // per section 2.4.127 RecTypeEOF recordType = 10 // section 2.4.103 RecTypeCalcCount recordType = 12 // section 2.4.31 RecTypeCalcMode recordType = 13 // section 2.4.34 RecTypeCalcPrecision recordType = 14 // section 2.4.35 RecTypeCalcRefMode recordType = 15 // section 2.4.36 RecTypeCalcDelta recordType = 16 // section 2.4.32 RecTypeCalcIter recordType = 17 // section 2.4.33 RecTypeProtect recordType = 18 // section 2.4.207 RecTypePassword recordType = 19 // section 2.4.191 RecTypeHeader recordType = 20 // section 2.4.136 RecTypeFooter recordType = 21 // section 2.4.124 RecTypeExternSheet recordType = 23 // section 2.4.106 RecTypeLbl recordType = 24 // section 2.4.150 RecTypeWinProtect recordType = 25 // section 2.4.347 RecTypeVerticalPageBreaks recordType = 26 // section 2.4.343 RecTypeHorizontalPageBreaks recordType = 27 // section 2.4.142 RecTypeNote recordType = 28 // section 2.4.179 RecTypeSelection recordType = 29 // section 2.4.248 RecTypeDate1904 recordType = 34 // section 2.4.77 RecTypeExternName recordType = 35 // section 2.4.105 RecTypeLeftMargin recordType = 38 // section 2.4.151 RecTypeRightMargin recordType = 39 // section 2.4.219 RecTypeTopMargin recordType = 40 // section 2.4.328 RecTypeBottomMargin recordType = 41 // section 2.4.27 RecTypePrintRowCol recordType = 42 // section 2.4.203 RecTypePrintGrid recordType = 43 // section 2.4.202 RecTypeFilePass recordType = 47 // section 2.4.117 RecTypeFont recordType = 49 // section 2.4.122 RecTypePrintSize recordType = 51 // section 2.4.204 RecTypeContinue recordType = 60 // section 2.4.58 RecTypeWindow1 recordType = 61 // section 2.4.345 RecTypeBackup recordType = 64 // section 2.4.14 RecTypePane recordType = 65 // section 2.4.189 RecTypeCodePage recordType = 66 // section 2.4.52 RecTypePls recordType = 77 // section 2.4.199 RecTypeDCon recordType = 80 // section 2.4.82 RecTypeDConRef recordType = 81 // section 2.4.86 RecTypeDConName recordType = 82 // section 2.4.85 RecTypeDefColWidth recordType = 85 // section 2.4.89 RecTypeXCT recordType = 89 // section 2.4.352 RecTypeCRN recordType = 90 // section 2.4.65 RecTypeFileSharing recordType = 91 // section 2.4.118 RecTypeWriteAccess recordType = 92 // section 2.4.349 RecTypeObj recordType = 93 // section 2.4.181 RecTypeUncalced recordType = 94 // section 2.4.331 RecTypeCalcSaveRecalc recordType = 95 // section 2.4.37 RecTypeTemplate recordType = 96 // section 2.4.323 RecTypeIntl recordType = 97 // section 2.4.147 RecTypeObjProtect recordType = 99 // section 2.4.183 RecTypeColInfo recordType = 125 // section 2.4.53 RecTypeGuts recordType = 128 // section 2.4.134 RecTypeWsBool recordType = 129 // section 2.4.351 RecTypeGridSet recordType = 130 // section 2.4.132 RecTypeHCenter recordType = 131 // section 2.4.135 RecTypeVCenter recordType = 132 // section 2.4.342 RecTypeBoundSheet8 recordType = 133 // section 2.4.28 RecTypeWriteProtect recordType = 134 // section 2.4.350 RecTypeCountry recordType = 140 // section 2.4.63 RecTypeHideObj recordType = 141 // section 2.4.139 RecTypeSort recordType = 144 // section 2.4.263 RecTypePalette recordType = 146 // section 2.4.188 RecTypeSync recordType = 151 // section 2.4.318 RecTypeLPr recordType = 152 // section 2.4.158 RecTypeDxGCol recordType = 153 // section 2.4.98 RecTypeFnGroupName recordType = 154 // section 2.4.120 RecTypeFilterMode recordType = 155 // section 2.4.119 RecTypeBuiltInFnGroupCount recordType = 156 // section 2.4.30 RecTypeAutoFilterInfo recordType = 157 // section 2.4.8 RecTypeAutoFilter recordType = 158 // section 2.4.6 RecTypeScl recordType = 160 // section 2.4.247 RecTypeSetup recordType = 161 // section 2.4.257 RecTypeScenMan recordType = 174 // section 2.4.246 RecTypeSCENARIO recordType = 175 // section 2.4.244 RecTypeSxView recordType = 176 // section 2.4.313 RecTypeSxvd recordType = 177 // section 2.4.309 RecTypeSXVI recordType = 178 // section 2.4.312 RecTypeSxIvd recordType = 180 // section 2.4.292 RecTypeSXLI recordType = 181 // section 2.4.293 RecTypeSXPI recordType = 182 // section 2.4.298 RecTypeDocRoute recordType = 184 // section 2.4.91 RecTypeRecipName recordType = 185 // section 2.4.216 RecTypeMulRk recordType = 189 // section 2.4.175 RecTypeMulBlank recordType = 190 // section 2.4.174 RecTypeMms recordType = 193 // section 2.4.169 RecTypeSXDI recordType = 197 // section 2.4.278 RecTypeSXDB recordType = 198 // section 2.4.275 RecTypeSXFDB recordType = 199 // section 2.4.283 RecTypeSXDBB recordType = 200 // section 2.4.276 RecTypeSXNum recordType = 201 // section 2.4.296 RecTypeSxBool recordType = 202 // section 2.4.274 RecTypeSxErr recordType = 203 // section 2.4.281 RecTypeSXInt recordType = 204 // section 2.4.289 RecTypeSXString recordType = 205 // section 2.4.304 RecTypeSXDtr recordType = 206 // section 2.4.279 RecTypeSxNil recordType = 207 // section 2.4.295 RecTypeSXTbl recordType = 208 // section 2.4.305 RecTypeSXTBRGIITM recordType = 209 // section 2.4.307 RecTypeSxTbpg recordType = 210 // section 2.4.306 RecTypeObProj recordType = 211 // section 2.4.185 RecTypeSXStreamID recordType = 213 // section 2.4.303 RecTypeDBCell recordType = 215 // section 2.4.78 RecTypeSXRng recordType = 216 // section 2.4.300 RecTypeSxIsxoper recordType = 217 // section 2.4.290 RecTypeBookBool recordType = 218 // section 2.4.22 RecTypeDbOrParamQry recordType = 220 // section 2.4.79 RecTypeScenarioProtect recordType = 221 // section 2.4.245 RecTypeOleObjectSize recordType = 222 // section 2.4.187 RecTypeXF recordType = 224 // section 2.4.353 RecTypeInterfaceHdr recordType = 225 // section 2.4.146 RecTypeInterfaceEnd recordType = 226 // section 2.4.145 RecTypeSXVS recordType = 227 // section 2.4.317 RecTypeMergeCells recordType = 229 // section 2.4.168 RecTypeBkHim recordType = 233 // section 2.4.19 RecTypeMsoDrawingGroup recordType = 235 // section 2.4.171 RecTypeMsoDrawing recordType = 236 // section 2.4.170 RecTypeMsoDrawingSelection recordType = 237 // section 2.4.172 RecTypePhoneticInfo recordType = 239 // section 2.4.192 RecTypeSxRule recordType = 240 // section 2.4.301 RecTypeSXEx recordType = 241 // section 2.4.282 RecTypeSxFilt recordType = 242 // section 2.4.285 RecTypeSxDXF recordType = 244 // section 2.4.280 RecTypeSxItm recordType = 245 // section 2.4.291 RecTypeSxName recordType = 246 // section 2.4.294 RecTypeSxSelect recordType = 247 // section 2.4.302 RecTypeSXPair recordType = 248 // section 2.4.297 RecTypeSxFmla recordType = 249 // section 2.4.286 RecTypeSxFormat recordType = 251 // section 2.4.287 RecTypeSST recordType = 252 // section 2.4.265 RecTypeLabelSst recordType = 253 // section 2.4.149 RecTypeExtSST recordType = 255 // section 2.4.107 RecTypeSXVDEx recordType = 256 // section 2.4.310 RecTypeSXFormula recordType = 259 // section 2.4.288 RecTypeSXDBEx recordType = 290 // section 2.4.277 RecTypeRRDInsDel recordType = 311 // section 2.4.228 RecTypeRRDHead recordType = 312 // section 2.4.226 RecTypeRRDChgCell recordType = 315 // section 2.4.223 RecTypeRRTabID recordType = 317 // section 2.4.241 RecTypeRRDRenSheet recordType = 318 // section 2.4.234 RecTypeRRSort recordType = 319 // section 2.4.240 RecTypeRRDMove recordType = 320 // section 2.4.231 RecTypeRRFormat recordType = 330 // section 2.4.238 RecTypeRRAutoFmt recordType = 331 // section 2.4.222 RecTypeRRInsertSh recordType = 333 // section 2.4.239 RecTypeRRDMoveBegin recordType = 334 // section 2.4.232 RecTypeRRDMoveEnd recordType = 335 // section 2.4.233 RecTypeRRDInsDelBegin recordType = 336 // section 2.4.229 RecTypeRRDInsDelEnd recordType = 337 // section 2.4.230 RecTypeRRDConflict recordType = 338 // section 2.4.224 RecTypeRRDDefName recordType = 339 // section 2.4.225 RecTypeRRDRstEtxp recordType = 340 // section 2.4.235 RecTypeLRng recordType = 351 // section 2.4.159 RecTypeUsesELFs recordType = 352 // section 2.4.337 RecTypeDSF recordType = 353 // section 2.4.94 RecTypeCUsr recordType = 401 // section 2.4.72 RecTypeCbUsr recordType = 402 // section 2.4.40 RecTypeUsrInfo recordType = 403 // section 2.4.340 RecTypeUsrExcl recordType = 404 // section 2.4.339 RecTypeFileLock recordType = 405 // section 2.4.116 RecTypeRRDInfo recordType = 406 // section 2.4.227 RecTypeBCUsrs recordType = 407 // section 2.4.16 RecTypeUsrChk recordType = 408 // section 2.4.338 RecTypeUserBView recordType = 425 // section 2.4.333 RecTypeUserSViewBegin recordType = 426 // section 2.4.334 RecTypeUserSViewBeginChart recordType = 426 // section 2.4.335 RecTypeUserSViewEnd recordType = 427 // section 2.4.336 RecTypeRRDUserView recordType = 428 // section 2.4.237 RecTypeQsi recordType = 429 // section 2.4.208 RecTypeSupBook recordType = 430 // section 2.4.271 RecTypeProt4Rev recordType = 431 // section 2.4.205 RecTypeCondFmt recordType = 432 // section 2.4.56 RecTypeCF recordType = 433 // section 2.4.42 RecTypeDVal recordType = 434 // section 2.4.96 RecTypeDConBin recordType = 437 // section 2.4.83 RecTypeTxO recordType = 438 // section 2.4.329 RecTypeRefreshAll recordType = 439 // section 2.4.217 RecTypeHLink recordType = 440 // section 2.4.140 RecTypeLel recordType = 441 // section 2.4.154 RecTypeCodeName recordType = 442 // section 2.4.51 RecTypeSXFDBType recordType = 443 // section 2.4.284 RecTypeProt4RevPass recordType = 444 // section 2.4.206 RecTypeObNoMacros recordType = 445 // section 2.4.184 RecTypeDv recordType = 446 // section 2.4.95 RecTypeExcel9File recordType = 448 // section 2.4.104 RecTypeRecalcID recordType = 449 // section 2.4.215 RecTypeEntExU2 recordType = 450 // section 2.4.102 RecTypeDimensions recordType = 512 // section 2.4.90 RecTypeBlank recordType = 513 // section 2.4.20 RecTypeNumber recordType = 515 // section 2.4.180 RecTypeLabel recordType = 516 // section 2.4.148 RecTypeBoolErr recordType = 517 // section 2.4.24 RecTypeString recordType = 519 // section 2.4.268 RecTypeRow recordType = 520 // section 2.4.221 RecTypeIndex recordType = 523 // section 2.4.144 RecTypeArray recordType = 545 // section 2.4.4 RecTypeDefaultRowHeight recordType = 549 // section 2.4.87 RecTypeTable recordType = 566 // section 2.4.319 RecTypeWindow2 recordType = 574 // section 2.4.346 RecTypeRK recordType = 638 // section 2.4.220 RecTypeStyle recordType = 659 // section 2.4.269 RecTypeBigName recordType = 1048 // section 2.4.18 RecTypeFormat recordType = 1054 // section 2.4.126 RecTypeContinueBigName recordType = 1084 // section 2.4.59 RecTypeShrFmla recordType = 1212 // section 2.4.260 RecTypeHLinkTooltip recordType = 2048 // section 2.4.141 RecTypeWebPub recordType = 2049 // section 2.4.344 RecTypeQsiSXTag recordType = 2050 // section 2.4.211 RecTypeDBQueryExt recordType = 2051 // section 2.4.81 RecTypeExtString recordType = 2052 // section 2.4.108 RecTypeTxtQry recordType = 2053 // section 2.4.330 RecTypeQsir recordType = 2054 // section 2.4.210 RecTypeQsif recordType = 2055 // section 2.4.209 RecTypeRRDTQSIF recordType = 2056 // section 2.4.236 RecTypeBOF recordType = 2057 // section 2.4.21 RecTypeOleDbConn recordType = 2058 // section 2.4.186 RecTypeWOpt recordType = 2059 // section 2.4.348 RecTypeSXViewEx recordType = 2060 // section 2.4.314 RecTypeSXTH recordType = 2061 // section 2.4.308 RecTypeSXPIEx recordType = 2062 // section 2.4.299 RecTypeSXVDTEx recordType = 2063 // section 2.4.311 RecTypeSXViewEx9 recordType = 2064 // section 2.4.315 RecTypeContinueFrt recordType = 2066 // section 2.4.60 RecTypeRealTimeData recordType = 2067 // section 2.4.214 RecTypeChartFrtInfo recordType = 2128 // section 2.4.49 RecTypeFrtWrapper recordType = 2129 // section 2.4.130 RecTypeStartBlock recordType = 2130 // section 2.4.266 RecTypeEndBlock recordType = 2131 // section 2.4.100 RecTypeStartObject recordType = 2132 // section 2.4.267 RecTypeEndObject recordType = 2133 // section 2.4.101 RecTypeCatLab recordType = 2134 // section 2.4.38 RecTypeYMult recordType = 2135 // section 2.4.356 RecTypeSXViewLink recordType = 2136 // section 2.4.316 RecTypePivotChartBits recordType = 2137 // section 2.4.196 RecTypeFrtFontList recordType = 2138 // section 2.4.129 RecTypeSheetExt recordType = 2146 // section 2.4.259 RecTypeBookExt recordType = 2147 // section 2.4.23 RecTypeSXAddl recordType = 2148 // section 2.4.273.2 RecTypeCrErr recordType = 2149 // section 2.4.64 RecTypeHFPicture recordType = 2150 // section 2.4.138 RecTypeFeatHdr recordType = 2151 // section 2.4.112 RecTypeFeat recordType = 2152 // section 2.4.111 RecTypeDataLabExt recordType = 2154 // section 2.4.75 RecTypeDataLabExtContents recordType = 2155 // section 2.4.76 RecTypeCellWatch recordType = 2156 // section 2.4.41 RecTypeFeatHdr11 recordType = 2161 // section 2.4.113 RecTypeFeature11 recordType = 2162 // section 2.4.114 RecTypeDropDownObjIds recordType = 2164 // section 2.4.93 RecTypeContinueFrt11 recordType = 2165 // section 2.4.61 RecTypeDConn recordType = 2166 // section 2.4.84 RecTypeList12 recordType = 2167 // section 2.4.157 RecTypeFeature12 recordType = 2168 // section 2.4.115 RecTypeCondFmt12 recordType = 2169 // section 2.4.57 RecTypeCF12 recordType = 2170 // section 2.4.43 RecTypeCFEx recordType = 2171 // section 2.4.44 RecTypeXFCRC recordType = 2172 // section 2.4.354 RecTypeXFExt recordType = 2173 // section 2.4.355 RecTypeAutoFilter12 recordType = 2174 // section 2.4.7 RecTypeContinueFrt12 recordType = 2175 // section 2.4.62 RecTypeMDTInfo recordType = 2180 // section 2.4.162 RecTypeMDXStr recordType = 2181 // section 2.4.166 RecTypeMDXTuple recordType = 2182 // section 2.4.167 RecTypeMDXSet recordType = 2183 // section 2.4.165 RecTypeMDXProp recordType = 2184 // section 2.4.164 RecTypeMDXKPI recordType = 2185 // section 2.4.163 RecTypeMDB recordType = 2186 // section 2.4.161 RecTypePLV recordType = 2187 // section 2.4.200 RecTypeCompat12 recordType = 2188 // section 2.4.54 RecTypeDXF recordType = 2189 // section 2.4.97 RecTypeTableStyles recordType = 2190 // section 2.4.322 RecTypeTableStyle recordType = 2191 // section 2.4.320 RecTypeTableStyleElement recordType = 2192 // section 2.4.321 RecTypeStyleExt recordType = 2194 // section 2.4.270 RecTypeNamePublish recordType = 2195 // section 2.4.178 RecTypeNameCmt recordType = 2196 // section 2.4.176 RecTypeSortData recordType = 2197 // section 2.4.264 RecTypeTheme recordType = 2198 // section 2.4.326 RecTypeGUIDTypeLib recordType = 2199 // section 2.4.133 RecTypeFnGrp12 recordType = 2200 // section 2.4.121 RecTypeNameFnGrp12 recordType = 2201 // section 2.4.177 RecTypeMTRSettings recordType = 2202 // section 2.4.173 RecTypeCompressPictures recordType = 2203 // section 2.4.55 RecTypeHeaderFooter recordType = 2204 // section 2.4.137 RecTypeCrtLayout12 recordType = 2205 // section 2.4.66 RecTypeCrtMlFrt recordType = 2206 // section 2.4.70 RecTypeCrtMlFrtContinue recordType = 2207 // section 2.4.71 RecTypeForceFullCalculation recordType = 2211 // section 2.4.125 RecTypeShapePropsStream recordType = 2212 // section 2.4.258 RecTypeTextPropsStream recordType = 2213 // section 2.4.325 RecTypeRichTextStream recordType = 2214 // section 2.4.218 RecTypeCrtLayout12A recordType = 2215 // section 2.4.67 RecTypeUnits recordType = 4097 // section 2.4.332 RecTypeChart recordType = 4098 // section 2.4.45 RecTypeSeries recordType = 4099 // section 2.4.252 RecTypeDataFormat recordType = 4102 // section 2.4.74 RecTypeLineFormat recordType = 4103 // section 2.4.156 RecTypeMarkerFormat recordType = 4105 // section 2.4.160 RecTypeAreaFormat recordType = 4106 // section 2.4.3 RecTypePieFormat recordType = 4107 // section 2.4.195 RecTypeAttachedLabel recordType = 4108 // section 2.4.5 RecTypeSeriesText recordType = 4109 // section 2.4.254 RecTypeChartFormat recordType = 4116 // section 2.4.48 RecTypeLegend recordType = 4117 // section 2.4.152 RecTypeSeriesList recordType = 4118 // section 2.4.253 RecTypeBar recordType = 4119 // section 2.4.15 RecTypeLine recordType = 4120 // section 2.4.155 RecTypePie recordType = 4121 // section 2.4.194 RecTypeArea recordType = 4122 // section 2.4.2 RecTypeScatter recordType = 4123 // section 2.4.243 RecTypeCrtLine recordType = 4124 // section 2.4.68 RecTypeAxis recordType = 4125 // section 2.4.11 RecTypeTick recordType = 4126 // section 2.4.327 RecTypeValueRange recordType = 4127 // section 2.4.341 RecTypeCatSerRange recordType = 4128 // section 2.4.39 RecTypeAxisLine recordType = 4129 // section 2.4.12 RecTypeCrtLink recordType = 4130 // section 2.4.69 RecTypeDefaultText recordType = 4132 // section 2.4.88 RecTypeText recordType = 4133 // section 2.4.324 RecTypeFontX recordType = 4134 // section 2.4.123 RecTypeObjectLink recordType = 4135 // section 2.4.182 RecTypeFrame recordType = 4146 // section 2.4.128 RecTypeBegin recordType = 4147 // section 2.4.17 RecTypeEnd recordType = 4148 // section 2.4.99 RecTypePlotArea recordType = 4149 // section 2.4.197 RecTypeChart3d recordType = 4154 // section 2.4.46 RecTypePicF recordType = 4156 // section 2.4.193 RecTypeDropBar recordType = 4157 // section 2.4.92 RecTypeRadar recordType = 4158 // section 2.4.212 RecTypeSurf recordType = 4159 // section 2.4.272 RecTypeRadarArea recordType = 4160 // section 2.4.213 RecTypeAxisParent recordType = 4161 // section 2.4.13 RecTypeLegendException recordType = 4163 // section 2.4.153( RecTypeShtProps recordType = 4164 // section 2.4.261 RecTypeSerToCrt recordType = 4165 // section 2.4.256 RecTypeAxesUsed recordType = 4166 // section 2.4.10 RecTypeSBaseRef recordType = 4168 // section 2.4.242 RecTypeSerParent recordType = 4170 // section 2.4.255 RecTypeSerAuxTrend recordType = 4171 // section 2.4.250 RecTypeIFmtRecord recordType = 4174 // section 2.4.143 RecTypePos recordType = 4175 // section 2.4.201 RecTypeAlRuns recordType = 4176 // section 2.4.1 RecTypeBRAI recordType = 4177 // section 2.4.29 RecTypeSerAuxErrBar recordType = 4187 // section 2.4.249 RecTypeClrtClient recordType = 4188 // section 2.4.50 RecTypeSerFmt recordType = 4189 // section 2.4.251 RecTypeChart3DBarShape recordType = 4191 // section 2.4.47 RecTypeFbi recordType = 4192 // section 2.4.109 RecTypeBopPop recordType = 4193 // section 2.4.25 RecTypeAxcExt recordType = 4194 // section 2.4.9 RecTypeDat recordType = 4195 // section 2.4.73 RecTypePlotGrowth recordType = 4196 // section 2.4.198 RecTypeSIIndex recordType = 4197 // section 2.4.262 RecTypeGelFrame recordType = 4198 // section 2.4.131 RecTypeBopPopCustom recordType = 4199 // section 2.4.26 RecTypeFbi2 recordType = 4200 // section 2.4.110 ) func (r recordType) String() string { switch r { case RecTypeFormula: return "Formula (6)" case RecTypeEOF: return "EOF (10)" case RecTypeCalcCount: return "CalcCount (12)" case RecTypeCalcMode: return "CalcMode (13)" case RecTypeCalcPrecision: return "CalcPrecision (14)" case RecTypeCalcRefMode: return "CalcRefMode (15)" case RecTypeCalcDelta: return "CalcDelta (16)" case RecTypeCalcIter: return "CalcIter (17)" case RecTypeProtect: return "Protect (18)" case RecTypePassword: return "Password (19)" case RecTypeHeader: return "Header (20)" case RecTypeFooter: return "Footer (21)" case RecTypeExternSheet: return "ExternSheet (23)" case RecTypeLbl: return "Lbl (24)" case RecTypeWinProtect: return "WinProtect (25)" case RecTypeVerticalPageBreaks: return "VerticalPageBreaks (26)" case RecTypeHorizontalPageBreaks: return "HorizontalPageBreaks (27)" case RecTypeNote: return "Note (28)" case RecTypeSelection: return "Selection (29)" case RecTypeDate1904: return "Date1904 (34)" case RecTypeExternName: return "ExternName (35)" case RecTypeLeftMargin: return "LeftMargin (38)" case RecTypeRightMargin: return "RightMargin (39)" case RecTypeTopMargin: return "TopMargin (40)" case RecTypeBottomMargin: return "BottomMargin (41)" case RecTypePrintRowCol: return "PrintRowCol (42)" case RecTypePrintGrid: return "PrintGrid (43)" case RecTypeFilePass: return "FilePass (47)" case RecTypeFont: return "Font (49)" case RecTypePrintSize: return "PrintSize (51)" case RecTypeContinue: return "Continue (60)" case RecTypeWindow1: return "Window1 (61)" case RecTypeBackup: return "Backup (64)" case RecTypePane: return "Pane (65)" case RecTypeCodePage: return "CodePage (66)" case RecTypePls: return "Pls (77)" case RecTypeDCon: return "DCon (80)" case RecTypeDConRef: return "DConRef (81)" case RecTypeDConName: return "DConName (82)" case RecTypeDefColWidth: return "DefColWidth (85)" case RecTypeXCT: return "XCT (89)" case RecTypeCRN: return "CRN (90)" case RecTypeFileSharing: return "FileSharing (91)" case RecTypeWriteAccess: return "WriteAccess (92)" case RecTypeObj: return "Obj (93)" case RecTypeUncalced: return "Uncalced (94)" case RecTypeCalcSaveRecalc: return "CalcSaveRecalc (95)" case RecTypeTemplate: return "Template (96)" case RecTypeIntl: return "Intl (97)" case RecTypeObjProtect: return "ObjProtect (99)" case RecTypeColInfo: return "ColInfo (125)" case RecTypeGuts: return "Guts (128)" case RecTypeWsBool: return "WsBool (129)" case RecTypeGridSet: return "GridSet (130)" case RecTypeHCenter: return "HCenter (131)" case RecTypeVCenter: return "VCenter (132)" case RecTypeBoundSheet8: return "BoundSheet8 (133)" case RecTypeWriteProtect: return "WriteProtect (134)" case RecTypeCountry: return "Country (140)" case RecTypeHideObj: return "HideObj (141)" case RecTypeSort: return "Sort (144)" case RecTypePalette: return "Palette (146)" case RecTypeSync: return "Sync (151)" case RecTypeLPr: return "LPr (152)" case RecTypeDxGCol: return "DxGCol (153)" case RecTypeFnGroupName: return "FnGroupName (154)" case RecTypeFilterMode: return "FilterMode (155)" case RecTypeBuiltInFnGroupCount: return "BuiltInFnGroupCount (156)" case RecTypeAutoFilterInfo: return "AutoFilterInfo (157)" case RecTypeAutoFilter: return "AutoFilter (158)" case RecTypeScl: return "Scl (160)" case RecTypeSetup: return "Setup (161)" case RecTypeScenMan: return "ScenMan (174)" case RecTypeSCENARIO: return "SCENARIO (175)" case RecTypeSxView: return "SxView (176)" case RecTypeSxvd: return "Sxvd (177)" case RecTypeSXVI: return "SXVI (178)" case RecTypeSxIvd: return "SxIvd (180)" case RecTypeSXLI: return "SXLI (181)" case RecTypeSXPI: return "SXPI (182)" case RecTypeDocRoute: return "DocRoute (184)" case RecTypeRecipName: return "RecipName (185)" case RecTypeMulRk: return "MulRk (189)" case RecTypeMulBlank: return "MulBlank (190)" case RecTypeMms: return "Mms (193)" case RecTypeSXDI: return "SXDI (197)" case RecTypeSXDB: return "SXDB (198)" case RecTypeSXFDB: return "SXFDB (199)" case RecTypeSXDBB: return "SXDBB (200)" case RecTypeSXNum: return "SXNum (201)" case RecTypeSxBool: return "SxBool (202)" case RecTypeSxErr: return "SxErr (203)" case RecTypeSXInt: return "SXInt (204)" case RecTypeSXString: return "SXString (205)" case RecTypeSXDtr: return "SXDtr (206)" case RecTypeSxNil: return "SxNil (207)" case RecTypeSXTbl: return "SXTbl (208)" case RecTypeSXTBRGIITM: return "SXTBRGIITM (209)" case RecTypeSxTbpg: return "SxTbpg (210)" case RecTypeObProj: return "ObProj (211)" case RecTypeSXStreamID: return "SXStreamID (213)" case RecTypeDBCell: return "DBCell (215)" case RecTypeSXRng: return "SXRng (216)" case RecTypeSxIsxoper: return "SxIsxoper (217)" case RecTypeBookBool: return "BookBool (218)" case RecTypeDbOrParamQry: return "DbOrParamQry (220)" case RecTypeScenarioProtect: return "ScenarioProtect (221)" case RecTypeOleObjectSize: return "OleObjectSize (222)" case RecTypeXF: return "XF (224)" case RecTypeInterfaceHdr: return "InterfaceHdr (225)" case RecTypeInterfaceEnd: return "InterfaceEnd (226)" case RecTypeSXVS: return "SXVS (227)" case RecTypeMergeCells: return "MergeCells (229)" case RecTypeBkHim: return "BkHim (233)" case RecTypeMsoDrawingGroup: return "MsoDrawingGroup (235)" case RecTypeMsoDrawing: return "MsoDrawing (236)" case RecTypeMsoDrawingSelection: return "MsoDrawingSelection (237)" case RecTypePhoneticInfo: return "PhoneticInfo (239)" case RecTypeSxRule: return "SxRule (240)" case RecTypeSXEx: return "SXEx (241)" case RecTypeSxFilt: return "SxFilt (242)" case RecTypeSxDXF: return "SxDXF (244)" case RecTypeSxItm: return "SxItm (245)" case RecTypeSxName: return "SxName (246)" case RecTypeSxSelect: return "SxSelect (247)" case RecTypeSXPair: return "SXPair (248)" case RecTypeSxFmla: return "SxFmla (249)" case RecTypeSxFormat: return "SxFormat (251)" case RecTypeSST: return "SST (252)" case RecTypeLabelSst: return "LabelSst (253)" case RecTypeExtSST: return "ExtSST (255)" case RecTypeSXVDEx: return "SXVDEx (256)" case RecTypeSXFormula: return "SXFormula (259)" case RecTypeSXDBEx: return "SXDBEx (290)" case RecTypeRRDInsDel: return "RRDInsDel (311)" case RecTypeRRDHead: return "RRDHead (312)" case RecTypeRRDChgCell: return "RRDChgCell (315)" case RecTypeRRTabID: return "RRTabID (317)" case RecTypeRRDRenSheet: return "RRDRenSheet (318)" case RecTypeRRSort: return "RRSort (319)" case RecTypeRRDMove: return "RRDMove (320)" case RecTypeRRFormat: return "RRFormat (330)" case RecTypeRRAutoFmt: return "RRAutoFmt (331)" case RecTypeRRInsertSh: return "RRInsertSh (333)" case RecTypeRRDMoveBegin: return "RRDMoveBegin (334)" case RecTypeRRDMoveEnd: return "RRDMoveEnd (335)" case RecTypeRRDInsDelBegin: return "RRDInsDelBegin (336)" case RecTypeRRDInsDelEnd: return "RRDInsDelEnd (337)" case RecTypeRRDConflict: return "RRDConflict (338)" case RecTypeRRDDefName: return "RRDDefName (339)" case RecTypeRRDRstEtxp: return "RRDRstEtxp (340)" case RecTypeLRng: return "LRng (351)" case RecTypeUsesELFs: return "UsesELFs (352)" case RecTypeDSF: return "DSF (353)" case RecTypeCUsr: return "CUsr (401)" case RecTypeCbUsr: return "CbUsr (402)" case RecTypeUsrInfo: return "UsrInfo (403)" case RecTypeUsrExcl: return "UsrExcl (404)" case RecTypeFileLock: return "FileLock (405)" case RecTypeRRDInfo: return "RRDInfo (406)" case RecTypeBCUsrs: return "BCUsrs (407)" case RecTypeUsrChk: return "UsrChk (408)" case RecTypeUserBView: return "UserBView (425)" case RecTypeUserSViewBegin: return "UserSViewBegin[Chart] (426)" case RecTypeUserSViewEnd: return "UserSViewEnd (427)" case RecTypeRRDUserView: return "RRDUserView (428)" case RecTypeQsi: return "Qsi (429)" case RecTypeSupBook: return "SupBook (430)" case RecTypeProt4Rev: return "Prot4Rev (431)" case RecTypeCondFmt: return "CondFmt (432)" case RecTypeCF: return "CF (433)" case RecTypeDVal: return "DVal (434)" case RecTypeDConBin: return "DConBin (437)" case RecTypeTxO: return "TxO (438)" case RecTypeRefreshAll: return "RefreshAll (439)" case RecTypeHLink: return "HLink (440)" case RecTypeLel: return "Lel (441)" case RecTypeCodeName: return "CodeName (442)" case RecTypeSXFDBType: return "SXFDBType (443)" case RecTypeProt4RevPass: return "Prot4RevPass (444)" case RecTypeObNoMacros: return "ObNoMacros (445)" case RecTypeDv: return "Dv (446)" case RecTypeExcel9File: return "Excel9File (448)" case RecTypeRecalcID: return "RecalcID (449)" case RecTypeEntExU2: return "EntExU2 (450)" case RecTypeDimensions: return "Dimensions (512)" case RecTypeBlank: return "Blank (513)" case RecTypeNumber: return "Number (515)" case RecTypeLabel: return "Label (516)" case RecTypeBoolErr: return "BoolErr (517)" case RecTypeString: return "String (519)" case RecTypeRow: return "Row (520)" case RecTypeIndex: return "Index (523)" case RecTypeArray: return "Array (545)" case RecTypeDefaultRowHeight: return "DefaultRowHeight (549)" case RecTypeTable: return "Table (566)" case RecTypeWindow2: return "Window2 (574)" case RecTypeRK: return "RK (638)" case RecTypeStyle: return "Style (659)" case RecTypeBigName: return "BigName (1048)" case RecTypeFormat: return "Format (1054)" case RecTypeContinueBigName: return "ContinueBigName (1084)" case RecTypeShrFmla: return "ShrFmla (1212)" case RecTypeHLinkTooltip: return "HLinkTooltip (2048)" case RecTypeWebPub: return "WebPub (2049)" case RecTypeQsiSXTag: return "QsiSXTag (2050)" case RecTypeDBQueryExt: return "DBQueryExt (2051)" case RecTypeExtString: return "ExtString (2052)" case RecTypeTxtQry: return "TxtQry (2053)" case RecTypeQsir: return "Qsir (2054)" case RecTypeQsif: return "Qsif (2055)" case RecTypeRRDTQSIF: return "RRDTQSIF (2056)" case RecTypeBOF: return "BOF (2057)" case RecTypeOleDbConn: return "OleDbConn (2058)" case RecTypeWOpt: return "WOpt (2059)" case RecTypeSXViewEx: return "SXViewEx (2060)" case RecTypeSXTH: return "SXTH (2061)" case RecTypeSXPIEx: return "SXPIEx (2062)" case RecTypeSXVDTEx: return "SXVDTEx (2063)" case RecTypeSXViewEx9: return "SXViewEx9 (2064)" case RecTypeContinueFrt: return "ContinueFrt (2066)" case RecTypeRealTimeData: return "RealTimeData (2067)" case RecTypeChartFrtInfo: return "ChartFrtInfo (2128)" case RecTypeFrtWrapper: return "FrtWrapper (2129)" case RecTypeStartBlock: return "StartBlock (2130)" case RecTypeEndBlock: return "EndBlock (2131)" case RecTypeStartObject: return "StartObject (2132)" case RecTypeEndObject: return "EndObject (2133)" case RecTypeCatLab: return "CatLab (2134)" case RecTypeYMult: return "YMult (2135)" case RecTypeSXViewLink: return "SXViewLink (2136)" case RecTypePivotChartBits: return "PivotChartBits (2137)" case RecTypeFrtFontList: return "FrtFontList (2138)" case RecTypeSheetExt: return "SheetExt (2146)" case RecTypeBookExt: return "BookExt (2147)" case RecTypeSXAddl: return "SXAddl (2148)" case RecTypeCrErr: return "CrErr (2149)" case RecTypeHFPicture: return "HFPicture (2150)" case RecTypeFeatHdr: return "FeatHdr (2151)" case RecTypeFeat: return "Feat (2152)" case RecTypeDataLabExt: return "DataLabExt (2154)" case RecTypeDataLabExtContents: return "DataLabExtContents (2155)" case RecTypeCellWatch: return "CellWatch (2156)" case RecTypeFeatHdr11: return "FeatHdr11 (2161)" case RecTypeFeature11: return "Feature11 (2162)" case RecTypeDropDownObjIds: return "DropDownObjIds (2164)" case RecTypeContinueFrt11: return "ContinueFrt11 (2165)" case RecTypeDConn: return "DConn (2166)" case RecTypeList12: return "List12 (2167)" case RecTypeFeature12: return "Feature12 (2168)" case RecTypeCondFmt12: return "CondFmt12 (2169)" case RecTypeCF12: return "CF12 (2170)" case RecTypeCFEx: return "CFEx (2171)" case RecTypeXFCRC: return "XFCRC (2172)" case RecTypeXFExt: return "XFExt (2173)" case RecTypeAutoFilter12: return "AutoFilter12 (2174)" case RecTypeContinueFrt12: return "ContinueFrt12 (2175)" case RecTypeMDTInfo: return "MDTInfo (2180)" case RecTypeMDXStr: return "MDXStr (2181)" case RecTypeMDXTuple: return "MDXTuple (2182)" case RecTypeMDXSet: return "MDXSet (2183)" case RecTypeMDXProp: return "MDXProp (2184)" case RecTypeMDXKPI: return "MDXKPI (2185)" case RecTypeMDB: return "MDB (2186)" case RecTypePLV: return "PLV (2187)" case RecTypeCompat12: return "Compat12 (2188)" case RecTypeDXF: return "DXF (2189)" case RecTypeTableStyles: return "TableStyles (2190)" case RecTypeTableStyle: return "TableStyle (2191)" case RecTypeTableStyleElement: return "TableStyleElement (2192)" case RecTypeStyleExt: return "StyleExt (2194)" case RecTypeNamePublish: return "NamePublish (2195)" case RecTypeNameCmt: return "NameCmt (2196)" case RecTypeSortData: return "SortData (2197)" case RecTypeTheme: return "Theme (2198)" case RecTypeGUIDTypeLib: return "GUIDTypeLib (2199)" case RecTypeFnGrp12: return "FnGrp12 (2200)" case RecTypeNameFnGrp12: return "NameFnGrp12 (2201)" case RecTypeMTRSettings: return "MTRSettings (2202)" case RecTypeCompressPictures: return "CompressPictures (2203)" case RecTypeHeaderFooter: return "HeaderFooter (2204)" case RecTypeCrtLayout12: return "CrtLayout12 (2205)" case RecTypeCrtMlFrt: return "CrtMlFrt (2206)" case RecTypeCrtMlFrtContinue: return "CrtMlFrtContinue (2207)" case RecTypeForceFullCalculation: return "ForceFullCalculation (2211)" case RecTypeShapePropsStream: return "ShapePropsStream (2212)" case RecTypeTextPropsStream: return "TextPropsStream (2213)" case RecTypeRichTextStream: return "RichTextStream (2214)" case RecTypeCrtLayout12A: return "CrtLayout12A (2215)" case RecTypeUnits: return "Units (4097)" case RecTypeChart: return "Chart (4098)" case RecTypeSeries: return "Series (4099)" case RecTypeDataFormat: return "DataFormat (4102)" case RecTypeLineFormat: return "LineFormat (4103)" case RecTypeMarkerFormat: return "MarkerFormat (4105)" case RecTypeAreaFormat: return "AreaFormat (4106)" case RecTypePieFormat: return "PieFormat (4107)" case RecTypeAttachedLabel: return "AttachedLabel (4108)" case RecTypeSeriesText: return "SeriesText (4109)" case RecTypeChartFormat: return "ChartFormat (4116)" case RecTypeLegend: return "Legend (4117)" case RecTypeSeriesList: return "SeriesList (4118)" case RecTypeBar: return "Bar (4119)" case RecTypeLine: return "Line (4120)" case RecTypePie: return "Pie (4121)" case RecTypeArea: return "Area (4122)" case RecTypeScatter: return "Scatter (4123)" case RecTypeCrtLine: return "CrtLine (4124)" case RecTypeAxis: return "Axis (4125)" case RecTypeTick: return "Tick (4126)" case RecTypeValueRange: return "ValueRange (4127)" case RecTypeCatSerRange: return "CatSerRange (4128)" case RecTypeAxisLine: return "AxisLine (4129)" case RecTypeCrtLink: return "CrtLink (4130)" case RecTypeDefaultText: return "DefaultText (4132)" case RecTypeText: return "Text (4133)" case RecTypeFontX: return "FontX (4134)" case RecTypeObjectLink: return "ObjectLink (4135)" case RecTypeFrame: return "Frame (4146)" case RecTypeBegin: return "Begin (4147)" case RecTypeEnd: return "End (4148)" case RecTypePlotArea: return "PlotArea (4149)" case RecTypeChart3d: return "Chart3d (4154)" case RecTypePicF: return "PicF (4156)" case RecTypeDropBar: return "DropBar (4157)" case RecTypeRadar: return "Radar (4158)" case RecTypeSurf: return "Surf (4159)" case RecTypeRadarArea: return "RadarArea (4160)" case RecTypeAxisParent: return "AxisParent (4161)" case RecTypeLegendException: return "LegendException (4163)" case RecTypeShtProps: return "ShtProps (4164)" case RecTypeSerToCrt: return "SerToCrt (4165)" case RecTypeAxesUsed: return "AxesUsed (4166)" case RecTypeSBaseRef: return "SBaseRef (4168)" case RecTypeSerParent: return "SerParent (4170)" case RecTypeSerAuxTrend: return "SerAuxTrend (4171)" case RecTypeIFmtRecord: return "IFmtRecord (4174)" case RecTypePos: return "Pos (4175)" case RecTypeAlRuns: return "AlRuns (4176)" case RecTypeBRAI: return "BRAI (4177)" case RecTypeSerAuxErrBar: return "SerAuxErrBar (4187)" case RecTypeClrtClient: return "ClrtClient (4188)" case RecTypeSerFmt: return "SerFmt (4189)" case RecTypeChart3DBarShape: return "Chart3DBarShape (4191)" case RecTypeFbi: return "Fbi (4192)" case RecTypeBopPop: return "BopPop (4193)" case RecTypeAxcExt: return "AxcExt (4194)" case RecTypeDat: return "Dat (4195)" case RecTypePlotGrowth: return "PlotGrowth (4196)" case RecTypeSIIndex: return "SIIndex (4197)" case RecTypeGelFrame: return "GelFrame (4198)" case RecTypeBopPopCustom: return "BopPopCustom (4199)" case RecTypeFbi2: return "Fbi2 (4200)" } return fmt.Sprintf("unknown (%d 0x%x)", uint16(r), uint16(r)) } ================================================ FILE: xls/sheets.go ================================================ package xls import ( "encoding/binary" "errors" "log" "math" "unicode/utf16" "github.com/pbnjay/grate" "github.com/pbnjay/grate/commonxl" ) // List (visible) sheet names from the workbook. func (b *WorkBook) List() ([]string, error) { res := make([]string, 0, len(b.sheets)) for _, s := range b.sheets { if (s.HiddenState & 0x03) == 0 { res = append(res, s.Name) } } return res, nil } // ListHidden sheet names in the workbook. func (b *WorkBook) ListHidden() ([]string, error) { res := make([]string, 0, len(b.sheets)) for _, s := range b.sheets { if (s.HiddenState & 0x03) != 0 { res = append(res, s.Name) } } return res, nil } // Get opens the named worksheet and return an iterator for its contents. func (b *WorkBook) Get(sheetName string) (grate.Collection, error) { for _, s := range b.sheets { if s.Name == sheetName { ss := b.pos2substream[int64(s.Position)] return b.parseSheet(s, ss) } } return nil, errors.New("xls: sheet not found") } func (b *WorkBook) parseSheet(s *boundSheet, ss int) (*commonxl.Sheet, error) { res := &commonxl.Sheet{ Formatter: &b.nfmt, } var minRow, maxRow uint32 var minCol, maxCol uint16 // temporary string buffer us := make([]uint16, 8224) inSubstream := 0 for idx, r := range b.substreams[ss] { if inSubstream > 0 { if r.RecType == RecTypeEOF { inSubstream-- } continue } switch r.RecType { case RecTypeBOF: // a BOF inside a sheet usually means embedded content like a chart // (which we aren't interested in). So we we set a flag and wait // for the EOF for that content block. if idx > 0 { inSubstream++ continue } case RecTypeWsBool: if (r.Data[1] & 0x10) != 0 { // it's a dialog return nil, nil } case RecTypeDimensions: // max = 0-based index of the row AFTER the last valid index minRow = binary.LittleEndian.Uint32(r.Data[:4]) maxRow = binary.LittleEndian.Uint32(r.Data[4:8]) // max = 0x010000 minCol = binary.LittleEndian.Uint16(r.Data[8:10]) maxCol = binary.LittleEndian.Uint16(r.Data[10:12]) // max = 0x000100 if grate.Debug { log.Printf(" Sheet dimensions (%d, %d) - (%d,%d)", minCol, minRow, maxCol, maxRow) } if minRow > 0x0000FFFF || maxRow > 0x00010000 { log.Println("invalid dimensions") } if minCol > 0x00FF || maxCol > 0x0100 { log.Println("invalid dimensions") } // pre-allocate cells res.Resize(int(maxRow), int(maxCol)) } } inSubstream = 0 var formulaRow, formulaCol uint16 for ridx, r := range b.substreams[ss] { if inSubstream > 0 { if r.RecType == RecTypeEOF { inSubstream-- } else if grate.Debug { log.Println(" Unhandled sheet substream record type:", r.RecType, ridx) } continue } // sec 2.1.7.20.6 Common Productions ABNF: /* CELLTABLE = 1*(1*Row *CELL 1*DBCell) *EntExU2 CELL = FORMULA / Blank / MulBlank / RK / MulRk / BoolErr / Number / LabelSst FORMULA = [Uncalced] Formula [Array / Table / ShrFmla / SUB] [String *Continue] Not parsed form the list above: DBCell, EntExU2, Uncalced, Array, Table,ShrFmla NB: no idea what "SUB" is */ switch r.RecType { case RecTypeBOF: if ridx > 0 { inSubstream++ continue } case RecTypeBoolErr: rowIndex := int(binary.LittleEndian.Uint16(r.Data[:2])) colIndex := int(binary.LittleEndian.Uint16(r.Data[2:4])) ixfe := int(binary.LittleEndian.Uint16(r.Data[4:6])) if r.Data[7] == 0 { // Boolean value bv := false if r.Data[6] == 1 { bv = true } var fno uint16 if ixfe < len(b.xfs) { fno = b.xfs[ixfe] } res.Put(rowIndex, colIndex, bv, fno) //log.Printf("bool/error spec: %d %d %+v", rowIndex, colIndex, bv) } else { // it's an error, load the label be, ok := berrLookup[r.Data[6]] if !ok { be = "" } res.Put(rowIndex, colIndex, be, 0) //log.Printf("bool/error spec: %d %d %s", rowIndex, colIndex, be) } case RecTypeMulRk: // MulRk encodes multiple RK values in a row nrk := int((r.RecSize - 6) / 6) rowIndex := int(binary.LittleEndian.Uint16(r.Data[:2])) colIndex := int(binary.LittleEndian.Uint16(r.Data[2:4])) for i := 0; i < nrk; i++ { off := 4 + i*6 ixfe := int(binary.LittleEndian.Uint16(r.Data[off:])) value := RKNumber(binary.LittleEndian.Uint32(r.Data[off+2:])) var rval interface{} if value.IsInteger() { rval = value.Int() } else { rval = value.Float64() } var fno uint16 if ixfe < len(b.xfs) { fno = b.xfs[ixfe] } res.Put(rowIndex, colIndex+i, rval, fno) } //log.Printf("mulrow spec: %+v", *mr) case RecTypeNumber: rowIndex := int(binary.LittleEndian.Uint16(r.Data[:2])) colIndex := int(binary.LittleEndian.Uint16(r.Data[2:4])) ixfe := int(binary.LittleEndian.Uint16(r.Data[4:6])) xnum := binary.LittleEndian.Uint64(r.Data[6:]) value := math.Float64frombits(xnum) var fno uint16 if ixfe < len(b.xfs) { fno = b.xfs[ixfe] } res.Put(rowIndex, colIndex, value, fno) //log.Printf("Number spec: %d %d = %f", rowIndex, colIndex, value) case RecTypeRK: rowIndex := int(binary.LittleEndian.Uint16(r.Data[:2])) colIndex := int(binary.LittleEndian.Uint16(r.Data[2:4])) ixfe := int(binary.LittleEndian.Uint16(r.Data[4:])) value := RKNumber(binary.LittleEndian.Uint32(r.Data[6:])) var rval interface{} if value.IsInteger() { rval = value.Int() } else { rval = value.Float64() } var fno uint16 if ixfe < len(b.xfs) { fno = b.xfs[ixfe] } res.Put(rowIndex, colIndex, rval, fno) //log.Printf("RK spec: %d %d = %+v", rowIndex, colIndex, rval) case RecTypeFormula: formulaRow = binary.LittleEndian.Uint16(r.Data[:2]) formulaCol = binary.LittleEndian.Uint16(r.Data[2:4]) ixfe := int(binary.LittleEndian.Uint16(r.Data[4:6])) fdata := r.Data[6:] var fno uint16 if ixfe < len(b.xfs) { fno = b.xfs[ixfe] } if fdata[6] == 0xFF && fdata[7] == 0xFF { switch fdata[0] { case 0: // string in next record // put placeholder now to record the numFmt res.Put(int(formulaRow), int(formulaCol), "", fno) case 1: // boolean bv := false if fdata[2] != 0 { bv = true } res.Put(int(formulaRow), int(formulaCol), bv, fno) case 2: // error value be, ok := berrLookup[fdata[2]] if !ok { be = "" } res.Put(int(formulaRow), int(formulaCol), be, 0) case 3: // blank string default: log.Printf("unknown formula value type %d", fdata[0]) } } else { xnum := binary.LittleEndian.Uint64(fdata) value := math.Float64frombits(xnum) res.Put(int(formulaRow), int(formulaCol), value, fno) } //log.Printf("formula spec: %d %d ~~ %+v", formulaRow, formulaCol, r.Data) case RecTypeString: // String is the previously rendered value of a formula // NB similar to the workbook SST, this can continue over // addition records up to 32k characters. A 1-byte flag // at each gap indicates if the encoding switches // to/from 8/16-bit characters. charCount := binary.LittleEndian.Uint16(r.Data[:2]) flags := r.Data[2] fstr := "" if (flags & 1) == 0 { fstr = string(r.Data[3:]) } else { raw := r.Data[3:] if int(charCount) > cap(us) { us = make([]uint16, charCount) } us = us[:charCount] for i := 0; i < int(charCount); i++ { us[i] = binary.LittleEndian.Uint16(raw) raw = raw[2:] } fstr = string(utf16.Decode(us)) } if (ridx + 1) < len(b.substreams[ss]) { ridx2 := ridx + 1 nrecs := len(b.substreams[ss]) for ridx2 < nrecs { r2 := b.substreams[ss][ridx2] if r2.RecType != RecTypeContinue { break } if (r2.Data[0] & 1) == 0 { fstr += string(r2.Data[1:]) } else { raw := r2.Data[1:] slen := len(raw) / 2 us = us[:slen] for i := 0; i < slen; i++ { us[i] = binary.LittleEndian.Uint16(raw) raw = raw[2:] } fstr += string(utf16.Decode(us)) } ridx2++ } } res.Set(int(formulaRow), int(formulaCol), fstr) //log.Printf("String direct: %d %d '%s'", int(formulaRow), int(formulaCol), fstr) case RecTypeLabelSst: rowIndex := int(binary.LittleEndian.Uint16(r.Data[:2])) colIndex := int(binary.LittleEndian.Uint16(r.Data[2:4])) ixfe := int(binary.LittleEndian.Uint16(r.Data[4:6])) sstIndex := int(binary.LittleEndian.Uint32(r.Data[6:])) if sstIndex > len(b.strings) { return nil, errors.New("xls: invalid sst index") } var fno uint16 if ixfe < len(b.xfs) { fno = b.xfs[ixfe] } if b.strings[sstIndex] != "" { res.Put(rowIndex, colIndex, b.strings[sstIndex], fno) } //log.Printf("SST spec: %d %d = [%d] '%s' %d", rowIndex, colIndex, sstIndex, b.strings[sstIndex], fno) case RecTypeHLink: firstRow := binary.LittleEndian.Uint16(r.Data[:2]) lastRow := binary.LittleEndian.Uint16(r.Data[2:4]) firstCol := binary.LittleEndian.Uint16(r.Data[4:6]) lastCol := binary.LittleEndian.Uint16(r.Data[6:]) if int(firstCol) > int(maxCol) { //log.Println("invalid hyperlink column") continue } if int(firstRow) > int(maxRow) { //log.Println("invalid hyperlink row") continue } if lastRow == 0xFFFF { // placeholder value indicate "last" lastRow = uint16(maxRow) - 1 } if lastCol == 0xFF { // placeholder value indicate "last" lastCol = uint16(maxCol) - 1 } // decode the hyperlink datastructure and try to find the // display text and separate the URL itself. displayText, linkText, err := decodeHyperlinks(r.Data[8:]) if err != nil { log.Println(err) continue } // apply merge cell rules (see RecTypeMergeCells below) for rn := int(firstRow); rn <= int(lastRow); rn++ { for cn := int(firstCol); cn <= int(lastCol); cn++ { if rn == int(firstRow) && cn == int(firstCol) { // TODO: provide custom hooks for how to handle links in output res.Put(rn, cn, displayText+" <"+linkText+">", 0) } else if cn == int(firstCol) { // first and last column MAY be the same if rn == int(lastRow) { res.Put(rn, cn, grate.EndRowMerged, 0) } else { res.Put(rn, cn, grate.ContinueRowMerged, 0) } } else if cn == int(lastCol) { // first and last column are NOT the same res.Put(rn, cn, grate.EndColumnMerged, 0) } else { res.Put(rn, cn, grate.ContinueColumnMerged, 0) } } } case RecTypeMergeCells: // To keep cells aligned, Merged cells are handled by placing // special characters in each cell covered by the merge block. // // The contents of the cell are always in the top left position. // A "down arrow" (↓) indicates the left side of the merge block, and a // "down arrow with stop line" (⤓) indicates the last row of the merge. // A "right arrow" (→) indicates that the columns span horizontally, // and a "right arrow with stop line" (⇥) indicates the rightmost // column of the merge. // cmcs := binary.LittleEndian.Uint16(r.Data[:2]) raw := r.Data[2:] for i := 0; i < int(cmcs); i++ { firstRow := binary.LittleEndian.Uint16(raw[:2]) lastRow := binary.LittleEndian.Uint16(raw[2:4]) firstCol := binary.LittleEndian.Uint16(raw[4:6]) lastCol := binary.LittleEndian.Uint16(raw[6:]) raw = raw[8:] if lastRow == 0xFFFF { // placeholder value indicate "last" lastRow = uint16(maxRow) - 1 } if lastCol == 0xFF { // placeholder value indicate "last" lastCol = uint16(maxCol) - 1 } for rn := int(firstRow); rn <= int(lastRow); rn++ { for cn := int(firstCol); cn <= int(lastCol); cn++ { if rn == int(firstRow) && cn == int(firstCol) { // should be a value there already! } else if cn == int(firstCol) { // first and last column MAY be the same if rn == int(lastRow) { res.Put(rn, cn, grate.EndRowMerged, 0) } else { res.Put(rn, cn, grate.ContinueRowMerged, 0) } } else if cn == int(lastCol) { // first and last column are NOT the same res.Put(rn, cn, grate.EndColumnMerged, 0) } else { res.Put(rn, cn, grate.ContinueColumnMerged, 0) } } } } /* case RecTypeBlank, RecTypeMulBlank: // cells default value is blank, no need for these case RecTypeContinue: // the only situation so far is when used in RecTypeString above case RecTypeRow, RecTypeDimensions, RecTypeEOF, RecTypeWsBool: // handled in initial pass default: if grate.Debug { log.Println(" Unhandled sheet record type:", r.RecType, ridx) } */ } } return res, nil } var berrLookup = map[byte]string{ 0x00: "#NULL!", 0x07: "#DIV/0!", 0x0F: "#VALUE!", 0x17: "#REF!", 0x1D: "#NAME?", 0x24: "#NUM!", 0x2A: "#N/A", 0x2B: "#GETTING_DATA", } ================================================ FILE: xls/simple_test.go ================================================ package xls import ( "bufio" "log" "os" "strings" "testing" "github.com/pbnjay/grate/commonxl" ) var testFilePairs = [][]string{ {"../testdata/basic.xls", "../testdata/basic.tsv"}, {"../testdata/testing.xls", "../testdata/testing.tsv"}, // TODO: custom formatter support //{"../testdata/basic2.xls", "../testdata/basic2.tsv"}, // TODO: datetime and fraction formatter support //{"../testdata/multi_test.xls", "../testdata/multi_test.tsv"}, } func loadTestData(fn string, ff *commonxl.Formatter) (*commonxl.Sheet, error) { f, err := os.Open(fn) if err != nil { return nil, err } xs := &commonxl.Sheet{ Formatter: ff, } row := 0 s := bufio.NewScanner(f) for s.Scan() { record := strings.Split(s.Text(), "\t") for i, val := range record { xs.Put(row, i, val, 0) } row++ } return xs, f.Close() } func TestBasic(t *testing.T) { for _, fnames := range testFilePairs { var trueData *commonxl.Sheet log.Println("Testing ", fnames[0]) wb, err := Open(fnames[0]) if err != nil { t.Fatal(err) } sheets, err := wb.List() if err != nil { t.Fatal(err) } firstLoad := true for _, s := range sheets { sheet, err := wb.Get(s) if err != nil { t.Fatal(err) } xsheet := sheet.(*commonxl.Sheet) if firstLoad { trueData, err = loadTestData(fnames[1], xsheet.Formatter) if err != nil { t.Fatal(err) } firstLoad = false } for xrow, xdata := range xsheet.Rows { for xcol, xval := range xdata { //t.Logf("at %s (%d,%d) expect '%v'", fnames[0], xrow, xcol, trueData.Rows[xrow][xcol]) if !trueData.Rows[xrow][xcol].Equal(xval) { t.Logf("mismatch at %s (%d,%d): '%v' <> '%v' expected", fnames[0], xrow, xcol, xval, trueData.Rows[xrow][xcol]) t.Fail() } } } } err = wb.Close() if err != nil { t.Fatal(err) } } } ================================================ FILE: xls/strings.go ================================================ package xls import ( "encoding/binary" "errors" "io" "io/ioutil" "unicode/utf16" ) // 2.5.240 func decodeShortXLUnicodeString(raw []byte) (string, int, error) { // identical to decodeXLUnicodeString except for cch=8bits instead of 16 cch := int(raw[0]) flags := raw[1] raw = raw[2:] content := make([]uint16, cch) if (flags & 0x1) == 0 { // 16-bit characters but only the bottom 8bits contentBytes := raw[:cch] for i, x := range contentBytes { content[i] = uint16(x) } cch += 2 // to return the offset } else { // 16-bit characters for i := 0; i < cch; i++ { content[i] = binary.LittleEndian.Uint16(raw[:2]) raw = raw[2:] } cch += cch + 2 // to return the offset } return string(utf16.Decode(content)), cch, nil } // 2.5.294 func decodeXLUnicodeString(raw []byte) (string, int, error) { // identical to decodeShortXLUnicodeString except for cch=16bits instead of 8 cch := int(binary.LittleEndian.Uint16(raw[:2])) flags := raw[2] raw = raw[3:] content := make([]uint16, cch) if (flags & 0x1) == 0 { // 16-bit characters but only the bottom 8bits contentBytes := raw[:cch] for i, x := range contentBytes { content[i] = uint16(x) } cch += 3 // to return the offset } else { // 16-bit characters for i := 0; i < cch; i++ { content[i] = binary.LittleEndian.Uint16(raw[:2]) raw = raw[2:] } cch += cch + 3 // to return the offset } return string(utf16.Decode(content)), cch, nil } // 2.5.293 func decodeXLUnicodeRichExtendedString(r io.Reader) (string, error) { var cch, cRun uint16 var flags uint8 var cbExtRs int32 err := binary.Read(r, binary.LittleEndian, &cch) if err != nil { return "", err } err = binary.Read(r, binary.LittleEndian, &flags) if err != nil { return "", err } if (flags & 0x8) != 0 { // rich formating data is present err = binary.Read(r, binary.LittleEndian, &cRun) if err != nil { return "", err } } if (flags & 0x4) != 0 { // phonetic string data is present err = binary.Read(r, binary.LittleEndian, &cbExtRs) if err != nil { return "", err } } content := make([]uint16, cch) if (flags & 0x1) == 0 { // 16-bit characters but only the bottom 8bits contentBytes := make([]byte, cch) n, err2 := io.ReadFull(r, contentBytes) if n == 0 && err2 != io.ErrUnexpectedEOF { err = err2 } if uint16(n) < cch { contentBytes = contentBytes[:n] content = content[:n] } for i, x := range contentBytes { content[i] = uint16(x) } } else { // 16-bit characters err = binary.Read(r, binary.LittleEndian, content) } if err != nil { return "", err } ////// if cRun > 0 { // rich formating data is present _, err = io.CopyN(ioutil.Discard, r, int64(cRun)*4) if err != nil { return "", err } } if cbExtRs > 0 { // phonetic string data is present _, err = io.CopyN(ioutil.Discard, r, int64(cbExtRs)) if err != nil { return "", err } } ////// return string(utf16.Decode(content)), nil } // read in an array of XLUnicodeRichExtendedString s func parseSST(recs []*rec) ([]string, error) { // The quirky thing about this code is that when strings cross a record // boundary, there's an intervening flags byte that MAY change the string // from an 8-bit encoding to 16-bit or vice versa. //totalRefs := binary.LittleEndian.Uint32(recs[0].Data[0:4]) numStrings := binary.LittleEndian.Uint32(recs[0].Data[4:8]) all := make([]string, 0, numStrings) current := make([]uint16, 32*1024) buf := recs[0].Data[8:] for i := 0; i < len(recs); { var cRunBytes int var flags byte var cbExtRs uint32 for len(buf) > 0 { slen := binary.LittleEndian.Uint16(buf) buf = buf[2:] flags = buf[0] buf = buf[1:] if (flags & 0x8) != 0 { // rich formating data is present cRun := binary.LittleEndian.Uint16(buf) cRunBytes = int(cRun) * 4 buf = buf[2:] } if (flags & 0x4) != 0 { // phonetic string data is present cbExtRs = binary.LittleEndian.Uint32(buf) buf = buf[4:] } /////// blx := len(buf) bly := len(buf) - 5 if blx > 5 { blx = 5 } if bly < 0 { bly = 0 } // this block will read the string data, but transparently // handle continuing across records if int(slen) > cap(current) { current = make([]uint16, slen) } else { current = current[:slen] } for j := 0; j < int(slen); j++ { if len(buf) == 0 { i++ if (recs[i].Data[0] & 1) == 0 { flags &= 0xFE } else { flags |= 1 } buf = recs[i].Data[1:] } if (flags & 1) == 0 { //8-bit current[j] = uint16(buf[0]) buf = buf[1:] } else { //16-bit current[j] = uint16(binary.LittleEndian.Uint16(buf[:2])) buf = buf[2:] if len(buf) == 1 { return nil, errors.New("xls: off by one") } } } s := string(utf16.Decode(current)) all = append(all, s) /////// for cRunBytes > 0 { if len(buf) >= int(cRunBytes) { buf = buf[cRunBytes:] cRunBytes = 0 } else { cRunBytes -= len(buf) i++ buf = recs[i].Data } } for cbExtRs > 0 { if len(buf) >= int(cbExtRs) { buf = buf[cbExtRs:] cbExtRs = 0 } else { cbExtRs -= uint32(len(buf)) i++ buf = recs[i].Data } } } i++ if i < len(recs) { buf = recs[i].Data } } return all, nil } ================================================ FILE: xls/structs.go ================================================ package xls import ( "fmt" "math" ) type header struct { Version uint16 // An unsigned integer that specifies the BIFF version of the file. The value MUST be 0x0600. DocType uint16 //An unsigned integer that specifies the document type of the substream of records following this record. For more information about the layout of the sub-streams in the workbook stream see File Structure. RupBuild uint16 // An unsigned integer that specifies the build identifier. RupYear uint16 // An unsigned integer that specifies the year when this BIFF version was first created. The value MUST be 0x07CC or 0x07CD. MiscBits uint64 // lots of miscellaneous bits and flags we're not going to check } // 2.1.4 type rec struct { RecType recordType // RecSize uint16 // must be between 0 and 8224 Data []byte // len(rec.data) = rec.recsize } type boundSheet struct { Position uint32 // A FilePointer as specified in [MS-OSHARED] section 2.2.1.5 that specifies the stream position of the start of the BOF record for the sheet. HiddenState byte // (2 bits) An unsigned integer that specifies the hidden state of the sheet. MUST be a value from the following table: SheetType byte // An unsigned integer that specifies the sheet type. 00=worksheet Name string } /////// type shRow struct { RowIndex uint16 // 0-based FirstCol uint16 // 0-based LastCol uint16 // 1-based! Height uint16 Reserved uint32 Flags uint32 } type shRef8 struct { FirstRow uint16 // 0-based LastRow uint16 // 0-based FirstCol uint16 // 0-based LastCol uint16 // 0-based } type shMulRK struct { RowIndex uint16 // 0-based FirstCol uint16 // 0-based Values []RkRec LastCol uint16 // 0-based? } type RkRec struct { IXFCell uint16 Value RKNumber } type shRK struct { RowIndex uint16 // 0-based Col uint16 // 0-based IXFCell uint16 Value RKNumber } type RKNumber uint32 func (r RKNumber) IsInteger() bool { if (r & 1) != 0 { // has 2 decimals return false } if (r & 2) == 0 { // is part of a float return false } return true } func (r RKNumber) Int() int { val := int32(r) >> 2 if (r&1) == 0 && (r&2) != 0 { return int(val) } if (r&1) != 0 && (r&2) != 0 { return int(val / 100) } return 0 } func (r RKNumber) Float64() float64 { val := int32(r) >> 2 v2 := math.Float64frombits(uint64(val) << 34) if (r&1) == 0 && (r&2) == 0 { return v2 } if (r&1) != 0 && (r&2) == 0 { return v2 / 100.0 } return 0.0 } func (r RKNumber) String() string { if r.IsInteger() { return fmt.Sprint(r.Int()) } return fmt.Sprint(r.Float64()) } ================================================ FILE: xls/xls.go ================================================ // Package xls implements the Microsoft Excel Binary File Format (.xls) Structure. // More specifically, it contains just enough detail to extract cell contents, // data types, and last-calculated formula values. In particular, it does NOT // implement formatting or formula calculations. package xls // https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-xls/cd03cb5f-ca02-4934-a391-bb674cb8aa06 import ( "context" "encoding/binary" "errors" "io" "log" "sync" "github.com/pbnjay/grate" "github.com/pbnjay/grate/commonxl" "github.com/pbnjay/grate/xls/cfb" "github.com/pbnjay/grate/xls/crypto" ) var _ = grate.Register("xls", 1, Open) // WorkBook represents an Excel workbook containing 1 or more sheets. type WorkBook struct { filename string ctx context.Context doc *cfb.Document prot bool h *header sheets []*boundSheet codepage uint16 dateMode uint16 strings []string password string substreams [][]*rec fpos int64 pos2substream map[int64]int nfmt commonxl.Formatter xfs []uint16 } func (b *WorkBook) IsProtected() bool { return b.prot } func Open(filename string) (grate.Source, error) { doc, err := cfb.Open(filename) if err != nil { return nil, err } b := &WorkBook{ filename: filename, doc: doc, pos2substream: make(map[int64]int, 16), xfs: make([]uint16, 0, 128), } rdr, err := doc.Open("Workbook") if err != nil { return nil, grate.WrapErr(err, grate.ErrNotInFormat) } raw, err := io.ReadAll(rdr) if err != nil { return nil, err } err = b.loadFromStream(raw) return b, err } func (b *WorkBook) loadFromStream(raw []byte) error { return b.loadFromStream2(raw, false) } func (b *WorkBook) loadFromStreamWithDecryptor(raw []byte, dec crypto.Decryptor) error { // interestingly (insecurely) BIFF8 keeps Record Types and sizes in the clear, // has a few records that are not encrypted, and has 1 record type that does // not encrypt the 32bit integer position at the beginning (while encrypting // the rest). It also resets the encryption block counter every 1024 bytes // (counting all the "skipped" bytes described above). // // So this code streams the records through the decryption, but also records // a set of overlays applied to the final result which restore the // "cleartext" contents in line with the decrypted content. if grate.Debug { log.Println(" Decrypting xls stream with standard RC4") } pos := 0 zeros := [8224]byte{} type overlay struct { Pos int RecType recordType DataBytes uint16 Data []byte // NB len() not necessarily = DataBytes } replaceBlocks := []overlay{} var err error for err == nil && len(raw[pos:]) > 4 { o := overlay{} o.Pos = pos o.RecType = recordType(binary.LittleEndian.Uint16(raw[pos : pos+2])) o.DataBytes = binary.LittleEndian.Uint16(raw[pos+2 : pos+4]) pos += 4 // copy to output and decryption stream binary.Write(dec, binary.LittleEndian, o.RecType) binary.Write(dec, binary.LittleEndian, o.DataBytes) tocopy := int(o.DataBytes) switch o.RecType { case RecTypeBOF, RecTypeFilePass, RecTypeUsrExcl, RecTypeFileLock, RecTypeInterfaceHdr, RecTypeRRDInfo, RecTypeRRDHead: // untouched data goes directly into output o.Data = raw[pos : pos+int(o.DataBytes)] pos += int(o.DataBytes) dec.Write(zeros[:int(o.DataBytes)]) tocopy = 0 case RecTypeBoundSheet8: // copy 32-bit position to output o.Data = raw[pos : pos+4] pos += 4 dec.Write(zeros[:4]) tocopy -= 4 } if tocopy > 0 { _, err = dec.Write(raw[pos : pos+tocopy]) pos += tocopy } replaceBlocks = append(replaceBlocks, o) } dec.Flush() alldata := dec.Bytes() for _, o := range replaceBlocks { offs := int(o.Pos) binary.LittleEndian.PutUint16(alldata[offs:], uint16(o.RecType)) binary.LittleEndian.PutUint16(alldata[offs+2:], uint16(o.DataBytes)) if len(o.Data) > 0 { offs += 4 copy(alldata[offs:], o.Data) } } // recurse into the stream parser now that things are decrypted return b.loadFromStream2(alldata, true) } func (b *WorkBook) Close() error { // return records to the pool for reuse for i, sub := range b.substreams { for _, r := range sub { r.Data = nil // allow GC recPool.Put(r) } b.substreams[i] = b.substreams[i][:0] } b.substreams = b.substreams[:0] return nil } func (b *WorkBook) loadFromStream2(raw []byte, isDecrypted bool) error { b.h = &header{} substr := -1 nestedBOF := 0 b.pos2substream = make(map[int64]int, 10) b.fpos = 0 // IMPORTANT: if there are any existing records, we need to return them to the pool for i, sub := range b.substreams { for _, r := range sub { recPool.Put(r) } b.substreams[i] = b.substreams[i][:0] } b.substreams = b.substreams[:0] rawfull := raw nr, no, err := b.nextRecord(raw) for err == nil { raw = raw[no:] switch nr.RecType { case RecTypeEOF: nestedBOF-- case RecTypeBOF: // when substreams are nested, keep them in the same grouping if nestedBOF == 0 { substr = len(b.substreams) b.substreams = append(b.substreams, []*rec{}) b.pos2substream[b.fpos] = substr } nestedBOF++ } b.fpos += int64(4 + len(nr.Data)) // if there's a FilePass record, the data is encrypted if nr.RecType == RecTypeFilePass && !isDecrypted { etype := binary.LittleEndian.Uint16(nr.Data) switch etype { case 1: dec, err := crypto.NewBasicRC4(nr.Data[2:]) if err != nil { log.Println("xls: rc4 encryption failed to set up", err) return err } return b.loadFromStreamWithDecryptor(rawfull, dec) case 2, 3, 4: log.Println("need Crypto API RC4 decryptor") return errors.New("xls: unsupported Crypto API encryption method") default: return errors.New("xls: unsupported encryption method") } } b.substreams[substr] = append(b.substreams[substr], nr) nr, no, err = b.nextRecord(raw) } if err == io.EOF { err = nil } if err != nil { return err } for ss, records := range b.substreams { if grate.Debug { log.Printf(" Processing substream %d/%d (%d records)", ss, len(b.substreams), len(records)) } for i, nr := range records { if len(nr.Data) == 0 { continue } switch nr.RecType { case RecTypeSST: // Shared String Table is often continued across multiple records, // so we want to gather them all before starting to parse (some // strings may span the gap between records) recSet := []*rec{nr} lastIndex := i for len(records) > (lastIndex+1) && records[lastIndex+1].RecType == RecTypeContinue { lastIndex++ recSet = append(recSet, records[lastIndex]) } b.strings, err = parseSST(recSet) if err != nil { return err } case RecTypeContinue: // no-op (used above) case RecTypeEOF: // done case RecTypeBOF: b.h = &header{ Version: binary.LittleEndian.Uint16(nr.Data[0:2]), DocType: binary.LittleEndian.Uint16(nr.Data[2:4]), RupBuild: binary.LittleEndian.Uint16(nr.Data[4:6]), RupYear: binary.LittleEndian.Uint16(nr.Data[6:8]), MiscBits: binary.LittleEndian.Uint64(nr.Data[8:16]), } if b.h.Version != 0x0600 { return errors.New("xls: invalid file version") } if b.h.RupYear != 0x07CC && b.h.RupYear != 0x07CD { return errors.New("xls: unsupported biff version") } /* if b.h.DocType != 0x0005 && b.h.DocType != 0x0010 { // we only support the workbook or worksheet substreams log.Println("xls: unsupported document type") //break } */ case RecTypeCodePage: // BIFF8 is entirely UTF-16LE so this is actually ignored b.codepage = binary.LittleEndian.Uint16(nr.Data) case RecTypeDate1904: b.dateMode = binary.LittleEndian.Uint16(nr.Data) case RecTypeFormat: // Format maps a format ID to a code string fmtNo := binary.LittleEndian.Uint16(nr.Data) formatStr, _, err := decodeXLUnicodeString(nr.Data[2:]) if err != nil { log.Println("fail2", err) return err } b.nfmt.Add(fmtNo, formatStr) case RecTypeXF: // XF records merge multiple style and format directives to one ID // ignore font id at nr.Data[0:2] fmtNo := binary.LittleEndian.Uint16(nr.Data[2:]) b.xfs = append(b.xfs, fmtNo) case RecTypeBoundSheet8: // Identifies the postition within the stream, visibility state, // and name of a worksheet bs := &boundSheet{} bs.Position = binary.LittleEndian.Uint32(nr.Data[:4]) bs.HiddenState = nr.Data[4] bs.SheetType = nr.Data[5] bs.Name, _, err = decodeShortXLUnicodeString(nr.Data[6:]) if err != nil { return err } b.sheets = append(b.sheets, bs) default: if grate.Debug && ss == 0 { log.Println(" Unhandled record type:", nr.RecType, i) } } } } return err } var recPool = sync.Pool{ New: func() interface{} { return &rec{} }, } func (b *WorkBook) nextRecord(raw []byte) (*rec, int, error) { if len(raw) < 4 { return nil, 0, io.EOF } rec := recPool.Get().(*rec) rec.RecType = recordType(binary.LittleEndian.Uint16(raw[:2])) rec.RecSize = binary.LittleEndian.Uint16(raw[2:4]) if len(raw[4:]) < int(rec.RecSize) { recPool.Put(rec) return nil, 4, io.ErrUnexpectedEOF } rec.Data = raw[4 : 4+rec.RecSize] return rec, int(4 + rec.RecSize), nil } ================================================ FILE: xlsx/comp_test.go ================================================ package xlsx import ( "os" "path/filepath" "strings" "testing" ) func TestAllFiles(t *testing.T) { err := filepath.Walk("../testdata", func(p string, info os.FileInfo, err error) error { if info.IsDir() { return nil } if !strings.HasSuffix(info.Name(), ".xlsx") { return nil } wb, err := Open(p) if err != nil { return err } sheets, err := wb.List() if err != nil { return err } for _, s := range sheets { sheet, err := wb.Get(s) if err != nil { return err } for sheet.Next() { sheet.Strings() } } return wb.Close() }) if err != nil { t.Fatal(err) } } ================================================ FILE: xlsx/sheets.go ================================================ package xlsx import ( "encoding/xml" "errors" "io" "log" "path/filepath" "strconv" "strings" "github.com/pbnjay/grate" "github.com/pbnjay/grate/commonxl" ) type Sheet struct { d *Document relID string name string docname string err error wrapped *commonxl.Sheet } var errNotLoaded = errors.New("xlsx: sheet not loaded") func (s *Sheet) parseSheet() error { s.wrapped = &commonxl.Sheet{ Formatter: &s.d.fmt, } linkmap := make(map[string]string) base := filepath.Base(s.docname) sub := strings.TrimSuffix(s.docname, base) relsname := filepath.Join(sub, "_rels", base+".rels") dec, clo, err := s.d.openXML(relsname) if err == nil { // rels might not exist for every sheet tok, err := dec.RawToken() for ; err == nil; tok, err = dec.RawToken() { if v, ok := tok.(xml.StartElement); ok && v.Name.Local == "Relationship" { ax := getAttrs(v.Attr, "Id", "Type", "Target", "TargetMode") if ax[3] == "External" && ax[1] == "http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink" { linkmap[ax[0]] = ax[2] } } } clo.Close() } dec, clo, err = s.d.openXML(s.docname) if err != nil { return err } defer clo.Close() currentCellType := BlankCellType currentCell := "" var fno uint16 var maxCol, maxRow int tok, err := dec.RawToken() for ; err == nil; tok, err = dec.RawToken() { switch v := tok.(type) { case xml.CharData: if currentCell == "" { continue } c, r := refToIndexes(currentCell) if c >= 0 && r >= 0 { var val interface{} = string(v) switch currentCellType { case BooleanCellType: if v[0] == '1' { val = true } else { val = false } case DateCellType: log.Println("CELL DATE", val, fno) case NumberCellType: fval, err := strconv.ParseFloat(string(v), 64) if err == nil { val = fval } //log.Println("CELL NUMBER", val, numFormat) case SharedStringCellType: //log.Println("CELL SHSTR", val, currentCellType, numFormat) si, _ := strconv.ParseInt(string(v), 10, 64) val = s.d.strings[si] case BlankCellType: //log.Println("CELL BLANK") // don't place any values continue case ErrorCellType, FormulaStringCellType, InlineStringCellType: //log.Println("CELL ERR/FORM/INLINE", val, currentCellType) default: log.Println("CELL UNKNOWN", val, currentCellType, fno) } s.wrapped.Put(r, c, val, fno) } else { //log.Println("FAIL row/col: ", currentCell) } case xml.StartElement: switch v.Name.Local { case "dimension": ax := getAttrs(v.Attr, "ref") if ax[0] == "A1" { maxCol, maxRow = 1, 1 // short-circuit empty sheet s.wrapped.Resize(1, 1) continue } dims := strings.Split(ax[0], ":") if len(dims) == 1 { maxCol, maxRow = refToIndexes(dims[0]) } else { //minCol, minRow := refToIndexes(dims[0]) maxCol, maxRow = refToIndexes(dims[1]) } s.wrapped.Resize(maxRow, maxCol) //log.Println("DIMENSION:", s.minRow, s.minCol, ">", s.maxRow, s.maxCol) case "row": //currentRow = ax["r"] // unsigned int row index //log.Println("ROW", currentRow) case "c": ax := getAttrs(v.Attr, "t", "r", "s") currentCellType = CellType(ax[0]) if currentCellType == BlankCellType { currentCellType = NumberCellType } currentCell = ax[1] // always an A1 style reference style := ax[2] sid, _ := strconv.ParseInt(style, 10, 64) if len(s.d.xfs) > int(sid) { fno = s.d.xfs[sid] } else { fno = 0 } //log.Println("CELL", currentCell, sid, numFormat, currentCellType) case "v": //log.Println("CELL VALUE", ax) case "mergeCell": ax := getAttrs(v.Attr, "ref") dims := strings.Split(ax[0], ":") startCol, startRow := refToIndexes(dims[0]) endCol, endRow := startCol, startRow if len(dims) > 1 { endCol, endRow = refToIndexes(dims[1]) } if endRow > maxRow { endRow = maxRow } if endCol > maxCol { endCol = maxCol } for r := startRow; r <= endRow; r++ { for c := startCol; c <= endCol; c++ { if r == startRow && c == startCol { // has data already! } else if c == startCol { // first and last column MAY be the same if r == endRow { s.wrapped.Put(r, c, grate.EndRowMerged, 0) } else { s.wrapped.Put(r, c, grate.ContinueRowMerged, 0) } } else if c == endCol { // first and last column are NOT the same s.wrapped.Put(r, c, grate.EndColumnMerged, 0) } else { s.wrapped.Put(r, c, grate.ContinueColumnMerged, 0) } } } case "hyperlink": ax := getAttrs(v.Attr, "ref", "id") col, row := refToIndexes(ax[0]) link := linkmap[ax[1]] s.wrapped.Put(row, col, link, 0) s.wrapped.SetURL(row, col, link) case "worksheet", "mergeCells", "hyperlinks": // containers case "f": //log.Println("start: ", v.Name.Local, v.Attr) default: if grate.Debug { log.Println(" Unhandled sheet xml tag", v.Name.Local, v.Attr) } } case xml.EndElement: switch v.Name.Local { case "c": currentCell = "" case "row": //currentRow = "" } default: if grate.Debug { log.Printf(" Unhandled sheet xml tokens %T %+v", tok, tok) } } } if err == io.EOF { err = nil } return err } ================================================ FILE: xlsx/simple_test.go ================================================ package xlsx import ( "bufio" "log" "os" "strings" "testing" "github.com/pbnjay/grate/commonxl" ) var testFilePairs = [][]string{ {"../testdata/basic.xlsx", "../testdata/basic.tsv"}, // TODO: custom formatter support //{"../testdata/basic2.xlsx", "../testdata/basic2.tsv"}, // TODO: datetime and fraction formatter support //{"../testdata/multi_test.xlsx", "../testdata/multi_test.tsv"}, } func loadTestData(fn string, ff *commonxl.Formatter) (*commonxl.Sheet, error) { f, err := os.Open(fn) if err != nil { return nil, err } xs := &commonxl.Sheet{ Formatter: ff, } row := 0 s := bufio.NewScanner(f) for s.Scan() { record := strings.Split(s.Text(), "\t") for i, val := range record { xs.Put(row, i, val, 0) } row++ } return xs, f.Close() } func TestBasic(t *testing.T) { for _, fnames := range testFilePairs { var trueData *commonxl.Sheet log.Println("Testing ", fnames[0]) wb, err := Open(fnames[0]) if err != nil { t.Fatal(err) } sheets, err := wb.List() if err != nil { t.Fatal(err) } firstLoad := true for _, s := range sheets { sheet, err := wb.Get(s) if err != nil { t.Fatal(err) } xsheet := sheet.(*commonxl.Sheet) if firstLoad { trueData, err = loadTestData(fnames[1], xsheet.Formatter) if err != nil { t.Fatal(err) } firstLoad = false } for xrow, xdata := range xsheet.Rows { for xcol, xval := range xdata { //t.Logf("at %s (%d,%d) expect '%v'", fnames[0], xrow, xcol, trueData.Rows[xrow][xcol]) if !trueData.Rows[xrow][xcol].Equal(xval) { t.Logf("mismatch at %s (%d,%d): '%v' <> '%v' expected", fnames[0], xrow, xcol, xval, trueData.Rows[xrow][xcol]) t.Fail() } } } } err = wb.Close() if err != nil { t.Fatal(err) } } } ================================================ FILE: xlsx/types.go ================================================ package xlsx import ( "encoding/xml" "strconv" "strings" ) type CellType string // CellTypes define data type in section 18.18.11 const ( BlankCellType CellType = "" BooleanCellType CellType = "b" DateCellType CellType = "d" ErrorCellType CellType = "e" NumberCellType CellType = "n" SharedStringCellType CellType = "s" FormulaStringCellType CellType = "str" InlineStringCellType CellType = "inlineStr" ) type staticCellType rune const ( staticBlank staticCellType = 0 // marks a continuation column within a merged cell. continueColumnMerged staticCellType = '→' // marks the last column of a merged cell. endColumnMerged staticCellType = '⇥' // marks a continuation row within a merged cell. continueRowMerged staticCellType = '↓' // marks the last row of a merged cell. endRowMerged staticCellType = '⤓' ) func (s staticCellType) String() string { if s == 0 { return "" } return string([]rune{rune(s)}) } // returns the 0-based index of the column string: // "A"=0, "B"=1, "AA"=26, "BB"=53 func col2int(col string) int { idx := 0 for _, c := range col { idx *= 26 idx += int(c - '@') } return idx - 1 } func refToIndexes(r string) (column, row int) { if len(r) < 2 { return -1, -1 } i1 := strings.IndexAny(r, "0123456789") if i1 <= 0 { return -1, -1 } // A1 Reference mode col1 := r[:i1] i2 := strings.IndexByte(r[i1:], 'C') if i2 == -1 { rn, _ := strconv.ParseInt(r[i1:], 10, 64) return col2int(col1), int(rn) - 1 } // R1C1 Reference Mode col1 = r[i1:i2] row1 := r[i2+1:] cn, _ := strconv.ParseInt(col1, 10, 64) rn, _ := strconv.ParseInt(row1, 10, 64) return int(cn), int(rn) - 1 } func getAttrs(attrs []xml.Attr, keys ...string) []string { res := make([]string, len(keys)) for _, a := range attrs { for i, k := range keys { if a.Name.Local == k { res[i] = a.Value } } } return res } ================================================ FILE: xlsx/workbook.go ================================================ package xlsx import ( "encoding/xml" "errors" "io" "log" "path/filepath" "strconv" "strings" "github.com/pbnjay/grate" ) func (d *Document) parseRels(dec *xml.Decoder, basedir string) error { tok, err := dec.RawToken() for ; err == nil; tok, err = dec.RawToken() { switch v := tok.(type) { case xml.StartElement: switch v.Name.Local { case "Relationships": // container case "Relationship": vals := make(map[string]string, 5) for _, a := range v.Attr { vals[a.Name.Local] = a.Value } if _, ok := d.rels[vals["Type"]]; !ok { d.rels[vals["Type"]] = make(map[string]string) } if strings.HasPrefix(vals["Target"], "/") { // handle malformed "absolute" paths cleanly d.rels[vals["Type"]][vals["Id"]] = vals["Target"][1:] } else { d.rels[vals["Type"]][vals["Id"]] = filepath.Join(basedir, vals["Target"]) } if vals["Type"] == "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" { d.primaryDoc = vals["Target"] } default: if grate.Debug { log.Println(" Unhandled relationship xml tag", v.Name.Local, v.Attr) } } case xml.EndElement: // not needed default: if grate.Debug { log.Printf(" Unhandled relationship xml tokens %T %+v", tok, tok) } } } if err == io.EOF { err = nil } return err } func (d *Document) parseWorkbook(dec *xml.Decoder) error { tok, err := dec.RawToken() for ; err == nil; tok, err = dec.RawToken() { switch v := tok.(type) { case xml.StartElement: switch v.Name.Local { case "sheet": vals := make(map[string]string, 5) for _, a := range v.Attr { vals[a.Name.Local] = a.Value } sheetID, ok1 := vals["id"] sheetName, ok2 := vals["name"] if !ok1 || !ok2 { return errors.New("xlsx: invalid sheet definition") } s := &Sheet{ d: d, relID: sheetID, name: sheetName, docname: d.rels["http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet"][sheetID], err: errNotLoaded, } d.sheets = append(d.sheets, s) case "workbook", "sheets": // containers default: if grate.Debug { log.Println(" Unhandled workbook xml tag", v.Name.Local, v.Attr) } } case xml.EndElement: // not needed default: if grate.Debug { log.Printf(" Unhandled workbook xml tokens %T %+v", tok, tok) } } } if err == io.EOF { err = nil } return err } func (d *Document) parseStyles(dec *xml.Decoder) error { baseNumFormats := []string{} d.xfs = d.xfs[:0] section := 0 tok, err := dec.RawToken() for ; err == nil; tok, err = dec.RawToken() { switch v := tok.(type) { case xml.StartElement: switch v.Name.Local { case "styleSheet": // container case "numFmt": ax := getAttrs(v.Attr, "numFmtId", "formatCode") fmtNo, _ := strconv.ParseInt(ax[0], 10, 16) d.fmt.Add(uint16(fmtNo), ax[1]) case "cellStyleXfs": section = 1 case "cellXfs": section = 2 ax := getAttrs(v.Attr, "count") n, _ := strconv.ParseInt(ax[0], 10, 64) d.xfs = make([]uint16, 0, n) case "xf": ax := getAttrs(v.Attr, "numFmtId", "applyNumberFormat", "xfId") if section == 1 { // load base styles, but only save number format if ax[1] == "0" { baseNumFormats = append(baseNumFormats, "0") } else { baseNumFormats = append(baseNumFormats, ax[0]) } } else if section == 2 { // actual referencable cell styles // 1) get base style so we can inherit format properly baseID, _ := strconv.ParseInt(ax[2], 10, 64) numFmtID := "0" if len(baseNumFormats) > int(baseID) { numFmtID = baseNumFormats[baseID] } // 2) check if this XF overrides the base format if ax[1] == "0" { // remove the format (if it was inherited) numFmtID = "0" } else { numFmtID = ax[0] } nfid, _ := strconv.ParseInt(numFmtID, 10, 16) d.xfs = append(d.xfs, uint16(nfid)) } else { panic("wheres is this xf??") } default: if grate.Debug { log.Println(" Unhandled style xml tag", v.Name.Local, v.Attr) } } case xml.EndElement: switch v.Name.Local { case "cellStyleXfs": section = 0 case "cellXfs": section = 0 } default: if grate.Debug { log.Printf(" Unhandled style xml tokens %T %+v", tok, tok) } } } if err == io.EOF { err = nil } return err } func (d *Document) parseSharedStrings(dec *xml.Decoder) error { val := "" tok, err := dec.RawToken() for ; err == nil; tok, err = dec.RawToken() { switch v := tok.(type) { case xml.CharData: val += string(v) case xml.StartElement: switch v.Name.Local { case "si": val = "" case "t": // no attributes to parse, we only want the CharData ... case "sst": // main container default: if grate.Debug { log.Println(" Unhandled SST xml tag", v.Name.Local, v.Attr) } } case xml.EndElement: if v.Name.Local == "si" { d.strings = append(d.strings, val) continue } default: if grate.Debug { log.Printf(" Unhandled SST xml token %T %+v", tok, tok) } } } if err == io.EOF { err = nil } return err } ================================================ FILE: xlsx/xlsx.go ================================================ package xlsx import ( "archive/zip" "encoding/xml" "errors" "io" "log" "os" "path/filepath" "strings" "github.com/pbnjay/grate" "github.com/pbnjay/grate/commonxl" ) var _ = grate.Register("xlsx", 5, Open) // Document contains an Office Open XML document. type Document struct { filename string f *os.File r *zip.Reader primaryDoc string // type => id => filename rels map[string]map[string]string sheets []*Sheet strings []string xfs []uint16 fmt commonxl.Formatter } func (d *Document) Close() error { d.xfs = d.xfs[:0] d.xfs = nil d.strings = d.strings[:0] d.strings = nil d.sheets = d.sheets[:0] d.sheets = nil return d.f.Close() } func Open(filename string) (grate.Source, error) { f, err := os.Open(filename) if err != nil { return nil, err } info, err := f.Stat() if err != nil { return nil, err } z, err := zip.NewReader(f, info.Size()) if err != nil { return nil, grate.WrapErr(err, grate.ErrNotInFormat) } d := &Document{ filename: filename, f: f, r: z, } d.rels = make(map[string]map[string]string, 4) // parse the primary relationships dec, c, err := d.openXML("_rels/.rels") if err != nil { return nil, grate.WrapErr(err, grate.ErrNotInFormat) } err = d.parseRels(dec, "") c.Close() if err != nil { return nil, grate.WrapErr(err, grate.ErrNotInFormat) } if d.primaryDoc == "" { return nil, errors.New("xlsx: invalid document") } // parse the secondary relationships to primary doc base := filepath.Base(d.primaryDoc) sub := strings.TrimSuffix(d.primaryDoc, base) relfn := filepath.Join(sub, "_rels", base+".rels") dec, c, err = d.openXML(relfn) if err != nil { return nil, err } err = d.parseRels(dec, sub) c.Close() if err != nil { return nil, err } // parse the workbook structure dec, c, err = d.openXML(d.primaryDoc) if err != nil { return nil, err } err = d.parseWorkbook(dec) c.Close() if err != nil { return nil, err } styn := d.rels["http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles"] for _, sst := range styn { // parse the shared string table dec, c, err = d.openXML(sst) if err != nil { return nil, err } err = d.parseStyles(dec) c.Close() if err != nil { return nil, err } } ssn := d.rels["http://schemas.openxmlformats.org/officeDocument/2006/relationships/sharedStrings"] for _, sst := range ssn { // parse the shared string table dec, c, err = d.openXML(sst) if err != nil { return nil, err } err = d.parseSharedStrings(dec) c.Close() if err != nil { return nil, err } } return d, nil } func (d *Document) openXML(name string) (*xml.Decoder, io.Closer, error) { if grate.Debug { log.Println(" openXML", name) } for _, zf := range d.r.File { if zf.Name == name { zfr, err := zf.Open() if err != nil { return nil, nil, err } dec := xml.NewDecoder(zfr) return dec, zfr, nil } } return nil, nil, io.EOF } func (d *Document) List() ([]string, error) { res := make([]string, 0, len(d.sheets)) for _, s := range d.sheets { res = append(res, s.name) } return res, nil } func (d *Document) Get(sheetName string) (grate.Collection, error) { for _, s := range d.sheets { if s.name == sheetName { if s.err == errNotLoaded { s.err = s.parseSheet() } return s.wrapped, s.err } } return nil, errors.New("xlsx: sheet not found") }