Repository: pbnjay/grate
Branch: main
Commit: 3f8e65d74a14
Files: 40
Total size: 174.9 KB

Directory structure:
gitextract_81jad5jf/

├── .github/
│   └── workflows/
│       └── go.yml
├── .gitignore
├── LICENSE
├── README.md
├── cmd/
│   ├── grate2tsv/
│   │   └── main.go
│   └── grater/
│       └── main.go
├── commonxl/
│   ├── cell.go
│   ├── dates.go
│   ├── fmt.go
│   ├── fmt_test.go
│   ├── formats.go
│   ├── frac_test.go
│   ├── numbers.go
│   └── sheet.go
├── errs.go
├── go.mod
├── grate.go
├── simple/
│   ├── csv.go
│   ├── simple.go
│   └── tsv.go
├── xls/
│   ├── cfb/
│   │   ├── cfb.go
│   │   ├── interface.go
│   │   ├── simple_test.go
│   │   └── slicereader.go
│   ├── comp_test.go
│   ├── crypto/
│   │   ├── crypto.go
│   │   └── rc4.go
│   ├── hyperlinks.go
│   ├── records.go
│   ├── sheets.go
│   ├── simple_test.go
│   ├── strings.go
│   ├── structs.go
│   └── xls.go
└── xlsx/
    ├── comp_test.go
    ├── sheets.go
    ├── simple_test.go
    ├── types.go
    ├── workbook.go
    └── xlsx.go

================================================
FILE CONTENTS
================================================

================================================
FILE: .github/workflows/go.yml
================================================
name: Go

on:
  push:
    branches: [ main ]
  pull_request:
    branches: [ main ]

jobs:

  build:
    runs-on: ubuntu-latest
    steps:
    - uses: actions/checkout@v2

    - name: Set up Go
      uses: actions/setup-go@v2
      with:
        go-version: 1.17

    - name: Build
      run: go build -v ./...

    - name: Test XLS
      run: go test -v ./xls

    - name: Test XLSX
      run: go test -v ./xlsx

    - name: Test CommonXL
      run: go test -v ./commonxl


================================================
FILE: .gitignore
================================================
cmd/grate2tsv/results
testdata

*.pprof
*.pdf


================================================
FILE: LICENSE
================================================
MIT License

Copyright (c) 2021 Jeremy Jay

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

================================================
FILE: README.md
================================================
# grate

A Go native tabular data extraction package. Currently supports `.xls`, `.xlsx`, `.csv`, `.tsv` formats.

# Why?

Grate focuses on speed and stability first, and makes no attempt to parse charts, figures, or other content types that may be present embedded within the input files. It tries to perform as few allocations as possible and errs on the side of caution.

There are certainly still some bugs and edge cases, but we have run it successfully on a set of 400k `.xls` and `.xlsx` files to catch many bugs and error conditions. Please file an issue with any feedback and additional problem files.

# Usage

Grate provides a simple standard interface for all supported filetypes, allowing access to both named worksheets in spreadsheets and single tables in plaintext formats.

```go
package main

import (
    "fmt"
    "os"
    "strings"

    "github.com/pbnjay/grate"
    _ "github.com/pbnjay/grate/simple" // tsv and csv support
    _ "github.com/pbnjay/grate/xls"
    _ "github.com/pbnjay/grate/xlsx"
)

func main() {
    wb, _ := grate.Open(os.Args[1])  // open the file
    sheets, _ := wb.List()           // list available sheets
    for _, s := range sheets {       // enumerate each sheet name
        sheet, _ := wb.Get(s)        // open the sheet
        for sheet.Next() {           // enumerate each row of data
            row := sheet.Strings()   // get the row's content as []string
            fmt.Println(strings.Join(row, "\t"))
        }
    }
    wb.Close()
}
```

# License

All source code is licensed under the [MIT License](https://raw.github.com/pbnjay/grate/master/LICENSE).


================================================
FILE: cmd/grate2tsv/main.go
================================================
// Command grate2tsv is a highly parallel tabular data extraction tool. It's
// probably not necessary in your situation, but is included here since it
// is a good stress test of the codebase.
//
// Files on the command line will be parsed and extracted to the "results"
// subdirectory under a heirarchical arrangement (to make our filesystems
// more responsive), and a "results.txt" file will be created logging basic
// information and errors for each file.
package main

import (
	"bufio"
	"crypto/md5"
	"flag"
	"fmt"
	"io"
	"io/ioutil"
	"log"
	"os"
	"path/filepath"
	"regexp"
	"runtime"
	"runtime/pprof"
	"strings"
	"sync"
	"time"

	"github.com/pbnjay/grate"
	_ "github.com/pbnjay/grate/simple"
	_ "github.com/pbnjay/grate/xls"
	_ "github.com/pbnjay/grate/xlsx"
)

var (
	logfile        = flag.String("l", "", "save processing logs to `filename.txt`")
	pretend        = flag.Bool("p", false, "pretend to output .tsv")
	infoFile       = flag.String("i", "results.txt", "`filename` to record stats about the process")
	removeNewlines = flag.Bool("r", true, "remove embedded tabs, newlines, and condense spaces in cell contents")
	trimSpaces     = flag.Bool("w", true, "trim whitespace from cell contents")
	skipBlanks     = flag.Bool("b", true, "discard blank rows from the output")
	cpuprofile     = flag.String("cpuprofile", "", "write cpu profile to file")
	memprofile     = flag.String("memprofile", "", "write memory profile to file")

	timeFormat = "2006-01-02 15:04:05"
	fstats     *os.File

	procWG  sync.WaitGroup
	cleanup = make(chan *output, 100)
	outpool = sync.Pool{New: func() interface{} {
		return &output{}
	}}
)

type output struct {
	f *os.File
	b *bufio.Writer
}

func main() {
	flag.Parse()

	if *memprofile != "" {
		f, err := os.Create(*memprofile)
		if err != nil {
			log.Fatal(err)
		}
		defer func() {
			runtime.GC()
			pprof.WriteHeapProfile(f)
			f.Close()
		}()
	}

	if *cpuprofile != "" {
		f, err := os.Create(*cpuprofile)
		if err != nil {
			log.Fatal(err)
		}
		pprof.StartCPUProfile(f)
		defer pprof.StopCPUProfile()
	}

	if *logfile != "" {
		fo, err := os.Create(*logfile)
		if err != nil {
			log.Fatal(err)
		}
		defer fo.Close()
		log.SetOutput(fo)
	}

	done := make(chan int)
	go func() {
		for x := range cleanup {
			x.b.Flush()
			x.f.Close()
			outpool.Put(x)
		}
		done <- 1
	}()

	var err error
	fstats, err = os.OpenFile(*infoFile, os.O_CREATE|os.O_RDWR, 0644)
	if err != nil {
		log.Fatal(err)
	}
	defer fstats.Close()
	pos, err := fstats.Seek(0, io.SeekEnd)
	if err != nil {
		log.Fatal(err)
	}
	if pos == 0 {
		fmt.Fprintf(fstats, "time\tfilename\tsheet\trows\tcolumns\terrors\n")
	}

	filenameChan := make(chan string)

	// fan out to 1/2 of CPU cores
	// (e.g. each file-processor can use 2 cpus)
	outMu := &sync.Mutex{}
	nparallel := runtime.NumCPU() / 2
	procWG.Add(nparallel)
	for i := 0; i < nparallel; i++ {
		go runProcessor(filenameChan, outMu)
	}
	for _, fn := range flag.Args() {
		filenameChan <- fn
	}

	close(filenameChan)
	procWG.Wait()
	close(cleanup)
	<-done
}

func runProcessor(from chan string, mu *sync.Mutex) {
	for fn := range from {
		nowFmt := time.Now().Format(timeFormat)
		results, err := processFile(fn)
		mu.Lock()
		if err != nil {
			// returned errors are fatal
			fmt.Fprintf(fstats, "%s\t%s\t-\t-\t-\t%s\n", nowFmt, fn, err.Error())
			mu.Unlock()
			continue
		}

		for _, res := range results {
			e := "-"
			if res.Err != nil {
				e = res.Err.Error()
			}
			fmt.Fprintf(fstats, "%s\t%s\t%s\t%d\t%d\t%s\n", nowFmt, res.Filename, res.SheetName,
				res.NumRows, res.NumCols, e)
		}
		mu.Unlock()
	}
	procWG.Done()
}

var (
	sanitize = regexp.MustCompile("[^a-zA-Z0-9]+")
	newlines = regexp.MustCompile("[ \n\r\t]+")
)

type stats struct {
	Filename  string
	Hash      string
	SheetName string
	NumRows   int
	NumCols   int
	Err       error
}

func processFile(fn string) ([]stats, error) {
	//log.Printf("Opening file '%s' ...", fn)
	wb, err := grate.Open(fn)
	if err != nil {
		return nil, err
	}
	defer wb.Close()

	results := []stats{}

	ext := filepath.Ext(fn)
	fn2 := filepath.Base(strings.TrimSuffix(fn, ext))
	subparts := fmt.Sprintf("%x", md5.Sum([]byte(fn2)))
	subdir := filepath.Join("results", subparts[:2], subparts[2:4])
	os.MkdirAll(subdir, 0755)
	log.Printf(subparts[:8]+"  Processing file '%s'", fn2)

	sheets, err := wb.List()
	if err != nil {
		return nil, err
	}
	for _, s := range sheets {
		ps := stats{
			Filename:  fn,
			Hash:      subparts[:8],
			SheetName: s,
		}
		log.Printf(subparts[:8]+"  Opening Sheet '%s'...", s)
		sheet, err := wb.Get(s)
		if err != nil {
			ps.Err = err
			results = append(results, ps)
			continue
		}
		if sheet.IsEmpty() {
			log.Println(subparts[:8] + "    Empty sheet. Skipping.")
			results = append(results, ps)
			continue
		}
		s2 := sanitize.ReplaceAllString(s, "_")
		if s == fn {
			s2 = "main"
		}
		var ox *output
		var w io.Writer = ioutil.Discard
		if !*pretend {
			f, err := os.Create(subdir + "/" + fn2 + "." + s2 + ".tsv")
			if err != nil {
				return nil, err
			}
			ox = outpool.Get().(*output)
			ox.f = f
			ox.b = bufio.NewWriter(f)
			w = ox.b
		}

		for sheet.Next() {
			row := sheet.Strings()
			nonblank := false
			for i, x := range row {
				if *removeNewlines {
					x = newlines.ReplaceAllString(x, " ")
				}
				if *trimSpaces {
					x = strings.TrimSpace(x)
					row[i] = x
				}
				if x != "" {
					nonblank = true
					if ps.NumCols < i {
						ps.NumCols = i
					}
				}
			}
			if nonblank || !*skipBlanks {
				for i, v := range row {
					if i != 0 {
						w.Write([]byte{'\t'})
					}
					w.Write([]byte(v))
				}
				w.Write([]byte{'\n'})
				ps.NumRows++
			}
		}
		results = append(results, ps)
		if ox != nil {
			cleanup <- ox
		}
	}
	return results, nil
}


================================================
FILE: cmd/grater/main.go
================================================
// Command grater extracts contents of the tabular files to stdout.
package main

import (
	"flag"
	"fmt"
	"os"
	"strings"

	"github.com/pbnjay/grate"
	_ "github.com/pbnjay/grate/simple" // tsv and csv support
	_ "github.com/pbnjay/grate/xls"
	_ "github.com/pbnjay/grate/xlsx"
)

func main() {
	flagDebug := flag.Bool("v", false, "debug log")
	flag.Parse()
	if flag.NArg() < 1 {
		fmt.Fprintf(os.Stderr, "USAGE: %s [file1.xls file2.xlsx file3.tsv ...]\n", os.Args[0])
		fmt.Fprintf(os.Stderr, "       Extracts contents of the tabular files to stdout\n")
		os.Exit(1)
	}
	grate.Debug = *flagDebug
	for _, fn := range flag.Args() {
		wb, err := grate.Open(fn)
		if err != nil {
			fmt.Fprintln(os.Stderr, err)
			continue
		}

		sheets, err := wb.List()
		if err != nil {
			wb.Close()
			fmt.Fprintln(os.Stderr, err)
			continue
		}

		for _, s := range sheets {
			sheet, err := wb.Get(s)
			if err != nil {
				fmt.Fprintln(os.Stderr, err)
				continue
			}

			for sheet.Next() {
				if *flagDebug {
					dtypes := sheet.Types()
					fmt.Println(strings.Join(dtypes, "\t"))
				}
				row := sheet.Strings()
				fmt.Println(strings.Join(row, "\t"))
			}
		}
		wb.Close()
	}
}


================================================
FILE: commonxl/cell.go
================================================
package commonxl

import (
	"fmt"
	"math"
	"net/url"
	"strconv"
	"time"
	"unicode/utf16"
)

// CellType annotates the type of data extracted in the cell.
type CellType uint16

// CellType annotations for various cell value types.
const (
	BlankCell CellType = iota
	IntegerCell
	FloatCell
	StringCell
	BooleanCell
	DateCell

	HyperlinkStringCell // internal type to separate URLs
	StaticCell          // placeholder, internal use only
)

// String returns a string description of the cell data type.
func (c CellType) String() string {
	switch c {
	case BlankCell:
		return "blank"
	case IntegerCell:
		return "integer"
	case FloatCell:
		return "float"
	case BooleanCell:
		return "boolean"
	case DateCell:
		return "date"
	case HyperlinkStringCell:
		return "hyperlink"
	case StaticCell:
		return "static"
	default: // StringCell, StaticCell
		return "string"
	}
}

// Cell represents a single cell value.
type Cell []interface{}

// internally, it is a slice sized 2 or 3
//   [Value, CellType] or [Value, CellType, FormatNumber]
// where FormatNumber is a uint16 if not 0

// Value returns the contents as a generic interface{}.
func (c Cell) Value() interface{} {
	if len(c) == 0 {
		return ""
	}
	return c[0]
}

// SetURL adds a URL hyperlink to the cell.
func (c *Cell) SetURL(link string) {
	(*c)[1] = HyperlinkStringCell
	if len(*c) == 2 {
		*c = append(*c, uint16(0), link)
	} else { // len = 3 already
		*c = append(*c, link)
	}
}

// URL returns the parsed URL when a cell contains a hyperlink.
func (c Cell) URL() (*url.URL, bool) {
	if c.Type() == HyperlinkStringCell && len(c) >= 4 {
		u, err := url.Parse(c[3].(string))
		return u, err == nil
	}
	return nil, false
}

// Type returns the CellType of the value.
func (c Cell) Type() CellType {
	if len(c) < 2 {
		return BlankCell
	}
	return c[1].(CellType)
}

// FormatNo returns the NumberFormat used for display.
func (c Cell) FormatNo() uint16 {
	if len(c) == 3 {
		return c[2].(uint16)
	}
	return 0
}

// Clone returns the new copy of this Cell.
func (c Cell) Clone() Cell {
	c2 := make([]interface{}, len(c))
	for i, x := range c {
		c2[i] = x
	}
	return c2
}

///////

var boolStrings = map[string]bool{
	"yes": true, "true": true, "t": true, "y": true, "1": true, "on": true,
	"no": false, "false": false, "f": false, "n": false, "0": false, "off": false,
	"YES": true, "TRUE": true, "T": true, "Y": true, "1.0": true, "ON": true,
	"NO": false, "FALSE": false, "F": false, "N": false, "0.0": false, "OFF": false,
}

// NewCellWithType creates a new cell value with the given type, coercing as necessary.
func NewCellWithType(value interface{}, t CellType, f *Formatter) Cell {
	c := NewCell(value)
	if c[1] == t {
		// fast path if it was already typed correctly
		return c
	}

	if c[1] == BooleanCell {
		if t == IntegerCell {
			if c[0].(bool) {
				c[0] = int64(1)
			} else {
				c[0] = int64(0)
			}
			c[1] = IntegerCell
		} else if t == FloatCell {
			if c[0].(bool) {
				c[0] = float64(1.0)
			} else {
				c[0] = float64(0.0)
			}
			c[1] = FloatCell
		} else if t == StringCell {
			if c[0].(bool) {
				c[0] = "TRUE"
			} else {
				c[0] = "FALSE"
			}
			c[1] = FloatCell
		}
	}

	if c[1] == FloatCell {
		if t == IntegerCell {
			c[0] = int64(c[0].(float64))
			c[1] = IntegerCell
		} else if t == BooleanCell {
			c[0] = c[0].(float64) != 0.0
			c[1] = BooleanCell
		}
	}
	if c[1] == IntegerCell {
		if t == FloatCell {
			c[0] = float64(c[0].(int64))
			c[1] = FloatCell
		} else if t == BooleanCell {
			c[0] = c[0].(int64) != 0
			c[1] = BooleanCell
		}
	}
	if c[1] == StringCell {
		if t == IntegerCell {
			x, _ := strconv.ParseInt(c[0].(string), 10, 64)
			c[0] = x
			c[1] = IntegerCell
		} else if t == FloatCell {
			x, _ := strconv.ParseFloat(c[0].(string), 64)
			c[0] = x
			c[1] = FloatCell
		} else if t == BooleanCell {
			c[0] = boolStrings[c[0].(string)]
			c[1] = BooleanCell
		}
	}
	if t == StringCell {
		c[0] = fmt.Sprint(c[0])
		c[1] = StringCell
	}
	if t == DateCell {
		if c[1] == FloatCell {
			c[0] = f.ConvertToDate(c[0].(float64))
		} else if c[1] == IntegerCell {
			c[0] = f.ConvertToDate(float64(c[0].(int64)))
		}
		c[1] = DateCell
	}
	return c
}

// NewCell creates a new cell value from any builtin type.
func NewCell(value interface{}) Cell {
	c := make([]interface{}, 2)
	switch v := value.(type) {
	case bool:
		c[0] = v
		c[1] = BooleanCell
	case int:
		c[0] = int64(v)
		c[1] = IntegerCell
	case int8:
		c[0] = int64(v)
		c[1] = IntegerCell
	case int16:
		c[0] = int64(v)
		c[1] = IntegerCell
	case int32:
		c[0] = int64(v)
		c[1] = IntegerCell
	case int64:
		c[0] = int64(v)
		c[1] = IntegerCell
	case uint8:
		c[0] = int64(v)
		c[1] = IntegerCell
	case uint16:
		c[0] = int64(v)
		c[1] = IntegerCell
	case uint32:
		c[0] = int64(v)
		c[1] = IntegerCell

	case uint:
		if int64(v) > int64(math.MaxInt64) {
			c[0] = float64(v)
			c[1] = FloatCell
		} else {
			c[0] = int64(v)
			c[1] = IntegerCell
		}
	case uint64:
		if v > math.MaxInt64 {
			c[0] = float64(v)
			c[1] = FloatCell
		} else {
			c[0] = int64(v)
			c[1] = IntegerCell
		}

	case float32:
		c[0] = float64(v)
		c[1] = FloatCell
	case float64:
		c[0] = float64(v)
		c[1] = FloatCell

	case string:
		if len(v) == 0 {
			c[0] = nil
			c[1] = BlankCell
		} else {
			c[0] = v
			c[1] = StringCell
		}
	case []byte:
		if len(v) == 0 {
			c[0] = nil
			c[1] = BlankCell
		} else {
			c[0] = string(v)
			c[1] = StringCell
		}
	case []uint16:
		if len(v) == 0 {
			c[0] = nil
			c[1] = BlankCell
		} else {
			c[0] = string(utf16.Decode(v))
			c[1] = StringCell
		}
	case []rune:
		if len(v) == 0 {
			c[0] = nil
			c[1] = BlankCell
		} else {
			c[0] = string(v)
			c[1] = StringCell
		}
	case time.Time:
		c[0] = v
		c[1] = DateCell

	case fmt.Stringer:
		s := v.String()
		if len(s) == 0 {
			c[0] = nil
			c[1] = BlankCell
		} else {
			c[0] = s
			c[1] = StringCell
		}
	default:
		panic("grate: data type not handled")
	}
	return Cell(c)
}

// SetFormatNumber changes the number format stored with the cell.
func (c *Cell) SetFormatNumber(f uint16) {
	if f == 0 {
		*c = (*c)[:2]
		return
	}

	if len(*c) == 2 {
		*c = append(*c, f)
	} else {
		(*c)[2] = f
	}
}

func (c Cell) Equal(other Cell) bool {
	if c.Type() == FloatCell || other.Type() == FloatCell ||
		c.Type() == IntegerCell || other.Type() == IntegerCell {
		v1, ok := c[0].(float64)
		v1x, okx := c[0].(int64)
		if okx {
			v1 = float64(v1x)
			ok = true
		}
		if !ok {
			fmt.Sscanf(fmt.Sprint(c[0]), "%g", &v1)
		}
		v2, ok := other[0].(float64)
		v2x, okx := other[0].(int64)
		if okx {
			v2 = float64(v2x)
			ok = true
		}
		if !ok {
			fmt.Sscanf(fmt.Sprint(c[0]), "%g", &v2)
		}
		return v1 == v2
	}

	return c.Less(other) == other.Less(c)
}

func (c Cell) Less(other Cell) bool {
	if len(c) == 0 {
		return false
	}
	switch v1 := c[0].(type) {
	case nil:
		return false
	case bool:
		// F < T = T
		// F < F = F
		// T < T = F
		// T < F = F
		if v1 {
			return false
		}

		// if v2 is truthy, return true
		switch v2 := other[0].(type) {
		case nil:
			return false
		case bool:
			return v2
		case int64:
			return v2 != 0
		case float64:
			return v2 != 0.0
		case string:
			return boolStrings[v2]
		}

	case int64:
		// v1 < v2

		switch v2 := other[0].(type) {
		case nil:
			return false
		case bool:
			x := int64(0)
			if v2 {
				x = 1
			}
			return v1 < x
		case int64:
			return v1 < v2
		case float64:
			if v2 < math.MinInt64 {
				return false
			}
			if v2 > math.MaxInt64 {
				return true
			}
			return float64(v1) < v2
		case string:
			var x int64
			_, err := fmt.Sscanf(v2, "%d", &x)
			if err == nil {
				return v1 < x
			}
			return fmt.Sprint(v1) < v2
		}
	case float64:
		switch v2 := other[0].(type) {
		case nil:
			return false
		case bool:
			x := float64(0.0)
			if v2 {
				x = 1.0
			}
			return v1 < x
		case int64:
			if v1 < math.MinInt64 {
				return true
			}
			if v1 > math.MaxInt64 {
				return false
			}
			return v1 < float64(v2)
		case float64:
			return v1 < v2
		case string:
			var x float64
			_, err := fmt.Sscanf(v2, "%g", &x)
			if err == nil {
				return v1 < x
			}
			return fmt.Sprint(v1) < v2
		}
	case string:
		//return v1 < fmt.Sprint(other[0])

		switch v2 := other[0].(type) {
		case nil:
			return false
		case bool:
			return v2 && !boolStrings[v1]
		case int64:
			var x int64
			_, err := fmt.Sscanf(v1, "%d", &x)
			if err == nil {
				return x < v2
			}
			return v1 < fmt.Sprint(v2)
		case float64:
			var x float64
			_, err := fmt.Sscanf(v1, "%g", &x)
			if err == nil {
				return x < v2
			}
			return v1 < fmt.Sprint(v2)
		case string:
			return v1 < v2
		}

	}

	panic("unable to compare cells (invalid internal type)")
}


================================================
FILE: commonxl/dates.go
================================================
package commonxl

import (
	"strings"
	"time"
)

// ConvertToDate converts a floating-point value using the
// Excel date serialization conventions.
func (x *Formatter) ConvertToDate(val float64) time.Time {
	// http://web.archive.org/web/20190808062235/http://aa.usno.navy.mil/faq/docs/JD_Formula.php
	v := int(val)
	if v < 61 {
		jdate := val + 0.5
		if (x.flags & fMode1904) != 0 {
			jdate += 2416480.5
		} else {
			jdate += 2415018.5
		}
		JD := int(jdate)
		frac := jdate - float64(JD)

		L := JD + 68569
		N := 4 * L / 146097
		L = L - (146097*N+3)/4
		I := 4000 * (L + 1) / 1461001
		L = L - 1461*I/4 + 31
		J := 80 * L / 2447
		day := L - 2447*J/80
		L = J / 11
		month := time.Month(J + 2 - 12*L)
		year := 100*(N-49) + I + L

		t := time.Duration(float64(time.Hour*24) * frac)
		return time.Date(year, month, day, 0, 0, 0, 0, time.UTC).Add(t)
	}
	frac := val - float64(v)
	date := time.Date(1904, 1, 1, 0, 0, 0, 0, time.UTC)
	if (x.flags & fMode1904) == 0 {
		date = time.Date(1899, 12, 30, 0, 0, 0, 0, time.UTC)
	}

	t := time.Duration(float64(time.Hour*24) * frac)
	return date.AddDate(0, 0, v).Add(t)
}

func timeFmtFunc(f string) FmtFunc {
	return func(x *Formatter, v interface{}) string {
		t, ok := v.(time.Time)
		if !ok {
			fval, ok := convertToFloat64(v)
			if !ok {
				return "MUST BE time.Time OR numeric TO FORMAT CORRECTLY"
			}
			t = x.ConvertToDate(fval)
		}
		//log.Println("formatting date", t, "with", f, "=", t.Format(f))
		return t.Format(f)
	}
}

// same as above but replaces "AM" and "PM" with chinese translations.
// TODO: implement others
func cnTimeFmtFunc(f string) FmtFunc {
	return func(x *Formatter, v interface{}) string {
		t, ok := v.(time.Time)
		if !ok {
			fval, ok := convertToFloat64(v)
			if !ok {
				return "MUST BE time.Time OR numeric TO FORMAT CORRECTLY"
			}
			t = x.ConvertToDate(fval)
		}
		s := t.Format(f)
		s = strings.Replace(s, `AM`, `上午`, 1)
		return strings.Replace(s, `PM`, `下午`, 1)
	}
}


================================================
FILE: commonxl/fmt.go
================================================
package commonxl

import (
	"fmt"
	"strconv"
	"strings"
)

// FmtFunc will format a value according to the designated style.
type FmtFunc func(*Formatter, interface{}) string

func staticFmtFunc(s string) FmtFunc {
	return func(x *Formatter, v interface{}) string {
		return s
	}
}

func surround(pre string, ff FmtFunc, post string) FmtFunc {
	return func(x *Formatter, v interface{}) string {
		return pre + ff(x, v) + post
	}
}

func addNegParens(ff FmtFunc) FmtFunc {
	return func(x *Formatter, v interface{}) string {
		s1 := ff(x, v)
		if s1[0] == '-' {
			return "(" + s1[1:] + ")"
		}
		return s1
	}
}

func addCommas(ff FmtFunc) FmtFunc {
	return func(x *Formatter, v interface{}) string {
		s1 := ff(x, v)
		isNeg := false
		if s1[0] == '-' {
			isNeg = true
			s1 = s1[1:]
		}
		endIndex := strings.IndexAny(s1, ".eE")
		if endIndex < 0 {
			endIndex = len(s1)
		}
		for endIndex > 3 {
			endIndex -= 3
			s1 = s1[:endIndex] + "," + s1[endIndex:]
		}
		if isNeg {
			return "-" + s1
		}
		return s1
	}
}

func identFunc(x *Formatter, v interface{}) string {
	switch x := v.(type) {
	case bool:
		if x {
			return "TRUE"
		}
		return "FALSE"
	case int64:
		s := strconv.FormatInt(x, 10)
		if len(s) <= 11 {
			return s
		}
	case float64:
		s := strconv.FormatFloat(x, 'f', -1, 64)
		if len(s) <= 11 || (len(s) == 12 && x < 0) {
			return s
		}
		s = strconv.FormatFloat(x, 'g', 6, 64)
		if len(s) <= 11 {
			return s
		}
	case string:
		return x
	case fmt.Stringer:
		return x.String()
	}
	return fmt.Sprint(v)
}

func sprintfFunc(fs string, mul int) FmtFunc {
	wantInt64 := strings.Contains(fs, "%d")
	return func(x *Formatter, v interface{}) string {
		switch val := v.(type) {
		case int, uint, int64, uint64, int32, uint32, uint16, int16:
			return fmt.Sprintf(fs, v)

		case float64:
			val *= float64(mul)
			if wantInt64 {
				v2 := int64(val)
				return fmt.Sprintf(fs, v2)
			}
			return fmt.Sprintf(fs, val)
		}
		return fmt.Sprint(v)
	}
}

func convertToInt64(v interface{}) (int64, bool) {
	x, ok := convertToFloat64(v)
	return int64(x), ok
}

func convertToFloat64(v interface{}) (float64, bool) {
	switch val := v.(type) {
	case float64:
		return val, true
	case bool:
		if val {
			return 1.0, true
		}
		return 0.0, true
	case int:
		return float64(val), true
	case int8:
		return float64(val), true
	case int16:
		return float64(val), true
	case int32:
		return float64(val), true
	case int64:
		return float64(val), true
	case uint:
		return float64(val), true
	case uint8:
		return float64(val), true
	case uint16:
		return float64(val), true
	case uint32:
		return float64(val), true
	case uint64:
		return float64(val), true
	case float32:
		return float64(val), true
	case string:
		nf, err := strconv.ParseFloat(val, 64)
		return nf, err == nil
	default:
		return 0.0, false
	}
}

// replaces a zero with a dash
func zeroDashFunc(ff FmtFunc) FmtFunc {
	return func(x *Formatter, v interface{}) string {
		fval, ok := convertToFloat64(v)
		if !ok {
			// strings etc returned as-is
			return fmt.Sprint(v)
		}
		if fval == 0.0 {
			return "-"
		}
		return ff(x, v)
	}
}

func fracFmtFunc(n int) FmtFunc {
	return func(x *Formatter, v interface{}) string {
		f, ok := convertToFloat64(v)
		if !ok {
			return "MUST BE numeric TO FORMAT CORRECTLY"
		}
		w, n, d := DecimalToWholeFraction(f, n, n)
		if n == 0 {
			return fmt.Sprintf("%d", w)
		}
		if w == 0 {
			if f < 0 && n > 0 {
				n = -n
			}
			return fmt.Sprintf("%d/%d", n, d)
		}
		return fmt.Sprintf("%d %d/%d", w, n, d)
	}
}

// handle (up to) all four format cases:
// positive;negative;zero;other
func switchFmtFunc(pos FmtFunc, others ...FmtFunc) FmtFunc {
	stringFF := identFunc
	zeroFF := pos
	negFF := pos
	if len(others) > 0 {
		negFF = others[0]
		if len(others) > 1 {
			zeroFF = others[1]
			if len(others) > 2 {
				stringFF = others[2]
			}
		}
	}
	return func(x *Formatter, v interface{}) string {
		val, ok := convertToFloat64(v)
		if !ok {
			return stringFF(x, v)
		}
		if val == 0.0 {
			return zeroFF(x, v)
		}
		if val < 0.0 {
			return negFF(x, v)
		}
		return pos(x, v)
	}
}

// mapping of standard built-ins to Go date format funcs.
var goFormatters = map[uint16]FmtFunc{
	0:  identFunc, // FIXME: better "general" formatter
	49: identFunc,

	14: timeFmtFunc(`01-02-06`),
	15: timeFmtFunc(`2-Jan-06`),
	16: timeFmtFunc(`2-Jan`),
	17: timeFmtFunc(`Jan-06`),
	20: timeFmtFunc(`15:04`),
	21: timeFmtFunc(`15:04:05`),
	22: timeFmtFunc(`1/2/06 15:04`),
	45: timeFmtFunc(`04:05`),
	46: timeFmtFunc(`3:04:05`),
	47: timeFmtFunc(`0405.9`),
	27: timeFmtFunc(`2006"年"1"月"`),
	28: timeFmtFunc(`1"月"2"日"`),
	29: timeFmtFunc(`1"月"2"日"`),
	30: timeFmtFunc(`1-2-06`),
	31: timeFmtFunc(`2006"年"1"月"2"日"`),
	32: timeFmtFunc(`15"时"04"分"`),
	33: timeFmtFunc(`15"时"04"分"05"秒"`),
	36: timeFmtFunc(`2006"年"2"月"`),
	50: timeFmtFunc(`2006"年"2"月"`),
	51: timeFmtFunc(`1"月"2"日"`),
	52: timeFmtFunc(`2006"年"1"月"`),
	53: timeFmtFunc(`1"月"2"日"`),
	54: timeFmtFunc(`1"月"2"日"`),
	57: timeFmtFunc(`2006"年"1"月"`),
	58: timeFmtFunc(`1"月"2"日"`),
	71: timeFmtFunc(`2/1/2006`),
	72: timeFmtFunc(`2-Jan-06`),
	73: timeFmtFunc(`2-Jan`),
	74: timeFmtFunc(`Jan-06`),
	75: timeFmtFunc(`15:04`),
	76: timeFmtFunc(`15:04:05`),
	77: timeFmtFunc(`2/1/2006 15:04`),
	78: timeFmtFunc(`04:05`),
	79: timeFmtFunc(`15:04:05`),
	80: timeFmtFunc(`04:05.9`),
	81: timeFmtFunc(`2/1/06`),
	18: timeFmtFunc(`3:04 PM`),
	19: timeFmtFunc(`3:04:05 PM`),

	34: cnTimeFmtFunc(`PM 3"时"04"分"`),
	35: cnTimeFmtFunc(`PM 3"时"04"分"05"秒"`),
	55: cnTimeFmtFunc(`PM 3"时"04"分"`),
	56: cnTimeFmtFunc(`PM 3"时"04"分"05"秒`),

	12: fracFmtFunc(1),
	13: fracFmtFunc(2),

	69: fracFmtFunc(1),
	70: fracFmtFunc(2),

	1:  sprintfFunc(`%d`, 1),
	2:  sprintfFunc(`%4.2f`, 1),
	59: sprintfFunc(`%d`, 1),
	60: sprintfFunc(`%4.2f`, 1),

	9:  sprintfFunc(`%d%%`, 100),
	10: sprintfFunc(`%4.2f%%`, 100),
	67: sprintfFunc(`%d%%`, 100),
	68: sprintfFunc(`%4.2f%%`, 100),

	3:  addCommas(sprintfFunc("%d", 1)),
	61: addCommas(sprintfFunc("%d", 1)),
	37: addNegParens(addCommas(sprintfFunc("%d", 1))),
	38: addNegParens(addCommas(sprintfFunc("%d", 1))),

	4:  addCommas(sprintfFunc("%4.2f", 1)),
	62: addCommas(sprintfFunc("%4.2f", 1)),
	39: addNegParens(addCommas(sprintfFunc("%4.2f", 1))),
	40: addNegParens(addCommas(sprintfFunc("%4.2f", 1))),

	11: sprintfFunc(`%4.2E`, 1),
	48: sprintfFunc(`%3.1E`, 1),

	41: zeroDashFunc(addCommas(sprintfFunc("%d", 1))),
	43: zeroDashFunc(addCommas(sprintfFunc("%4.2f", 1))),

	42: switchFmtFunc(
		surround("$", addCommas(sprintfFunc("%d", 1)), ""),
		surround("$(", addCommas(sprintfFunc("%d", 1)), ")"),
		staticFmtFunc("$-")),
	44: switchFmtFunc(
		surround("$", addCommas(sprintfFunc("%4.2f", 1)), ""),
		surround("$(", addCommas(sprintfFunc("%4.2f", 1)), ")"),
		staticFmtFunc("$-")),
}


================================================
FILE: commonxl/fmt_test.go
================================================
package commonxl

import (
	"log"
	"testing"
	"time"
)

type testcaseNums struct {
	v interface{}
	s string
}

var commas = []testcaseNums{
	{10, "10"},
	{float64(10), "10"},
	{float64(10) + 0.12345, "10.12345"},
	{-10, "-10"},
	{float64(-10), "-10"},
	{float64(-10) + 0.12345, "-9.87655"},
	{uint16(10), "10"},
	{100, "100"},
	{float64(100), "100"},
	{float64(100) + 0.12345, "100.12345"},
	{-100, "-100"},
	{float64(-100), "-100"},
	{float64(-100) + 0.12345, "-99.87655"},
	{uint16(100), "100"},
	{1000, "1,000"},
	{float64(1000), "1,000"},
	{float64(1000) + 0.12345, "1,000.12345"},
	{-1000, "-1,000"},
	{float64(-1000), "-1,000"},
	{float64(-1000) + 0.12345, "-999.87655"},
	{uint16(1000), "1,000"},
	{10000, "10,000"},
	{float64(10000), "10,000"},
	{float64(10000) + 0.12345, "10,000.12345"},
	{-10000, "-10,000"},
	{float64(-10000), "-10,000"},
	{float64(-10000) + 0.12345, "-9,999.87655"},
	{uint16(10000), "10,000"},
	{100000, "100,000"},
	{float64(100000), "100,000"},
	{float64(100000) + 0.12345, "100,000.12345"},
	{-100000, "-100,000"},
	{float64(-100000), "-100,000"},
	{float64(-100000) + 0.12345, "-99,999.87655"},
	{uint64(100000), "100,000"},
	{1000000, "1,000,000"},
	{float64(1000000), "1e+06"},
	{float64(1000000) + 0.12345, "1.00000012345e+06"},
	{-1000000, "-1,000,000"},
	{float64(-1000000), "-1e+06"},
	{float64(-1000000) + 0.12345, "-999,999.87655"},
	{uint64(1000000), "1,000,000"},
	{10000000, "10,000,000"},
	{float64(10000000), "1e+07"},
	{float64(10000000) + 0.12345, "1.000000012345e+07"},
	{-10000000, "-10,000,000"},
	{float64(-10000000), "-1e+07"},
	{float64(-10000000) + 0.12345, "-9.99999987655e+06"},
	{uint64(10000000), "10,000,000"},
	{100000000, "100,000,000"},
	{float64(100000000), "1e+08"},
	{float64(100000000) + 0.12345, "1.0000000012345e+08"},
	{-100000000, "-100,000,000"},
	{float64(-100000000), "-1e+08"},
	{float64(-100000000) + 0.12345, "-9.999999987655e+07"},
	{uint64(100000000), "100,000,000"},
}

func TestCommas(t *testing.T) {
	cf := addCommas(identFunc)
	for _, c := range commas {
		fs := cf(nil, c.v)
		if c.s != fs {
			t.Fatalf("commas failed: get '%s' but expected '%s' for %T(%v)",
				fs, c.s, c.v, c.v)
		}
	}
}

func TestDateFormats(t *testing.T) {
	var testDates = []time.Time{
		time.Date(1901, 7, 11, 1, 5, 0, 0, time.UTC),
		time.Date(1905, 7, 11, 4, 10, 0, 0, time.UTC),
		time.Date(1904, 7, 11, 8, 15, 0, 0, time.UTC),
		time.Date(1993, 7, 11, 12, 20, 0, 0, time.UTC),
		time.Date(1983, 7, 11, 16, 30, 0, 0, time.UTC),
		time.Date(1983, 7, 11, 20, 45, 0, 0, time.UTC),
		time.Date(2000, 12, 31, 23, 59, 0, 0, time.UTC),
		time.Date(2002, 12, 31, 23, 59, 0, 0, time.UTC),
		time.Date(2012, 3, 10, 9, 30, 0, 0, time.UTC),
		time.Date(2014, 3, 27, 9, 37, 0, 0, time.UTC),
	}

	fx := &Formatter{}
	for _, t := range testDates {
		for fid, ctype := range builtInFormatTypes {
			if ctype != DateCell {
				continue
			}
			ff, _ := goFormatters[fid]
			// mainly testing these don't crash...
			log.Println(ff(fx, t))
		}
	}
}
func TestBoolFormats(t *testing.T) {
	ff, _ := makeFormatter(`"yes";"yes";"no"`)

	if "no" != ff(nil, false) {
		t.Fatal(`false should be "no"`)
	}
	if "no" != ff(nil, 0) {
		t.Fatal(`0 should be "no"`)
	}
	if "no" != ff(nil, 0.0) {
		t.Fatal(`0.0 should be "no"`)
	}

	/////

	if "yes" != ff(nil, true) {
		t.Fatal(`true should be "yes"`)
	}
	if "yes" != ff(nil, 99) {
		t.Fatal(`99 should be "yes"`)
	}
	if "yes" != ff(nil, -4) {
		t.Fatal(`-4 should be "yes"`)
	}

	if "yes" != ff(nil, 4.0) {
		t.Fatal(`4.0 should be "yes"`)
	}
	if "yes" != ff(nil, -99.0) {
		t.Fatal(`-99.0 should be "yes"`)
	}
}


================================================
FILE: commonxl/formats.go
================================================
package commonxl

import (
	"errors"
	"fmt"
	"regexp"
	"strings"
)

// Formatter contains formatting methods common to Excel spreadsheets.
type Formatter struct {
	flags           uint64
	customCodes     map[uint16]FmtFunc
	customCodeTypes map[uint16]CellType
}

const (
	fMode1904 uint64 = 1
)

// Mode1904 indicates that dates start on Jan 1, 1904
// this setting was used in early MacOS Excel applications.
func (x *Formatter) Mode1904(enabled bool) {
	if enabled {
		x.flags |= fMode1904
	} else {
		x.flags = x.flags &^ fMode1904
	}
}

// Add a custom number format to the formatter.
func (x *Formatter) Add(fmtID uint16, formatCode string) error {
	if x.customCodes == nil {
		x.customCodes = make(map[uint16]FmtFunc)
		x.customCodeTypes = make(map[uint16]CellType)
	}
	if strings.ToLower(formatCode) == "general" {
		x.customCodes[fmtID] = goFormatters[0]
		return nil
	}
	_, ok := goFormatters[fmtID]
	if ok {
		return errors.New("grate/commonxl: cannot replace default number formats")
	}

	_, ok2 := x.customCodes[fmtID]
	if ok2 {
		return errors.New("grate/commonxl: cannot replace existing number formats")
	}

	x.customCodes[fmtID], x.customCodeTypes[fmtID] = makeFormatter(formatCode)
	return nil
}

func (x *Formatter) getCellType(fmtID uint16) (CellType, bool) {
	if ct, ok := builtInFormatTypes[fmtID]; ok {
		return ct, true
	}
	if x.customCodeTypes != nil {
		ct, ok := x.customCodeTypes[fmtID]
		return ct, ok
	}
	return 0, false
}

var (
	minsMatch = regexp.MustCompile("h.*m.*s")
	nonEsc    = regexp.MustCompile(`([^"]|^)"`)
	squash    = regexp.MustCompile(`[*_].`)
	fixEsc    = regexp.MustCompile(`\\(.)`)

	formatMatchBrackets    = regexp.MustCompile(`\[[^\]]*\]`)
	formatMatchTextLiteral = regexp.MustCompile(`"[^"]*"`)
)

func makeFormatter(s string) (FmtFunc, CellType) {
	//log.Printf("makeFormatter('%s')", s)
	// remove any coloring marks
	s = formatMatchBrackets.ReplaceAllString(s, "")
	if strings.Contains(s, ";") {
		parts := strings.Split(s, ";")
		posFF, ctypePos := makeFormatter(parts[0])
		rem := make([]FmtFunc, len(parts)-1)
		for i, ps := range parts[1:] {
			rem[i], _ = makeFormatter(ps)
		}
		return switchFmtFunc(posFF, rem...), ctypePos
	}

	// escaped characters, and quoted text
	s2 := fixEsc.ReplaceAllString(s, "")
	s2 = formatMatchTextLiteral.ReplaceAllString(s, "")

	if strings.ContainsAny(s2, "ymdhs") {
		// it's a date/time format

		if loc := minsMatch.FindStringIndex(s); loc != nil {
			// m or mm in loc[0]:loc[1] is a minute format
			inner := s[loc[0]:loc[1]]
			inner = strings.Replace(inner, "mm", "04", 1)
			inner = strings.Replace(inner, "m", "4", 1)
			s = s[:loc[0]] + inner + s[loc[1]:]
		}
		dfreps := [][]string{
			{"hh", "15"}, {"h", "15"},
			{"ss", "05"}, {"s", "5"},
			{"mmmmm", "Jan"}, // super ambiguous, replace with 3-letter month
			{"mmmm", "January"}, {"mmm", "Jan"},
			{"mm", "01"}, {"m", "1"},
			{"dddd", "Monday"}, {"ddd", "Mon"},
			{"dd", "02"}, {"d", "2"},
			{"yyyy", "2006"}, {"yy", "06"},
		}
		if strings.Contains(s, "AM") || strings.Contains(s, "PM") {
			dfreps[0][1] = "03"
			dfreps[1][1] = "3"
		}
		for _, dfr := range dfreps {
			s = strings.Replace(s, dfr[0], dfr[1], 1)
		}

		s = nonEsc.ReplaceAllString(s, `$1`)
		s = squash.ReplaceAllString(s, ``)
		s = fixEsc.ReplaceAllString(s, `$1`)

		//log.Printf("   made time formatter '%s'", s)
		return timeFmtFunc(s), DateCell
	}

	var ff FmtFunc
	var ctype CellType
	if strings.ContainsAny(s, ".Ee") {
		verb := "f"
		if strings.ContainsAny(s, "Ee") {
			verb = "E"
		}
		s = regexp.MustCompile("[eE]+[+-]0+").ReplaceAllString(s, "")
		s2 := strings.ReplaceAll(s, ",", "")
		i1 := strings.IndexAny(s2, "0")
		i2 := strings.IndexByte(s2, '.')
		i3 := strings.LastIndexAny(s2, "0.")
		mul := 1
		if strings.Contains(s2, "%") {
			mul = 100
		}
		sf := fmt.Sprintf("%%%d.%d%s", i3-i1, i3-i2, verb)
		//log.Printf("   made float formatter '%s'", sf)
		ff = sprintfFunc(sf, mul)
		ctype = FloatCell
	} else {
		s2 := strings.ReplaceAll(s, ",", "")
		i1 := strings.IndexAny(s2, "0")
		i2 := strings.LastIndexAny(s2, "0.")
		mul := 1
		if strings.Contains(s2, "%") {
			mul = 100
		}
		sf := fmt.Sprintf("%%%dd", i2-i1)
		if (i2 - i1) == 0 {
			sf = "%d"
		}
		//log.Printf("   made int formatter '%s'", sf)
		ff = sprintfFunc(sf, mul)
		ctype = IntegerCell
	}

	if strings.Contains(s, ",") {
		ff = addCommas(ff)
		//log.Printf("   added commas")
	}

	surReg := regexp.MustCompile(`[0#?,.]+`)
	prepost := surReg.Split(s, 2)
	if len(prepost) > 0 && len(prepost[0]) > 0 {
		prepost[0] = nonEsc.ReplaceAllString(prepost[0], `$1`)
		prepost[0] = squash.ReplaceAllString(prepost[0], ``)
		prepost[0] = fixEsc.ReplaceAllString(prepost[0], `$1`)
	}
	if len(prepost) == 1 {
		if prepost[0] == "@" {
			return identFunc, StringCell
		}
		//log.Printf("   added static ('%s')", prepost[0])
		return staticFmtFunc(prepost[0]), StringCell
	}
	if len(prepost[0]) > 0 || len(prepost[1]) > 0 {
		prepost[1] = nonEsc.ReplaceAllString(prepost[1], `$1`)
		prepost[1] = squash.ReplaceAllString(prepost[1], ``)
		prepost[1] = fixEsc.ReplaceAllString(prepost[1], `$1`)

		ff = surround(prepost[0], ff, prepost[1])
		//log.Printf("   added surround ('%s' ... '%s')", prepost[0], prepost[1])
	}

	return ff, ctype
}

// Get the number format func to use for formatting values,
// it returns false when fmtID is unknown.
func (x *Formatter) Get(fmtID uint16) (FmtFunc, bool) {
	ff, ok := goFormatters[fmtID]
	if !ok {
		fs, ok2 := x.customCodes[fmtID]
		if ok2 {
			return fs, true
		}
		ff = identFunc
	}

	return ff, ok
}

// Apply the specified number format to the value.
// Returns false when fmtID is unknown.
func (x *Formatter) Apply(fmtID uint16, val interface{}) (string, bool) {
	ff, ok := goFormatters[fmtID]
	if !ok {
		fs, ok2 := x.customCodes[fmtID]
		if ok2 {
			return fs(x, val), true
		}
	}
	return ff(x, val), ok
}

// builtInFormats are all the built-in number formats for XLS/XLSX.
var builtInFormats = map[uint16]string{
	0:  `General`,
	1:  `0`,
	2:  `0.00`,
	3:  `#,##0`,
	4:  `#,##0.00`,
	9:  `0%`,
	10: `0.00%`,

	11: `0.00E+00`,
	12: `# ?/?`,
	13: `# ??/??`,
	14: `mm-dd-yy`,
	15: `d-mmm-yy`,
	16: `d-mmm`,
	17: `mmm-yy`,
	18: `h:mm AM/PM`,
	19: `h:mm:ss AM/PM`,
	20: `h:mm`,
	21: `h:mm:ss`,
	22: `m/d/yy h:mm`,
	37: `#,##0 ;(#,##0)`,
	38: `#,##0 ;[Red](#,##0)`,
	39: `#,##0.00;(#,##0.00)`,
	40: `#,##0.00;[Red](#,##0.00)`,

	41: `_(* #,##0_);_(* \(#,##0\);_(* "-"_);_(@_)`,
	42: `_("$"* #,##0_);_("$"* \(#,##0\);_("$"* "-"_);_(@_)`,
	43: `_(* #,##0.00_);_(* \(#,##0.00\);_(* "-"??_);_(@_)`,
	44: `_("$"* #,##0.00_);_("$"* \(#,##0.00\);_("$"* "-"??_);_(@_)`,

	45: `mm:ss`,
	46: `[h]:mm:ss`,
	47: `mmss.0`,
	48: `##0.0E+0`,
	49: `@`,

	// zh-cn format codes
	27: `yyyy"年"m"月"`,
	28: `m"月"d"日"`,
	29: `m"月"d"日"`,
	30: `m-d-yy`,
	31: `yyyy"年"m"月"d"日"`,
	32: `h"时"mm"分"`,
	33: `h"时"mm"分"ss"秒"`,
	34: `上午/下午 h"时"mm"分"`,
	35: `上午/下午 h"时"mm"分"ss"秒"`,
	36: `yyyy"年"m"月"`,
	50: `yyyy"年"m"月"`,
	51: `m"月"d"日"`,
	52: `yyyy"年"m"月"`,
	53: `m"月"d"日"`,
	54: `m"月"d"日"`,
	55: `上午/下午 h"时"mm"分"`,
	56: `上午/下午 h"时"mm"分"ss"秒`,
	57: `yyyy"年"m"月"`,
	58: `m"月"d"日"`,

	// th-th format codes (in the spec these have a "t" prefix?)
	59: `0`,
	60: `0.00`,
	61: `#,##0`,
	62: `#,##0.00`,
	67: `0%`,
	68: `0.00%`,
	69: `# ?/?`,
	70: `# ??/??`,

	// th format code, but translated to aid the parser
	71: `d/m/yyyy`,      // `ว/ด/ปปปป`,
	72: `d-mmm-yy`,      // `ว-ดดด-ปป`,
	73: `d-mmm`,         // `ว-ดดด`,
	74: `mmm-yy`,        // `ดดด-ปป`,
	75: `h:mm`,          // `ช:นน`,
	76: `h:mm:ss`,       // `ช:นน:ทท`,
	77: `d/m/yyyy h:mm`, // `ว/ด/ปปปป ช:นน`,
	78: `mm:ss`,         // `นน:ทท`,
	79: `[h]:mm:ss`,     // `[ช]:นน:ทท`,
	80: `mm:ss.0`,       // `นน:ทท.0`,
	81: `d/m/bb`,        // `d/m/bb`,
}

// builtInFormatTypes are the underlying datatypes for built-in number formats in XLS/XLSX.
var builtInFormatTypes = map[uint16]CellType{
	// 0 has no defined type
	1:  IntegerCell,
	2:  FloatCell,
	3:  IntegerCell,
	4:  FloatCell,
	9:  FloatCell,
	10: FloatCell,

	11: FloatCell,
	12: FloatCell,
	13: FloatCell,
	14: DateCell,
	15: DateCell,
	16: DateCell,
	17: DateCell,
	18: DateCell,
	19: DateCell,
	20: DateCell,
	21: DateCell,
	22: DateCell,
	37: IntegerCell,
	38: IntegerCell,
	39: FloatCell,
	40: FloatCell,
	41: IntegerCell,
	42: IntegerCell,
	43: FloatCell,
	44: FloatCell,
	45: DateCell, // Durations?
	46: DateCell,
	47: DateCell,
	48: FloatCell,
	49: StringCell,
	27: DateCell,
	28: DateCell,
	29: DateCell,
	30: DateCell,
	31: DateCell,
	32: DateCell,
	33: DateCell,
	34: DateCell,
	35: DateCell,
	36: DateCell,
	50: DateCell,
	51: DateCell,
	52: DateCell,
	53: DateCell,
	54: DateCell,
	55: DateCell,
	56: DateCell,
	57: DateCell,
	58: DateCell,
	59: IntegerCell,
	60: FloatCell,
	61: IntegerCell,
	62: FloatCell,
	67: FloatCell,
	68: FloatCell,
	69: FloatCell,
	70: FloatCell,
	71: DateCell,
	72: DateCell,
	73: DateCell,
	74: DateCell,
	75: DateCell,
	76: DateCell,
	77: DateCell,
	78: DateCell,
	79: DateCell,
	80: DateCell,
	81: DateCell,
}


================================================
FILE: commonxl/frac_test.go
================================================
package commonxl

import (
	"math"
	"testing"
)

type testcaseFrac struct {
	v float64
	s string
	n int
}

var fracs = []testcaseFrac{
	{0, "0", 1},
	{0.5, "1/2", 1},
	{-0.5, "-1/2", 1},
	{0.125, "1/8", 1},

	{10, "10", 1},
	{-10, "-10", 1},
	{10.5, "10 1/2", 1},
	{-10.5, "-10 1/2", 1},

	{10.25, "10 1/4", 1},
	{10.75, "10 3/4", 1},
	{10.667, "10 2/3", 1},

	{-10.25, "-10 1/4", 1},
	{-10.75, "-10 3/4", 1},
	{-10.667, "-10 2/3", 1},

	{3.14159, "3 1/7", 1},
	{3.14159, "3 1/7", 2},
	{3.14159, "3 16/113", 3},
	{3.14159, "3 431/3044", 4},
	{3.14159, "3 3432/24239", 5},
	{3.14159, "3 14159/100000", 6},

	{math.Pi, "3 1/7", 1},
	{math.Pi, "3 1/7", 2},
	{math.Pi, "3 16/113", 3}, // err = 2.6e-7
	{math.Pi, "3 16/113", 4}, // better because 431/3044 err = 2.6e-6
	{math.Pi, "3 14093/99532", 5},
	{math.Pi, "3 14093/99532", 6},

	{-math.Pi, "-3 1/7", 1},
	{-math.Pi, "-3 1/7", 2},
	{-math.Pi, "-3 16/113", 3}, // err = 2.6e-7
	{-math.Pi, "-3 16/113", 4}, // better because 431/3044 err = 2.6e-6
	{-math.Pi, "-3 14093/99532", 5},
	{-math.Pi, "-3 14093/99532", 6},

	// TODO: fixed denominator fractions (e.g. "??/8" )
	// TODO: string interpolations (e.g. '0 "pounds and " ??/100 "pence"')
	// examples: https://bettersolutions.com/excel/formatting/number-tab-fractions.htm
}

func TestFractions(t *testing.T) {
	for _, c := range fracs {
		ff := fracFmtFunc(c.n)
		fs := ff(nil, c.v)
		if c.s != fs {
			t.Fatalf("fractions failed: got: '%s' expected: '%s' for %T(%v)",
				fs, c.s, c.v, c.v)
		}
	}
}


================================================
FILE: commonxl/numbers.go
================================================
package commonxl

import (
	"math"
)

// DecimalToWholeFraction converts a floating point value into a whole
// number and fraction approximation with at most nn digits in the numerator
// and nd digits in the denominator.
func DecimalToWholeFraction(val float64, nn, nd int) (whole, num, den int) {
	wholeF, part := math.Modf(val)
	if part == 0.0 {
		return int(wholeF), 0, 1
	}
	if part < 0.0 {
		part = -part
	}
	whole = int(wholeF)
	num, den = DecimalToFraction(part, nn, nd)
	return
}

// DecimalToFraction converts a floating point value into a fraction
// approximation with at most nn digits in the numerator and nd
// digits in the denominator.
func DecimalToFraction(val float64, nn, nd int) (num, den int) {
	// http://web.archive.org/web/20111027100847/http://homepage.smc.edu/kennedy_john/DEC2FRAC.PDF
	sign := 1
	z := val
	if val < 0 {
		sign = -1
		z = -val
	}
	if nn == 0 {
		nn = 2
	}
	if nd == 0 {
		nd = 2
	}
	maxn := math.Pow(10.0, float64(nn)) // numerator with nn digits
	maxd := math.Pow(10.0, float64(nd)) // denominator with nd digits

	_, fracPart := math.Modf(val)
	if fracPart == 0.0 {
		return int(z) * sign, 1
	}
	if fracPart < 1e-9 {
		return sign, int(1e9)
	}
	if fracPart > 1e9 {
		return int(1e9) * sign, 1
	}

	diff := 1.0
	denom := 1.0
	numer := 0.0
	var lastDenom, lastNumer float64
	for diff > 1e-10 && z != math.Floor(z) {
		z = 1 / (z - math.Floor(z))
		tmp := denom
		denom = (denom * math.Floor(z)) + lastDenom
		lastDenom = tmp
		lastNumer = numer
		numer = math.Round(val * denom)
		if numer >= maxn || denom >= maxd {
			return sign * int(lastNumer), int(lastDenom)
		}
		diff = val - (numer / denom)
		if diff < 0.0 {
			diff = -diff
		}
	}
	return sign * int(numer), int(denom)
}


================================================
FILE: commonxl/sheet.go
================================================
package commonxl

import (
	"fmt"
	"log"
	"time"

	"github.com/pbnjay/grate"
)

// Sheet holds raw and rendered values for a spreadsheet.
type Sheet struct {
	Formatter *Formatter
	NumRows   int
	NumCols   int
	Rows      [][]Cell

	CurRow int
}

// Resize the sheet for the number of rows and cols given.
// Newly added cells default to blank.
func (s *Sheet) Resize(rows, cols int) {
	for i := range s.Rows {
		if i > rows {
			break
		}
		n := cols - len(s.Rows[i])
		if n <= 0 {
			continue
		}
		s.Rows[i] = append(s.Rows[i], make([]Cell, n)...)
	}

	if rows <= 0 {
		rows = 1
	}
	if cols <= 0 {
		cols = 1
	}
	s.CurRow = 0
	s.NumRows = rows
	s.NumCols = cols

	for rows >= len(s.Rows) {
		s.Rows = append(s.Rows, make([]Cell, cols))
	}
}

// Put the value at the cell location given.
func (s *Sheet) Put(row, col int, value interface{}, fmtNum uint16) {
	//log.Println(row, col, value, fmtNum)
	if row >= s.NumRows || col >= s.NumCols {
		if grate.Debug {
			log.Printf("grate: cell out of bounds row %d>=%d, col %d>=%d",
				row, s.NumRows, col, s.NumCols)
		}

		// per the spec, this is an invalid Excel file
		// but we'll resize in place instead of crashing out
		if row >= s.NumRows {
			s.NumRows = row + 1
		}
		if col >= s.NumCols {
			s.NumCols = col + 1
		}
		s.Resize(s.NumRows, s.NumCols)
	}

	if spec, ok := value.(string); ok {
		if spec == grate.EndRowMerged || spec == grate.EndColumnMerged || spec == grate.ContinueRowMerged || spec == grate.ContinueColumnMerged {
			s.Rows[row][col] = NewCell(value)
			s.Rows[row][col][1] = StaticCell
			return
		}
	}

	ct, ok := s.Formatter.getCellType(fmtNum)
	if !ok || fmtNum == 0 {
		s.Rows[row][col] = NewCell(value)
	} else {
		s.Rows[row][col] = NewCellWithType(value, ct, s.Formatter)
	}
	s.Rows[row][col].SetFormatNumber(fmtNum)
}

// Set changes the value in an existing cell location.
// NB Currently only used for populating string results for formulas.
func (s *Sheet) Set(row, col int, value interface{}) {
	if row > s.NumRows || col > s.NumCols {
		log.Println("grate: cell out of bounds")
		return
	}

	s.Rows[row][col][0] = value
	s.Rows[row][col][1] = StringCell
}

// SetURL adds a hyperlink to an existing cell location.
func (s *Sheet) SetURL(row, col int, link string) {
	if row > s.NumRows || col > s.NumCols {
		log.Println("grate: cell out of bounds")
		return
	}

	s.Rows[row][col].SetURL(link)
}

// Next advances to the next record of content.
// It MUST be called prior to any Scan().
func (s *Sheet) Next() bool {
	if (s.CurRow + 1) > len(s.Rows) {
		return false
	}
	s.CurRow++
	return true
}

// Raw extracts the raw Cell interfaces underlying the current row.
func (s *Sheet) Raw() []Cell {
	rr := make([]Cell, s.NumCols)
	for i, cell := range s.Rows[s.CurRow-1] {
		rr[i] = cell.Clone()
	}
	return rr
}

// Strings extracts values from the current record into a list of strings.
func (s *Sheet) Strings() []string {
	res := make([]string, s.NumCols)
	for i, cell := range s.Rows[s.CurRow-1] {
		if cell.Type() == BlankCell {
			res[i] = ""
			continue
		}
		if cell.Type() == StaticCell {
			res[i] = cell.Value().(string)
			continue
		}
		val := cell.Value()
		fs, ok := s.Formatter.Apply(cell.FormatNo(), val)
		if !ok {
			fs = fmt.Sprint(val)
		}
		res[i] = fs
	}
	return res
}

// Types extracts the data types from the current record into a list.
// options: "boolean", "integer", "float", "string", "date",
// and special cases: "blank", "hyperlink" which are string types
func (s *Sheet) Types() []string {
	res := make([]string, s.NumCols)
	for i, cell := range s.Rows[s.CurRow-1] {
		res[i] = cell.Type().String()
	}
	return res
}

// Formats extracts the format code for the current record into a list.
func (s *Sheet) Formats() []string {
	ok := true
	res := make([]string, s.NumCols)
	for i, cell := range s.Rows[s.CurRow-1] {
		res[i], ok = builtInFormats[cell.FormatNo()]
		if !ok {
			res[i] = fmt.Sprint(cell.FormatNo())
		}
	}
	return res
}

// Scan extracts values from the current record into the provided arguments
// Arguments must be pointers to one of 5 supported types:
//     bool, int64, float64, string, or time.Time
// If invalid, returns ErrInvalidScanType
func (s *Sheet) Scan(args ...interface{}) error {
	row := s.Rows[s.CurRow-1]

	for i, a := range args {
		val := row[i].Value()

		switch v := a.(type) {
		case bool, int64, float64, string, time.Time:
			return fmt.Errorf("scan destinations must be pointer (arg %d is not)", i)
		case *bool:
			if x, ok := val.(bool); ok {
				*v = x
			} else {
				return fmt.Errorf("scan destination %d expected *%T, not *bool", i, val)
			}
		case *int64:
			if x, ok := val.(int64); ok {
				*v = x
			} else {
				return fmt.Errorf("scan destination %d expected *%T, not *int64", i, val)
			}
		case *float64:
			if x, ok := val.(float64); ok {
				*v = x
			} else {
				return fmt.Errorf("scan destination %d expected *%T, not *float64", i, val)
			}
		case *string:
			if x, ok := val.(string); ok {
				*v = x
			} else {
				return fmt.Errorf("scan destination %d expected *%T, not *string", i, val)
			}
		case *time.Time:
			if x, ok := val.(time.Time); ok {
				*v = x
			} else {
				return fmt.Errorf("scan destination %d expected *%T, not *time.Time", i, val)
			}
		default:
			return fmt.Errorf("scan destination for arg %d is not supported (%T)", i, a)
		}
	}
	return nil
}

// IsEmpty returns true if there are no data values.
func (s *Sheet) IsEmpty() bool {
	return (s.NumCols <= 1 && s.NumRows <= 1)
}

// Err returns the last error that occured.
func (s *Sheet) Err() error {
	return nil
}


================================================
FILE: errs.go
================================================
package grate

import "errors"

var (
	// configure at build time by adding go build arguments:
	//   -ldflags="-X github.com/pbnjay/grate.loglevel=debug"
	loglevel string = "warn"

	// Debug should be set to true to expose detailed logging.
	Debug bool = (loglevel == "debug")
)

// ErrInvalidScanType is returned by Scan for invalid arguments.
var ErrInvalidScanType = errors.New("grate: Scan only supports *bool, *int, *float64, *string, *time.Time arguments")

// ErrNotInFormat is used to auto-detect file types using the defined OpenFunc
// It is returned by OpenFunc when the code does not detect correct file formats.
var ErrNotInFormat = errors.New("grate: file is not in this format")

// ErrUnknownFormat is used when grate does not know how to open a file format.
var ErrUnknownFormat = errors.New("grate: file format is not known/supported")

type errx struct {
	errs []error
}

func (e errx) Error() string {
	return e.errs[0].Error()
}
func (e errx) Unwrap() error {
	if len(e.errs) > 1 {
		return e.errs[1]
	}
	return nil
}

// WrapErr wraps a set of errors.
func WrapErr(e ...error) error {
	if len(e) == 1 {
		return e[0]
	}
	return errx{errs: e}
}


================================================
FILE: go.mod
================================================
module github.com/pbnjay/grate

go 1.16


================================================
FILE: grate.go
================================================
// Package grate opens tabular data files (such as spreadsheets and delimited plaintext files)
// and allows programmatic access to the data contents in a consistent interface.
package grate

import (
	"errors"
	"log"
	"sort"
)

// Source represents a set of data collections.
type Source interface {
	// List the individual data tables within this source.
	List() ([]string, error)

	// Get a Collection from the source by name.
	Get(name string) (Collection, error)

	// Close the source and discard memory.
	Close() error
}

// Collection represents an iterable collection of records.
type Collection interface {
	// Next advances to the next record of content.
	// It MUST be called prior to any Scan().
	Next() bool

	// Strings extracts values from the current record into a list of strings.
	Strings() []string

	// Types extracts the data types from the current record into a list.
	// options: "boolean", "integer", "float", "string", "date",
	// and special cases: "blank", "hyperlink" which are string types
	Types() []string

	// Formats extracts the format codes for the current record into a list.
	Formats() []string

	// Scan extracts values from the current record into the provided arguments
	// Arguments must be pointers to one of 5 supported types:
	//     bool, int64, float64, string, or time.Time
	// If invalid, returns ErrInvalidScanType
	Scan(args ...interface{}) error

	// IsEmpty returns true if there are no data values.
	IsEmpty() bool

	// Err returns the last error that occured.
	Err() error
}

// OpenFunc defines a Source's instantiation function.
// It should return ErrNotInFormat immediately if filename is not of the correct file type.
type OpenFunc func(filename string) (Source, error)

// Open a tabular data file and return a Source for accessing it's contents.
func Open(filename string) (Source, error) {
	for _, o := range srcTable {
		src, err := o.op(filename)
		if err == nil {
			return src, nil
		}
		if !errors.Is(err, ErrNotInFormat) {
			return nil, err
		}
		if Debug {
			log.Println(" ", filename, "is not in", o.name, "format")
		}
	}
	return nil, ErrUnknownFormat
}

type srcOpenTab struct {
	name string
	pri  int
	op   OpenFunc
}

var srcTable = make([]*srcOpenTab, 0, 20)

// Register the named source as a grate datasource implementation.
func Register(name string, priority int, opener OpenFunc) error {
	if Debug {
		log.Println("Registering the", name, "format at priority", priority)
	}
	srcTable = append(srcTable, &srcOpenTab{name: name, pri: priority, op: opener})
	sort.Slice(srcTable, func(i, j int) bool {
		return srcTable[i].pri < srcTable[j].pri
	})
	return nil
}

const (
	// ContinueColumnMerged marks a continuation column within a merged cell.
	ContinueColumnMerged = "→"
	// EndColumnMerged marks the last column of a merged cell.
	EndColumnMerged = "⇥"

	// ContinueRowMerged marks a continuation row within a merged cell.
	ContinueRowMerged = "↓"
	// EndRowMerged marks the last row of a merged cell.
	EndRowMerged = "⤓"
)


================================================
FILE: simple/csv.go
================================================
package simple

import (
	"encoding/csv"
	"os"

	"github.com/pbnjay/grate"
)

var _ = grate.Register("csv", 15, OpenCSV)

// OpenCSV defines a Source's instantiation function.
// It should return ErrNotInFormat immediately if filename is not of the correct file type.
func OpenCSV(filename string) (grate.Source, error) {
	f, err := os.Open(filename)
	if err != nil {
		return nil, err
	}
	defer f.Close()
	t := &simpleFile{
		filename: filename,
		iterRow:  -1,
	}

	s := csv.NewReader(f)
	s.FieldsPerRecord = -1

	total := 0
	ncols := make(map[int]int)
	rec, err := s.Read()
	for ; err == nil; rec, err = s.Read() {
		ncols[len(rec)]++
		total++
		t.rows = append(t.rows, rec)
	}
	if err != nil {
		switch perr := err.(type) {
		case *csv.ParseError:
			return nil, grate.WrapErr(perr, grate.ErrNotInFormat)
		}
		if total < 10 {
			// probably? not in this format
			return nil, grate.WrapErr(err, grate.ErrNotInFormat)
		}
		return nil, err
	}

	// kinda arbitrary metrics for detecting CSV
	looksGood := 0
	for c, n := range ncols {
		if c <= 1 {
			continue
		}
		if n > 10 && float64(n)/float64(total) > 0.8 {
			// more than 80% of rows have the same number of columns, we're good
			looksGood = 2
		} else if n > 25 && looksGood == 0 {
			looksGood = 1
		}
	}
	if looksGood == 1 {
		return t, grate.ErrNotInFormat
	}

	return t, nil
}


================================================
FILE: simple/simple.go
================================================
package simple

import (
	"errors"
	"fmt"
	"path/filepath"
	"strconv"
	"strings"
	"time"

	"github.com/pbnjay/grate"
)

// represents a set of data collections.
type simpleFile struct {
	filename string
	rows     [][]string
	iterRow  int
}

// List the individual data tables within this source.
func (t *simpleFile) List() ([]string, error) {
	return []string{filepath.Base(t.filename)}, nil
}

func (t *simpleFile) Close() error {
	return nil
}

// Get a Collection from the source by name.
func (t *simpleFile) Get(name string) (grate.Collection, error) {
	return t, nil
}

// Next advances to the next record of content.
// It MUST be called prior to any Scan().
func (t *simpleFile) Next() bool {
	t.iterRow++
	return t.iterRow < len(t.rows)
}

// Strings extracts values from the current record into a list of strings.
func (t *simpleFile) Strings() []string {
	return t.rows[t.iterRow]
}

// Formats extracts the format code for the current record into a list.
func (t *simpleFile) Formats() []string {
	res := make([]string, len(t.rows[t.iterRow]))
	for i := range res {
		res[i] = "General"
	}
	return res
}

// Types extracts the data types from the current record into a list.
// options: "boolean", "integer", "float", "string", "date",
// and special cases: "blank", "hyperlink" which are string types
func (t *simpleFile) Types() []string {
	res := make([]string, len(t.rows[t.iterRow]))
	for i, v := range t.rows[t.iterRow] {
		if v == "" {
			res[i] = "blank"
		} else {
			res[i] = "string"
		}
	}
	return res
}

// Scan extracts values from the current record into the provided arguments
// Arguments must be pointers to one of 5 supported types:
//     bool, int, float64, string, or time.Time
func (t *simpleFile) Scan(args ...interface{}) error {
	var err error
	row := t.rows[t.iterRow]
	if len(row) != len(args) {
		return fmt.Errorf("grate/simple: expected %d Scan destinations, got %d", len(row), len(args))
	}

	for i, a := range args {
		switch v := a.(type) {
		case *bool:
			switch strings.ToLower(row[i]) {
			case "1", "t", "true", "y", "yes":
				*v = true
			default:
				*v = false
			}
		case *int:
			var n int64
			n, err = strconv.ParseInt(row[i], 10, 64)
			*v = int(n)
		case *float64:
			*v, err = strconv.ParseFloat(row[i], 64)
		case *string:
			*v = row[i]
		case *time.Time:
			return errors.New("grate/simple: time.Time not supported, you must parse date strings manually")
		default:
			return grate.ErrInvalidScanType
		}
		if err != nil {
			return err
		}
	}
	return nil
}

// IsEmpty returns true if there are no data values.
func (t *simpleFile) IsEmpty() bool {
	return len(t.rows) == 0
}

// Err returns the last error that occured.
func (t *simpleFile) Err() error {
	return nil
}


================================================
FILE: simple/tsv.go
================================================
package simple

import (
	"bufio"
	"os"
	"strings"

	"github.com/pbnjay/grate"
)

var _ = grate.Register("tsv", 10, OpenTSV)

// OpenTSV defines a Source's instantiation function.
// It should return ErrNotInFormat immediately if filename is not of the correct file type.
func OpenTSV(filename string) (grate.Source, error) {
	f, err := os.Open(filename)
	if err != nil {
		return nil, err
	}
	defer f.Close()
	t := &simpleFile{
		filename: filename,
		iterRow:  -1,
	}

	s := bufio.NewScanner(f)
	total := 0
	ncols := make(map[int]int)
	for s.Scan() {
		r := strings.Split(s.Text(), "\t")
		ncols[len(r)]++
		total++
		t.rows = append(t.rows, r)
	}
	if s.Err() != nil {
		// this can only be read errors, not format
		return nil, s.Err()
	}

	// kinda arbitrary metrics for detecting TSV
	looksGood := 0
	for c, n := range ncols {
		if c <= 1 {
			continue
		}
		if n > 10 && float64(n)/float64(total) > 0.8 {
			// more than 80% of rows have the same number of columns, we're good
			looksGood = 2
		} else if n > 25 && looksGood == 0 {
			looksGood = 1
		}
	}
	if looksGood == 1 {
		return t, grate.ErrNotInFormat
	}

	return t, nil
}


================================================
FILE: xls/cfb/cfb.go
================================================
// Package cfb implements the Microsoft Compound File Binary File Format.
package cfb

// https://docs.microsoft.com/en-us/openspecs/windows_protocols/ms-cfb/53989ce4-7b05-4f8d-829b-d08d6148375b
// Note for myself:
//   Storage = Directory
//   Stream = File

import (
	"bytes"
	"encoding/binary"
	"errors"
	"io"
	"io/ioutil"
	"log"
	"unicode/utf16"

	"github.com/pbnjay/grate"
)

const fullAssertions = true

const (
	secFree       uint32 = 0xFFFFFFFF // FREESECT
	secEndOfChain uint32 = 0xFFFFFFFE // ENDOFCHAIN
	secFAT        uint32 = 0xFFFFFFFD // FATSECT
	secDIFAT      uint32 = 0xFFFFFFFC // DIFSECT
	secReserved   uint32 = 0xFFFFFFFB
	secMaxRegular uint32 = 0xFFFFFFFA // MAXREGSECT
)

// Header of the Compound File MUST be at the beginning of the file (offset 0).
type header struct {
	Signature                    uint64      // Identification signature for the compound file structure, and MUST be set to the value 0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1.
	ClassID                      [2]uint64   // Reserved and unused class ID that MUST be set to all zeroes (CLSID_NULL).
	MinorVersion                 uint16      // Version number for nonbreaking changes. This field SHOULD be set to 0x003E if the major version field is either 0x0003 or 0x0004.
	MajorVersion                 uint16      // Version number for breaking changes. This field MUST be set to either 0x0003 (version 3) or 0x0004 (version 4).
	ByteOrder                    uint16      // This field MUST be set to 0xFFFE. This field is a byte order mark for all integer fields, specifying little-endian byte order.
	SectorShift                  uint16      // This field MUST be set to 0x0009, or 0x000c, depending on the Major Version field. This field specifies the sector size of the compound file as a power of 2.
	MiniSectorShift              uint16      // This field MUST be set to 0x0006. This field specifies the sector size of the Mini Stream as a power of 2. The sector size of the Mini Stream MUST be 64 bytes.
	Reserved1                    [6]byte     // This field MUST be set to all zeroes.
	NumDirectorySectors          int32       // This integer field contains the count of the number of directory sectors in the compound file.
	NumFATSectors                int32       // This integer field contains the count of the number of FAT sectors in the compound file.
	FirstDirectorySectorLocation uint32      // This integer field contains the starting sector number for the directory stream.
	TransactionSignature         int32       // This integer field MAY contain a sequence number that is incremented every time the compound file is saved by an implementation that supports file transactions. This is the field that MUST be set to all zeroes if file transactions are not implemented.<1>
	MiniStreamCutoffSize         int32       // This integer field MUST be set to 0x00001000. This field specifies the maximum size of a user-defined data stream that is allocated from the mini FAT and mini stream, and that cutoff is 4,096 bytes. Any user-defined data stream that is greater than or equal to this cutoff size must be allocated as normal sectors from the FAT.
	FirstMiniFATSectorLocation   uint32      // This integer field contains the starting sector number for the mini FAT.
	NumMiniFATSectors            int32       // This integer field contains the count of the number of mini FAT sectors in the compound file.
	FirstDIFATSectorLocation     uint32      // This integer field contains the starting sector number for the DIFAT.
	NumDIFATSectors              int32       // This integer field contains the count of the number of DIFAT sectors in the compound file.
	DIFAT                        [109]uint32 // This array of 32-bit integer fields contains the first 109 FAT sector locations of the compound file.
}

type objectType byte

const (
	typeUnknown     objectType = 0x00
	typeStorage     objectType = 0x01
	typeStream      objectType = 0x02
	typeRootStorage objectType = 0x05
)

type directory struct {
	Name                   [32]uint16 // 32 utf16 characters
	NameByteLen            int16      // length of Name in bytes
	ObjectType             objectType
	ColorFlag              byte   // 0=red, 1=black
	LeftSiblingID          uint32 // stream ids
	RightSiblingID         uint32
	ChildID                uint32
	ClassID                [2]uint64 // GUID
	StateBits              uint32
	CreationTime           int64
	ModifiedTime           int64
	StartingSectorLocation int32
	StreamSize             uint64
}

func (d *directory) String() string {
	if (d.NameByteLen&1) == 1 || d.NameByteLen > 64 {
		return "<invalid utf16 string>"
	}
	r16 := utf16.Decode(d.Name[:int(d.NameByteLen)/2])
	// trim off null terminator
	return string(r16[:len(r16)-1])
}

// Document represents a Compound File Binary Format document.
type Document struct {
	// the entire file, loaded into memory
	data []byte

	// pre-parsed info
	header *header
	dir    []*directory

	// lookup tables for all the sectors
	fat     []uint32
	minifat []uint32

	ministreamstart uint32
	ministreamsize  uint32
}

func (d *Document) load(rx io.ReadSeeker) error {
	var err error
	d.data, err = ioutil.ReadAll(rx)
	if err != nil {
		return err
	}
	br := bytes.NewReader(d.data)

	h := &header{}
	err = binary.Read(br, binary.LittleEndian, h)
	if h.Signature != 0xe11ab1a1e011cfd0 {
		return grate.ErrNotInFormat // errors.New("ole2: invalid format")
	}
	if h.ByteOrder != 0xFFFE {
		return grate.ErrNotInFormat //errors.New("ole2: invalid format")
	}
	if fullAssertions {
		if h.ClassID[0] != 0 || h.ClassID[1] != 0 {
			return grate.ErrNotInFormat //errors.New("ole2: invalid CLSID")
		}
		if h.MajorVersion != 3 && h.MajorVersion != 4 {
			return errors.New("ole2: unknown major version")
		}
		if h.MinorVersion != 0x3B && h.MinorVersion != 0x3E {
			log.Printf("WARNING MinorVersion = 0x%02x NOT 0x3E", h.MinorVersion)
			//return errors.New("ole2: unknown minor version")
		}

		for _, v := range h.Reserved1 {
			if v != 0 {
				return errors.New("ole2: reserved section is non-zero")
			}
		}
		if h.MajorVersion == 3 {
			if h.SectorShift != 9 {
				return errors.New("ole2: invalid sector size")
			}
			if h.NumDirectorySectors != 0 {
				return errors.New("ole2: version 3 does not support directory sectors")
			}
		}
		if h.MajorVersion == 4 {
			if h.SectorShift != 12 {
				return errors.New("ole2: invalid sector size")
			}
		}
		if h.MiniSectorShift != 6 {
			return errors.New("ole2: invalid mini sector size")
		}
		if h.MiniStreamCutoffSize != 0x00001000 {
			return errors.New("ole2: invalid mini sector cutoff")
		}
	}
	d.header = h

	numFATentries := (1 << (h.SectorShift - 2))
	le := binary.LittleEndian
	d.fat = make([]uint32, 0, numFATentries*int(1+d.header.NumFATSectors))
	d.minifat = make([]uint32, 0, numFATentries*int(1+h.NumMiniFATSectors))

	// step 1: read the DIFAT sector list
	for i := 0; i < 109; i++ {
		sid := h.DIFAT[i]
		if sid == secFree {
			break
		}
		offs := int64(1+sid) << int32(h.SectorShift)
		if offs >= int64(len(d.data)) {
			return errors.New("xls/cfb: unable to load file")
		}
		sector := d.data[offs:]
		for j := 0; j < numFATentries; j++ {
			sid2 := le.Uint32(sector)
			d.fat = append(d.fat, sid2)
			sector = sector[4:]
		}
	}
	if h.NumDIFATSectors > 0 {
		sid1 := h.FirstDIFATSectorLocation

		for sid1 != secEndOfChain {
			offs := int64(1+sid1) << int32(h.SectorShift)
			difatSector := d.data[offs:]

			for i := 0; i < numFATentries-1; i++ {
				sid2 := le.Uint32(difatSector)
				if sid2 == secFree || sid2 == secEndOfChain {
					difatSector = difatSector[4:]
					continue
				}

				offs := int64(1+sid2) << int32(h.SectorShift)
				if offs >= int64(len(d.data)) {
					return errors.New("xls/cfb: unable to load file")
				}
				sector := d.data[offs:]
				for j := 0; j < numFATentries; j++ {
					sid3 := le.Uint32(sector)
					d.fat = append(d.fat, sid3)
					sector = sector[4:]
				}

				difatSector = difatSector[4:]
			}
			// chain the next DIFAT sector
			sid1 = le.Uint32(difatSector)
		}
	}

	// step 2: read the mini FAT
	sid := h.FirstMiniFATSectorLocation
	for sid != secEndOfChain {
		offs := int64(1+sid) << int32(h.SectorShift)
		if offs >= int64(len(d.data)) {
			return errors.New("xls/cfb: unable to load file")
		}
		sector := d.data[offs:]
		for j := 0; j < numFATentries; j++ {
			sid = le.Uint32(sector)
			d.minifat = append(d.minifat, sid)
			sector = sector[4:]
		}

		if len(d.minifat) >= int(h.NumMiniFATSectors) {
			break
		}

		// chain the next mini FAT sector
		sid = le.Uint32(sector)
	}

	// step 3: read the Directory Entries
	err = d.buildDirs(br)

	return err
}

func (d *Document) buildDirs(br *bytes.Reader) error {
	h := d.header
	le := binary.LittleEndian

	// step 2: read the Directory
	sid := h.FirstDirectorySectorLocation
	offs := int64(1+sid) << int64(h.SectorShift)
	br.Seek(offs, io.SeekStart)

	for j := 0; j < 4; j++ {
		dirent := &directory{}
		binary.Read(br, le, dirent)
		if d.header.MajorVersion == 3 {
			// mask out upper 32bits
			dirent.StreamSize = dirent.StreamSize & 0xFFFFFFFF
		}

		switch dirent.ObjectType {
		case typeRootStorage:
			d.ministreamstart = uint32(dirent.StartingSectorLocation)
			d.ministreamsize = uint32(dirent.StreamSize)
		case typeStorage:
			//log.Println("got a storage? what to do now?")
		case typeStream:
			/*
				var freader io.Reader
				if dirent.StreamSize < uint64(d.header.MiniStreamCutoffSize) {
					freader = d.getMiniStreamReader(uint32(dirent.StartingSectorLocation), dirent.StreamSize)
				} else if dirent.StreamSize != 0 {
					freader = d.getStreamReader(uint32(dirent.StartingSectorLocation), dirent.StreamSize)
				}
			*/
		case typeUnknown:
			return nil
		}
		d.dir = append(d.dir, dirent)
	}

	return nil
}

func (d *Document) getStreamReader(sid uint32, size uint64) (io.ReadSeeker, error) {
	// NB streamData is a slice of slices of the raw data, so this is the
	// only allocation - for the (much smaller) list of sector slices
	streamData := make([][]byte, 1+(size>>d.header.SectorShift))

	x := 0
	secSize := int64(1) << int32(d.header.SectorShift)
	for sid != secEndOfChain && sid != secFree {
		offs := int64(1+sid) << int64(d.header.SectorShift)
		if offs > int64(len(d.data)) {
			return nil, errors.New("ole2: corrupt data format")
		}
		slice := d.data[offs : offs+secSize]
		if size < uint64(len(slice)) {
			slice = slice[:size]
			size = 0
		} else {
			size -= uint64(len(slice))
		}
		streamData[x] = slice
		if size == 0 {
			break
		}
		sid = d.fat[sid]
		x++
	}
	if size != 0 {
		return nil, errors.New("ole2: incomplete read")
	}

	return &SliceReader{Data: streamData}, nil
}

func (d *Document) getMiniStreamReader(sid uint32, size uint64) (io.ReadSeeker, error) {
	// TODO: move into a separate cache so we don't recalculate it each time
	fatStreamData := make([][]byte, 1+(d.ministreamsize>>d.header.SectorShift))

	// NB streamData is a slice of slices of the raw data, so this is the
	// only allocation - for the (much smaller) list of sector slices
	streamData := make([][]byte, 1+(size>>d.header.MiniSectorShift))

	x := 0
	fsid := d.ministreamstart
	fsize := uint64(d.ministreamsize)
	secSize := int64(1) << int64(d.header.SectorShift)
	for fsid != secEndOfChain && fsid != secFree {
		offs := int64(1+fsid) << int64(d.header.SectorShift)
		slice := d.data[offs : offs+secSize]
		if fsize < uint64(len(slice)) {
			slice = slice[:fsize]
			fsize = 0
		} else {
			fsize -= uint64(len(slice))
		}
		fatStreamData[x] = slice
		x++
		fsid = d.fat[fsid]
	}

	x = 0
	miniSecSize := int64(1) << int64(d.header.MiniSectorShift)
	for sid != secEndOfChain && sid != secFree {
		offs := int64(sid) << int64(d.header.MiniSectorShift)

		so, si := offs/secSize, offs%secSize
		data := fatStreamData[so]

		slice := data[si : si+miniSecSize]
		if size < uint64(len(slice)) {
			slice = slice[:size]
			size = 0
		} else {
			size -= uint64(len(slice))
		}
		streamData[x] = slice
		x++
		sid = d.minifat[sid]
	}

	return &SliceReader{Data: streamData}, nil
}


================================================
FILE: xls/cfb/interface.go
================================================
package cfb

import (
	"fmt"
	"io"
	"os"
)

// Open a Compound File Binary Format document.
func Open(filename string) (*Document, error) {
	d := &Document{}
	f, err := os.Open(filename)
	if err != nil {
		return nil, err
	}
	err = d.load(f)
	if err != nil {
		return nil, err
	}
	return d, nil
}

// List the streams contained in the document.
func (d *Document) List() ([]string, error) {
	var res []string
	for _, e := range d.dir {
		if e.ObjectType == typeStream {
			res = append(res, e.String())
		}
	}
	return res, nil
}

// Open the named stream contained in the document.
func (d *Document) Open(name string) (io.ReadSeeker, error) {
	for _, e := range d.dir {
		if e.String() == name && e.ObjectType == typeStream {
			if e.StreamSize < uint64(d.header.MiniStreamCutoffSize) {
				return d.getMiniStreamReader(uint32(e.StartingSectorLocation), e.StreamSize)
			} else if e.StreamSize != 0 {
				return d.getStreamReader(uint32(e.StartingSectorLocation), e.StreamSize)
			}
		}
	}
	return nil, fmt.Errorf("cfb: stream '%s' not found", name)
}


================================================
FILE: xls/cfb/simple_test.go
================================================
package cfb

import (
	"io"
	"io/ioutil"
	"log"
	"os"
	"testing"
)

func TestHeader(t *testing.T) {
	d := &Document{}
	f, _ := os.Open("../../testdata/test.xls")
	err := d.load(f)
	if err != nil {
		t.Fatal(err)
	}
}

func TestHeader2(t *testing.T) {
	d := &Document{}
	f, _ := os.Open("../../testdata/test2.xls")
	err := d.load(f)
	if err != nil {
		t.Fatal(err)
	}
}

func TestHeader3(t *testing.T) {
	d := &Document{}
	f, _ := os.Open("../../testdata/test3.xls")
	err := d.load(f)
	if err != nil {
		t.Fatal(err)
	}
}

func TestHeader4(t *testing.T) {
	d := &Document{}
	f, _ := os.Open("../../testdata/test4.xls")
	err := d.load(f)
	if err != nil {
		t.Fatal(err)
	}

	log.Println(d.List())

	r, err := d.Open("Workbook")
	if err != nil {
		t.Fatal(err)
	}
	book, err := ioutil.ReadAll(r)
	if err != nil {
		t.Fatal(err)
	}
	log.Println(len(book))

	r, err = d.Open("\x05DocumentSummaryInformation")
	if err != nil {
		t.Fatal(err)
	}
	data, err := ioutil.ReadAll(r)
	if err != nil {
		t.Fatal(err)
	}
	log.Println(len(data))
}

var testSlices = [][]byte{
	{0, 1, 2, 3, 4, 5, 6, 7, 8, 9},
	{10, 11, 12, 13, 14, 15, 16, 17, 18, 19},
	{20, 21, 22, 23, 24, 25, 26, 27, 28, 29},
	{30, 31, 32, 33, 34, 35, 36, 37, 38, 39},
	{40, 41, 42, 43, 44, 45, 46, 47, 48, 49},
}

func TestSliceReader(t *testing.T) {
	sr := &SliceReader{
		Data: testSlices,
	}
	var uno, old [1]byte
	_, err := sr.Read(uno[:])
	for err == nil {
		old[0] = uno[0]
		_, err = sr.Read(uno[:])
		if err == nil && uno[0] != (old[0]+1) {
			log.Printf("read data out of order new=%d, old=%d", old[0], uno[0])
			t.Fail()
		}
	}
	sr.Seek(0, io.SeekStart)
	_, err = sr.Read(uno[:])
	for err == nil {
		old[0] = uno[0]
		_, err = sr.Read(uno[:])
		if err == nil && uno[0] != (old[0]+1) {
			log.Printf("read data out of order new=%d, old=%d", old[0], uno[0])
			t.Fail()
		}
	}
	sr.Seek(10, io.SeekStart)
	_, err = sr.Read(uno[:])
	if uno[0] != 10 {
		log.Printf("unexpected element %d (expected %d)", uno[0], 10)
		t.Fail()
	}
	sr.Seek(35, io.SeekStart)
	_, err = sr.Read(uno[:])
	if uno[0] != 35 {
		log.Printf("unexpected element %d (expected %d)", uno[0], 35)
		t.Fail()
	}
	sr.Seek(7, io.SeekCurrent)
	_, err = sr.Read(uno[:])
	if uno[0] != 43 {
		log.Printf("unexpected element %d (expected %d)", uno[0], 43)
		t.Fail()
	}
	sr.Seek(-9, io.SeekCurrent)
	_, err = sr.Read(uno[:])
	if uno[0] != 35 {
		log.Printf("unexpected element %d (expected %d)", uno[0], 35)
		t.Fail()
	}
}


================================================
FILE: xls/cfb/slicereader.go
================================================
package cfb

import (
	"errors"
	"io"
)

// SliceReader wraps a list of slices as a io.ReadSeeker that
// can transparently merge them into a single coherent stream.
type SliceReader struct {
	CSize  []int64
	Data   [][]byte
	Index  uint
	Offset uint
}

// Read implements the io.Reader interface.
func (s *SliceReader) Read(b []byte) (int, error) {
	if s.Index >= uint(len(s.Data)) {
		return 0, io.EOF
	}
	n := copy(b, s.Data[s.Index][s.Offset:])
	if n > 0 {
		s.Offset += uint(n)
		if s.Offset == uint(len(s.Data[s.Index])) {
			s.Offset = 0
			s.Index++
		}
		return n, nil
	}

	return 0, io.EOF
}

var x io.Seeker

// Seek implements the io.Seeker interface.
func (s *SliceReader) Seek(offset int64, whence int) (int64, error) {
	if len(s.CSize) != len(s.Data) {
		// calculate the cumulative block size cache
		s.CSize = make([]int64, len(s.Data))
		sz := int64(0)
		for i, d := range s.Data {
			s.CSize[i] = sz
			sz += int64(len(d))
		}
	}
	if s.Index >= uint(len(s.CSize)) {
		s.Index = uint(len(s.CSize) - 1)
		s.Offset = uint(len(s.Data[s.Index]))
	}
	// current offset in stream
	trueOffset := int64(s.Offset) + s.CSize[int(s.Index)]
	if offset == 0 && whence == io.SeekCurrent {
		// just asking for current position
		return trueOffset, nil
	}

	switch whence {
	case io.SeekStart:
		if offset < 0 {
			return -1, errors.New("xls: invalid seek offset")
		}
		s.Index = 0
		s.Offset = 0
		trueOffset = 0

	case io.SeekEnd:
		if offset > 0 {
			return -1, errors.New("xls: invalid seek offset")
		}

		s.Index = uint(len(s.Data) - 1)
		s.Offset = uint(len(s.Data[s.Index]))
		trueOffset = int64(s.Offset) + s.CSize[s.Index]

	default:
		// current position already defined
	}

	wantOffset := offset + trueOffset
	for trueOffset != wantOffset {
		loOffset := s.CSize[int(s.Index)]
		hiOffset := s.CSize[int(s.Index)] + int64(len(s.Data[s.Index]))
		if wantOffset > loOffset && wantOffset < hiOffset {
			s.Offset = uint(wantOffset - loOffset)
			return wantOffset, nil
		}

		if trueOffset > wantOffset {
			s.Index--
			s.Offset = 0
			trueOffset = s.CSize[int(s.Index)]
		} else if trueOffset < wantOffset {
			s.Index++
			s.Offset = 0
			trueOffset = s.CSize[int(s.Index)]
		}
	}
	return wantOffset, nil
}


================================================
FILE: xls/comp_test.go
================================================
package xls

import (
	"os"
	"path/filepath"
	"strings"
	"testing"
)

func TestAllFiles(t *testing.T) {
	err := filepath.Walk("../testdata", func(p string, info os.FileInfo, err error) error {
		if info.IsDir() {
			return nil
		}
		if !strings.HasSuffix(info.Name(), ".xls") {
			return nil
		}
		wb, err := Open(p)
		if err != nil {
			return err
		}

		sheets, err := wb.List()
		if err != nil {
			return err
		}
		for _, s := range sheets {
			sheet, err := wb.Get(s)
			if err != nil {
				return err
			}

			for sheet.Next() {
				sheet.Strings()
			}
		}

		return wb.Close()
	})
	if err != nil {
		t.Fatal(err)
	}
}


================================================
FILE: xls/crypto/crypto.go
================================================
// Package crypto implements excel encryption algorithms from the
// MS-OFFCRYPTO design specs. Currently only standard/basic RC4
// "obfuscation" is supported.
package crypto

import (
	"bytes"
	"encoding/binary"
	"fmt"
)

// Decryptor describes methods to decrypt an excel sheet.
type Decryptor interface {
	// SetPassword for the decryption.
	SetPassword(password []byte)

	// Read implements the io.Reader interface.
	Read(p []byte) (n int, err error)

	// Write implements the io.Writer interface.
	Write(p []byte) (n int, err error)

	// Bytes returns the decrypted data.
	Bytes() []byte

	// Flush tells the decryptor to decrypt the latest block.
	Flush()

	// Reset the decryptor, and clear all written and readable data.
	Reset()
}

// Algorithms designed based on specs in MS-OFFCRYPTO:
// https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-offcrypto/3c34d72a-1a61-4b52-a893-196f9157f083

// Important notes from MS-XLS section 2.2.10:
// https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-xls/cd03cb5f-ca02-4934-a391-bb674cb8aa06

// When obfuscating or encrypting BIFF records in these streams the record type and
// record size components MUST NOT be obfuscated or encrypted.
// In addition the following records MUST NOT be obfuscated or encrypted:
// BOF (section 2.4.21), FilePass (section 2.4.117), UsrExcl (section 2.4.339),
// FileLock (section 2.4.116), InterfaceHdr (section 2.4.146), RRDInfo (section 2.4.227),
// and RRDHead (section 2.4.226). Additionally, the lbPlyPos field of the BoundSheet8
// record (section 2.4.28) MUST NOT be encrypted.

// For RC4 encryption and RC4 CryptoAPI encryption, the Unicode password string is used
// to generate the encryption key as specified in [MS-OFFCRYPTO] section 2.3.6.2 or
// [MS-OFFCRYPTO] section 2.3.5.2 depending on the RC4 algorithm used. The record data
// is then encrypted by the specific RC4 algorithm in 1024-byte blocks. The block number
// is set to zero at the beginning of every BIFF record stream, and incremented by one
// at each 1024-byte boundary. Bytes to be encrypted are passed into the RC4 encryption
// function and then written to the stream. For unencrypted records and the record
// headers consisting of the record type and record size, a byte buffer of all zeros,
// of the same size as the section of unencrypted bytes, is passed into the RC4
// encryption function. The results are then ignored and the unencrypted bytes are
// written to the stream.

// DefaultXLSPassword is the default encryption password defined by note
// <100> Section 2.4.191: If the value of the wPassword field of the Password record in
// the Globals Substream is not 0x0000, Excel 97, Excel 2000, Excel 2002, Office Excel
// 2003, Office Excel 2007, and Excel 2010 encrypt the document as specified in [MS-OFFCRYPTO],
// section 2.3. If an encryption password is not specified or the workbook or sheet is only
// protected, the document is encrypted with the default password of:

// DefaultXLSPassword is the default Excel encryption password.
var DefaultXLSPassword = "VelvetSweatshop"

/////////////

// 2.3.6.1
type basicRC4Encryption struct {
	MajorVersion uint16
	MinorVersion uint16
	Salt         [16]byte
	Verifier     [16]byte
	VerifierHash [16]byte
}

// NewBasicRC4 implements the standard RC4 decryption.
func NewBasicRC4(data []byte) (Decryptor, error) {
	h := basicRC4Encryption{}
	b := bytes.NewReader(data)
	err := binary.Read(b, binary.LittleEndian, &h)
	if err != nil {
		return nil, err
	}
	if h.MinorVersion != 1 {
		return nil, fmt.Errorf("xls: unknown basic-RC4 minor version %d (%d byte record)",
			h.MinorVersion, len(data))
	}
	if len(data) != 52 {
		return nil, fmt.Errorf("xls: data length is invalid (expected 52 bytes, got %d)",
			len(data))
	}

	d := &rc4Writer{
		Salt: make([]byte, len(h.Salt)),
	}
	copy(d.Salt, h.Salt[:])

	return d, d.Verify(h.Verifier[:], h.VerifierHash[:])
}


================================================
FILE: xls/crypto/rc4.go
================================================
package crypto

import (
	"bytes"
	"crypto/md5"
	"crypto/rc4"
	"encoding/binary"
	"fmt"
)

var _ Decryptor = &rc4Writer{}

func (d *rc4Writer) Write(data []byte) (n int, err error) {
	x := len(data)
	for len(data) > 0 {
		n := copy(d.bytes[d.offset:], data)
		d.offset += n
		if d.offset >= 1024 {
			if d.offset != 1024 {
				panic("invalid offset from write")
			}
			d.Flush()
		}
		data = data[n:]
	}
	return x, nil
}

func (d *rc4Writer) Read(data []byte) (n int, err error) {
	return d.buf.Read(data)
}

// Reset to block 0, and clear all written and readable data.
func (d *rc4Writer) Reset() {
	d.block = 0
	d.offset = 0
	d.buf.Reset()
}

// Flush tells the decryptor to decrypt the latest block.
func (d *rc4Writer) Flush() {
	var zeros [1024]byte

	endpad := 0
	if d.offset < 1024 {
		endpad = copy(d.bytes[d.offset:], zeros[:])
		d.offset += endpad
	}
	if d.offset != 1024 {
		panic("invalid offset fill")
	}

	// decrypt and write results to output buffer
	d.startBlock()
	d.dec.XORKeyStream(d.bytes[:], d.bytes[:])
	d.buf.Write(d.bytes[:1024-endpad])

	d.offset = 0
	d.block++
}

// SetPassword for the decryption.
func (d *rc4Writer) SetPassword(password []byte) {
	d.Password = make([]rune, len(password))
	for i, p := range password {
		d.Password[i] = rune(p)
	}

	/// compute the first part of the encryption key
	result := generateStd97Key(d.Password, d.Salt)
	d.encKey = make([]byte, len(result))
	copy(d.encKey, result)
}

type rc4Writer struct {
	block  uint32
	offset int
	bytes  [1024]byte

	// records the decrypted data
	buf bytes.Buffer

	///////

	// decrypter for RC4 content streams
	dec *rc4.Cipher

	cipherKey []byte // H1 per 2.3.6.2
	encKey    []byte // Hfinal per 2.3.6.2

	Salt     []byte
	Password []rune
}

func (d *rc4Writer) Bytes() []byte {
	return d.buf.Bytes()
}

func (d *rc4Writer) Verify(everifier, everifierHash []byte) error {
	d.Reset()
	d.startBlock()

	var temp1 [16]byte
	var temp2 [16]byte
	d.dec.XORKeyStream(temp1[:], everifier)
	d.dec.XORKeyStream(temp2[:], everifierHash)

	newhash := md5.Sum(temp1[:])
	for i, c := range newhash {
		if temp2[i] != c {
			return fmt.Errorf("verification failed")
		}
	}
	return nil
}

/////////////////////

func (d *rc4Writer) startBlock() {
	if d.encKey == nil {
		d.SetPassword([]byte(DefaultXLSPassword))
	}

	d.cipherKey = make([]byte, 16)
	copy(d.cipherKey, d.encKey[:5])
	binary.LittleEndian.PutUint32(d.cipherKey[5:], d.block)
	mhash := md5.Sum(d.cipherKey[:9])
	d.dec, _ = rc4.NewCipher(mhash[:])
}

func generateStd97Key(passData []rune, salt []byte) []byte {
	if len(passData) == 0 || len(salt) != 16 {
		panic("invalid keygen material")
	}

	passBytes := make([]byte, len(passData)*2)

	for i, c := range passData {
		binary.LittleEndian.PutUint16(passBytes[2*i:], uint16(c))
	}

	// digest the IV then copy back into pKeyData
	h0 := md5.Sum(passBytes)

	// now do the final set of keygen ops
	msum := md5.New()
	for i := 0; i < 16; i++ {
		msum.Write(h0[:5])
		msum.Write(salt)
	}
	// return H1
	temp := make([]byte, 0, 16)
	temp = msum.Sum(temp)
	return temp
}


================================================
FILE: xls/hyperlinks.go
================================================
package xls

import (
	"encoding/binary"
	"errors"
	"fmt"
	"strings"
	"unicode/utf16"
)

func decodeHyperlinks(raw []byte) (displayText, linkText string, err error) {
	raw = raw[16:] // skip classid
	slen := binary.LittleEndian.Uint32(raw[:4])
	if slen != 2 {
		return "", "", errors.New("xls: unknown hyperlink version")
	}

	flags := binary.LittleEndian.Uint32(raw[4:8])
	raw = raw[8:]
	if (flags & hlstmfHasDisplayName) != 0 {
		slen = binary.LittleEndian.Uint32(raw[:4])
		raw = raw[4:]
		us := make([]uint16, slen)
		for i := 0; i < int(slen); i++ {
			us[i] = binary.LittleEndian.Uint16(raw)
			raw = raw[2:]
		}
		displayText = string(utf16.Decode(us))
	}

	if (flags & hlstmfHasFrameName) != 0 {
		// skip a HyperlinkString containing target Frame
		slen = binary.LittleEndian.Uint32(raw[:4])
		raw = raw[4+(slen*2):]
	}

	if (flags & hlstmfHasMoniker) != 0 {
		if (flags & hlstmfMonikerSavedAsStr) != 0 {
			// read HyperlinkString containing the URL
			slen = binary.LittleEndian.Uint32(raw[:4])
			raw = raw[4:]
			us := make([]uint16, slen)
			for i := 0; i < int(slen); i++ {
				us[i] = binary.LittleEndian.Uint16(raw)
				raw = raw[2:]
			}
			linkText = string(utf16.Decode(us))

		} else {
			n := 0
			var err error
			linkText, n, err = parseHyperlinkMoniker(raw)
			raw = raw[n:]
			if err != nil {
				return "", "", err
			}
		}
	}

	if (flags & hlstmfHasLocationStr) != 0 {
		slen = binary.LittleEndian.Uint32(raw[:4])
		raw = raw[4:]
		us := make([]uint16, slen)
		for i := 0; i < int(slen); i++ {
			us[i] = binary.LittleEndian.Uint16(raw)
			raw = raw[2:]
		}
		linkText = string(utf16.Decode(us))
	}

	linkText = strings.Trim(linkText, " \v\f\t\r\n\x00")
	displayText = strings.Trim(displayText, " \v\f\t\r\n\x00")
	return
}

func parseHyperlinkMoniker(raw []byte) (string, int, error) {
	classid := raw[:16]
	no := 16

	isURLMoniker := true
	isFileMoniker := true
	urlMonikerClassID := [16]byte{0xE0, 0xC9, 0xEA, 0x79, 0xF9, 0xBA, 0xCE, 0x11, 0x8C, 0x82, 0x00, 0xAA, 0x00, 0x4B, 0xA9, 0x0B}
	fileMonikerClassID := [16]byte{0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46}
	for i, b := range classid {
		if urlMonikerClassID[i] != b {
			isURLMoniker = false
		}
		if fileMonikerClassID[i] != b {
			isFileMoniker = false
		}
	}
	if isURLMoniker {
		length := binary.LittleEndian.Uint32(raw[no:])
		no += 4
		length /= 2
		buf := make([]uint16, length)
		for i := 0; i < int(length); i++ {
			buf[i] = binary.LittleEndian.Uint16(raw[no:])
			no += 2
		}
		if length > 12 && buf[length-13] == 0 {
			buf = buf[:length-12]
		}
		return string(utf16.Decode(buf)), no, nil
	}
	if isFileMoniker {
		//x := binary.LittleEndian.Uint16(raw[no:])        //cAnti
		length := binary.LittleEndian.Uint32(raw[no+2:]) //ansiLength
		no += 6
		buf := raw[no : no+int(length)]

		// skip 24 more bytes for misc fixed properties
		no += int(length) + 24

		length = binary.LittleEndian.Uint32(raw[no:]) // cbUnicodePathSize
		no += 4
		if length > 0 {
			no += 6
			length -= 6
			buf2 := make([]uint16, length/2)
			for i := 0; i < int(length/2); i++ {
				buf2[i] = binary.LittleEndian.Uint16(raw[no:])
				no += 2
			}
			return string(utf16.Decode(buf2)), no, nil
		}

		return string(buf), no, nil
	}

	return "", 0, fmt.Errorf("xls: unknown moniker classid")
}

// HLink flags
const (
	hlstmfHasMoniker          = uint32(0x001)
	hlstmfIsAbsolute          = uint32(0x002)
	hlstmfSiteGaveDisplayName = uint32(0x004)
	hlstmfHasLocationStr      = uint32(0x008)
	hlstmfHasDisplayName      = uint32(0x010)
	hlstmfHasGUID             = uint32(0x020)
	hlstmfHasCreationTime     = uint32(0x040)
	hlstmfHasFrameName        = uint32(0x080)
	hlstmfMonikerSavedAsStr   = uint32(0x100)
	hlstmfAbsFromGetdataRel   = uint32(0x200)
)


================================================
FILE: xls/records.go
================================================
package xls

import "fmt"

type recordType uint16

// Record types defined by the XLS specification document, section 2.3/2.4.
// https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-xls/43684742-8fcd-4fcd-92df-157d8d7241f9
const (
	RecTypeFormula              recordType = 6    // per section 2.4.127
	RecTypeEOF                  recordType = 10   // section 2.4.103
	RecTypeCalcCount            recordType = 12   // section 2.4.31
	RecTypeCalcMode             recordType = 13   // section 2.4.34
	RecTypeCalcPrecision        recordType = 14   // section 2.4.35
	RecTypeCalcRefMode          recordType = 15   // section 2.4.36
	RecTypeCalcDelta            recordType = 16   // section 2.4.32
	RecTypeCalcIter             recordType = 17   // section 2.4.33
	RecTypeProtect              recordType = 18   // section 2.4.207
	RecTypePassword             recordType = 19   // section 2.4.191
	RecTypeHeader               recordType = 20   // section 2.4.136
	RecTypeFooter               recordType = 21   // section 2.4.124
	RecTypeExternSheet          recordType = 23   // section 2.4.106
	RecTypeLbl                  recordType = 24   // section 2.4.150
	RecTypeWinProtect           recordType = 25   // section 2.4.347
	RecTypeVerticalPageBreaks   recordType = 26   // section 2.4.343
	RecTypeHorizontalPageBreaks recordType = 27   // section 2.4.142
	RecTypeNote                 recordType = 28   // section 2.4.179
	RecTypeSelection            recordType = 29   // section 2.4.248
	RecTypeDate1904             recordType = 34   // section 2.4.77
	RecTypeExternName           recordType = 35   // section 2.4.105
	RecTypeLeftMargin           recordType = 38   // section 2.4.151
	RecTypeRightMargin          recordType = 39   // section 2.4.219
	RecTypeTopMargin            recordType = 40   // section 2.4.328
	RecTypeBottomMargin         recordType = 41   // section 2.4.27
	RecTypePrintRowCol          recordType = 42   // section 2.4.203
	RecTypePrintGrid            recordType = 43   // section 2.4.202
	RecTypeFilePass             recordType = 47   // section 2.4.117
	RecTypeFont                 recordType = 49   // section 2.4.122
	RecTypePrintSize            recordType = 51   // section 2.4.204
	RecTypeContinue             recordType = 60   // section 2.4.58
	RecTypeWindow1              recordType = 61   // section 2.4.345
	RecTypeBackup               recordType = 64   // section 2.4.14
	RecTypePane                 recordType = 65   // section 2.4.189
	RecTypeCodePage             recordType = 66   // section 2.4.52
	RecTypePls                  recordType = 77   // section 2.4.199
	RecTypeDCon                 recordType = 80   // section 2.4.82
	RecTypeDConRef              recordType = 81   // section 2.4.86
	RecTypeDConName             recordType = 82   // section 2.4.85
	RecTypeDefColWidth          recordType = 85   // section 2.4.89
	RecTypeXCT                  recordType = 89   // section 2.4.352
	RecTypeCRN                  recordType = 90   // section 2.4.65
	RecTypeFileSharing          recordType = 91   // section 2.4.118
	RecTypeWriteAccess          recordType = 92   // section 2.4.349
	RecTypeObj                  recordType = 93   // section 2.4.181
	RecTypeUncalced             recordType = 94   // section 2.4.331
	RecTypeCalcSaveRecalc       recordType = 95   // section 2.4.37
	RecTypeTemplate             recordType = 96   // section 2.4.323
	RecTypeIntl                 recordType = 97   // section 2.4.147
	RecTypeObjProtect           recordType = 99   // section 2.4.183
	RecTypeColInfo              recordType = 125  // section 2.4.53
	RecTypeGuts                 recordType = 128  // section 2.4.134
	RecTypeWsBool               recordType = 129  // section 2.4.351
	RecTypeGridSet              recordType = 130  // section 2.4.132
	RecTypeHCenter              recordType = 131  // section 2.4.135
	RecTypeVCenter              recordType = 132  // section 2.4.342
	RecTypeBoundSheet8          recordType = 133  // section 2.4.28
	RecTypeWriteProtect         recordType = 134  // section 2.4.350
	RecTypeCountry              recordType = 140  // section 2.4.63
	RecTypeHideObj              recordType = 141  // section 2.4.139
	RecTypeSort                 recordType = 144  // section 2.4.263
	RecTypePalette              recordType = 146  // section 2.4.188
	RecTypeSync                 recordType = 151  // section 2.4.318
	RecTypeLPr                  recordType = 152  // section 2.4.158
	RecTypeDxGCol               recordType = 153  // section 2.4.98
	RecTypeFnGroupName          recordType = 154  // section 2.4.120
	RecTypeFilterMode           recordType = 155  // section 2.4.119
	RecTypeBuiltInFnGroupCount  recordType = 156  // section 2.4.30
	RecTypeAutoFilterInfo       recordType = 157  // section 2.4.8
	RecTypeAutoFilter           recordType = 158  // section 2.4.6
	RecTypeScl                  recordType = 160  // section 2.4.247
	RecTypeSetup                recordType = 161  // section 2.4.257
	RecTypeScenMan              recordType = 174  // section 2.4.246
	RecTypeSCENARIO             recordType = 175  // section 2.4.244
	RecTypeSxView               recordType = 176  // section 2.4.313
	RecTypeSxvd                 recordType = 177  // section 2.4.309
	RecTypeSXVI                 recordType = 178  // section 2.4.312
	RecTypeSxIvd                recordType = 180  // section 2.4.292
	RecTypeSXLI                 recordType = 181  // section 2.4.293
	RecTypeSXPI                 recordType = 182  // section 2.4.298
	RecTypeDocRoute             recordType = 184  // section 2.4.91
	RecTypeRecipName            recordType = 185  // section 2.4.216
	RecTypeMulRk                recordType = 189  // section 2.4.175
	RecTypeMulBlank             recordType = 190  // section 2.4.174
	RecTypeMms                  recordType = 193  // section 2.4.169
	RecTypeSXDI                 recordType = 197  // section 2.4.278
	RecTypeSXDB                 recordType = 198  // section 2.4.275
	RecTypeSXFDB                recordType = 199  // section 2.4.283
	RecTypeSXDBB                recordType = 200  // section 2.4.276
	RecTypeSXNum                recordType = 201  // section 2.4.296
	RecTypeSxBool               recordType = 202  // section 2.4.274
	RecTypeSxErr                recordType = 203  // section 2.4.281
	RecTypeSXInt                recordType = 204  // section 2.4.289
	RecTypeSXString             recordType = 205  // section 2.4.304
	RecTypeSXDtr                recordType = 206  // section 2.4.279
	RecTypeSxNil                recordType = 207  // section 2.4.295
	RecTypeSXTbl                recordType = 208  // section 2.4.305
	RecTypeSXTBRGIITM           recordType = 209  // section 2.4.307
	RecTypeSxTbpg               recordType = 210  // section 2.4.306
	RecTypeObProj               recordType = 211  // section 2.4.185
	RecTypeSXStreamID           recordType = 213  // section 2.4.303
	RecTypeDBCell               recordType = 215  // section 2.4.78
	RecTypeSXRng                recordType = 216  // section 2.4.300
	RecTypeSxIsxoper            recordType = 217  // section 2.4.290
	RecTypeBookBool             recordType = 218  // section 2.4.22
	RecTypeDbOrParamQry         recordType = 220  // section 2.4.79
	RecTypeScenarioProtect      recordType = 221  // section 2.4.245
	RecTypeOleObjectSize        recordType = 222  // section 2.4.187
	RecTypeXF                   recordType = 224  // section 2.4.353
	RecTypeInterfaceHdr         recordType = 225  // section 2.4.146
	RecTypeInterfaceEnd         recordType = 226  // section 2.4.145
	RecTypeSXVS                 recordType = 227  // section 2.4.317
	RecTypeMergeCells           recordType = 229  // section 2.4.168
	RecTypeBkHim                recordType = 233  // section 2.4.19
	RecTypeMsoDrawingGroup      recordType = 235  // section 2.4.171
	RecTypeMsoDrawing           recordType = 236  // section 2.4.170
	RecTypeMsoDrawingSelection  recordType = 237  // section 2.4.172
	RecTypePhoneticInfo         recordType = 239  // section 2.4.192
	RecTypeSxRule               recordType = 240  // section 2.4.301
	RecTypeSXEx                 recordType = 241  // section 2.4.282
	RecTypeSxFilt               recordType = 242  // section 2.4.285
	RecTypeSxDXF                recordType = 244  // section 2.4.280
	RecTypeSxItm                recordType = 245  // section 2.4.291
	RecTypeSxName               recordType = 246  // section 2.4.294
	RecTypeSxSelect             recordType = 247  // section 2.4.302
	RecTypeSXPair               recordType = 248  // section 2.4.297
	RecTypeSxFmla               recordType = 249  // section 2.4.286
	RecTypeSxFormat             recordType = 251  // section 2.4.287
	RecTypeSST                  recordType = 252  // section 2.4.265
	RecTypeLabelSst             recordType = 253  // section 2.4.149
	RecTypeExtSST               recordType = 255  // section 2.4.107
	RecTypeSXVDEx               recordType = 256  // section 2.4.310
	RecTypeSXFormula            recordType = 259  // section 2.4.288
	RecTypeSXDBEx               recordType = 290  // section 2.4.277
	RecTypeRRDInsDel            recordType = 311  // section 2.4.228
	RecTypeRRDHead              recordType = 312  // section 2.4.226
	RecTypeRRDChgCell           recordType = 315  // section 2.4.223
	RecTypeRRTabID              recordType = 317  // section 2.4.241
	RecTypeRRDRenSheet          recordType = 318  // section 2.4.234
	RecTypeRRSort               recordType = 319  // section 2.4.240
	RecTypeRRDMove              recordType = 320  // section 2.4.231
	RecTypeRRFormat             recordType = 330  // section 2.4.238
	RecTypeRRAutoFmt            recordType = 331  // section 2.4.222
	RecTypeRRInsertSh           recordType = 333  // section 2.4.239
	RecTypeRRDMoveBegin         recordType = 334  // section 2.4.232
	RecTypeRRDMoveEnd           recordType = 335  // section 2.4.233
	RecTypeRRDInsDelBegin       recordType = 336  // section 2.4.229
	RecTypeRRDInsDelEnd         recordType = 337  // section 2.4.230
	RecTypeRRDConflict          recordType = 338  // section 2.4.224
	RecTypeRRDDefName           recordType = 339  // section 2.4.225
	RecTypeRRDRstEtxp           recordType = 340  // section 2.4.235
	RecTypeLRng                 recordType = 351  // section 2.4.159
	RecTypeUsesELFs             recordType = 352  // section 2.4.337
	RecTypeDSF                  recordType = 353  // section 2.4.94
	RecTypeCUsr                 recordType = 401  // section 2.4.72
	RecTypeCbUsr                recordType = 402  // section 2.4.40
	RecTypeUsrInfo              recordType = 403  // section 2.4.340
	RecTypeUsrExcl              recordType = 404  // section 2.4.339
	RecTypeFileLock             recordType = 405  // section 2.4.116
	RecTypeRRDInfo              recordType = 406  // section 2.4.227
	RecTypeBCUsrs               recordType = 407  // section 2.4.16
	RecTypeUsrChk               recordType = 408  // section 2.4.338
	RecTypeUserBView            recordType = 425  // section 2.4.333
	RecTypeUserSViewBegin       recordType = 426  // section 2.4.334
	RecTypeUserSViewBeginChart  recordType = 426  // section 2.4.335
	RecTypeUserSViewEnd         recordType = 427  // section 2.4.336
	RecTypeRRDUserView          recordType = 428  // section 2.4.237
	RecTypeQsi                  recordType = 429  // section 2.4.208
	RecTypeSupBook              recordType = 430  // section 2.4.271
	RecTypeProt4Rev             recordType = 431  // section 2.4.205
	RecTypeCondFmt              recordType = 432  // section 2.4.56
	RecTypeCF                   recordType = 433  // section 2.4.42
	RecTypeDVal                 recordType = 434  // section 2.4.96
	RecTypeDConBin              recordType = 437  // section 2.4.83
	RecTypeTxO                  recordType = 438  // section 2.4.329
	RecTypeRefreshAll           recordType = 439  // section 2.4.217
	RecTypeHLink                recordType = 440  // section 2.4.140
	RecTypeLel                  recordType = 441  // section 2.4.154
	RecTypeCodeName             recordType = 442  // section 2.4.51
	RecTypeSXFDBType            recordType = 443  // section 2.4.284
	RecTypeProt4RevPass         recordType = 444  // section 2.4.206
	RecTypeObNoMacros           recordType = 445  // section 2.4.184
	RecTypeDv                   recordType = 446  // section 2.4.95
	RecTypeExcel9File           recordType = 448  // section 2.4.104
	RecTypeRecalcID             recordType = 449  // section 2.4.215
	RecTypeEntExU2              recordType = 450  // section 2.4.102
	RecTypeDimensions           recordType = 512  // section 2.4.90
	RecTypeBlank                recordType = 513  // section 2.4.20
	RecTypeNumber               recordType = 515  // section 2.4.180
	RecTypeLabel                recordType = 516  // section 2.4.148
	RecTypeBoolErr              recordType = 517  // section 2.4.24
	RecTypeString               recordType = 519  // section 2.4.268
	RecTypeRow                  recordType = 520  // section 2.4.221
	RecTypeIndex                recordType = 523  // section 2.4.144
	RecTypeArray                recordType = 545  // section 2.4.4
	RecTypeDefaultRowHeight     recordType = 549  // section 2.4.87
	RecTypeTable                recordType = 566  // section 2.4.319
	RecTypeWindow2              recordType = 574  // section 2.4.346
	RecTypeRK                   recordType = 638  // section 2.4.220
	RecTypeStyle                recordType = 659  // section 2.4.269
	RecTypeBigName              recordType = 1048 // section 2.4.18
	RecTypeFormat               recordType = 1054 // section 2.4.126
	RecTypeContinueBigName      recordType = 1084 // section 2.4.59
	RecTypeShrFmla              recordType = 1212 // section 2.4.260
	RecTypeHLinkTooltip         recordType = 2048 // section 2.4.141
	RecTypeWebPub               recordType = 2049 // section 2.4.344
	RecTypeQsiSXTag             recordType = 2050 // section 2.4.211
	RecTypeDBQueryExt           recordType = 2051 // section 2.4.81
	RecTypeExtString            recordType = 2052 // section 2.4.108
	RecTypeTxtQry               recordType = 2053 // section 2.4.330
	RecTypeQsir                 recordType = 2054 // section 2.4.210
	RecTypeQsif                 recordType = 2055 // section 2.4.209
	RecTypeRRDTQSIF             recordType = 2056 // section 2.4.236
	RecTypeBOF                  recordType = 2057 // section 2.4.21
	RecTypeOleDbConn            recordType = 2058 // section 2.4.186
	RecTypeWOpt                 recordType = 2059 // section 2.4.348
	RecTypeSXViewEx             recordType = 2060 // section 2.4.314
	RecTypeSXTH                 recordType = 2061 // section 2.4.308
	RecTypeSXPIEx               recordType = 2062 // section 2.4.299
	RecTypeSXVDTEx              recordType = 2063 // section 2.4.311
	RecTypeSXViewEx9            recordType = 2064 // section 2.4.315
	RecTypeContinueFrt          recordType = 2066 // section 2.4.60
	RecTypeRealTimeData         recordType = 2067 // section 2.4.214
	RecTypeChartFrtInfo         recordType = 2128 // section 2.4.49
	RecTypeFrtWrapper           recordType = 2129 // section 2.4.130
	RecTypeStartBlock           recordType = 2130 // section 2.4.266
	RecTypeEndBlock             recordType = 2131 // section 2.4.100
	RecTypeStartObject          recordType = 2132 // section 2.4.267
	RecTypeEndObject            recordType = 2133 // section 2.4.101
	RecTypeCatLab               recordType = 2134 // section 2.4.38
	RecTypeYMult                recordType = 2135 // section 2.4.356
	RecTypeSXViewLink           recordType = 2136 // section 2.4.316
	RecTypePivotChartBits       recordType = 2137 // section 2.4.196
	RecTypeFrtFontList          recordType = 2138 // section 2.4.129
	RecTypeSheetExt             recordType = 2146 // section 2.4.259
	RecTypeBookExt              recordType = 2147 // section 2.4.23
	RecTypeSXAddl               recordType = 2148 // section 2.4.273.2
	RecTypeCrErr                recordType = 2149 // section 2.4.64
	RecTypeHFPicture            recordType = 2150 // section 2.4.138
	RecTypeFeatHdr              recordType = 2151 // section 2.4.112
	RecTypeFeat                 recordType = 2152 // section 2.4.111
	RecTypeDataLabExt           recordType = 2154 // section 2.4.75
	RecTypeDataLabExtContents   recordType = 2155 // section 2.4.76
	RecTypeCellWatch            recordType = 2156 // section 2.4.41
	RecTypeFeatHdr11            recordType = 2161 // section 2.4.113
	RecTypeFeature11            recordType = 2162 // section 2.4.114
	RecTypeDropDownObjIds       recordType = 2164 // section 2.4.93
	RecTypeContinueFrt11        recordType = 2165 // section 2.4.61
	RecTypeDConn                recordType = 2166 // section 2.4.84
	RecTypeList12               recordType = 2167 // section 2.4.157
	RecTypeFeature12            recordType = 2168 // section 2.4.115
	RecTypeCondFmt12            recordType = 2169 // section 2.4.57
	RecTypeCF12                 recordType = 2170 // section 2.4.43
	RecTypeCFEx                 recordType = 2171 // section 2.4.44
	RecTypeXFCRC                recordType = 2172 // section 2.4.354
	RecTypeXFExt                recordType = 2173 // section 2.4.355
	RecTypeAutoFilter12         recordType = 2174 // section 2.4.7
	RecTypeContinueFrt12        recordType = 2175 // section 2.4.62
	RecTypeMDTInfo              recordType = 2180 // section 2.4.162
	RecTypeMDXStr               recordType = 2181 // section 2.4.166
	RecTypeMDXTuple             recordType = 2182 // section 2.4.167
	RecTypeMDXSet               recordType = 2183 // section 2.4.165
	RecTypeMDXProp              recordType = 2184 // section 2.4.164
	RecTypeMDXKPI               recordType = 2185 // section 2.4.163
	RecTypeMDB                  recordType = 2186 // section 2.4.161
	RecTypePLV                  recordType = 2187 // section 2.4.200
	RecTypeCompat12             recordType = 2188 // section 2.4.54
	RecTypeDXF                  recordType = 2189 // section 2.4.97
	RecTypeTableStyles          recordType = 2190 // section 2.4.322
	RecTypeTableStyle           recordType = 2191 // section 2.4.320
	RecTypeTableStyleElement    recordType = 2192 // section 2.4.321
	RecTypeStyleExt             recordType = 2194 // section 2.4.270
	RecTypeNamePublish          recordType = 2195 // section 2.4.178
	RecTypeNameCmt              recordType = 2196 // section 2.4.176
	RecTypeSortData             recordType = 2197 // section 2.4.264
	RecTypeTheme                recordType = 2198 // section 2.4.326
	RecTypeGUIDTypeLib          recordType = 2199 // section 2.4.133
	RecTypeFnGrp12              recordType = 2200 // section 2.4.121
	RecTypeNameFnGrp12          recordType = 2201 // section 2.4.177
	RecTypeMTRSettings          recordType = 2202 // section 2.4.173
	RecTypeCompressPictures     recordType = 2203 // section 2.4.55
	RecTypeHeaderFooter         recordType = 2204 // section 2.4.137
	RecTypeCrtLayout12          recordType = 2205 // section 2.4.66
	RecTypeCrtMlFrt             recordType = 2206 // section 2.4.70
	RecTypeCrtMlFrtContinue     recordType = 2207 // section 2.4.71
	RecTypeForceFullCalculation recordType = 2211 // section 2.4.125
	RecTypeShapePropsStream     recordType = 2212 // section 2.4.258
	RecTypeTextPropsStream      recordType = 2213 // section 2.4.325
	RecTypeRichTextStream       recordType = 2214 // section 2.4.218
	RecTypeCrtLayout12A         recordType = 2215 // section 2.4.67
	RecTypeUnits                recordType = 4097 // section 2.4.332
	RecTypeChart                recordType = 4098 // section 2.4.45
	RecTypeSeries               recordType = 4099 // section 2.4.252
	RecTypeDataFormat           recordType = 4102 // section 2.4.74
	RecTypeLineFormat           recordType = 4103 // section 2.4.156
	RecTypeMarkerFormat         recordType = 4105 // section 2.4.160
	RecTypeAreaFormat           recordType = 4106 // section 2.4.3
	RecTypePieFormat            recordType = 4107 // section 2.4.195
	RecTypeAttachedLabel        recordType = 4108 // section 2.4.5
	RecTypeSeriesText           recordType = 4109 // section 2.4.254
	RecTypeChartFormat          recordType = 4116 // section 2.4.48
	RecTypeLegend               recordType = 4117 // section 2.4.152
	RecTypeSeriesList           recordType = 4118 // section 2.4.253
	RecTypeBar                  recordType = 4119 // section 2.4.15
	RecTypeLine                 recordType = 4120 // section 2.4.155
	RecTypePie                  recordType = 4121 // section 2.4.194
	RecTypeArea                 recordType = 4122 // section 2.4.2
	RecTypeScatter              recordType = 4123 // section 2.4.243
	RecTypeCrtLine              recordType = 4124 // section 2.4.68
	RecTypeAxis                 recordType = 4125 // section 2.4.11
	RecTypeTick                 recordType = 4126 // section 2.4.327
	RecTypeValueRange           recordType = 4127 // section 2.4.341
	RecTypeCatSerRange          recordType = 4128 // section 2.4.39
	RecTypeAxisLine             recordType = 4129 // section 2.4.12
	RecTypeCrtLink              recordType = 4130 // section 2.4.69
	RecTypeDefaultText          recordType = 4132 // section 2.4.88
	RecTypeText                 recordType = 4133 // section 2.4.324
	RecTypeFontX                recordType = 4134 // section 2.4.123
	RecTypeObjectLink           recordType = 4135 // section 2.4.182
	RecTypeFrame                recordType = 4146 // section 2.4.128
	RecTypeBegin                recordType = 4147 // section 2.4.17
	RecTypeEnd                  recordType = 4148 // section 2.4.99
	RecTypePlotArea             recordType = 4149 // section 2.4.197
	RecTypeChart3d              recordType = 4154 // section 2.4.46
	RecTypePicF                 recordType = 4156 // section 2.4.193
	RecTypeDropBar              recordType = 4157 // section 2.4.92
	RecTypeRadar                recordType = 4158 // section 2.4.212
	RecTypeSurf                 recordType = 4159 // section 2.4.272
	RecTypeRadarArea            recordType = 4160 // section 2.4.213
	RecTypeAxisParent           recordType = 4161 // section 2.4.13
	RecTypeLegendException      recordType = 4163 // section 2.4.153(
	RecTypeShtProps             recordType = 4164 // section 2.4.261
	RecTypeSerToCrt             recordType = 4165 // section 2.4.256
	RecTypeAxesUsed             recordType = 4166 // section 2.4.10
	RecTypeSBaseRef             recordType = 4168 // section 2.4.242
	RecTypeSerParent            recordType = 4170 // section 2.4.255
	RecTypeSerAuxTrend          recordType = 4171 // section 2.4.250
	RecTypeIFmtRecord           recordType = 4174 // section 2.4.143
	RecTypePos                  recordType = 4175 // section 2.4.201
	RecTypeAlRuns               recordType = 4176 // section 2.4.1
	RecTypeBRAI                 recordType = 4177 // section 2.4.29
	RecTypeSerAuxErrBar         recordType = 4187 // section 2.4.249
	RecTypeClrtClient           recordType = 4188 // section 2.4.50
	RecTypeSerFmt               recordType = 4189 // section 2.4.251
	RecTypeChart3DBarShape      recordType = 4191 // section 2.4.47
	RecTypeFbi                  recordType = 4192 // section 2.4.109
	RecTypeBopPop               recordType = 4193 // section 2.4.25
	RecTypeAxcExt               recordType = 4194 // section 2.4.9
	RecTypeDat                  recordType = 4195 // section 2.4.73
	RecTypePlotGrowth           recordType = 4196 // section 2.4.198
	RecTypeSIIndex              recordType = 4197 // section 2.4.262
	RecTypeGelFrame             recordType = 4198 // section 2.4.131
	RecTypeBopPopCustom         recordType = 4199 // section 2.4.26
	RecTypeFbi2                 recordType = 4200 // section 2.4.110
)

func (r recordType) String() string {
	switch r {
	case RecTypeFormula:
		return "Formula (6)"
	case RecTypeEOF:
		return "EOF (10)"
	case RecTypeCalcCount:
		return "CalcCount (12)"
	case RecTypeCalcMode:
		return "CalcMode (13)"
	case RecTypeCalcPrecision:
		return "CalcPrecision (14)"
	case RecTypeCalcRefMode:
		return "CalcRefMode (15)"
	case RecTypeCalcDelta:
		return "CalcDelta (16)"
	case RecTypeCalcIter:
		return "CalcIter (17)"
	case RecTypeProtect:
		return "Protect (18)"
	case RecTypePassword:
		return "Password (19)"
	case RecTypeHeader:
		return "Header (20)"
	case RecTypeFooter:
		return "Footer (21)"
	case RecTypeExternSheet:
		return "ExternSheet (23)"
	case RecTypeLbl:
		return "Lbl (24)"
	case RecTypeWinProtect:
		return "WinProtect (25)"
	case RecTypeVerticalPageBreaks:
		return "VerticalPageBreaks (26)"
	case RecTypeHorizontalPageBreaks:
		return "HorizontalPageBreaks (27)"
	case RecTypeNote:
		return "Note (28)"
	case RecTypeSelection:
		return "Selection (29)"
	case RecTypeDate1904:
		return "Date1904 (34)"
	case RecTypeExternName:
		return "ExternName (35)"
	case RecTypeLeftMargin:
		return "LeftMargin (38)"
	case RecTypeRightMargin:
		return "RightMargin (39)"
	case RecTypeTopMargin:
		return "TopMargin (40)"
	case RecTypeBottomMargin:
		return "BottomMargin (41)"
	case RecTypePrintRowCol:
		return "PrintRowCol (42)"
	case RecTypePrintGrid:
		return "PrintGrid (43)"
	case RecTypeFilePass:
		return "FilePass (47)"
	case RecTypeFont:
		return "Font (49)"
	case RecTypePrintSize:
		return "PrintSize (51)"
	case RecTypeContinue:
		return "Continue (60)"
	case RecTypeWindow1:
		return "Window1 (61)"
	case RecTypeBackup:
		return "Backup (64)"
	case RecTypePane:
		return "Pane (65)"
	case RecTypeCodePage:
		return "CodePage (66)"
	case RecTypePls:
		return "Pls (77)"
	case RecTypeDCon:
		return "DCon (80)"
	case RecTypeDConRef:
		return "DConRef (81)"
	case RecTypeDConName:
		return "DConName (82)"
	case RecTypeDefColWidth:
		return "DefColWidth (85)"
	case RecTypeXCT:
		return "XCT (89)"
	case RecTypeCRN:
		return "CRN (90)"
	case RecTypeFileSharing:
		return "FileSharing (91)"
	case RecTypeWriteAccess:
		return "WriteAccess (92)"
	case RecTypeObj:
		return "Obj (93)"
	case RecTypeUncalced:
		return "Uncalced (94)"
	case RecTypeCalcSaveRecalc:
		return "CalcSaveRecalc (95)"
	case RecTypeTemplate:
		return "Template (96)"
	case RecTypeIntl:
		return "Intl (97)"
	case RecTypeObjProtect:
		return "ObjProtect (99)"
	case RecTypeColInfo:
		return "ColInfo (125)"
	case RecTypeGuts:
		return "Guts (128)"
	case RecTypeWsBool:
		return "WsBool (129)"
	case RecTypeGridSet:
		return "GridSet (130)"
	case RecTypeHCenter:
		return "HCenter (131)"
	case RecTypeVCenter:
		return "VCenter (132)"
	case RecTypeBoundSheet8:
		return "BoundSheet8 (133)"
	case RecTypeWriteProtect:
		return "WriteProtect (134)"
	case RecTypeCountry:
		return "Country (140)"
	case RecTypeHideObj:
		return "HideObj (141)"
	case RecTypeSort:
		return "Sort (144)"
	case RecTypePalette:
		return "Palette (146)"
	case RecTypeSync:
		return "Sync (151)"
	case RecTypeLPr:
		return "LPr (152)"
	case RecTypeDxGCol:
		return "DxGCol (153)"
	case RecTypeFnGroupName:
		return "FnGroupName (154)"
	case RecTypeFilterMode:
		return "FilterMode (155)"
	case RecTypeBuiltInFnGroupCount:
		return "BuiltInFnGroupCount (156)"
	case RecTypeAutoFilterInfo:
		return "AutoFilterInfo (157)"
	case RecTypeAutoFilter:
		return "AutoFilter (158)"
	case RecTypeScl:
		return "Scl (160)"
	case RecTypeSetup:
		return "Setup (161)"
	case RecTypeScenMan:
		return "ScenMan (174)"
	case RecTypeSCENARIO:
		return "SCENARIO (175)"
	case RecTypeSxView:
		return "SxView (176)"
	case RecTypeSxvd:
		return "Sxvd (177)"
	case RecTypeSXVI:
		return "SXVI (178)"
	case RecTypeSxIvd:
		return "SxIvd (180)"
	case RecTypeSXLI:
		return "SXLI (181)"
	case RecTypeSXPI:
		return "SXPI (182)"
	case RecTypeDocRoute:
		return "DocRoute (184)"
	case RecTypeRecipName:
		return "RecipName (185)"
	case RecTypeMulRk:
		return "MulRk (189)"
	case RecTypeMulBlank:
		return "MulBlank (190)"
	case RecTypeMms:
		return "Mms (193)"
	case RecTypeSXDI:
		return "SXDI (197)"
	case RecTypeSXDB:
		return "SXDB (198)"
	case RecTypeSXFDB:
		return "SXFDB (199)"
	case RecTypeSXDBB:
		return "SXDBB (200)"
	case RecTypeSXNum:
		return "SXNum (201)"
	case RecTypeSxBool:
		return "SxBool (202)"
	case RecTypeSxErr:
		return "SxErr (203)"
	case RecTypeSXInt:
		return "SXInt (204)"
	case RecTypeSXString:
		return "SXString (205)"
	case RecTypeSXDtr:
		return "SXDtr (206)"
	case RecTypeSxNil:
		return "SxNil (207)"
	case RecTypeSXTbl:
		return "SXTbl (208)"
	case RecTypeSXTBRGIITM:
		return "SXTBRGIITM (209)"
	case RecTypeSxTbpg:
		return "SxTbpg (210)"
	case RecTypeObProj:
		return "ObProj (211)"
	case RecTypeSXStreamID:
		return "SXStreamID (213)"
	case RecTypeDBCell:
		return "DBCell (215)"
	case RecTypeSXRng:
		return "SXRng (216)"
	case RecTypeSxIsxoper:
		return "SxIsxoper (217)"
	case RecTypeBookBool:
		return "BookBool (218)"
	case RecTypeDbOrParamQry:
		return "DbOrParamQry (220)"
	case RecTypeScenarioProtect:
		return "ScenarioProtect (221)"
	case RecTypeOleObjectSize:
		return "OleObjectSize (222)"
	case RecTypeXF:
		return "XF (224)"
	case RecTypeInterfaceHdr:
		return "InterfaceHdr (225)"
	case RecTypeInterfaceEnd:
		return "InterfaceEnd (226)"
	case RecTypeSXVS:
		return "SXVS (227)"
	case RecTypeMergeCells:
		return "MergeCells (229)"
	case RecTypeBkHim:
		return "BkHim (233)"
	case RecTypeMsoDrawingGroup:
		return "MsoDrawingGroup (235)"
	case RecTypeMsoDrawing:
		return "MsoDrawing (236)"
	case RecTypeMsoDrawingSelection:
		return "MsoDrawingSelection (237)"
	case RecTypePhoneticInfo:
		return "PhoneticInfo (239)"
	case RecTypeSxRule:
		return "SxRule (240)"
	case RecTypeSXEx:
		return "SXEx (241)"
	case RecTypeSxFilt:
		return "SxFilt (242)"
	case RecTypeSxDXF:
		return "SxDXF (244)"
	case RecTypeSxItm:
		return "SxItm (245)"
	case RecTypeSxName:
		return "SxName (246)"
	case RecTypeSxSelect:
		return "SxSelect (247)"
	case RecTypeSXPair:
		return "SXPair (248)"
	case RecTypeSxFmla:
		return "SxFmla (249)"
	case RecTypeSxFormat:
		return "SxFormat (251)"
	case RecTypeSST:
		return "SST (252)"
	case RecTypeLabelSst:
		return "LabelSst (253)"
	case RecTypeExtSST:
		return "ExtSST (255)"
	case RecTypeSXVDEx:
		return "SXVDEx (256)"
	case RecTypeSXFormula:
		return "SXFormula (259)"
	case RecTypeSXDBEx:
		return "SXDBEx (290)"
	case RecTypeRRDInsDel:
		return "RRDInsDel (311)"
	case RecTypeRRDHead:
		return "RRDHead (312)"
	case RecTypeRRDChgCell:
		return "RRDChgCell (315)"
	case RecTypeRRTabID:
		return "RRTabID (317)"
	case RecTypeRRDRenSheet:
		return "RRDRenSheet (318)"
	case RecTypeRRSort:
		return "RRSort (319)"
	case RecTypeRRDMove:
		return "RRDMove (320)"
	case RecTypeRRFormat:
		return "RRFormat (330)"
	case RecTypeRRAutoFmt:
		return "RRAutoFmt (331)"
	case RecTypeRRInsertSh:
		return "RRInsertSh (333)"
	case RecTypeRRDMoveBegin:
		return "RRDMoveBegin (334)"
	case RecTypeRRDMoveEnd:
		return "RRDMoveEnd (335)"
	case RecTypeRRDInsDelBegin:
		return "RRDInsDelBegin (336)"
	case RecTypeRRDInsDelEnd:
		return "RRDInsDelEnd (337)"
	case RecTypeRRDConflict:
		return "RRDConflict (338)"
	case RecTypeRRDDefName:
		return "RRDDefName (339)"
	case RecTypeRRDRstEtxp:
		return "RRDRstEtxp (340)"
	case RecTypeLRng:
		return "LRng (351)"
	case RecTypeUsesELFs:
		return "UsesELFs (352)"
	case RecTypeDSF:
		return "DSF (353)"
	case RecTypeCUsr:
		return "CUsr (401)"
	case RecTypeCbUsr:
		return "CbUsr (402)"
	case RecTypeUsrInfo:
		return "UsrInfo (403)"
	case RecTypeUsrExcl:
		return "UsrExcl (404)"
	case RecTypeFileLock:
		return "FileLock (405)"
	case RecTypeRRDInfo:
		return "RRDInfo (406)"
	case RecTypeBCUsrs:
		return "BCUsrs (407)"
	case RecTypeUsrChk:
		return "UsrChk (408)"
	case RecTypeUserBView:
		return "UserBView (425)"
	case RecTypeUserSViewBegin:
		return "UserSViewBegin[Chart] (426)"
	case RecTypeUserSViewEnd:
		return "UserSViewEnd (427)"
	case RecTypeRRDUserView:
		return "RRDUserView (428)"
	case RecTypeQsi:
		return "Qsi (429)"
	case RecTypeSupBook:
		return "SupBook (430)"
	case RecTypeProt4Rev:
		return "Prot4Rev (431)"
	case RecTypeCondFmt:
		return "CondFmt (432)"
	case RecTypeCF:
		return "CF (433)"
	case RecTypeDVal:
		return "DVal (434)"
	case RecTypeDConBin:
		return "DConBin (437)"
	case RecTypeTxO:
		return "TxO (438)"
	case RecTypeRefreshAll:
		return "RefreshAll (439)"
	case RecTypeHLink:
		return "HLink (440)"
	case RecTypeLel:
		return "Lel (441)"
	case RecTypeCodeName:
		return "CodeName (442)"
	case RecTypeSXFDBType:
		return "SXFDBType (443)"
	case RecTypeProt4RevPass:
		return "Prot4RevPass (444)"
	case RecTypeObNoMacros:
		return "ObNoMacros (445)"
	case RecTypeDv:
		return "Dv (446)"
	case RecTypeExcel9File:
		return "Excel9File (448)"
	case RecTypeRecalcID:
		return "RecalcID (449)"
	case RecTypeEntExU2:
		return "EntExU2 (450)"
	case RecTypeDimensions:
		return "Dimensions (512)"
	case RecTypeBlank:
		return "Blank (513)"
	case RecTypeNumber:
		return "Number (515)"
	case RecTypeLabel:
		return "Label (516)"
	case RecTypeBoolErr:
		return "BoolErr (517)"
	case RecTypeString:
		return "String (519)"
	case RecTypeRow:
		return "Row (520)"
	case RecTypeIndex:
		return "Index (523)"
	case RecTypeArray:
		return "Array (545)"
	case RecTypeDefaultRowHeight:
		return "DefaultRowHeight (549)"
	case RecTypeTable:
		return "Table (566)"
	case RecTypeWindow2:
		return "Window2 (574)"
	case RecTypeRK:
		return "RK (638)"
	case RecTypeStyle:
		return "Style (659)"
	case RecTypeBigName:
		return "BigName (1048)"
	case RecTypeFormat:
		return "Format (1054)"
	case RecTypeContinueBigName:
		return "ContinueBigName (1084)"
	case RecTypeShrFmla:
		return "ShrFmla (1212)"
	case RecTypeHLinkTooltip:
		return "HLinkTooltip (2048)"
	case RecTypeWebPub:
		return "WebPub (2049)"
	case RecTypeQsiSXTag:
		return "QsiSXTag (2050)"
	case RecTypeDBQueryExt:
		return "DBQueryExt (2051)"
	case RecTypeExtString:
		return "ExtString (2052)"
	case RecTypeTxtQry:
		return "TxtQry (2053)"
	case RecTypeQsir:
		return "Qsir (2054)"
	case RecTypeQsif:
		return "Qsif (2055)"
	case RecTypeRRDTQSIF:
		return "RRDTQSIF (2056)"
	case RecTypeBOF:
		return "BOF (2057)"
	case RecTypeOleDbConn:
		return "OleDbConn (2058)"
	case RecTypeWOpt:
		return "WOpt (2059)"
	case RecTypeSXViewEx:
		return "SXViewEx (2060)"
	case RecTypeSXTH:
		return "SXTH (2061)"
	case RecTypeSXPIEx:
		return "SXPIEx (2062)"
	case RecTypeSXVDTEx:
		return "SXVDTEx (2063)"
	case RecTypeSXViewEx9:
		return "SXViewEx9 (2064)"
	case RecTypeContinueFrt:
		return "ContinueFrt (2066)"
	case RecTypeRealTimeData:
		return "RealTimeData (2067)"
	case RecTypeChartFrtInfo:
		return "ChartFrtInfo (2128)"
	case RecTypeFrtWrapper:
		return "FrtWrapper (2129)"
	case RecTypeStartBlock:
		return "StartBlock (2130)"
	case RecTypeEndBlock:
		return "EndBlock (2131)"
	case RecTypeStartObject:
		return "StartObject (2132)"
	case RecTypeEndObject:
		return "EndObject (2133)"
	case RecTypeCatLab:
		return "CatLab (2134)"
	case RecTypeYMult:
		return "YMult (2135)"
	case RecTypeSXViewLink:
		return "SXViewLink (2136)"
	case RecTypePivotChartBits:
		return "PivotChartBits (2137)"
	case RecTypeFrtFontList:
		return "FrtFontList (2138)"
	case RecTypeSheetExt:
		return "SheetExt (2146)"
	case RecTypeBookExt:
		return "BookExt (2147)"
	case RecTypeSXAddl:
		return "SXAddl (2148)"
	case RecTypeCrErr:
		return "CrErr (2149)"
	case RecTypeHFPicture:
		return "HFPicture (2150)"
	case RecTypeFeatHdr:
		return "FeatHdr (2151)"
	case RecTypeFeat:
		return "Feat (2152)"
	case RecTypeDataLabExt:
		return "DataLabExt (2154)"
	case RecTypeDataLabExtContents:
		return "DataLabExtContents (2155)"
	case RecTypeCellWatch:
		return "CellWatch (2156)"
	case RecTypeFeatHdr11:
		return "FeatHdr11 (2161)"
	case RecTypeFeature11:
		return "Feature11 (2162)"
	case RecTypeDropDownObjIds:
		return "DropDownObjIds (2164)"
	case RecTypeContinueFrt11:
		return "ContinueFrt11 (2165)"
	case RecTypeDConn:
		return "DConn (2166)"
	case RecTypeList12:
		return "List12 (2167)"
	case RecTypeFeature12:
		return "Feature12 (2168)"
	case RecTypeCondFmt12:
		return "CondFmt12 (2169)"
	case RecTypeCF12:
		return "CF12 (2170)"
	case RecTypeCFEx:
		return "CFEx (2171)"
	case RecTypeXFCRC:
		return "XFCRC (2172)"
	case RecTypeXFExt:
		return "XFExt (2173)"
	case RecTypeAutoFilter12:
		return "AutoFilter12 (2174)"
	case RecTypeContinueFrt12:
		return "ContinueFrt12 (2175)"
	case RecTypeMDTInfo:
		return "MDTInfo (2180)"
	case RecTypeMDXStr:
		return "MDXStr (2181)"
	case RecTypeMDXTuple:
		return "MDXTuple (2182)"
	case RecTypeMDXSet:
		return "MDXSet (2183)"
	case RecTypeMDXProp:
		return "MDXProp (2184)"
	case RecTypeMDXKPI:
		return "MDXKPI (2185)"
	case RecTypeMDB:
		return "MDB (2186)"
	case RecTypePLV:
		return "PLV (2187)"
	case RecTypeCompat12:
		return "Compat12 (2188)"
	case RecTypeDXF:
		return "DXF (2189)"
	case RecTypeTableStyles:
		return "TableStyles (2190)"
	case RecTypeTableStyle:
		return "TableStyle (2191)"
	case RecTypeTableStyleElement:
		return "TableStyleElement (2192)"
	case RecTypeStyleExt:
		return "StyleExt (2194)"
	case RecTypeNamePublish:
		return "NamePublish (2195)"
	case RecTypeNameCmt:
		return "NameCmt (2196)"
	case RecTypeSortData:
		return "SortData (2197)"
	case RecTypeTheme:
		return "Theme (2198)"
	case RecTypeGUIDTypeLib:
		return "GUIDTypeLib (2199)"
	case RecTypeFnGrp12:
		return "FnGrp12 (2200)"
	case RecTypeNameFnGrp12:
		return "NameFnGrp12 (2201)"
	case RecTypeMTRSettings:
		return "MTRSettings (2202)"
	case RecTypeCompressPictures:
		return "CompressPictures (2203)"
	case RecTypeHeaderFooter:
		return "HeaderFooter (2204)"
	case RecTypeCrtLayout12:
		return "CrtLayout12 (2205)"
	case RecTypeCrtMlFrt:
		return "CrtMlFrt (2206)"
	case RecTypeCrtMlFrtContinue:
		return "CrtMlFrtContinue (2207)"
	case RecTypeForceFullCalculation:
		return "ForceFullCalculation (2211)"
	case RecTypeShapePropsStream:
		return "ShapePropsStream (2212)"
	case RecTypeTextPropsStream:
		return "TextPropsStream (2213)"
	case RecTypeRichTextStream:
		return "RichTextStream (2214)"
	case RecTypeCrtLayout12A:
		return "CrtLayout12A (2215)"
	case RecTypeUnits:
		return "Units (4097)"
	case RecTypeChart:
		return "Chart (4098)"
	case RecTypeSeries:
		return "Series (4099)"
	case RecTypeDataFormat:
		return "DataFormat (4102)"
	case RecTypeLineFormat:
		return "LineFormat (4103)"
	case RecTypeMarkerFormat:
		return "MarkerFormat (4105)"
	case RecTypeAreaFormat:
		return "AreaFormat (4106)"
	case RecTypePieFormat:
		return "PieFormat (4107)"
	case RecTypeAttachedLabel:
		return "AttachedLabel (4108)"
	case RecTypeSeriesText:
		return "SeriesText (4109)"
	case RecTypeChartFormat:
		return "ChartFormat (4116)"
	case RecTypeLegend:
		return "Legend (4117)"
	case RecTypeSeriesList:
		return "SeriesList (4118)"
	case RecTypeBar:
		return "Bar (4119)"
	case RecTypeLine:
		return "Line (4120)"
	case RecTypePie:
		return "Pie (4121)"
	case RecTypeArea:
		return "Area (4122)"
	case RecTypeScatter:
		return "Scatter (4123)"
	case RecTypeCrtLine:
		return "CrtLine (4124)"
	case RecTypeAxis:
		return "Axis (4125)"
	case RecTypeTick:
		return "Tick (4126)"
	case RecTypeValueRange:
		return "ValueRange (4127)"
	case RecTypeCatSerRange:
		return "CatSerRange (4128)"
	case RecTypeAxisLine:
		return "AxisLine (4129)"
	case RecTypeCrtLink:
		return "CrtLink (4130)"
	case RecTypeDefaultText:
		return "DefaultText (4132)"
	case RecTypeText:
		return "Text (4133)"
	case RecTypeFontX:
		return "FontX (4134)"
	case RecTypeObjectLink:
		return "ObjectLink (4135)"
	case RecTypeFrame:
		return "Frame (4146)"
	case RecTypeBegin:
		return "Begin (4147)"
	case RecTypeEnd:
		return "End (4148)"
	case RecTypePlotArea:
		return "PlotArea (4149)"
	case RecTypeChart3d:
		return "Chart3d (4154)"
	case RecTypePicF:
		return "PicF (4156)"
	case RecTypeDropBar:
		return "DropBar (4157)"
	case RecTypeRadar:
		return "Radar (4158)"
	case RecTypeSurf:
		return "Surf (4159)"
	case RecTypeRadarArea:
		return "RadarArea (4160)"
	case RecTypeAxisParent:
		return "AxisParent (4161)"
	case RecTypeLegendException:
		return "LegendException (4163)"
	case RecTypeShtProps:
		return "ShtProps (4164)"
	case RecTypeSerToCrt:
		return "SerToCrt (4165)"
	case RecTypeAxesUsed:
		return "AxesUsed (4166)"
	case RecTypeSBaseRef:
		return "SBaseRef (4168)"
	case RecTypeSerParent:
		return "SerParent (4170)"
	case RecTypeSerAuxTrend:
		return "SerAuxTrend (4171)"
	case RecTypeIFmtRecord:
		return "IFmtRecord (4174)"
	case RecTypePos:
		return "Pos (4175)"
	case RecTypeAlRuns:
		return "AlRuns (4176)"
	case RecTypeBRAI:
		return "BRAI (4177)"
	case RecTypeSerAuxErrBar:
		return "SerAuxErrBar (4187)"
	case RecTypeClrtClient:
		return "ClrtClient (4188)"
	case RecTypeSerFmt:
		return "SerFmt (4189)"
	case RecTypeChart3DBarShape:
		return "Chart3DBarShape (4191)"
	case RecTypeFbi:
		return "Fbi (4192)"
	case RecTypeBopPop:
		return "BopPop (4193)"
	case RecTypeAxcExt:
		return "AxcExt (4194)"
	case RecTypeDat:
		return "Dat (4195)"
	case RecTypePlotGrowth:
		return "PlotGrowth (4196)"
	case RecTypeSIIndex:
		return "SIIndex (4197)"
	case RecTypeGelFrame:
		return "GelFrame (4198)"
	case RecTypeBopPopCustom:
		return "BopPopCustom (4199)"
	case RecTypeFbi2:
		return "Fbi2 (4200)"
	}
	return fmt.Sprintf("unknown (%d 0x%x)", uint16(r), uint16(r))
}


================================================
FILE: xls/sheets.go
================================================
package xls

import (
	"encoding/binary"
	"errors"
	"log"
	"math"
	"unicode/utf16"

	"github.com/pbnjay/grate"
	"github.com/pbnjay/grate/commonxl"
)

// List (visible) sheet names from the workbook.
func (b *WorkBook) List() ([]string, error) {
	res := make([]string, 0, len(b.sheets))
	for _, s := range b.sheets {
		if (s.HiddenState & 0x03) == 0 {
			res = append(res, s.Name)
		}
	}
	return res, nil
}

// ListHidden sheet names in the workbook.
func (b *WorkBook) ListHidden() ([]string, error) {
	res := make([]string, 0, len(b.sheets))
	for _, s := range b.sheets {
		if (s.HiddenState & 0x03) != 0 {
			res = append(res, s.Name)
		}
	}
	return res, nil
}

// Get opens the named worksheet and return an iterator for its contents.
func (b *WorkBook) Get(sheetName string) (grate.Collection, error) {
	for _, s := range b.sheets {
		if s.Name == sheetName {
			ss := b.pos2substream[int64(s.Position)]
			return b.parseSheet(s, ss)
		}
	}
	return nil, errors.New("xls: sheet not found")
}

func (b *WorkBook) parseSheet(s *boundSheet, ss int) (*commonxl.Sheet, error) {
	res := &commonxl.Sheet{
		Formatter: &b.nfmt,
	}
	var minRow, maxRow uint32
	var minCol, maxCol uint16

	// temporary string buffer
	us := make([]uint16, 8224)

	inSubstream := 0
	for idx, r := range b.substreams[ss] {
		if inSubstream > 0 {
			if r.RecType == RecTypeEOF {
				inSubstream--
			}
			continue
		}
		switch r.RecType {
		case RecTypeBOF:
			// a BOF inside a sheet usually means embedded content like a chart
			// (which we aren't interested in). So we we set a flag and wait
			// for the EOF for that content block.
			if idx > 0 {
				inSubstream++
				continue
			}
		case RecTypeWsBool:
			if (r.Data[1] & 0x10) != 0 {
				// it's a dialog
				return nil, nil
			}

		case RecTypeDimensions:
			// max = 0-based index of the row AFTER the last valid index
			minRow = binary.LittleEndian.Uint32(r.Data[:4])
			maxRow = binary.LittleEndian.Uint32(r.Data[4:8]) // max = 0x010000
			minCol = binary.LittleEndian.Uint16(r.Data[8:10])
			maxCol = binary.LittleEndian.Uint16(r.Data[10:12]) // max = 0x000100
			if grate.Debug {
				log.Printf("    Sheet dimensions (%d, %d) - (%d,%d)",
					minCol, minRow, maxCol, maxRow)
			}
			if minRow > 0x0000FFFF || maxRow > 0x00010000 {
				log.Println("invalid dimensions")
			}
			if minCol > 0x00FF || maxCol > 0x0100 {
				log.Println("invalid dimensions")
			}

			// pre-allocate cells
			res.Resize(int(maxRow), int(maxCol))
		}
	}
	inSubstream = 0

	var formulaRow, formulaCol uint16
	for ridx, r := range b.substreams[ss] {
		if inSubstream > 0 {
			if r.RecType == RecTypeEOF {
				inSubstream--
			} else if grate.Debug {
				log.Println("      Unhandled sheet substream record type:", r.RecType, ridx)
			}
			continue
		}

		// sec 2.1.7.20.6 Common Productions ABNF:
		/*
			CELLTABLE = 1*(1*Row *CELL 1*DBCell) *EntExU2
			CELL = FORMULA / Blank / MulBlank / RK / MulRk / BoolErr / Number / LabelSst
			FORMULA = [Uncalced] Formula [Array / Table / ShrFmla / SUB] [String *Continue]

			Not parsed form the list above:
				DBCell, EntExU2, Uncalced, Array, Table,ShrFmla
				NB: no idea what "SUB" is
		*/

		switch r.RecType {
		case RecTypeBOF:
			if ridx > 0 {
				inSubstream++
				continue
			}

		case RecTypeBoolErr:
			rowIndex := int(binary.LittleEndian.Uint16(r.Data[:2]))
			colIndex := int(binary.LittleEndian.Uint16(r.Data[2:4]))
			ixfe := int(binary.LittleEndian.Uint16(r.Data[4:6]))
			if r.Data[7] == 0 {
				// Boolean value
				bv := false
				if r.Data[6] == 1 {
					bv = true
				}
				var fno uint16
				if ixfe < len(b.xfs) {
					fno = b.xfs[ixfe]
				}
				res.Put(rowIndex, colIndex, bv, fno)
				//log.Printf("bool/error spec: %d %d %+v", rowIndex, colIndex, bv)
			} else {
				// it's an error, load the label
				be, ok := berrLookup[r.Data[6]]
				if !ok {
					be = "<unknown error>"
				}
				res.Put(rowIndex, colIndex, be, 0)
				//log.Printf("bool/error spec: %d %d %s", rowIndex, colIndex, be)
			}

		case RecTypeMulRk:
			// MulRk encodes multiple RK values in a row
			nrk := int((r.RecSize - 6) / 6)
			rowIndex := int(binary.LittleEndian.Uint16(r.Data[:2]))
			colIndex := int(binary.LittleEndian.Uint16(r.Data[2:4]))
			for i := 0; i < nrk; i++ {
				off := 4 + i*6
				ixfe := int(binary.LittleEndian.Uint16(r.Data[off:]))
				value := RKNumber(binary.LittleEndian.Uint32(r.Data[off+2:]))

				var rval interface{}
				if value.IsInteger() {
					rval = value.Int()
				} else {
					rval = value.Float64()
				}
				var fno uint16
				if ixfe < len(b.xfs) {
					fno = b.xfs[ixfe]
				}
				res.Put(rowIndex, colIndex+i, rval, fno)
			}
			//log.Printf("mulrow spec: %+v", *mr)

		case RecTypeNumber:
			rowIndex := int(binary.LittleEndian.Uint16(r.Data[:2]))
			colIndex := int(binary.LittleEndian.Uint16(r.Data[2:4]))
			ixfe := int(binary.LittleEndian.Uint16(r.Data[4:6]))
			xnum := binary.LittleEndian.Uint64(r.Data[6:])

			value := math.Float64frombits(xnum)
			var fno uint16
			if ixfe < len(b.xfs) {
				fno = b.xfs[ixfe]
			}
			res.Put(rowIndex, colIndex, value, fno)
			//log.Printf("Number spec: %d %d = %f", rowIndex, colIndex, value)

		case RecTypeRK:
			rowIndex := int(binary.LittleEndian.Uint16(r.Data[:2]))
			colIndex := int(binary.LittleEndian.Uint16(r.Data[2:4]))
			ixfe := int(binary.LittleEndian.Uint16(r.Data[4:]))
			value := RKNumber(binary.LittleEndian.Uint32(r.Data[6:]))

			var rval interface{}
			if value.IsInteger() {
				rval = value.Int()
			} else {
				rval = value.Float64()
			}
			var fno uint16
			if ixfe < len(b.xfs) {
				fno = b.xfs[ixfe]
			}
			res.Put(rowIndex, colIndex, rval, fno)
			//log.Printf("RK spec: %d %d = %+v", rowIndex, colIndex, rval)

		case RecTypeFormula:
			formulaRow = binary.LittleEndian.Uint16(r.Data[:2])
			formulaCol = binary.LittleEndian.Uint16(r.Data[2:4])
			ixfe := int(binary.LittleEndian.Uint16(r.Data[4:6]))
			fdata := r.Data[6:]
			var fno uint16
			if ixfe < len(b.xfs) {
				fno = b.xfs[ixfe]
			}
			if fdata[6] == 0xFF && fdata[7] == 0xFF {
				switch fdata[0] {
				case 0:
					// string in next record
					// put placeholder now to record the numFmt
					res.Put(int(formulaRow), int(formulaCol), "", fno)
				case 1:
					// boolean
					bv := false
					if fdata[2] != 0 {
						bv = true
					}
					res.Put(int(formulaRow), int(formulaCol), bv, fno)
				case 2:
					// error value
					be, ok := berrLookup[fdata[2]]
					if !ok {
						be = "<unknown error>"
					}
					res.Put(int(formulaRow), int(formulaCol), be, 0)
				case 3:
					// blank string
				default:
					log.Printf("unknown formula value type %d", fdata[0])
				}
			} else {
				xnum := binary.LittleEndian.Uint64(fdata)
				value := math.Float64frombits(xnum)
				res.Put(int(formulaRow), int(formulaCol), value, fno)
			}
			//log.Printf("formula spec: %d %d ~~ %+v", formulaRow, formulaCol, r.Data)

		case RecTypeString:
			// String is the previously rendered value of a formula
			// NB similar to the workbook SST, this can continue over
			// addition records up to 32k characters. A 1-byte flag
			// at each gap indicates if the encoding switches
			// to/from 8/16-bit characters.

			charCount := binary.LittleEndian.Uint16(r.Data[:2])
			flags := r.Data[2]
			fstr := ""
			if (flags & 1) == 0 {
				fstr = string(r.Data[3:])
			} else {
				raw := r.Data[3:]
				if int(charCount) > cap(us) {
					us = make([]uint16, charCount)
				}
				us = us[:charCount]
				for i := 0; i < int(charCount); i++ {
					us[i] = binary.LittleEndian.Uint16(raw)
					raw = raw[2:]
				}
				fstr = string(utf16.Decode(us))
			}

			if (ridx + 1) < len(b.substreams[ss]) {
				ridx2 := ridx + 1
				nrecs := len(b.substreams[ss])
				for ridx2 < nrecs {
					r2 := b.substreams[ss][ridx2]
					if r2.RecType != RecTypeContinue {
						break
					}
					if (r2.Data[0] & 1) == 0 {
						fstr += string(r2.Data[1:])
					} else {
						raw := r2.Data[1:]
						slen := len(raw) / 2
						us = us[:slen]
						for i := 0; i < slen; i++ {
							us[i] = binary.LittleEndian.Uint16(raw)
							raw = raw[2:]
						}
						fstr += string(utf16.Decode(us))
					}
					ridx2++
				}
			}
			res.Set(int(formulaRow), int(formulaCol), fstr)
			//log.Printf("String direct: %d %d '%s'", int(formulaRow), int(formulaCol), fstr)

		case RecTypeLabelSst:
			rowIndex := int(binary.LittleEndian.Uint16(r.Data[:2]))
			colIndex := int(binary.LittleEndian.Uint16(r.Data[2:4]))
			ixfe := int(binary.LittleEndian.Uint16(r.Data[4:6]))
			sstIndex := int(binary.LittleEndian.Uint32(r.Data[6:]))
			if sstIndex > len(b.strings) {
				return nil, errors.New("xls: invalid sst index")
			}
			var fno uint16
			if ixfe < len(b.xfs) {
				fno = b.xfs[ixfe]
			}
			if b.strings[sstIndex] != "" {
				res.Put(rowIndex, colIndex, b.strings[sstIndex], fno)
			}
			//log.Printf("SST spec: %d %d = [%d] '%s' %d", rowIndex, colIndex, sstIndex, b.strings[sstIndex], fno)

		case RecTypeHLink:
			firstRow := binary.LittleEndian.Uint16(r.Data[:2])
			lastRow := binary.LittleEndian.Uint16(r.Data[2:4])
			firstCol := binary.LittleEndian.Uint16(r.Data[4:6])
			lastCol := binary.LittleEndian.Uint16(r.Data[6:])
			if int(firstCol) > int(maxCol) {
				//log.Println("invalid hyperlink column")
				continue
			}
			if int(firstRow) > int(maxRow) {
				//log.Println("invalid hyperlink row")
				continue
			}
			if lastRow == 0xFFFF { // placeholder value indicate "last"
				lastRow = uint16(maxRow) - 1
			}
			if lastCol == 0xFF { // placeholder value indicate "last"
				lastCol = uint16(maxCol) - 1
			}

			// decode the hyperlink datastructure and try to find the
			// display text and separate the URL itself.
			displayText, linkText, err := decodeHyperlinks(r.Data[8:])
			if err != nil {
				log.Println(err)
				continue
			}

			// apply merge cell rules (see RecTypeMergeCells below)
			for rn := int(firstRow); rn <= int(lastRow); rn++ {
				for cn := int(firstCol); cn <= int(lastCol); cn++ {
					if rn == int(firstRow) && cn == int(firstCol) {
						// TODO: provide custom hooks for how to handle links in output
						res.Put(rn, cn, displayText+" <"+linkText+">", 0)
					} else if cn == int(firstCol) {
						// first and last column MAY be the same
						if rn == int(lastRow) {
							res.Put(rn, cn, grate.EndRowMerged, 0)
						} else {
							res.Put(rn, cn, grate.ContinueRowMerged, 0)
						}
					} else if cn == int(lastCol) {
						// first and last column are NOT the same
						res.Put(rn, cn, grate.EndColumnMerged, 0)
					} else {
						res.Put(rn, cn, grate.ContinueColumnMerged, 0)
					}
				}
			}

		case RecTypeMergeCells:
			// To keep cells aligned, Merged cells are handled by placing
			// special characters in each cell covered by the merge block.
			//
			// The contents of the cell are always in the top left position.
			// A "down arrow" (↓) indicates the left side of the merge block, and a
			// "down arrow with stop line" (⤓) indicates the last row of the merge.
			// A "right arrow" (→) indicates that the columns span horizontally,
			// and a "right arrow with stop line" (⇥) indicates the rightmost
			// column of the merge.
			//

			cmcs := binary.LittleEndian.Uint16(r.Data[:2])
			raw := r.Data[2:]
			for i := 0; i < int(cmcs); i++ {
				firstRow := binary.LittleEndian.Uint16(raw[:2])
				lastRow := binary.LittleEndian.Uint16(raw[2:4])
				firstCol := binary.LittleEndian.Uint16(raw[4:6])
				lastCol := binary.LittleEndian.Uint16(raw[6:])
				raw = raw[8:]

				if lastRow == 0xFFFF { // placeholder value indicate "last"
					lastRow = uint16(maxRow) - 1
				}
				if lastCol == 0xFF { // placeholder value indicate "last"
					lastCol = uint16(maxCol) - 1
				}
				for rn := int(firstRow); rn <= int(lastRow); rn++ {
					for cn := int(firstCol); cn <= int(lastCol); cn++ {
						if rn == int(firstRow) && cn == int(firstCol) {
							// should be a value there already!
						} else if cn == int(firstCol) {
							// first and last column MAY be the same
							if rn == int(lastRow) {
								res.Put(rn, cn, grate.EndRowMerged, 0)
							} else {
								res.Put(rn, cn, grate.ContinueRowMerged, 0)
							}
						} else if cn == int(lastCol) {
							// first and last column are NOT the same
							res.Put(rn, cn, grate.EndColumnMerged, 0)
						} else {
							res.Put(rn, cn, grate.ContinueColumnMerged, 0)
						}
					}
				}
			}
			/*
				case RecTypeBlank, RecTypeMulBlank:
					// cells default value is blank, no need for these

				case RecTypeContinue:
					// the only situation so far is when used in RecTypeString above

				case RecTypeRow, RecTypeDimensions, RecTypeEOF, RecTypeWsBool:
					// handled in initial pass

				default:
					if grate.Debug {
						log.Println("    Unhandled sheet record type:", r.RecType, ridx)
					}
			*/
		}
	}
	return res, nil
}

var berrLookup = map[byte]string{
	0x00: "#NULL!",
	0x07: "#DIV/0!",
	0x0F: "#VALUE!",
	0x17: "#REF!",
	0x1D: "#NAME?",
	0x24: "#NUM!",
	0x2A: "#N/A",
	0x2B: "#GETTING_DATA",
}


================================================
FILE: xls/simple_test.go
================================================
package xls

import (
	"bufio"
	"log"
	"os"
	"strings"
	"testing"

	"github.com/pbnjay/grate/commonxl"
)

var testFilePairs = [][]string{
	{"../testdata/basic.xls", "../testdata/basic.tsv"},
	{"../testdata/testing.xls", "../testdata/testing.tsv"},

	// TODO: custom formatter support
	//{"../testdata/basic2.xls", "../testdata/basic2.tsv"},

	// TODO: datetime and fraction formatter support
	//{"../testdata/multi_test.xls", "../testdata/multi_test.tsv"},
}

func loadTestData(fn string, ff *commonxl.Formatter) (*commonxl.Sheet, error) {
	f, err := os.Open(fn)
	if err != nil {
		return nil, err
	}
	xs := &commonxl.Sheet{
		Formatter: ff,
	}

	row := 0
	s := bufio.NewScanner(f)
	for s.Scan() {
		record := strings.Split(s.Text(), "\t")
		for i, val := range record {
			xs.Put(row, i, val, 0)
		}
		row++
	}
	return xs, f.Close()
}

func TestBasic(t *testing.T) {
	for _, fnames := range testFilePairs {
		var trueData *commonxl.Sheet
		log.Println("Testing ", fnames[0])

		wb, err := Open(fnames[0])
		if err != nil {
			t.Fatal(err)
		}

		sheets, err := wb.List()
		if err != nil {
			t.Fatal(err)
		}
		firstLoad := true
		for _, s := range sheets {
			sheet, err := wb.Get(s)
			if err != nil {
				t.Fatal(err)
			}
			xsheet := sheet.(*commonxl.Sheet)
			if firstLoad {
				trueData, err = loadTestData(fnames[1], xsheet.Formatter)
				if err != nil {
					t.Fatal(err)
				}
				firstLoad = false
			}

			for xrow, xdata := range xsheet.Rows {
				for xcol, xval := range xdata {
					//t.Logf("at %s (%d,%d) expect '%v'", fnames[0], xrow, xcol, trueData.Rows[xrow][xcol])
					if !trueData.Rows[xrow][xcol].Equal(xval) {
						t.Logf("mismatch at %s (%d,%d): '%v' <> '%v' expected", fnames[0], xrow, xcol,
							xval, trueData.Rows[xrow][xcol])
						t.Fail()
					}
				}
			}
		}

		err = wb.Close()
		if err != nil {
			t.Fatal(err)
		}
	}
}


================================================
FILE: xls/strings.go
================================================
package xls

import (
	"encoding/binary"
	"errors"
	"io"
	"io/ioutil"
	"unicode/utf16"
)

// 2.5.240
func decodeShortXLUnicodeString(raw []byte) (string, int, error) {
	// identical to decodeXLUnicodeString except for cch=8bits instead of 16
	cch := int(raw[0])
	flags := raw[1]
	raw = raw[2:]

	content := make([]uint16, cch)
	if (flags & 0x1) == 0 {
		// 16-bit characters but only the bottom 8bits
		contentBytes := raw[:cch]
		for i, x := range contentBytes {
			content[i] = uint16(x)
		}
		cch += 2 // to return the offset
	} else {
		// 16-bit characters
		for i := 0; i < cch; i++ {
			content[i] = binary.LittleEndian.Uint16(raw[:2])
			raw = raw[2:]
		}
		cch += cch + 2 // to return the offset
	}
	return string(utf16.Decode(content)), cch, nil
}

// 2.5.294
func decodeXLUnicodeString(raw []byte) (string, int, error) {
	// identical to decodeShortXLUnicodeString except for cch=16bits instead of 8
	cch := int(binary.LittleEndian.Uint16(raw[:2]))
	flags := raw[2]
	raw = raw[3:]

	content := make([]uint16, cch)
	if (flags & 0x1) == 0 {
		// 16-bit characters but only the bottom 8bits
		contentBytes := raw[:cch]
		for i, x := range contentBytes {
			content[i] = uint16(x)
		}
		cch += 3 // to return the offset
	} else {
		// 16-bit characters
		for i := 0; i < cch; i++ {
			content[i] = binary.LittleEndian.Uint16(raw[:2])
			raw = raw[2:]
		}
		cch += cch + 3 // to return the offset
	}
	return string(utf16.Decode(content)), cch, nil
}

// 2.5.293
func decodeXLUnicodeRichExtendedString(r io.Reader) (string, error) {
	var cch, cRun uint16
	var flags uint8
	var cbExtRs int32
	err := binary.Read(r, binary.LittleEndian, &cch)
	if err != nil {
		return "", err
	}
	err = binary.Read(r, binary.LittleEndian, &flags)
	if err != nil {
		return "", err
	}
	if (flags & 0x8) != 0 {
		// rich formating data is present
		err = binary.Read(r, binary.LittleEndian, &cRun)
		if err != nil {
			return "", err
		}
	}
	if (flags & 0x4) != 0 {
		// phonetic string data is present
		err = binary.Read(r, binary.LittleEndian, &cbExtRs)
		if err != nil {
			return "", err
		}
	}

	content := make([]uint16, cch)
	if (flags & 0x1) == 0 {
		// 16-bit characters but only the bottom 8bits
		contentBytes := make([]byte, cch)
		n, err2 := io.ReadFull(r, contentBytes)
		if n == 0 && err2 != io.ErrUnexpectedEOF {
			err = err2
		}
		if uint16(n) < cch {
			contentBytes = contentBytes[:n]
			content = content[:n]
		}

		for i, x := range contentBytes {
			content[i] = uint16(x)
		}

	} else {
		// 16-bit characters
		err = binary.Read(r, binary.LittleEndian, content)
	}
	if err != nil {
		return "", err
	}
	//////

	if cRun > 0 {
		// rich formating data is present
		_, err = io.CopyN(ioutil.Discard, r, int64(cRun)*4)
		if err != nil {
			return "", err
		}
	}
	if cbExtRs > 0 {
		// phonetic string data is present
		_, err = io.CopyN(ioutil.Discard, r, int64(cbExtRs))
		if err != nil {
			return "", err
		}
	}
	//////

	return string(utf16.Decode(content)), nil
}

// read in an array of XLUnicodeRichExtendedString s
func parseSST(recs []*rec) ([]string, error) {
	// The quirky thing about this code is that when strings cross a record
	// boundary, there's an intervening flags byte that MAY change the string
	// from an 8-bit encoding to 16-bit or vice versa.

	//totalRefs := binary.LittleEndian.Uint32(recs[0].Data[0:4])
	numStrings := binary.LittleEndian.Uint32(recs[0].Data[4:8])

	all := make([]string, 0, numStrings)
	current := make([]uint16, 32*1024)

	buf := recs[0].Data[8:]
	for i := 0; i < len(recs); {
		var cRunBytes int
		var flags byte
		var cbExtRs uint32

		for len(buf) > 0 {
			slen := binary.LittleEndian.Uint16(buf)
			buf = buf[2:]
			flags = buf[0]
			buf = buf[1:]

			if (flags & 0x8) != 0 {
				// rich formating data is present
				cRun := binary.LittleEndian.Uint16(buf)
				cRunBytes = int(cRun) * 4
				buf = buf[2:]
			}
			if (flags & 0x4) != 0 {
				// phonetic string data is present
				cbExtRs = binary.LittleEndian.Uint32(buf)
				buf = buf[4:]
			}

			///////
			blx := len(buf)
			bly := len(buf) - 5
			if blx > 5 {
				blx = 5
			}
			if bly < 0 {
				bly = 0
			}

			// this block will read the string data, but transparently
			// handle continuing across records
			if int(slen) > cap(current) {
				current = make([]uint16, slen)
			} else {
				current = current[:slen]
			}
			for j := 0; j < int(slen); j++ {
				if len(buf) == 0 {
					i++
					if (recs[i].Data[0] & 1) == 0 {
						flags &= 0xFE
					} else {
						flags |= 1
					}
					buf = recs[i].Data[1:]
				}

				if (flags & 1) == 0 { //8-bit
					current[j] = uint16(buf[0])
					buf = buf[1:]
				} else { //16-bit
					current[j] = uint16(binary.LittleEndian.Uint16(buf[:2]))
					buf = buf[2:]
					if len(buf) == 1 {
						return nil, errors.New("xls: off by one")
					}
				}
			}

			s := string(utf16.Decode(current))
			all = append(all, s)

			///////

			for cRunBytes > 0 {
				if len(buf) >= int(cRunBytes) {
					buf = buf[cRunBytes:]
					cRunBytes = 0
				} else {
					cRunBytes -= len(buf)
					i++
					buf = recs[i].Data
				}
			}

			for cbExtRs > 0 {
				if len(buf) >= int(cbExtRs) {
					buf = buf[cbExtRs:]
					cbExtRs = 0
				} else {
					cbExtRs -= uint32(len(buf))
					i++
					buf = recs[i].Data
				}
			}
		}
		i++
		if i < len(recs) {
			buf = recs[i].Data
		}
	}

	return all, nil
}


================================================
FILE: xls/structs.go
================================================
package xls

import (
	"fmt"
	"math"
)

type header struct {
	Version  uint16 // An unsigned integer that specifies the BIFF version of the file. The value MUST be 0x0600.
	DocType  uint16 //An unsigned integer that specifies the document type of the substream of records following this record. For more information about the layout of the sub-streams in the workbook stream see File Structure.
	RupBuild uint16 // An unsigned integer that specifies the build identifier.
	RupYear  uint16 // An unsigned integer that specifies the year when this BIFF version was first created. The value MUST be 0x07CC or 0x07CD.
	MiscBits uint64 // lots of miscellaneous bits and flags we're not going to check
}

// 2.1.4
type rec struct {
	RecType recordType //
	RecSize uint16     // must be between 0 and 8224
	Data    []byte     // len(rec.data) = rec.recsize
}

type boundSheet struct {
	Position    uint32 // A FilePointer as specified in [MS-OSHARED] section 2.2.1.5 that specifies the stream position of the start of the BOF record for the sheet.
	HiddenState byte   // (2 bits) An unsigned integer that specifies the hidden state of the sheet. MUST be a value from the following table:
	SheetType   byte   // An unsigned integer that specifies the sheet type. 00=worksheet
	Name        string
}

///////
type shRow struct {
	RowIndex uint16 // 0-based
	FirstCol uint16 // 0-based
	LastCol  uint16 // 1-based!
	Height   uint16
	Reserved uint32
	Flags    uint32
}

type shRef8 struct {
	FirstRow uint16 // 0-based
	LastRow  uint16 // 0-based
	FirstCol uint16 // 0-based
	LastCol  uint16 // 0-based
}
type shMulRK struct {
	RowIndex uint16 // 0-based
	FirstCol uint16 // 0-based
	Values   []RkRec
	LastCol  uint16 // 0-based?
}
type RkRec struct {
	IXFCell uint16
	Value   RKNumber
}

type shRK struct {
	RowIndex uint16 // 0-based
	Col      uint16 // 0-based
	IXFCell  uint16
	Value    RKNumber
}

type RKNumber uint32

func (r RKNumber) IsInteger() bool {
	if (r & 1) != 0 {
		// has 2 decimals
		return false
	}
	if (r & 2) == 0 {
		// is part of a float
		return false
	}
	return true
}

func (r RKNumber) Int() int {
	val := int32(r) >> 2
	if (r&1) == 0 && (r&2) != 0 {
		return int(val)
	}
	if (r&1) != 0 && (r&2) != 0 {
		return int(val / 100)
	}
	return 0
}

func (r RKNumber) Float64() float64 {
	val := int32(r) >> 2
	v2 := math.Float64frombits(uint64(val) << 34)

	if (r&1) == 0 && (r&2) == 0 {
		return v2
	}
	if (r&1) != 0 && (r&2) == 0 {
		return v2 / 100.0
	}
	return 0.0
}

func (r RKNumber) String() string {
	if r.IsInteger() {
		return fmt.Sprint(r.Int())
	}
	return fmt.Sprint(r.Float64())
}


================================================
FILE: xls/xls.go
================================================
// Package xls implements the Microsoft Excel Binary File Format (.xls) Structure.
// More specifically, it contains just enough detail to extract cell contents,
// data types, and last-calculated formula values. In particular, it does NOT
// implement formatting or formula calculations.
package xls

// https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-xls/cd03cb5f-ca02-4934-a391-bb674cb8aa06

import (
	"context"
	"encoding/binary"
	"errors"
	"io"
	"log"
	"sync"

	"github.com/pbnjay/grate"
	"github.com/pbnjay/grate/commonxl"
	"github.com/pbnjay/grate/xls/cfb"
	"github.com/pbnjay/grate/xls/crypto"
)

var _ = grate.Register("xls", 1, Open)

// WorkBook represents an Excel workbook containing 1 or more sheets.
type WorkBook struct {
	filename string
	ctx      context.Context
	doc      *cfb.Document

	prot     bool
	h        *header
	sheets   []*boundSheet
	codepage uint16
	dateMode uint16
	strings  []string

	password   string
	substreams [][]*rec

	fpos          int64
	pos2substream map[int64]int

	nfmt commonxl.Formatter
	xfs  []uint16
}

func (b *WorkBook) IsProtected() bool {
	return b.prot
}

func Open(filename string) (grate.Source, error) {
	doc, err := cfb.Open(filename)
	if err != nil {
		return nil, err
	}

	b := &WorkBook{
		filename: filename,
		doc:      doc,

		pos2substream: make(map[int64]int, 16),
		xfs:           make([]uint16, 0, 128),
	}

	rdr, err := doc.Open("Workbook")
	if err != nil {
		return nil, grate.WrapErr(err, grate.ErrNotInFormat)
	}
	raw, err := io.ReadAll(rdr)
	if err != nil {
		return nil, err
	}

	err = b.loadFromStream(raw)
	return b, err
}

func (b *WorkBook) loadFromStream(raw []byte) error {
	return b.loadFromStream2(raw, false)
}

func (b *WorkBook) loadFromStreamWithDecryptor(raw []byte, dec crypto.Decryptor) error {
	// interestingly (insecurely) BIFF8 keeps Record Types and sizes in the clear,
	// has a few records that are not encrypted, and has 1 record type that does
	// not encrypt the 32bit integer position at the beginning (while encrypting
	// the rest). It also resets the encryption block counter every 1024 bytes
	// (counting all the "skipped" bytes described above).
	//
	// So this code streams the records through the decryption, but also records
	// a set of overlays applied to the final result which restore the
	// "cleartext" contents in line with the decrypted content.

	if grate.Debug {
		log.Println("  Decrypting xls stream with standard RC4")
	}

	pos := 0
	zeros := [8224]byte{}

	type overlay struct {
		Pos int

		RecType   recordType
		DataBytes uint16
		Data      []byte // NB len() not necessarily = DataBytes
	}
	replaceBlocks := []overlay{}

	var err error
	for err == nil && len(raw[pos:]) > 4 {
		o := overlay{}
		o.Pos = pos
		o.RecType = recordType(binary.LittleEndian.Uint16(raw[pos : pos+2]))
		o.DataBytes = binary.LittleEndian.Uint16(raw[pos+2 : pos+4])
		pos += 4

		// copy to output and decryption stream
		binary.Write(dec, binary.LittleEndian, o.RecType)
		binary.Write(dec, binary.LittleEndian, o.DataBytes)
		tocopy := int(o.DataBytes)

		switch o.RecType {
		case RecTypeBOF, RecTypeFilePass, RecTypeUsrExcl, RecTypeFileLock, RecTypeInterfaceHdr, RecTypeRRDInfo, RecTypeRRDHead:
			// untouched data goes directly into output
			o.Data = raw[pos : pos+int(o.DataBytes)]
			pos += int(o.DataBytes)
			dec.Write(zeros[:int(o.DataBytes)])
			tocopy = 0

		case RecTypeBoundSheet8:
			// copy 32-bit position to output
			o.Data = raw[pos : pos+4]
			pos += 4
			dec.Write(zeros[:4])
			tocopy -= 4
		}

		if tocopy > 0 {
			_, err = dec.Write(raw[pos : pos+tocopy])
			pos += tocopy
		}
		replaceBlocks = append(replaceBlocks, o)
	}
	dec.Flush()

	alldata := dec.Bytes()
	for _, o := range replaceBlocks {
		offs := int(o.Pos)
		binary.LittleEndian.PutUint16(alldata[offs:], uint16(o.RecType))
		binary.LittleEndian.PutUint16(alldata[offs+2:], uint16(o.DataBytes))
		if len(o.Data) > 0 {
			offs += 4
			copy(alldata[offs:], o.Data)
		}
	}

	// recurse into the stream parser now that things are decrypted
	return b.loadFromStream2(alldata, true)
}

func (b *WorkBook) Close() error {
	// return records to the pool for reuse
	for i, sub := range b.substreams {
		for _, r := range sub {
			r.Data = nil // allow GC
			recPool.Put(r)
		}
		b.substreams[i] = b.substreams[i][:0]
	}
	b.substreams = b.substreams[:0]
	return nil
}

func (b *WorkBook) loadFromStream2(raw []byte, isDecrypted bool) error {
	b.h = &header{}
	substr := -1
	nestedBOF := 0
	b.pos2substream = make(map[int64]int, 10)
	b.fpos = 0

	// IMPORTANT: if there are any existing records, we need to return them to the pool
	for i, sub := range b.substreams {
		for _, r := range sub {
			recPool.Put(r)
		}
		b.substreams[i] = b.substreams[i][:0]
	}
	b.substreams = b.substreams[:0]

	rawfull := raw
	nr, no, err := b.nextRecord(raw)
	for err == nil {
		raw = raw[no:]
		switch nr.RecType {
		case RecTypeEOF:
			nestedBOF--
		case RecTypeBOF:
			// when substreams are nested, keep them in the same grouping
			if nestedBOF == 0 {
				substr = len(b.substreams)
				b.substreams = append(b.substreams, []*rec{})
				b.pos2substream[b.fpos] = substr
			}
			nestedBOF++
		}
		b.fpos += int64(4 + len(nr.Data))

		// if there's a FilePass record, the data is encrypted
		if nr.RecType == RecTypeFilePass && !isDecrypted {
			etype := binary.LittleEndian.Uint16(nr.Data)
			switch etype {
			case 1:
				dec, err := crypto.NewBasicRC4(nr.Data[2:])
				if err != nil {
					log.Println("xls: rc4 encryption failed to set up", err)
					return err
				}
				return b.loadFromStreamWithDecryptor(rawfull, dec)
			case 2, 3, 4:
				log.Println("need Crypto API RC4 decryptor")
				return errors.New("xls: unsupported Crypto API encryption method")
			default:
				return errors.New("xls: unsupported encryption method")
			}
		}

		b.substreams[substr] = append(b.substreams[substr], nr)
		nr, no, err = b.nextRecord(raw)
	}
	if err == io.EOF {
		err = nil
	}
	if err != nil {
		return err
	}

	for ss, records := range b.substreams {
		if grate.Debug {
			log.Printf("  Processing substream %d/%d (%d records)", ss, len(b.substreams), len(records))
		}
		for i, nr := range records {
			if len(nr.Data) == 0 {
				continue
			}

			switch nr.RecType {
			case RecTypeSST:
				// Shared String Table is often continued across multiple records,
				// so we want to gather them all before starting to parse (some
				// strings may span the gap between records)
				recSet := []*rec{nr}

				lastIndex := i
				for len(records) > (lastIndex+1) && records[lastIndex+1].RecType == RecTypeContinue {
					lastIndex++
					recSet = append(recSet, records[lastIndex])
				}

				b.strings, err = parseSST(recSet)
				if err != nil {
					return err
				}

			case RecTypeContinue:
				// no-op (used above)
			case RecTypeEOF:
				// done

			case RecTypeBOF:
				b.h = &header{
					Version:  binary.LittleEndian.Uint16(nr.Data[0:2]),
					DocType:  binary.LittleEndian.Uint16(nr.Data[2:4]),
					RupBuild: binary.LittleEndian.Uint16(nr.Data[4:6]),
					RupYear:  binary.LittleEndian.Uint16(nr.Data[6:8]),
					MiscBits: binary.LittleEndian.Uint64(nr.Data[8:16]),
				}

				if b.h.Version != 0x0600 {
					return errors.New("xls: invalid file version")
				}
				if b.h.RupYear != 0x07CC && b.h.RupYear != 0x07CD {
					return errors.New("xls: unsupported biff version")
				}
				/*
					if b.h.DocType != 0x0005 && b.h.DocType != 0x0010 {
						// we only support the workbook or worksheet substreams
						log.Println("xls: unsupported document type")
						//break
					}
				*/

			case RecTypeCodePage:
				// BIFF8 is entirely UTF-16LE so this is actually ignored
				b.codepage = binary.LittleEndian.Uint16(nr.Data)

			case RecTypeDate1904:
				b.dateMode = binary.LittleEndian.Uint16(nr.Data)

			case RecTypeFormat:
				// Format maps a format ID to a code string
				fmtNo := binary.LittleEndian.Uint16(nr.Data)
				formatStr, _, err := decodeXLUnicodeString(nr.Data[2:])
				if err != nil {
					log.Println("fail2", err)
					return err
				}
				b.nfmt.Add(fmtNo, formatStr)

			case RecTypeXF:
				// XF records merge multiple style and format directives to one ID
				// ignore font id at nr.Data[0:2]
				fmtNo := binary.LittleEndian.Uint16(nr.Data[2:])
				b.xfs = append(b.xfs, fmtNo)

			case RecTypeBoundSheet8:
				// Identifies the postition within the stream, visibility state,
				// and name of a worksheet
				bs := &boundSheet{}
				bs.Position = binary.LittleEndian.Uint32(nr.Data[:4])
				bs.HiddenState = nr.Data[4]
				bs.SheetType = nr.Data[5]

				bs.Name, _, err = decodeShortXLUnicodeString(nr.Data[6:])
				if err != nil {
					return err
				}
				b.sheets = append(b.sheets, bs)
			default:
				if grate.Debug && ss == 0 {
					log.Println("    Unhandled record type:", nr.RecType, i)
				}
			}
		}
	}

	return err
}

var recPool = sync.Pool{
	New: func() interface{} {
		return &rec{}
	},
}

func (b *WorkBook) nextRecord(raw []byte) (*rec, int, error) {
	if len(raw) < 4 {
		return nil, 0, io.EOF
	}
	rec := recPool.Get().(*rec)

	rec.RecType = recordType(binary.LittleEndian.Uint16(raw[:2]))
	rec.RecSize = binary.LittleEndian.Uint16(raw[2:4])
	if len(raw[4:]) < int(rec.RecSize) {
		recPool.Put(rec)
		return nil, 4, io.ErrUnexpectedEOF
	}
	rec.Data = raw[4 : 4+rec.RecSize]
	return rec, int(4 + rec.RecSize), nil
}


================================================
FILE: xlsx/comp_test.go
================================================
package xlsx

import (
	"os"
	"path/filepath"
	"strings"
	"testing"
)

func TestAllFiles(t *testing.T) {
	err := filepath.Walk("../testdata", func(p string, info os.FileInfo, err error) error {
		if info.IsDir() {
			return nil
		}
		if !strings.HasSuffix(info.Name(), ".xlsx") {
			return nil
		}
		wb, err := Open(p)
		if err != nil {
			return err
		}

		sheets, err := wb.List()
		if err != nil {
			return err
		}
		for _, s := range sheets {
			sheet, err := wb.Get(s)
			if err != nil {
				return err
			}

			for sheet.Next() {
				sheet.Strings()
			}
		}

		return wb.Close()
	})
	if err != nil {
		t.Fatal(err)
	}
}


================================================
FILE: xlsx/sheets.go
================================================
package xlsx

import (
	"encoding/xml"
	"errors"
	"io"
	"log"
	"path/filepath"
	"strconv"
	"strings"

	"github.com/pbnjay/grate"
	"github.com/pbnjay/grate/commonxl"
)

type Sheet struct {
	d       *Document
	relID   string
	name    string
	docname string

	err error

	wrapped *commonxl.Sheet
}

var errNotLoaded = errors.New("xlsx: sheet not loaded")

func (s *Sheet) parseSheet() error {
	s.wrapped = &commonxl.Sheet{
		Formatter: &s.d.fmt,
	}
	linkmap := make(map[string]string)
	base := filepath.Base(s.docname)
	sub := strings.TrimSuffix(s.docname, base)
	relsname := filepath.Join(sub, "_rels", base+".rels")
	dec, clo, err := s.d.openXML(relsname)
	if err == nil {
		// rels might not exist for every sheet
		tok, err := dec.RawToken()
		for ; err == nil; tok, err = dec.RawToken() {
			if v, ok := tok.(xml.StartElement); ok && v.Name.Local == "Relationship" {
				ax := getAttrs(v.Attr, "Id", "Type", "Target", "TargetMode")
				if ax[3] == "External" && ax[1] == "http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink" {
					linkmap[ax[0]] = ax[2]
				}
			}
		}
		clo.Close()
	}

	dec, clo, err = s.d.openXML(s.docname)
	if err != nil {
		return err
	}
	defer clo.Close()

	currentCellType := BlankCellType
	currentCell := ""
	var fno uint16
	var maxCol, maxRow int

	tok, err := dec.RawToken()
	for ; err == nil; tok, err = dec.RawToken() {
		switch v := tok.(type) {
		case xml.CharData:
			if currentCell == "" {
				continue
			}
			c, r := refToIndexes(currentCell)
			if c >= 0 && r >= 0 {
				var val interface{} = string(v)

				switch currentCellType {
				case BooleanCellType:
					if v[0] == '1' {
						val = true
					} else {
						val = false
					}
				case DateCellType:
					log.Println("CELL DATE", val, fno)
				case NumberCellType:
					fval, err := strconv.ParseFloat(string(v), 64)
					if err == nil {
						val = fval
					}
					//log.Println("CELL NUMBER", val, numFormat)
				case SharedStringCellType:
					//log.Println("CELL SHSTR", val, currentCellType, numFormat)
					si, _ := strconv.ParseInt(string(v), 10, 64)
					val = s.d.strings[si]
				case BlankCellType:
					//log.Println("CELL BLANK")
					// don't place any values
					continue
				case ErrorCellType, FormulaStringCellType, InlineStringCellType:
					//log.Println("CELL ERR/FORM/INLINE", val, currentCellType)
				default:
					log.Println("CELL UNKNOWN", val, currentCellType, fno)
				}
				s.wrapped.Put(r, c, val, fno)
			} else {
				//log.Println("FAIL row/col: ", currentCell)
			}
		case xml.StartElement:
			switch v.Name.Local {
			case "dimension":
				ax := getAttrs(v.Attr, "ref")
				if ax[0] == "A1" {
					maxCol, maxRow = 1, 1
					// short-circuit empty sheet
					s.wrapped.Resize(1, 1)
					continue
				}
				dims := strings.Split(ax[0], ":")
				if len(dims) == 1 {
					maxCol, maxRow = refToIndexes(dims[0])
				} else {
					//minCol, minRow := refToIndexes(dims[0])
					maxCol, maxRow = refToIndexes(dims[1])
				}
				s.wrapped.Resize(maxRow, maxCol)
				//log.Println("DIMENSION:", s.minRow, s.minCol, ">", s.maxRow, s.maxCol)
			case "row":
				//currentRow = ax["r"] // unsigned int row index
				//log.Println("ROW", currentRow)
			case "c":
				ax := getAttrs(v.Attr, "t", "r", "s")
				currentCellType = CellType(ax[0])
				if currentCellType == BlankCellType {
					currentCellType = NumberCellType
				}
				currentCell = ax[1] // always an A1 style reference
				style := ax[2]
				sid, _ := strconv.ParseInt(style, 10, 64)
				if len(s.d.xfs) > int(sid) {
					fno = s.d.xfs[sid]
				} else {
					fno = 0
				}
				//log.Println("CELL", currentCell, sid, numFormat, currentCellType)
			case "v":
				//log.Println("CELL VALUE", ax)

			case "mergeCell":
				ax := getAttrs(v.Attr, "ref")
				dims := strings.Split(ax[0], ":")
				startCol, startRow := refToIndexes(dims[0])
				endCol, endRow := startCol, startRow
				if len(dims) > 1 {
					endCol, endRow = refToIndexes(dims[1])
				}
				if endRow > maxRow {
					endRow = maxRow
				}
				if endCol > maxCol {
					endCol = maxCol
				}
				for r := startRow; r <= endRow; r++ {
					for c := startCol; c <= endCol; c++ {
						if r == startRow && c == startCol {
							// has data already!
						} else if c == startCol {
							// first and last column MAY be the same
							if r == endRow {
								s.wrapped.Put(r, c, grate.EndRowMerged, 0)
							} else {
								s.wrapped.Put(r, c, grate.ContinueRowMerged, 0)
							}
						} else if c == endCol {
							// first and last column are NOT the same
							s.wrapped.Put(r, c, grate.EndColumnMerged, 0)
						} else {
							s.wrapped.Put(r, c, grate.ContinueColumnMerged, 0)
						}
					}
				}

			case "hyperlink":
				ax := getAttrs(v.Attr, "ref", "id")
				col, row := refToIndexes(ax[0])
				link := linkmap[ax[1]]
				s.wrapped.Put(row, col, link, 0)
				s.wrapped.SetURL(row, col, link)

			case "worksheet", "mergeCells", "hyperlinks":
				// containers
			case "f":
				//log.Println("start: ", v.Name.Local, v.Attr)
			default:
				if grate.Debug {
					log.Println("      Unhandled sheet xml tag", v.Name.Local, v.Attr)
				}
			}
		case xml.EndElement:

			switch v.Name.Local {
			case "c":
				currentCell = ""
			case "row":
				//currentRow = ""
			}
		default:
			if grate.Debug {
				log.Printf("      Unhandled sheet xml tokens %T %+v", tok, tok)
			}
		}
	}
	if err == io.EOF {
		err = nil
	}
	return err
}


================================================
FILE: xlsx/simple_test.go
================================================
package xlsx

import (
	"bufio"
	"log"
	"os"
	"strings"
	"testing"

	"github.com/pbnjay/grate/commonxl"
)

var testFilePairs = [][]string{
	{"../testdata/basic.xlsx", "../testdata/basic.tsv"},

	// TODO: custom formatter support
	//{"../testdata/basic2.xlsx", "../testdata/basic2.tsv"},

	// TODO: datetime and fraction formatter support
	//{"../testdata/multi_test.xlsx", "../testdata/multi_test.tsv"},
}

func loadTestData(fn string, ff *commonxl.Formatter) (*commonxl.Sheet, error) {
	f, err := os.Open(fn)
	if err != nil {
		return nil, err
	}
	xs := &commonxl.Sheet{
		Formatter: ff,
	}

	row := 0
	s := bufio.NewScanner(f)
	for s.Scan() {
		record := strings.Split(s.Text(), "\t")
		for i, val := range record {
			xs.Put(row, i, val, 0)
		}
		row++
	}
	return xs, f.Close()
}

func TestBasic(t *testing.T) {
	for _, fnames := range testFilePairs {
		var trueData *commonxl.Sheet
		log.Println("Testing ", fnames[0])

		wb, err := Open(fnames[0])
		if err != nil {
			t.Fatal(err)
		}

		sheets, err := wb.List()
		if err != nil {
			t.Fatal(err)
		}
		firstLoad := true
		for _, s := range sheets {
			sheet, err := wb.Get(s)
			if err != nil {
				t.Fatal(err)
			}
			xsheet := sheet.(*commonxl.Sheet)
			if firstLoad {
				trueData, err = loadTestData(fnames[1], xsheet.Formatter)
				if err != nil {
					t.Fatal(err)
				}
				firstLoad = false
			}

			for xrow, xdata := range xsheet.Rows {
				for xcol, xval := range xdata {
					//t.Logf("at %s (%d,%d) expect '%v'", fnames[0], xrow, xcol, trueData.Rows[xrow][xcol])
					if !trueData.Rows[xrow][xcol].Equal(xval) {
						t.Logf("mismatch at %s (%d,%d): '%v' <> '%v' expected", fnames[0], xrow, xcol,
							xval, trueData.Rows[xrow][xcol])
						t.Fail()
					}
				}
			}
		}

		err = wb.Close()
		if err != nil {
			t.Fatal(err)
		}
	}
}


================================================
FILE: xlsx/types.go
================================================
package xlsx

import (
	"encoding/xml"
	"strconv"
	"strings"
)

type CellType string

// CellTypes define data type in section 18.18.11
const (
	BlankCellType         CellType = ""
	BooleanCellType       CellType = "b"
	DateCellType          CellType = "d"
	ErrorCellType         CellType = "e"
	NumberCellType        CellType = "n"
	SharedStringCellType  CellType = "s"
	FormulaStringCellType CellType = "str"
	InlineStringCellType  CellType = "inlineStr"
)

type staticCellType rune

const (
	staticBlank staticCellType = 0

	// marks a continuation column within a merged cell.
	continueColumnMerged staticCellType = '→'
	// marks the last column of a merged cell.
	endColumnMerged staticCellType = '⇥'

	// marks a continuation row within a merged cell.
	continueRowMerged staticCellType = '↓'
	// marks the last row of a merged cell.
	endRowMerged staticCellType = '⤓'
)

func (s staticCellType) String() string {
	if s == 0 {
		return ""
	}
	return string([]rune{rune(s)})
}

// returns the 0-based index of the column string:
//    "A"=0, "B"=1, "AA"=26, "BB"=53
func col2int(col string) int {
	idx := 0
	for _, c := range col {
		idx *= 26
		idx += int(c - '@')
	}
	return idx - 1
}

func refToIndexes(r string) (column, row int) {
	if len(r) < 2 {
		return -1, -1
	}
	i1 := strings.IndexAny(r, "0123456789")
	if i1 <= 0 {
		return -1, -1
	}

	// A1 Reference mode
	col1 := r[:i1]
	i2 := strings.IndexByte(r[i1:], 'C')
	if i2 == -1 {
		rn, _ := strconv.ParseInt(r[i1:], 10, 64)
		return col2int(col1), int(rn) - 1
	}

	// R1C1 Reference Mode
	col1 = r[i1:i2]
	row1 := r[i2+1:]
	cn, _ := strconv.ParseInt(col1, 10, 64)
	rn, _ := strconv.ParseInt(row1, 10, 64)
	return int(cn), int(rn) - 1
}

func getAttrs(attrs []xml.Attr, keys ...string) []string {
	res := make([]string, len(keys))
	for _, a := range attrs {
		for i, k := range keys {
			if a.Name.Local == k {
				res[i] = a.Value
			}
		}
	}
	return res
}


================================================
FILE: xlsx/workbook.go
================================================
package xlsx

import (
	"encoding/xml"
	"errors"
	"io"
	"log"
	"path/filepath"
	"strconv"
	"strings"

	"github.com/pbnjay/grate"
)

func (d *Document) parseRels(dec *xml.Decoder, basedir string) error {
	tok, err := dec.RawToken()
	for ; err == nil; tok, err = dec.RawToken() {
		switch v := tok.(type) {
		case xml.StartElement:
			switch v.Name.Local {
			case "Relationships":
				// container
			case "Relationship":
				vals := make(map[string]string, 5)
				for _, a := range v.Attr {
					vals[a.Name.Local] = a.Value
				}
				if _, ok := d.rels[vals["Type"]]; !ok {
					d.rels[vals["Type"]] = make(map[string]string)
				}
				if strings.HasPrefix(vals["Target"], "/") {
					// handle malformed "absolute" paths cleanly
					d.rels[vals["Type"]][vals["Id"]] = vals["Target"][1:]
				} else {
					d.rels[vals["Type"]][vals["Id"]] = filepath.Join(basedir, vals["Target"])
				}
				if vals["Type"] == "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" {
					d.primaryDoc = vals["Target"]
				}
			default:
				if grate.Debug {
					log.Println("      Unhandled relationship xml tag", v.Name.Local, v.Attr)
				}
			}
		case xml.EndElement:
			// not needed
		default:
			if grate.Debug {
				log.Printf("      Unhandled relationship xml tokens %T %+v", tok, tok)
			}
		}
	}
	if err == io.EOF {
		err = nil
	}
	return err
}

func (d *Document) parseWorkbook(dec *xml.Decoder) error {
	tok, err := dec.RawToken()
	for ; err == nil; tok, err = dec.RawToken() {
		switch v := tok.(type) {
		case xml.StartElement:
			switch v.Name.Local {
			case "sheet":
				vals := make(map[string]string, 5)
				for _, a := range v.Attr {
					vals[a.Name.Local] = a.Value
				}
				sheetID, ok1 := vals["id"]
				sheetName, ok2 := vals["name"]
				if !ok1 || !ok2 {
					return errors.New("xlsx: invalid sheet definition")
				}
				s := &Sheet{
					d:       d,
					relID:   sheetID,
					name:    sheetName,
					docname: d.rels["http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet"][sheetID],
					err:     errNotLoaded,
				}
				d.sheets = append(d.sheets, s)
			case "workbook", "sheets":
				// containers
			default:
				if grate.Debug {
					log.Println("      Unhandled workbook xml tag", v.Name.Local, v.Attr)
				}
			}
		case xml.EndElement:
			// not needed
		default:
			if grate.Debug {
				log.Printf("      Unhandled workbook xml tokens %T %+v", tok, tok)
			}
		}
	}
	if err == io.EOF {
		err = nil
	}
	return err
}

func (d *Document) parseStyles(dec *xml.Decoder) error {
	baseNumFormats := []string{}
	d.xfs = d.xfs[:0]

	section := 0
	tok, err := dec.RawToken()
	for ; err == nil; tok, err = dec.RawToken() {
		switch v := tok.(type) {
		case xml.StartElement:
			switch v.Name.Local {
			case "styleSheet":
				// container
			case "numFmt":
				ax := getAttrs(v.Attr, "numFmtId", "formatCode")
				fmtNo, _ := strconv.ParseInt(ax[0], 10, 16)
				d.fmt.Add(uint16(fmtNo), ax[1])

			case "cellStyleXfs":
				section = 1
			case "cellXfs":
				section = 2
				ax := getAttrs(v.Attr, "count")
				n, _ := strconv.ParseInt(ax[0], 10, 64)
				d.xfs = make([]uint16, 0, n)

			case "xf":
				ax := getAttrs(v.Attr, "numFmtId", "applyNumberFormat", "xfId")
				if section == 1 {
					// load base styles, but only save number format
					if ax[1] == "0" {
						baseNumFormats = append(baseNumFormats, "0")
					} else {
						baseNumFormats = append(baseNumFormats, ax[0])
					}
				} else if section == 2 {
					// actual referencable cell styles
					// 1) get base style so we can inherit format properly
					baseID, _ := strconv.ParseInt(ax[2], 10, 64)
					numFmtID := "0"
					if len(baseNumFormats) > int(baseID) {
						numFmtID = baseNumFormats[baseID]
					}

					// 2) check if this XF overrides the base format
					if ax[1] == "0" {
						// remove the format (if it was inherited)
						numFmtID = "0"
					} else {
						numFmtID = ax[0]
					}

					nfid, _ := strconv.ParseInt(numFmtID, 10, 16)
					d.xfs = append(d.xfs, uint16(nfid))
				} else {
					panic("wheres is this xf??")
				}
			default:
				if grate.Debug {
					log.Println("  Unhandled style xml tag", v.Name.Local, v.Attr)
				}
			}
		case xml.EndElement:
			switch v.Name.Local {
			case "cellStyleXfs":
				section = 0
			case "cellXfs":
				section = 0
			}
		default:
			if grate.Debug {
				log.Printf("      Unhandled style xml tokens %T %+v", tok, tok)
			}
		}
	}
	if err == io.EOF {
		err = nil
	}
	return err
}

func (d *Document) parseSharedStrings(dec *xml.Decoder) error {
	val := ""
	tok, err := dec.RawToken()
	for ; err == nil; tok, err = dec.RawToken() {
		switch v := tok.(type) {
		case xml.CharData:
			val += string(v)
		case xml.StartElement:
			switch v.Name.Local {
			case "si":
				val = ""
			case "t":
				// no attributes to parse, we only want the CharData ...
			case "sst":
				// main container
			default:
				if grate.Debug {
					log.Println("  Unhandled SST xml tag", v.Name.Local, v.Attr)
				}
			}
		case xml.EndElement:
			if v.Name.Local == "si" {
				d.strings = append(d.strings, val)
				continue
			}
		default:
			if grate.Debug {
				log.Printf("    Unhandled SST xml token %T %+v", tok, tok)
			}
		}
	}
	if err == io.EOF {
		err = nil
	}
	return err
}


================================================
FILE: xlsx/xlsx.go
================================================
package xlsx

import (
	"archive/zip"
	"encoding/xml"
	"errors"
	"io"
	"log"
	"os"
	"path/filepath"
	"strings"

	"github.com/pbnjay/grate"
	"github.com/pbnjay/grate/commonxl"
)

var _ = grate.Register("xlsx", 5, Open)

// Document contains an Office Open XML document.
type Document struct {
	filename   string
	f          *os.File
	r          *zip.Reader
	primaryDoc string

	// type => id => filename
	rels    map[string]map[string]string
	sheets  []*Sheet
	strings []string
	xfs     []uint16
	fmt     commonxl.Formatter
}

func (d *Document) Close() error {
	d.xfs = d.xfs[:0]
	d.xfs = nil
	d.strings = d.strings[:0]
	d.strings = nil
	d.sheets = d.sheets[:0]
	d.sheets = nil
	return d.f.Close()
}

func Open(filename string) (grate.Source, error) {
	f, err := os.Open(filename)
	if err != nil {
		return nil, err
	}
	info, err := f.Stat()
	if err != nil {
		return nil, err
	}
	z, err := zip.NewReader(f, info.Size())
	if err != nil {
		return nil, grate.WrapErr(err, grate.ErrNotInFormat)
	}
	d := &Document{
		filename: filename,
		f:        f,
		r:        z,
	}

	d.rels = make(map[string]map[string]string, 4)

	// parse the primary relationships
	dec, c, err := d.openXML("_rels/.rels")
	if err != nil {
		return nil, grate.WrapErr(err, grate.ErrNotInFormat)
	}
	err = d.parseRels(dec, "")
	c.Close()
	if err != nil {
		return nil, grate.WrapErr(err, grate.ErrNotInFormat)
	}
	if d.primaryDoc == "" {
		return nil, errors.New("xlsx: invalid document")
	}

	// parse the secondary relationships to primary doc
	base := filepath.Base(d.primaryDoc)
	sub := strings.TrimSuffix(d.primaryDoc, base)
	relfn := filepath.Join(sub, "_rels", base+".rels")
	dec, c, err = d.openXML(relfn)
	if err != nil {
		return nil, err
	}
	err = d.parseRels(dec, sub)
	c.Close()
	if err != nil {
		return nil, err
	}

	// parse the workbook structure
	dec, c, err = d.openXML(d.primaryDoc)
	if err != nil {
		return nil, err
	}
	err = d.parseWorkbook(dec)
	c.Close()
	if err != nil {
		return nil, err
	}

	styn := d.rels["http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles"]
	for _, sst := range styn {
		// parse the shared string table
		dec, c, err = d.openXML(sst)
		if err != nil {
			return nil, err
		}
		err = d.parseStyles(dec)
		c.Close()
		if err != nil {
			return nil, err
		}
	}

	ssn := d.rels["http://schemas.openxmlformats.org/officeDocument/2006/relationships/sharedStrings"]
	for _, sst := range ssn {
		// parse the shared string table
		dec, c, err = d.openXML(sst)
		if err != nil {
			return nil, err
		}
		err = d.parseSharedStrings(dec)
		c.Close()
		if err != nil {
			return nil, err
		}
	}

	return d, nil
}

func (d *Document) openXML(name string) (*xml.Decoder, io.Closer, error) {
	if grate.Debug {
		log.Println("    openXML", name)
	}
	for _, zf := range d.r.File {
		if zf.Name == name {
			zfr, err := zf.Open()
			if err != nil {
				return nil, nil, err
			}
			dec := xml.NewDecoder(zfr)
			return dec, zfr, nil
		}
	}
	return nil, nil, io.EOF
}

func (d *Document) List() ([]string, error) {
	res := make([]string, 0, len(d.sheets))
	for _, s := range d.sheets {
		res = append(res, s.name)
	}
	return res, nil
}

func (d *Document) Get(sheetName string) (grate.Collection, error) {
	for _, s := range d.sheets {
		if s.name == sheetName {
			if s.err == errNotLoaded {
				s.err = s.parseSheet()
			}
			return s.wrapped, s.err
		}
	}
	return nil, errors.New("xlsx: sheet not found")
}