From 08d1a86c3a1bffdf431dba6a3d5a3b369ef740a7 Mon Sep 17 00:00:00 2001 From: xuri Date: Tue, 10 Dec 2019 00:16:17 +0800 Subject: Fix #523, add stream writer for generate new worksheet with huge amounts of data --- stream.go | 219 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 219 insertions(+) create mode 100644 stream.go (limited to 'stream.go') diff --git a/stream.go b/stream.go new file mode 100644 index 0000000..0d91ddd --- /dev/null +++ b/stream.go @@ -0,0 +1,219 @@ +// Copyright 2016 - 2019 The excelize Authors. All rights reserved. Use of +// this source code is governed by a BSD-style license that can be found in +// the LICENSE file. +// +// Package excelize providing a set of functions that allow you to write to +// and read from XLSX files. Support reads and writes XLSX file generated by +// Microsoft Excelâ„¢ 2007 and later. Support save file without losing original +// charts of XLSX. This library needs Go version 1.10 or later. + +package excelize + +import ( + "bytes" + "encoding/xml" + "errors" + "fmt" + "io/ioutil" + "os" + "reflect" +) + +// StreamWriter defined the type of stream writer. +type StreamWriter struct { + tmpFile *os.File + File *File + Sheet string + SheetID int + SheetData bytes.Buffer +} + +// NewStreamWriter return stream writer struct by given worksheet name for +// generate new worksheet with large amounts of data. Note that after set +// rows, you must call the 'Flush' method to end the streaming writing +// process and ensure that the order of line numbers is ascending. For +// example, set data for worksheet of size 102400 rows x 50 columns with +// numbers: +// +// file := excelize.NewFile() +// streamWriter, err := file.NewStreamWriter("Sheet1") +// if err != nil { +// panic(err) +// } +// for rowID := 1; rowID <= 102400; rowID++ { +// row := make([]interface{}, 50) +// for colID := 0; colID < 50; colID++ { +// row[colID] = rand.Intn(640000) +// } +// cell, _ := excelize.CoordinatesToCellName(1, rowID) +// if err := streamWriter.SetRow(cell, &row); err != nil { +// panic(err) +// } +// } +// if err := streamWriter.Flush(); err != nil { +// panic(err) +// } +// if err := file.SaveAs("Book1.xlsx"); err != nil { +// panic(err) +// } +// +func (f *File) NewStreamWriter(sheet string) (*StreamWriter, error) { + sheetID := f.GetSheetIndex(sheet) + if sheetID == 0 { + return nil, fmt.Errorf("sheet %s is not exist", sheet) + } + rsw := &StreamWriter{ + File: f, + Sheet: sheet, + SheetID: sheetID, + } + rsw.SheetData.WriteString("") + return rsw, nil +} + +// SetRow writes an array to streaming row by given worksheet name, starting +// coordinate and a pointer to array type 'slice'. Note that, cell settings +// with styles are not supported currently and after set rows, you must call the +// 'Flush' method to end the streaming writing process. The following +// shows the supported data types: +// +// int +// string +// +func (sw *StreamWriter) SetRow(axis string, slice interface{}) error { + col, row, err := CellNameToCoordinates(axis) + if err != nil { + return err + } + // Make sure 'slice' is a Ptr to Slice + v := reflect.ValueOf(slice) + if v.Kind() != reflect.Ptr || v.Elem().Kind() != reflect.Slice { + return errors.New("pointer to slice expected") + } + v = v.Elem() + sw.SheetData.WriteString(fmt.Sprintf(``, row)) + for i := 0; i < v.Len(); i++ { + axis, err := CoordinatesToCellName(col+i, row) + if err != nil { + return err + } + switch val := v.Index(i).Interface().(type) { + case int: + sw.SheetData.WriteString(fmt.Sprintf(`%d`, axis, val)) + case string: + sw.SheetData.WriteString(sw.setCellStr(axis, val)) + default: + sw.SheetData.WriteString(sw.setCellStr(axis, fmt.Sprint(val))) + } + } + sw.SheetData.WriteString(``) + // Try to use local storage + chunk := 1 << 24 + if sw.SheetData.Len() >= chunk { + if sw.tmpFile == nil { + err := sw.createTmp() + if err != nil { + // can not use local storage + return nil + } + } + // use local storage + _, err := sw.tmpFile.Write(sw.SheetData.Bytes()) + if err != nil { + return nil + } + sw.SheetData.Reset() + } + return err +} + +// Flush ending the streaming writing process. +func (sw *StreamWriter) Flush() error { + sw.SheetData.WriteString(``) + + ws, err := sw.File.workSheetReader(sw.Sheet) + if err != nil { + return err + } + sheetXML := fmt.Sprintf("xl/worksheets/sheet%d.xml", sw.SheetID) + delete(sw.File.Sheet, sheetXML) + delete(sw.File.checked, sheetXML) + var sheetDataByte []byte + if sw.tmpFile != nil { + // close the local storage file + if err = sw.tmpFile.Close(); err != nil { + return err + } + + file, err := os.Open(sw.tmpFile.Name()) + if err != nil { + return err + } + + sheetDataByte, err = ioutil.ReadAll(file) + if err != nil { + return err + } + + if err := file.Close(); err != nil { + return err + } + + err = os.Remove(sw.tmpFile.Name()) + if err != nil { + return err + } + } + + sheetDataByte = append(sheetDataByte, sw.SheetData.Bytes()...) + replaceMap := map[string][]byte{ + "XMLName": []byte{}, + "SheetData": sheetDataByte, + } + sw.SheetData.Reset() + sw.File.XLSX[fmt.Sprintf("xl/worksheets/sheet%d.xml", sw.SheetID)] = + StreamMarshalSheet(ws, replaceMap) + return err +} + +// createTmp creates a temporary file in the operating system default +// temporary directory. +func (sw *StreamWriter) createTmp() (err error) { + sw.tmpFile, err = ioutil.TempFile(os.TempDir(), "excelize-") + return err +} + +// StreamMarshalSheet provides method to serialization worksheets by field as +// streaming. +func StreamMarshalSheet(ws *xlsxWorksheet, replaceMap map[string][]byte) []byte { + s := reflect.ValueOf(ws).Elem() + typeOfT := s.Type() + var marshalResult []byte + marshalResult = append(marshalResult, []byte(XMLHeader+``)...) + return marshalResult +} + +// setCellStr provides a function to set string type value of a cell as +// streaming. Total number of characters that a cell can contain 32767 +// characters. +func (sw *StreamWriter) setCellStr(axis, value string) string { + if len(value) > 32767 { + value = value[0:32767] + } + // Leading and ending space(s) character detection. + if len(value) > 0 && (value[0] == 32 || value[len(value)-1] == 32) { + return fmt.Sprintf(`%s`, axis, value) + } + return fmt.Sprintf(`%s`, axis, value) +} -- cgit v1.2.1 From a526e90404913f5d649d29a7aeee29f5ac9ff590 Mon Sep 17 00:00:00 2001 From: xuri Date: Mon, 16 Dec 2019 08:32:04 +0800 Subject: Fix #426, handle empty workbook view --- stream.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'stream.go') diff --git a/stream.go b/stream.go index 0d91ddd..5e74e8e 100644 --- a/stream.go +++ b/stream.go @@ -191,13 +191,12 @@ func StreamMarshalSheet(ws *xlsxWorksheet, replaceMap map[string][]byte) []byte var marshalResult []byte marshalResult = append(marshalResult, []byte(XMLHeader+``)...) -- cgit v1.2.1 From 1666d04559d9f5b579ab7c850ccc95863c31bd25 Mon Sep 17 00:00:00 2001 From: xuri Date: Tue, 24 Dec 2019 01:09:28 +0800 Subject: optimization: checking error in unit tests --- stream.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'stream.go') diff --git a/stream.go b/stream.go index 5e74e8e..1b1bbe3 100644 --- a/stream.go +++ b/stream.go @@ -167,7 +167,7 @@ func (sw *StreamWriter) Flush() error { sheetDataByte = append(sheetDataByte, sw.SheetData.Bytes()...) replaceMap := map[string][]byte{ - "XMLName": []byte{}, + "XMLName": {}, "SheetData": sheetDataByte, } sw.SheetData.Reset() -- cgit v1.2.1 From 5c87effc7e6c97fff36a56dea1afac8a2f06fb37 Mon Sep 17 00:00:00 2001 From: Cameron Howey Date: Sat, 28 Dec 2019 20:45:10 -0800 Subject: Stream to Excel table (#530) * Support all datatypes for StreamWriter * Support setting styles with StreamWriter **NOTE:** This is a breaking change. Values are now explicitly passed as a []interface{} for simplicity. We also let styles to be set at the same time. * Create function to write stream into a table * Write rows directly to buffer Avoiding the xml.Encoder makes the streamer faster and use less memory. Using the included benchmark, the results went from: > BenchmarkStreamWriter-4 514 2576155 ns/op 454918 B/op 6592 allocs/op down to: > BenchmarkStreamWriter-4 1614 777480 ns/op 147608 B/op 5570 allocs/op * Use AddTable instead of SetTable This requires reading the cells after they have been written, which requires additional structure for the temp file. As a bonus, we now efficiently allocate only one buffer when reading the file back into memory, using the same approach as ioutil.ReadFile. * Use an exported Cell type to handle inline styles for StreamWriter --- stream.go | 495 ++++++++++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 387 insertions(+), 108 deletions(-) (limited to 'stream.go') diff --git a/stream.go b/stream.go index 1b1bbe3..e981f78 100644 --- a/stream.go +++ b/stream.go @@ -12,20 +12,23 @@ package excelize import ( "bytes" "encoding/xml" - "errors" "fmt" + "io" "io/ioutil" "os" "reflect" + "strconv" + "strings" + "time" ) // StreamWriter defined the type of stream writer. type StreamWriter struct { - tmpFile *os.File - File *File - Sheet string - SheetID int - SheetData bytes.Buffer + File *File + Sheet string + SheetID int + rawData bufferedWriter + tableParts string } // NewStreamWriter return stream writer struct by given worksheet name for @@ -46,7 +49,7 @@ type StreamWriter struct { // row[colID] = rand.Intn(640000) // } // cell, _ := excelize.CoordinatesToCellName(1, rowID) -// if err := streamWriter.SetRow(cell, &row); err != nil { +// if err := streamWriter.SetRow(cell, row, nil); err != nil { // panic(err) // } // } @@ -62,157 +65,433 @@ func (f *File) NewStreamWriter(sheet string) (*StreamWriter, error) { if sheetID == 0 { return nil, fmt.Errorf("sheet %s is not exist", sheet) } - rsw := &StreamWriter{ + sw := &StreamWriter{ File: f, Sheet: sheet, SheetID: sheetID, } - rsw.SheetData.WriteString("") - return rsw, nil + + ws, err := f.workSheetReader(sheet) + if err != nil { + return nil, err + } + sw.rawData.WriteString(XMLHeader + ``) + return sw, nil } -// SetRow writes an array to streaming row by given worksheet name, starting -// coordinate and a pointer to array type 'slice'. Note that, cell settings -// with styles are not supported currently and after set rows, you must call the -// 'Flush' method to end the streaming writing process. The following -// shows the supported data types: +// AddTable creates an Excel table for the StreamWriter using the given +// coordinate area and format set. For example, create a table of A1:D5: // -// int -// string +// err := sw.AddTable("A1", "D5", ``) // -func (sw *StreamWriter) SetRow(axis string, slice interface{}) error { - col, row, err := CellNameToCoordinates(axis) +// Create a table of F2:H6 with format set: +// +// err := sw.AddTable("F2", "H6", `{"table_name":"table","table_style":"TableStyleMedium2","show_first_column":true,"show_last_column":true,"show_row_stripes":false,"show_column_stripes":true}`) +// +// Note that the table must be at least two lines including the header. The +// header cells must contain strings and must be unique. +// +// Currently only one table is allowed for a StreamWriter. AddTable must be +// called after the rows are written but before Flush. +// +// See File.AddTable for details on the table format. +func (sw *StreamWriter) AddTable(hcell, vcell, format string) error { + formatSet, err := parseFormatTableSet(format) if err != nil { return err } - // Make sure 'slice' is a Ptr to Slice - v := reflect.ValueOf(slice) - if v.Kind() != reflect.Ptr || v.Elem().Kind() != reflect.Slice { - return errors.New("pointer to slice expected") + + coordinates, err := areaRangeToCoordinates(hcell, vcell) + if err != nil { + return err } - v = v.Elem() - sw.SheetData.WriteString(fmt.Sprintf(``, row)) - for i := 0; i < v.Len(); i++ { - axis, err := CoordinatesToCellName(col+i, row) + sortCoordinates(coordinates) + + // Correct the minimum number of rows, the table at least two lines. + if coordinates[1] == coordinates[3] { + coordinates[3]++ + } + + // Correct table reference coordinate area, such correct C1:B3 to B1:C3. + ref, err := sw.File.coordinatesToAreaRef(coordinates) + if err != nil { + return err + } + + // create table columns using the first row + tableHeaders, err := sw.getRowValues(coordinates[1], coordinates[0], coordinates[2]) + if err != nil { + return err + } + tableColumn := make([]*xlsxTableColumn, len(tableHeaders)) + for i, name := range tableHeaders { + tableColumn[i] = &xlsxTableColumn{ + ID: i + 1, + Name: name, + } + } + + tableID := sw.File.countTables() + 1 + + name := formatSet.TableName + if name == "" { + name = "Table" + strconv.Itoa(tableID) + } + + table := xlsxTable{ + XMLNS: NameSpaceSpreadSheet, + ID: tableID, + Name: name, + DisplayName: name, + Ref: ref, + AutoFilter: &xlsxAutoFilter{ + Ref: ref, + }, + TableColumns: &xlsxTableColumns{ + Count: len(tableColumn), + TableColumn: tableColumn, + }, + TableStyleInfo: &xlsxTableStyleInfo{ + Name: formatSet.TableStyle, + ShowFirstColumn: formatSet.ShowFirstColumn, + ShowLastColumn: formatSet.ShowLastColumn, + ShowRowStripes: formatSet.ShowRowStripes, + ShowColumnStripes: formatSet.ShowColumnStripes, + }, + } + + sheetRelationshipsTableXML := "../tables/table" + strconv.Itoa(tableID) + ".xml" + tableXML := strings.Replace(sheetRelationshipsTableXML, "..", "xl", -1) + + // Add first table for given sheet. + sheetPath, _ := sw.File.sheetMap[trimSheetName(sw.Sheet)] + sheetRels := "xl/worksheets/_rels/" + strings.TrimPrefix(sheetPath, "xl/worksheets/") + ".rels" + rID := sw.File.addRels(sheetRels, SourceRelationshipTable, sheetRelationshipsTableXML, "") + + sw.tableParts = fmt.Sprintf(``, rID) + + sw.File.addContentTypePart(tableID, "table") + + b, _ := xml.Marshal(table) + sw.File.saveFileList(tableXML, b) + return nil +} + +// Extract values from a row in the StreamWriter. +func (sw *StreamWriter) getRowValues(hrow, hcol, vcol int) (res []string, err error) { + res = make([]string, vcol-hcol+1) + + r, err := sw.rawData.Reader() + if err != nil { + return nil, err + } + + dec := xml.NewDecoder(r) + for { + token, err := dec.Token() + if err == io.EOF { + return res, nil + } if err != nil { - return err + return nil, err } - switch val := v.Index(i).Interface().(type) { - case int: - sw.SheetData.WriteString(fmt.Sprintf(`%d`, axis, val)) - case string: - sw.SheetData.WriteString(sw.setCellStr(axis, val)) - default: - sw.SheetData.WriteString(sw.setCellStr(axis, fmt.Sprint(val))) + startElement, ok := getRowElement(token, hrow) + if !ok { + continue } - } - sw.SheetData.WriteString(``) - // Try to use local storage - chunk := 1 << 24 - if sw.SheetData.Len() >= chunk { - if sw.tmpFile == nil { - err := sw.createTmp() + // decode cells + var row xlsxRow + if err := dec.DecodeElement(&row, &startElement); err != nil { + return nil, err + } + for _, c := range row.C { + col, _, err := CellNameToCoordinates(c.R) if err != nil { - // can not use local storage - return nil + return nil, err } + if col < hcol || col > vcol { + continue + } + res[col-hcol] = c.V } - // use local storage - _, err := sw.tmpFile.Write(sw.SheetData.Bytes()) - if err != nil { - return nil + return res, nil + } +} + +// Check if the token is an XLSX row with the matching row number. +func getRowElement(token xml.Token, hrow int) (startElement xml.StartElement, ok bool) { + startElement, ok = token.(xml.StartElement) + if !ok { + return + } + ok = startElement.Name.Local == "row" + if !ok { + return + } + ok = false + for _, attr := range startElement.Attr { + if attr.Name.Local != "r" { + continue + } + row, _ := strconv.Atoi(attr.Value) + if row == hrow { + ok = true + return } - sw.SheetData.Reset() } - return err + return } -// Flush ending the streaming writing process. -func (sw *StreamWriter) Flush() error { - sw.SheetData.WriteString(``) +// Cell can be used directly in StreamWriter.SetRow to specify a style and +// a value. +type Cell struct { + StyleID int + Value interface{} +} - ws, err := sw.File.workSheetReader(sw.Sheet) +// SetRow writes an array to stream rows by giving a worksheet name, starting +// coordinate and a pointer to an array of values. Note that you must call the +// 'Flush' method to end the streaming writing process. +// +// As a special case, if Cell is used as a value, then the Cell.StyleID will be +// applied to that cell. +func (sw *StreamWriter) SetRow(axis string, values []interface{}) error { + col, row, err := CellNameToCoordinates(axis) if err != nil { return err } - sheetXML := fmt.Sprintf("xl/worksheets/sheet%d.xml", sw.SheetID) - delete(sw.File.Sheet, sheetXML) - delete(sw.File.checked, sheetXML) - var sheetDataByte []byte - if sw.tmpFile != nil { - // close the local storage file - if err = sw.tmpFile.Close(); err != nil { - return err - } - file, err := os.Open(sw.tmpFile.Name()) + fmt.Fprintf(&sw.rawData, ``, row) + for i, val := range values { + axis, err := CoordinatesToCellName(col+i, row) if err != nil { return err } - - sheetDataByte, err = ioutil.ReadAll(file) - if err != nil { - return err + c := xlsxC{R: axis} + if v, ok := val.(Cell); ok { + c.S = v.StyleID + val = v.Value + } else if v, ok := val.(*Cell); ok && v != nil { + c.S = v.StyleID + val = v.Value } - - if err := file.Close(); err != nil { - return err + switch val := val.(type) { + case int: + c.T, c.V = setCellInt(val) + case int8: + c.T, c.V = setCellInt(int(val)) + case int16: + c.T, c.V = setCellInt(int(val)) + case int32: + c.T, c.V = setCellInt(int(val)) + case int64: + c.T, c.V = setCellInt(int(val)) + case uint: + c.T, c.V = setCellInt(int(val)) + case uint8: + c.T, c.V = setCellInt(int(val)) + case uint16: + c.T, c.V = setCellInt(int(val)) + case uint32: + c.T, c.V = setCellInt(int(val)) + case uint64: + c.T, c.V = setCellInt(int(val)) + case float32: + c.T, c.V = setCellFloat(float64(val), -1, 32) + case float64: + c.T, c.V = setCellFloat(val, -1, 64) + case string: + c.T, c.V, c.XMLSpace = setCellStr(val) + case []byte: + c.T, c.V, c.XMLSpace = setCellStr(string(val)) + case time.Duration: + c.T, c.V = setCellDuration(val) + case time.Time: + c.T, c.V, _, err = setCellTime(val) + case bool: + c.T, c.V = setCellBool(val) + case nil: + c.T, c.V, c.XMLSpace = setCellStr("") + default: + c.T, c.V, c.XMLSpace = setCellStr(fmt.Sprint(val)) } - - err = os.Remove(sw.tmpFile.Name()) if err != nil { return err } + writeCell(&sw.rawData, c) } + sw.rawData.WriteString(``) + return sw.rawData.Sync() +} - sheetDataByte = append(sheetDataByte, sw.SheetData.Bytes()...) - replaceMap := map[string][]byte{ - "XMLName": {}, - "SheetData": sheetDataByte, +func writeCell(buf *bufferedWriter, c xlsxC) { + buf.WriteString(``) + if c.V != "" { + buf.WriteString(``) + xml.EscapeText(buf, []byte(c.V)) + buf.WriteString(``) + } + buf.WriteString(``) } -// createTmp creates a temporary file in the operating system default -// temporary directory. -func (sw *StreamWriter) createTmp() (err error) { - sw.tmpFile, err = ioutil.TempFile(os.TempDir(), "excelize-") - return err +// Flush ending the streaming writing process. +func (sw *StreamWriter) Flush() error { + sw.rawData.WriteString(``) + sw.rawData.WriteString(sw.tableParts) + sw.rawData.WriteString(``) + if err := sw.rawData.Flush(); err != nil { + return err + } + + sheetXML := fmt.Sprintf("xl/worksheets/sheet%d.xml", sw.SheetID) + delete(sw.File.Sheet, sheetXML) + delete(sw.File.checked, sheetXML) + + defer sw.rawData.Close() + b, err := sw.rawData.Bytes() + if err != nil { + return err + } + sw.File.XLSX[sheetXML] = b + return nil } -// StreamMarshalSheet provides method to serialization worksheets by field as -// streaming. -func StreamMarshalSheet(ws *xlsxWorksheet, replaceMap map[string][]byte) []byte { +// bulkAppendOtherFields bulk-appends fields in a worksheet, skipping the +// specified field names. +func bulkAppendOtherFields(w io.Writer, ws *xlsxWorksheet, skip ...string) { + skipMap := make(map[string]struct{}) + for _, name := range skip { + skipMap[name] = struct{}{} + } + s := reflect.ValueOf(ws).Elem() typeOfT := s.Type() - var marshalResult []byte - marshalResult = append(marshalResult, []byte(XMLHeader+``)...) - return marshalResult } -// setCellStr provides a function to set string type value of a cell as -// streaming. Total number of characters that a cell can contain 32767 -// characters. -func (sw *StreamWriter) setCellStr(axis, value string) string { - if len(value) > 32767 { - value = value[0:32767] +// bufferedWriter uses a temp file to store an extended buffer. Writes are +// always made to an in-memory buffer, which will always succeed. The buffer +// is written to the temp file with Sync, which may return an error. Therefore, +// Sync should be periodically called and the error checked. +type bufferedWriter struct { + tmp *os.File + buf bytes.Buffer +} + +// Write to the in-memory buffer. The err is always nil. +func (bw *bufferedWriter) Write(p []byte) (n int, err error) { + return bw.buf.Write(p) +} + +// WriteString wites to the in-memory buffer. The err is always nil. +func (bw *bufferedWriter) WriteString(p string) (n int, err error) { + return bw.buf.WriteString(p) +} + +// Reader provides read-access to the underlying buffer/file. +func (bw *bufferedWriter) Reader() (io.Reader, error) { + if bw.tmp == nil { + return bytes.NewReader(bw.buf.Bytes()), nil + } + if err := bw.Flush(); err != nil { + return nil, err + } + fi, err := bw.tmp.Stat() + if err != nil { + return nil, err } - // Leading and ending space(s) character detection. - if len(value) > 0 && (value[0] == 32 || value[len(value)-1] == 32) { - return fmt.Sprintf(`%s`, axis, value) + // os.File.ReadAt does not affect the cursor position and is safe to use here + return io.NewSectionReader(bw.tmp, 0, fi.Size()), nil +} + +// Bytes returns the entire content of the bufferedWriter. If a temp file is +// used, Bytes will efficiently allocate a buffer to prevent re-allocations. +func (bw *bufferedWriter) Bytes() ([]byte, error) { + if bw.tmp == nil { + return bw.buf.Bytes(), nil + } + + if err := bw.Flush(); err != nil { + return nil, err + } + + var buf bytes.Buffer + if fi, err := bw.tmp.Stat(); err == nil { + if size := fi.Size() + bytes.MinRead; size > bytes.MinRead { + if int64(int(size)) == size { + buf.Grow(int(size)) + } else { + return nil, bytes.ErrTooLarge + } + } + } + + if _, err := bw.tmp.Seek(0, 0); err != nil { + return nil, err + } + + _, err := buf.ReadFrom(bw.tmp) + return buf.Bytes(), err +} + +// Sync will write the in-memory buffer to a temp file, if the in-memory buffer +// has grown large enough. Any error will be returned. +func (bw *bufferedWriter) Sync() (err error) { + // Try to use local storage + const chunk = 1 << 24 + if bw.buf.Len() < chunk { + return nil + } + if bw.tmp == nil { + bw.tmp, err = ioutil.TempFile(os.TempDir(), "excelize-") + if err != nil { + // can not use local storage + return nil + } + } + return bw.Flush() +} + +// Flush the entire in-memory buffer to the temp file, if a temp file is being +// used. +func (bw *bufferedWriter) Flush() error { + if bw.tmp == nil { + return nil + } + _, err := bw.buf.WriteTo(bw.tmp) + if err != nil { + return err + } + bw.buf.Reset() + return nil +} + +// Close the underlying temp file and reset the in-memory buffer. +func (bw *bufferedWriter) Close() error { + bw.buf.Reset() + if bw.tmp == nil { + return nil } - return fmt.Sprintf(`%s`, axis, value) + defer os.Remove(bw.tmp.Name()) + return bw.tmp.Close() } -- cgit v1.2.1 From 09485b3f9f0aefc58d51462aed65c2416205c591 Mon Sep 17 00:00:00 2001 From: xuri Date: Sun, 29 Dec 2019 16:02:31 +0800 Subject: Improve code coverage unit tests --- stream.go | 127 +++++++++++++++++++++++++++++++++++++------------------------- 1 file changed, 75 insertions(+), 52 deletions(-) (limited to 'stream.go') diff --git a/stream.go b/stream.go index e981f78..9facf31 100644 --- a/stream.go +++ b/stream.go @@ -1,4 +1,4 @@ -// Copyright 2016 - 2019 The excelize Authors. All rights reserved. Use of +// Copyright 2016 - 2020 The excelize Authors. All rights reserved. Use of // this source code is governed by a BSD-style license that can be found in // the LICENSE file. // @@ -36,20 +36,27 @@ type StreamWriter struct { // rows, you must call the 'Flush' method to end the streaming writing // process and ensure that the order of line numbers is ascending. For // example, set data for worksheet of size 102400 rows x 50 columns with -// numbers: +// numbers and style: // // file := excelize.NewFile() // streamWriter, err := file.NewStreamWriter("Sheet1") // if err != nil { // panic(err) // } -// for rowID := 1; rowID <= 102400; rowID++ { +// styleID, err := file.NewStyle(`{"font":{"color":"#777777"}}`) +// if err != nil { +// panic(err) +// } +// if err := streamWriter.SetRow("A1", []interface{}{excelize.Cell{StyleID: styleID, Value: "Data"}}); err != nil { +// panic(err) +// } +// for rowID := 2; rowID <= 102400; rowID++ { // row := make([]interface{}, 50) // for colID := 0; colID < 50; colID++ { // row[colID] = rand.Intn(640000) // } // cell, _ := excelize.CoordinatesToCellName(1, rowID) -// if err := streamWriter.SetRow(cell, row, nil); err != nil { +// if err := streamWriter.SetRow(cell, row); err != nil { // panic(err) // } // } @@ -107,7 +114,7 @@ func (sw *StreamWriter) AddTable(hcell, vcell, format string) error { if err != nil { return err } - sortCoordinates(coordinates) + _ = sortCoordinates(coordinates) // Correct the minimum number of rows, the table at least two lines. if coordinates[1] == coordinates[3] { @@ -188,7 +195,7 @@ func (sw *StreamWriter) getRowValues(hrow, hcol, vcol int) (res []string, err er return nil, err } - dec := xml.NewDecoder(r) + dec := sw.File.xmlNewDecoder(r) for { token, err := dec.Token() if err == io.EOF { @@ -248,7 +255,7 @@ func getRowElement(token xml.Token, hrow int) (startElement xml.StartElement, ok // a value. type Cell struct { StyleID int - Value interface{} + Value interface{} } // SetRow writes an array to stream rows by giving a worksheet name, starting @@ -277,47 +284,8 @@ func (sw *StreamWriter) SetRow(axis string, values []interface{}) error { c.S = v.StyleID val = v.Value } - switch val := val.(type) { - case int: - c.T, c.V = setCellInt(val) - case int8: - c.T, c.V = setCellInt(int(val)) - case int16: - c.T, c.V = setCellInt(int(val)) - case int32: - c.T, c.V = setCellInt(int(val)) - case int64: - c.T, c.V = setCellInt(int(val)) - case uint: - c.T, c.V = setCellInt(int(val)) - case uint8: - c.T, c.V = setCellInt(int(val)) - case uint16: - c.T, c.V = setCellInt(int(val)) - case uint32: - c.T, c.V = setCellInt(int(val)) - case uint64: - c.T, c.V = setCellInt(int(val)) - case float32: - c.T, c.V = setCellFloat(float64(val), -1, 32) - case float64: - c.T, c.V = setCellFloat(val, -1, 64) - case string: - c.T, c.V, c.XMLSpace = setCellStr(val) - case []byte: - c.T, c.V, c.XMLSpace = setCellStr(string(val)) - case time.Duration: - c.T, c.V = setCellDuration(val) - case time.Time: - c.T, c.V, _, err = setCellTime(val) - case bool: - c.T, c.V = setCellBool(val) - case nil: - c.T, c.V, c.XMLSpace = setCellStr("") - default: - c.T, c.V, c.XMLSpace = setCellStr(fmt.Sprint(val)) - } - if err != nil { + if err = setCellValFunc(&c, val); err != nil { + sw.rawData.WriteString(``) return err } writeCell(&sw.rawData, c) @@ -326,6 +294,61 @@ func (sw *StreamWriter) SetRow(axis string, values []interface{}) error { return sw.rawData.Sync() } +// setCellValFunc provides a function to set value of a cell. +func setCellValFunc(c *xlsxC, val interface{}) (err error) { + switch val := val.(type) { + case int, int8, int16, int32, int64, uint, uint8, uint16, uint32, uint64: + err = setCellIntFunc(c, val) + case float32: + c.T, c.V = setCellFloat(float64(val), -1, 32) + case float64: + c.T, c.V = setCellFloat(val, -1, 64) + case string: + c.T, c.V, c.XMLSpace = setCellStr(val) + case []byte: + c.T, c.V, c.XMLSpace = setCellStr(string(val)) + case time.Duration: + c.T, c.V = setCellDuration(val) + case time.Time: + c.T, c.V, _, err = setCellTime(val) + case bool: + c.T, c.V = setCellBool(val) + case nil: + c.T, c.V, c.XMLSpace = setCellStr("") + default: + c.T, c.V, c.XMLSpace = setCellStr(fmt.Sprint(val)) + } + return err +} + +// setCellIntFunc is a wrapper of SetCellInt. +func setCellIntFunc(c *xlsxC, val interface{}) (err error) { + switch val := val.(type) { + case int: + c.T, c.V = setCellInt(val) + case int8: + c.T, c.V = setCellInt(int(val)) + case int16: + c.T, c.V = setCellInt(int(val)) + case int32: + c.T, c.V = setCellInt(int(val)) + case int64: + c.T, c.V = setCellInt(int(val)) + case uint: + c.T, c.V = setCellInt(int(val)) + case uint8: + c.T, c.V = setCellInt(int(val)) + case uint16: + c.T, c.V = setCellInt(int(val)) + case uint32: + c.T, c.V = setCellInt(int(val)) + case uint64: + c.T, c.V = setCellInt(int(val)) + default: + } + return +} + func writeCell(buf *bufferedWriter, c xlsxC) { buf.WriteString(` Date: Tue, 31 Dec 2019 01:01:16 +0800 Subject: Fix #551, handle empty rows in streaming reading --- stream.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'stream.go') diff --git a/stream.go b/stream.go index 9facf31..8398622 100644 --- a/stream.go +++ b/stream.go @@ -41,14 +41,14 @@ type StreamWriter struct { // file := excelize.NewFile() // streamWriter, err := file.NewStreamWriter("Sheet1") // if err != nil { -// panic(err) +// println(err.Error()) // } // styleID, err := file.NewStyle(`{"font":{"color":"#777777"}}`) // if err != nil { -// panic(err) +// println(err.Error()) // } // if err := streamWriter.SetRow("A1", []interface{}{excelize.Cell{StyleID: styleID, Value: "Data"}}); err != nil { -// panic(err) +// println(err.Error()) // } // for rowID := 2; rowID <= 102400; rowID++ { // row := make([]interface{}, 50) @@ -57,14 +57,14 @@ type StreamWriter struct { // } // cell, _ := excelize.CoordinatesToCellName(1, rowID) // if err := streamWriter.SetRow(cell, row); err != nil { -// panic(err) +// println(err.Error()) // } // } // if err := streamWriter.Flush(); err != nil { -// panic(err) +// println(err.Error()) // } // if err := file.SaveAs("Book1.xlsx"); err != nil { -// panic(err) +// println(err.Error()) // } // func (f *File) NewStreamWriter(sheet string) (*StreamWriter, error) { -- cgit v1.2.1 From 023dba726510a4a7a97838ac9a8f4292a90aa227 Mon Sep 17 00:00:00 2001 From: xuri Date: Thu, 13 Feb 2020 00:00:42 +0800 Subject: Fix #576, serialize by fields order on stream flush --- stream.go | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) (limited to 'stream.go') diff --git a/stream.go b/stream.go index 8398622..c854d8b 100644 --- a/stream.go +++ b/stream.go @@ -27,6 +27,7 @@ type StreamWriter struct { File *File Sheet string SheetID int + worksheet *xlsxWorksheet rawData bufferedWriter tableParts string } @@ -77,15 +78,15 @@ func (f *File) NewStreamWriter(sheet string) (*StreamWriter, error) { Sheet: sheet, SheetID: sheetID, } - - ws, err := f.workSheetReader(sheet) + var err error + sw.worksheet, err = f.workSheetReader(sheet) if err != nil { return nil, err } sw.rawData.WriteString(XMLHeader + ``) - return sw, nil + return sw, err } // AddTable creates an Excel table for the StreamWriter using the given @@ -373,7 +374,9 @@ func writeCell(buf *bufferedWriter, c xlsxC) { // Flush ending the streaming writing process. func (sw *StreamWriter) Flush() error { sw.rawData.WriteString(``) + bulkAppendFields(&sw.rawData, sw.worksheet, 7, 37) sw.rawData.WriteString(sw.tableParts) + bulkAppendFields(&sw.rawData, sw.worksheet, 39, 39) sw.rawData.WriteString(``) if err := sw.rawData.Flush(); err != nil { return err @@ -392,23 +395,15 @@ func (sw *StreamWriter) Flush() error { return nil } -// bulkAppendOtherFields bulk-appends fields in a worksheet, skipping the -// specified field names. -func bulkAppendOtherFields(w io.Writer, ws *xlsxWorksheet, skip ...string) { - skipMap := make(map[string]struct{}) - for _, name := range skip { - skipMap[name] = struct{}{} - } - +// bulkAppendFields bulk-appends fields in a worksheet by specified field +// names order range. +func bulkAppendFields(w io.Writer, ws *xlsxWorksheet, from, to int) { s := reflect.ValueOf(ws).Elem() - typeOfT := s.Type() enc := xml.NewEncoder(w) for i := 0; i < s.NumField(); i++ { - f := s.Field(i) - if _, ok := skipMap[typeOfT.Field(i).Name]; ok { - continue + if from <= i && i <= to { + enc.Encode(s.Field(i).Interface()) } - enc.Encode(f.Interface()) } } -- cgit v1.2.1 From ad883caa0f77dfc016ae99bd5fbb606953eb99a0 Mon Sep 17 00:00:00 2001 From: xuri Date: Wed, 19 Feb 2020 00:08:10 +0800 Subject: Resolve #580, revert commit https://github.com/360EntSecGroup-Skylar/excelize/commit/5ca7231ed408ac264f509ff52b5d28ff4fbda757 --- stream.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'stream.go') diff --git a/stream.go b/stream.go index c854d8b..98cf828 100644 --- a/stream.go +++ b/stream.go @@ -42,14 +42,14 @@ type StreamWriter struct { // file := excelize.NewFile() // streamWriter, err := file.NewStreamWriter("Sheet1") // if err != nil { -// println(err.Error()) +// fmt.Println(err) // } // styleID, err := file.NewStyle(`{"font":{"color":"#777777"}}`) // if err != nil { -// println(err.Error()) +// fmt.Println(err) // } // if err := streamWriter.SetRow("A1", []interface{}{excelize.Cell{StyleID: styleID, Value: "Data"}}); err != nil { -// println(err.Error()) +// fmt.Println(err) // } // for rowID := 2; rowID <= 102400; rowID++ { // row := make([]interface{}, 50) @@ -58,14 +58,14 @@ type StreamWriter struct { // } // cell, _ := excelize.CoordinatesToCellName(1, rowID) // if err := streamWriter.SetRow(cell, row); err != nil { -// println(err.Error()) +// fmt.Println(err) // } // } // if err := streamWriter.Flush(); err != nil { -// println(err.Error()) +// fmt.Println(err) // } // if err := file.SaveAs("Book1.xlsx"); err != nil { -// println(err.Error()) +// fmt.Println(err) // } // func (f *File) NewStreamWriter(sheet string) (*StreamWriter, error) { -- cgit v1.2.1 From 0f2a9053246c3ae45e6c7ba911a1fb135664abdf Mon Sep 17 00:00:00 2001 From: xuri Date: Thu, 2 Apr 2020 00:41:14 +0800 Subject: Performance improvements --- stream.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'stream.go') diff --git a/stream.go b/stream.go index 98cf828..1af0b9f 100644 --- a/stream.go +++ b/stream.go @@ -365,7 +365,7 @@ func writeCell(buf *bufferedWriter, c xlsxC) { buf.WriteString(`>`) if c.V != "" { buf.WriteString(``) - xml.EscapeText(buf, []byte(c.V)) + xml.EscapeText(buf, stringToBytes(c.V)) buf.WriteString(``) } buf.WriteString(``) -- cgit v1.2.1 From 1fe660df648422a53eef0c735657cb2f5237ef7b Mon Sep 17 00:00:00 2001 From: xuri Date: Thu, 23 Apr 2020 02:01:14 +0800 Subject: - Resolve #485 use sheet index instead of ID - added 3 internal function: getSheetID, getActiveSheetID, getSheetNameByID --- stream.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'stream.go') diff --git a/stream.go b/stream.go index 1af0b9f..838751d 100644 --- a/stream.go +++ b/stream.go @@ -69,7 +69,7 @@ type StreamWriter struct { // } // func (f *File) NewStreamWriter(sheet string) (*StreamWriter, error) { - sheetID := f.GetSheetIndex(sheet) + sheetID := f.getSheetID(sheet) if sheetID == 0 { return nil, fmt.Errorf("sheet %s is not exist", sheet) } -- cgit v1.2.1