Friday, March 18, 2011

Breaking Down Database Query Results in Chunks of 65536 Rows for Excel 2003 Using Office Interop and VB .Net (Memory Efficient Edition)

So, my prior post had a solution for breaking down database query results from Excel into Worksheets of 65536 rows each. As previously mentioned in the post, it is a memory hog, and probably will blow through your assembly's allocation. I came up with a more memory-friendly solution to this issue. In essence, in this approach, I swap out memory processing for I/O processing.

This solution takes the following approach:
  1. Dump out contents of database query to Excel 2003 XML format using a SqlDataReader, broken by x rows (65536 rows in this case, in the spirit of Excel 2003's limitations)
  2. Open said Excel 2003 XML file using Excel Interop
  3. Do any post-processing formatting and niceties to your spreadsheet
  4. Save the file as a normal Excel Workbook
  5. Delete the source XML file (which does indeed get gigantic)
The result is a process which consumes, at most, 40-50 MB in memory (as opposed to the hundreds of MB in the prior approach) with performance close to the prior approach's in-memory + Excel Interop approach.

Code follows.

Excel XML Header (as referenced in the code below)



<?xml version="1.0"?>
<?mso-application progid="Excel.Sheet"?>
<Workbook xmlns="urn:schemas-microsoft-com:office:spreadsheet" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:x="urn:schemas-microsoft-com:office:excel" xmlns:ss="urn:schemas-microsoft-com:office:spreadsheet" xmlns:html="http://www.w3.org/TR/REC-html40">
<DocumentProperties xmlns="urn:schemas-microsoft-com:office:office">
<Author>enograles</Author>
<LastAuthor>enograles</LastAuthor>
<Created>2011-03-18T16:24:37Z</Created>
<Company></Company>
<Version>12.00</Version>
</DocumentProperties>
<ExcelWorkbook xmlns="urn:schemas-microsoft-com:office:excel">
<WindowHeight>11895</WindowHeight>
<WindowWidth>19020</WindowWidth>
<WindowTopX>120</WindowTopX>
<WindowTopY>105</WindowTopY>
<ProtectStructure>False</ProtectStructure>
<ProtectWindows>False</ProtectWindows>
</ExcelWorkbook>
<Styles>
<Style ss:ID="sDate">
<NumberFormat ss:Format="Short Date"/>
</Style>
</Styles>



Implementation Code



Public Shared Function ExportToExcelXML(ByVal conn As SqlConnection, ByVal sql As String, _
Optional ByVal path As String = Nothing) As String

Dim excel As Application

Try
Dim destination As System.IO.DirectoryInfo
Dim workbookFullPathSource As String
Dim dtResult As New System.Data.DataTable
Dim textWriter As System.IO.TextWriter
Dim pageCount As Integer = 1
Dim rowCount As Integer = 0
Dim worksheetWritten As Boolean

' First grab the result set
Dim cmdResult As New SqlCommand(sql, conn)
Dim rdr As SqlDataReader = cmdResult.ExecuteReader

' Generate a new Guid for the Workbook name.
Dim workbook_name As String = System.Guid.NewGuid().ToString()

' First validate the destination
If String.IsNullOrEmpty(path) = False Then
If System.IO.Directory.Exists(path) Then
destination = New System.IO.DirectoryInfo(path)
Else
' Attempt to create it
destination = System.IO.Directory.CreateDirectory(path)
End If
Else
' Just drop it in the executing assembly's root folder if no path is specified
destination = New System.IO.DirectoryInfo(System.Reflection.Assembly.GetExecutingAssembly.Location.Substring(0, System.Reflection.Assembly.GetExecutingAssembly.Location.LastIndexOf("\")))

End If

' Construct the full path
workbookFullPathSource = destination.FullName & "\" & workbook_name & ".xml"

' Instantiate the writer
textWriter = New System.IO.StreamWriter(workbookFullPathSource, False)

' Write the header
WriteExcelXMLHeader(textWriter)

' Iterate through the results
If rdr.HasRows Then
While rdr.Read

' Iterate the sheet if we've reached the limit
If rowCount = 65536 Then
worksheetWritten = False
pageCount += 1
rowCount = 0 ' Reset the counter to 0
End If

' Define a sheet and write the headers
If worksheetWritten = False Then
If pageCount <> 1 Then
textWriter.WriteLine(" </Table>")
textWriter.WriteLine(" </Worksheet>")
End If

textWriter.WriteLine(" <Worksheet ss:Name=""Page " & pageCount & """>")
textWriter.WriteLine(" <Table ss:ExpandedColumnCount=""" & rdr.FieldCount & """>")
textWriter.WriteLine(" <Row ss:AutoFitHeight=""0"">")

' The headers
For i As Integer = 0 To rdr.FieldCount - 1
textWriter.WriteLine(" <Cell><Data ss:Type=""String"">" & rdr.GetName(i) & "</Data></Cell>")
Next

textWriter.WriteLine(" </Row>")

' Yes, the header counts as a row
worksheetWritten = True
rowCount += 1
End If

' Write the actual data
textWriter.WriteLine(" <Row ss:AutoFitHeight=""0"">")
For i As Integer = 0 To rdr.FieldCount - 1
Dim dataType As String
Dim dataContents As String = rdr.Item(i).ToString()

If TypeOf (rdr.Item(i)) Is String Then
dataType = "String"
ElseIf TypeOf (rdr.Item(i)) Is DateTime Then
dataType = "String"
dataContents = CType(rdr.Item(i), DateTime).ToString("MM/dd/yyyy")
ElseIf IsNumeric(rdr.Item(i)) Then
dataType = "Number"
Else
dataType = "String"
End If

' The data with the proper type
textWriter.WriteLine(" <Cell><Data ss:Type=""" & dataType & """>" & dataContents & "</Data></Cell>")
Next

' Terminate the row
textWriter.WriteLine(" </Row>")

' Iterate row counter
rowCount += 1
End While
Else
textWriter.WriteLine(" <Worksheet ss:Name=""Page " & pageCount & """>")
textWriter.WriteLine(" <Table ss:ExpandedColumnCount=""" & rdr.FieldCount & """>")
textWriter.WriteLine(" <Row ss:AutoFitHeight=""0"">")
textWriter.WriteLine(" <Cell><Data ss:Type=""String"">No Results Found from Query:" & cmdResult.CommandText & "</Data></Cell>")
textWriter.WriteLine(" </Row>")
End If

' Close out the writer
textWriter.WriteLine(" </Table>")
textWriter.WriteLine(" </Worksheet>")
textWriter.WriteLine("</Workbook>")
textWriter.Close()

' Process in Excel Interop for formatting and saving to a proper Excel document
excel = New Application()
excel.DisplayAlerts = False

Dim wbSource As Workbook = excel.Workbooks.Open(workbookFullPathSource)

' For all sheets, autofit columns, bold first rows, freeze panes on all worksheets
For i As Integer = 1 To wbSource.Worksheets.Count
Dim ws As Worksheet = CType(wbSource.Worksheets(i), Worksheet)
ws.Select()
CType(ws.Cells(1, 1), Range).EntireRow.Font.Bold = True
CType(ws.Cells(2, 1), Range).Select()
excel.ActiveWindow.FreezePanes = True
ws.Columns.AutoFit()
Next

' Select the first Worksheet
CType(wbSource.Worksheets(1), Worksheet).Select()

' Save as a workbook, exit out of Excel
wbSource.SaveAs(destination.FullName & "\" & workbook_name & ".xls", FileFormat:=Microsoft.Office.Interop.Excel.XlFileFormat.xlWorkbookNormal)
wbSource.Close()
excel.Quit()

' Delete the XML source
'System.IO.File.Delete(workbookFullPathSource)

Return workbook_name & ".xls" 'workbook_full_path

Catch ex As Exception
Throw ex
Finally
If excel Is Nothing = False Then
excel.Quit()
System.Runtime.InteropServices.Marshal.ReleaseComObject(excel)
excel = Nothing
GC.Collect()
End If
End Try
End Function

''' <summary>
''' This routine will write an Excel XML header before defining worksheets
''' </summary>
''' <param name="textWriter"></param>
''' <remarks></remarks>
Private Shared Sub WriteExcelXMLHeader(ByVal textWriter As System.IO.TextWriter)
textWriter.WriteLine(My.Resources.resMain.ExcelXMLHeader)
End Sub


No comments: