The new version of Office give us some niceties not afforded to the 2003 version. Some neat niceties (the Excel file being a glorified Zip file, with XML standards), and some practical niceties... in the case of the latter, the elimination of that pesky 65536 row limitation. Alas, with my enterprise still on 2003, it is a limitation we developers need to live with. We have various utilities in .Net that export results out to Excel spreadsheets, because let's face it, as an ad-hoc BI tool, Excel is pretty darn good at analyzing volumes of data.
With that in mind, when you have a requirement for large amounts of data (think in the hundreds of thousands of rows) to be published out to an Excel 2003 Workbook, it poses a little bit of a challenge. Specifically, how do you break up your results into Pages of Worksheets (65536 rows in each page) that the end-users can manipulate to their hearts' contentment? A couple of issues you run into: (a) How to break down your results in chunks of 65536 rows and (b) How to insert them blocks at a time without blowing through the Range.Value2's memory limitation (which, as far as I can tell, is not even published)?
I came up with a routine that facilitates this functionality. Just a fair word of warning, it's a bit of a hack, using standard ADO .Net objects and clever pasting of arrays to the Range.Value2 property of the Excel Interop Model while gratuitously calling the Garbage Collector so we don't blow through our memory allocation (because it uses up a bunch to begin with in persisting the results to memory as an ADO .Net DataTable).
Not my neatest of routines, but it gets the job done. An obvious improvement would be to break down the work in multiple subroutines. Another improvement could be the utilization of memory -- I've only tested this routine for the upper bounds of our data requirements (about 900,000 points of data?), so for larger data requirements, you may need to tune it a bit so it will not blow through the assembly's memory allocation. Of course, if you come up with a clever way to clean the code up a bit, I'd be happy to hear about it!
Again, I reiterate my HACK ALERT statement:
Public Shared Function ExportToExcel(ByVal conn As SqlConnection, ByVal sql As String, _
Optional ByVal path As String = Nothing) As String
Dim excel As Application
Try
Dim destination As System.IO.DirectoryInfo
Dim workbook_full_path As String
Dim dtResult As New System.Data.DataTable
Dim lstSheets As New Generic.List(Of String)
' First grab the result set
Dim cmdResult As New SqlCommand(sql, conn)
Dim rdr As SqlDataReader = cmdResult.ExecuteReader
dtResult.Load(rdr)
' How many sheets will we need?
Dim sheet_count As Integer = SheetCount(dtResult.Rows.Count)
' Generate a new Guid for the Workbook name.
Dim workbook_name As String = System.Guid.NewGuid().ToString()
' First validate the destination
If String.IsNullOrEmpty(path) = False Then
If System.IO.Directory.Exists(path) Then
destination = New System.IO.DirectoryInfo(path)
Else
' Attempt to create it
destination = System.IO.Directory.CreateDirectory(path)
End If
Else
' Just drop it in the executing assembly's root folder if no path is specified
destination = New System.IO.DirectoryInfo(System.Reflection.Assembly.GetExecutingAssembly.Location.Substring(0, System.Reflection.Assembly.GetExecutingAssembly.Location.LastIndexOf("\")))
End If
' Construct the full path
workbook_full_path = destination.FullName & "\" & workbook_name & ".xls"
' Create Excel objects
excel = New Application
excel.DisplayAlerts = False
Dim wb As Workbook = excel.Workbooks.Add()
Dim results_processed As Integer = 0
' Tracking dictionary so we can order the sheets
Dim dicSheets As New Generic.Dictionary(Of Integer, Worksheet)
' Only do the Excel processing if we have results
' Create the right number of sheets
' And drop data into each sheet
If sheet_count > 0 Then
For i As Integer = 1 To sheet_count
' Create a sheet
Dim ws As Worksheet
' Put in order
If i > 1 Then
ws = wb.Worksheets.Add(Type.Missing, dicSheets(i - 1))
Else
ws = wb.Worksheets.Add
End If
dicSheets.Add(i, ws)
Dim results_outstanding As Integer = dtResult.Rows.Count - results_processed
ws.Name = "Page " & i.ToString()
lstSheets.Add(ws.Name)
' Start and end positions of the DataTable. Conditions for if we have
' more than 65536 results, we need to know the breakdown for where we
' should start and end (ordinally) on the table based on what Sheet we're on
Dim dt_start_row As Integer
Dim dt_end_row As Integer
Dim sheet_end_row As Integer
' Data Table Bounds
' Positionally on the DataTable, where are we extracting data?
If i > 1 Then
dt_start_row = (i - 1) * 65536 ' Minus one because the first sheet is rows 1-65536
Else
dt_start_row = 1
End If
If results_outstanding < 65536 Then
dt_end_row = dtResult.Rows.Count
sheet_end_row = results_outstanding + 1 ' We need it to be inclusive
Else
dt_end_row = i * 65535
sheet_end_row = 65535
End If
' Create a two dimensional array
' First dimension = rows
' Second dimension = columns
' Add + 1 to rows because the first row is always the column headers
Dim result(sheet_end_row + 1, dtResult.Columns.Count) As Object
' Publish the row header
For col As Integer = 0 To dtResult.Columns.Count - 1
result(0, col) = dtResult.Columns(col).ColumnName
Next
Dim sheet_row_count As Integer = 1
' Propagate the data down in the array
' subtract one since the DataTable is zero based
For result_data_row As Integer = dt_start_row - 1 To dt_end_row - 1
For result_data_col As Integer = 0 To dtResult.Columns.Count - 1
Dim value As Object = dtResult.Rows(result_data_row)(result_data_col)
' DateTimes come up weird on Excel
If TypeOf (value) Is DateTime Then
result(sheet_row_count, result_data_col) = value.ToString() 'dtResult.Rows(result_data_row)(result_data_col).ToString()
Else
result(sheet_row_count, result_data_col) = value 'dtResult.Rows(result_data_row)(result_data_col)
End If
Next
' Iterate the sheet's row counter (not the data table's row counter)
sheet_row_count += 1
Next
' Arbitrary number of rows to drop at a time so Excel's rng.Value2 doesn't exception out. Needs further refining, obviously
If sheet_row_count >= 65535 AndAlso dtResult.Columns.Count > 30 Then
' Drop in Chunks of 700 rows at a time
' We can change this later as we performance tune
Dim row_chunks As Integer = 700
Dim excel_sheet_max As Integer = 65536
For array_row As Integer = 0 To excel_sheet_max - 1 Step (row_chunks - 1)
Dim number_of_rows As Integer
' If we are at the very end, specify as such
' as it might not be a full 700 rows
If array_row + (row_chunks - 1) > excel_sheet_max Then
number_of_rows = excel_sheet_max - array_row
Else
number_of_rows = row_chunks
End If
Dim result_segment(number_of_rows, dtResult.Columns.Count) As Object
Dim rngBegin As Range = ws.Cells(array_row + 1, 1)
Dim rngEnd As Range = ws.Cells(array_row + number_of_rows, dtResult.Columns.Count)
Dim rngAll As Range = ws.Range(rngBegin.Address & ":" & rngEnd.Address)
' Copy the 700 rows of elements to the segment
Try
Array.Copy(result, array_row * (dtResult.Columns.Count + 1), result_segment, 0, number_of_rows * (dtResult.Columns.Count + 1))
rngAll.Value2 = result_segment
Catch e As Exception
End Try
Array.Clear(result_segment, result_segment.GetLowerBound(0), result_segment.Length)
Array.Clear(result_segment, result_segment.GetLowerBound(1), result_segment.Length)
result_segment = Nothing
Next
Else
Dim rngBegin As Range = ws.Cells(1, 1)
Dim rngEnd As Range = ws.Cells(sheet_end_row + 1, dtResult.Columns.Count)
Dim rngAll As Range = ws.Range(rngBegin.Address & ":" & rngEnd.Address)
rngAll.Value2 = result
End If
' Formatting -- bold and freeze header columns, autofit columns
CType(ws.Cells(1, 1), Range).EntireRow.Font.Bold = True
CType(ws.Cells(2, 1), Range).Select()
excel.ActiveWindow.FreezePanes = True
ws.Columns.AutoFit()
' Blank out the array
Array.Clear(result, result.GetLowerBound(0), result.Length)
Array.Clear(result, result.GetLowerBound(1), result.Length)
result = Nothing
GC.Collect()
' Keep track of what we've processed
results_processed += sheet_end_row
Next
' Cleanse sheets - remove Worksheets that aren't named "Page x"
Dim lstRemoveSheets As New Generic.List(Of String)
For Each objWS As Object In wb.Worksheets
Dim ws As Worksheet = CType(objWS, Worksheet)
If lstSheets.Contains(ws.Name) = False Then
lstRemoveSheets.Add(ws.Name)
End If
Next
For Each sheet_to_delete As String In lstRemoveSheets
CType(wb.Worksheets(sheet_to_delete), Worksheet).Delete()
Next
End If
' Select the first cell of the sheet
Dim wsFirst As Worksheet '= dicSheets(1)
' Indicate we received no results
If sheet_count = 0 Then
wsFirst = wb.Worksheets(1)
CType(wsFirst.Cells(1, 1), Range).Value2 = "No results found."
Else
wsFirst = dicSheets(1)
End If
wsFirst.Select()
CType(wsFirst.Cells(1, 1), Range).Select()
' Finally return the workbook name
wb.SaveAs(workbook_full_path)
wb.Close()
' Dispose the dtResult
dtResult.Dispose()
dtResult = Nothing
Return workbook_name & ".xls" 'workbook_full_path
Catch ex As Exception
Throw New Exception("Error encountered when exporting to Excel: " & ex.Message & " " & ex.StackTrace)
'Return Nothing ' A nothing returned implies unsuccessful creation
Finally
If excel Is Nothing = False Then
excel.Quit()
System.Runtime.InteropServices.Marshal.ReleaseComObject(excel)
excel = Nothing
GC.Collect()
End If
End Try
End Function
No comments:
Post a Comment