Monday, June 7, 2010

Create an Excel workbook from a DataSet using Open XML SDK

Here is a simple code snippet for creating Excel workbooks from a DataSet using Open XML SDK. It will create a tab for each DataTable object and apply some basic formatting.

Steps
1.  Open a project in Visual Studio. In my case, I used a Web Application project.
2.  Add a reference to the following assemblies:
     Document.OpenXml (C:\Program Files\Open XML SDK\V2.0\lib)
          (Download here: Open XML SDK 2.0 for Microsoft Office)
     DocumentFormat.OpenXml.Extensions
            
(Download here: Simple OOXML)
     WindowsBase (GAC)
3.  Add a class called ExcelUtility with the following code:
using System.Data;
using System.IO;
using System.Linq;
using DocumentFormat.OpenXml.Extensions;
using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Spreadsheet;

namespace AcmeCompany
{
    public class ExcelUtility
   
{
        public static MemoryStream GetStreamFromDataSet(DataSet ds)
        {
            MemoryStream stream = SpreadsheetReader.Create();
            using (SpreadsheetDocument spreadSheet =
                SpreadsheetDocument.Open(stream, true))
            {
                for (int i = 0; i < ds.Tables.Count; i++)
                {
                    WorksheetPart newWorkSheetPart =
                        spreadSheet.WorkbookPart.AddNewPart<WorksheetPart>();
                    newWorkSheetPart.Worksheet = new Worksheet(new SheetData());
                    newWorkSheetPart.Worksheet.Save();
                    Sheets sheets =
                        spreadSheet.WorkbookPart.Workbook.GetFirstChild<Sheets>();
                    string relationshipId =
                        spreadSheet.WorkbookPart.GetIdOfPart(newWorkSheetPart);
                    uint sheetId = (uint)i;
                    if (sheets.Elements<Sheet>().Count() > 0)
                    {
                        sheetId = sheets.Elements<Sheet>().Select(s =>
                            s.SheetId.Value).Max() + 1;
                    }
                    Sheet sheet = new Sheet()
                        {
                            Id = relationshipId, SheetId = sheetId,
                            Name = ds.Tables[i].TableName
                        };
                    sheets.Append(sheet);
                    WorksheetWriter writer =
                        new WorksheetWriter(spreadSheet, newWorkSheetPart);
                    SpreadsheetStyle style =
                        SpreadsheetReader.GetDefaultStyle(spreadSheet);
                    style.SetBorder("000000", BorderStyleValues.Thin);
                    style.IsBold = true;
                    for (int x = 0; x < ds.Tables[i].Columns.Count; x++)
                    {
                        string columnName = GetExcelColumnValue(x + 1);                      
                        writer.PasteText(columnName + "1",
                            ds.Tables[i].Columns[x].ColumnName, style);
                    }
                    style.IsBold = false;
                    writer.PasteDataTable(ds.Tables[i], "A2", style);
                    spreadSheet.WorkbookPart.Workbook.Save();
                }
                //Remove first 3 default tabs (Sheet1 ~ Sheet3)
               
spreadSheet.WorkbookPart.Workbook.Sheets.FirstChild.Remove();
                spreadSheet.WorkbookPart.Workbook.Sheets.FirstChild.Remove();
                spreadSheet.WorkbookPart.Workbook.Sheets.FirstChild.Remove();
            };
            return stream;
        }

        private static string GetExcelColumnValue(int columnNumber)
        {
            if (columnNumber <= 26)
            {
                return ((char)(columnNumber + 64)).ToString();
            }
            columnNumber--;
            return GetExcelColumnValue(columnNumber / 26) +
                GetExcelColumnValue((columnNumber % 26) + 1);
        }       
    }
}

4.  Create some test code:

using System;
using System.Data;
using System.IO;

namespace AcmeCompany
{
    public partial class _Default : System.Web.UI.Page
   
{
        protected void Page_Load(object sender, EventArgs e)
        {
            DataSet ds = GetSampleData();
            string fileName = "test.xlsx";
            MemoryStream ms = ExcelUtility.GetStreamFromDataSet(ds);
            Response.Clear();
            Response.AddHeader("content-disposition", String.Format("attachment;filename={0}", fileName));
            Response.ContentType = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet";
            ms.WriteTo(Response.OutputStream);
            Response.End();

            //string filePath = @"c:\test.xlsx";
            //FileStream fs = File.OpenWrite(filePath);
            //fs.Write(ms.ToArray(), 0, ms.ToArray().Length);
            //fs.Close();
       
}

        public DataSet GetSampleData()
        {
            DataSet ds = new DataSet();

            DataTable dt1 = new DataTable();
            dt1.TableName = "Tab 1";
            dt1.Columns.Add("Column 1");
            dt1.Columns.Add("Column 2");
            dt1.Columns.Add("Column 3");
            DataRow r1 = dt1.NewRow();
            r1.ItemArray = new object[] { "asdf1", "asdf1", "asdf1" };
            dt1.Rows.Add(r1);
            r1 = dt1.NewRow();
            r1.ItemArray = new object[] { "asdf2", "asdf2", "asdf2" };
            dt1.Rows.Add(r1);
            r1 = dt1.NewRow();
            r1.ItemArray = new object[] { "asdf3", "asdf3", "asdf3" };
            dt1.Rows.Add(r1);
            r1 = dt1.NewRow();
            r1.ItemArray = new object[] { "asdf3", "asdf3", "asdf3" };
            dt1.Rows.Add(r1);

            DataTable dt2 = new DataTable();
            dt2.TableName = "Tab 2";
            dt2.Columns.Add("Column a");
            dt2.Columns.Add("Column b");
            dt2.Columns.Add("Column c");
            DataRow r = dt2.NewRow();
            r.ItemArray = new object[] { "asdf12", "asdf12", "asdf12" };
            dt2.Rows.Add(r);
            r = dt2.NewRow();
            r.ItemArray = new object[] { "asdf22", "asdf22", "asdf22" };
            dt2.Rows.Add(r);
            r = dt2.NewRow();
            r.ItemArray = new object[] { "asdf32", "asdf32", "asdf32" };
            dt2.Rows.Add(r);
            r = dt2.NewRow();
            r.ItemArray = new object[] { "asdf32", "asdf32", "asdf32" };
            dt2.Rows.Add(r);

            ds.Tables.Add(dt1);
            ds.Tables.Add(dt2);

            return ds;
        }
    }
}

Screenshots

image

image

15 comments:

  1. Thanks for the tutorial, but is it me or is it really slow for large tables? For example, a 1-column, 120K-row table takes over an hour to convert on my machine. Is that to be expected? Here's the test code:

    DataTable table = new DataTable();
    table.Columns.Add("Col_ID", typeof(Int32));
    table.Columns.Add("Col_Name", typeof(string));
    for (int x = 0; x <= 120000; x++)
    {
    DataRow dr = table.NewRow();
    dr["Col_ID"] = x;
    dr["Col_Name"] = "Data" + x.ToString();
    table.Rows.Add(dr);
    }

    DataSet dataSet = new DataSet();
    dataSet.Tables.Add(table);
    MemoryStream memoryStream = ExcelUtility.GetStreamFromDataSet(dataSet);

    ReplyDelete
  2. Wow, that is pretty horrendous. I have only used it for smaller datasets and it worked fine. If you are just using a DataTable, you may consider CSV instead. I needed a separate tab created for each DataTable in the DataSet in this case.

    ReplyDelete
  3. Thanx for this awesome snippet! happy coding :)

    ReplyDelete
  4. How can you format the style of the worksheet and freeze the top row?

    ReplyDelete
  5. Not entirely sure...I'd definitely check out the Open XML SDK to see if they have anything.

    ReplyDelete
  6. Thanks for the code John. Have you done the reverse, take a memory stream with an Excel and create a dataset?

    ReplyDelete
  7. No, I have not, but I'd be willing to bet you can use the same SDK.

    ReplyDelete
  8. Thanks a million!

    ReplyDelete
  9. I am using this code now on one of our production intranet sites. and the

    writer.PasteDataTable(ds.Tables[i], "A2", style);

    call is very slow for large data tables > 1000 rows.

    I even tried walking through each row and cell and using PasteText() instead, but thats slow also

    is this slow behavior to be expected?

    ReplyDelete
  10. Thanks John
    Leaving the first purple line?

    ReplyDelete
  11. How to freeze the header row of xls?

    ReplyDelete