kuujinbo_dot_info

Posted 2007-03

In Database Updates with SqlBulkCopy the example code showed how to extract data from an Excel file and dump it into a SQL Server database table using the SqlBulkCopy class. It was assumed that you knew the worksheet name(s) required for the OdbcCommand object used to pull the data from Excel. But what if you don't know the worksheet names? It would also be nice if we could get the schema information from Excel so that we could map the .NET System.Type to it's corresponding ADO.NET data provider Type in the database table(s).

using System;
using System.Data;
using System.Data.Odbc;
using System.IO;
using System.Text.RegularExpressions;

public class xl_schema {
  public static void Main(string[] args) {
    xl_schema x = new xl_schema(args);
    x.print_schema();
  }

  public xl_schema(string[] args) {
    // !! pass *full* path to excel file !!
    _xl_file = args[0];

    // sanity checks, file: [1] exists, and [2] has expected named extension
    if ( _xl_file == null || !File.Exists(_xl_file) )
      throw new ArgumentException("Path to Excel file required");
    if ( !Regex.IsMatch(_xl_file, @"\.xls$", RegexOptions.IgnoreCase) )
      throw new ArgumentException("Valid Excel file required");
  }

  // http://www.connectionstrings.com/?carrier=excel
  private readonly string _cs = 
  @"Driver={{Microsoft Excel Driver (*.xls)}};DriverId=790;Dbq={0}";
  private readonly string _sql = @"SELECT * FROM [{0}]";
  private string _xl_file;

  public void print_schema() {
    using (OdbcConnection c = new OdbcConnection(String.Format(_cs, _xl_file)) ) {
      c.Open();

      // get *ALL* sheet names
      DataTable sheets = c.GetSchema(OdbcMetaDataCollectionNames.Tables);
      foreach (DataRow dr in sheets.Rows) {
        string sheet_name = "";
        foreach (DataColumn dc in sheets.Columns) {
          sheet_name = dr[dc].ToString();
          // lot of schema info we *don't* need, so filter.
          // dumped all values to see which are needed; couldn't find
          // (.NET SDK) documentation on this topic
          if (
            sheet_name == "" 
            || dc.ColumnName != "TABLE_NAME"
            || sheet_name.EndsWith("Print_Area")
          ) continue;
          Console.WriteLine("Sheet Name: {0}", sheet_name);
          Console.WriteLine("========================================");

          // from sheet name dump schema/data types:
          // [1] column name, [2] .NET type
          using (OdbcCommand cmd = new OdbcCommand(String.Format(_sql, sheet_name), c) ) {
            OdbcDataAdapter da = new OdbcDataAdapter();
            da.SelectCommand = cmd;
            DataTable schema = da.FillSchema(new DataTable(), SchemaType.Mapped);
            foreach (DataColumn schema_col in schema.Columns) {
              Type t = schema_col.DataType;
              Console.WriteLine("{0} :: {1}", schema_col.ColumnName, t);
            }
            Console.WriteLine("\n");
          }
        }
      }
    }
  }
}

Running it on an example file produces the following results:

Sheet Name: Sheet1$
========================================
Order ID :: System.Double
Product ID :: System.Double
Quantity :: System.Double


Sheet Name: Sheet2$
========================================
Product ID :: System.Double
Product Name :: System.String

Notes: