Posted 2007-03
In Database Updates with SqlBulkCopy the example code showed how to extract data from an Excel file and dump it into a SQL Server database table using the SqlBulkCopy class. It was assumed that you knew the worksheet name(s) required for the OdbcCommand object used to pull the data from Excel. But what if you don't know the worksheet names? It would also be nice if we could get the schema information from Excel so that we could map the .NET System.Type to it's corresponding ADO.NET data provider Type in the database table(s).
using System;
using System.Data;
using System.Data.Odbc;
using System.IO;
using System.Text.RegularExpressions;
public class xl_schema {
public static void Main(string[] args) {
xl_schema x = new xl_schema(args);
x.print_schema();
}
public xl_schema(string[] args) {
// !! pass *full* path to excel file !!
_xl_file = args[0];
// sanity checks, file: [1] exists, and [2] has expected named extension
if ( _xl_file == null || !File.Exists(_xl_file) )
throw new ArgumentException("Path to Excel file required");
if ( !Regex.IsMatch(_xl_file, @"\.xls$", RegexOptions.IgnoreCase) )
throw new ArgumentException("Valid Excel file required");
}
// http://www.connectionstrings.com/?carrier=excel
private readonly string _cs =
@"Driver={{Microsoft Excel Driver (*.xls)}};DriverId=790;Dbq={0}";
private readonly string _sql = @"SELECT * FROM [{0}]";
private string _xl_file;
public void print_schema() {
using (OdbcConnection c = new OdbcConnection(String.Format(_cs, _xl_file)) ) {
c.Open();
// get *ALL* sheet names
DataTable sheets = c.GetSchema(OdbcMetaDataCollectionNames.Tables);
foreach (DataRow dr in sheets.Rows) {
string sheet_name = "";
foreach (DataColumn dc in sheets.Columns) {
sheet_name = dr[dc].ToString();
// lot of schema info we *don't* need, so filter.
// dumped all values to see which are needed; couldn't find
// (.NET SDK) documentation on this topic
if (
sheet_name == ""
|| dc.ColumnName != "TABLE_NAME"
|| sheet_name.EndsWith("Print_Area")
) continue;
Console.WriteLine("Sheet Name: {0}", sheet_name);
Console.WriteLine("========================================");
// from sheet name dump schema/data types:
// [1] column name, [2] .NET type
using (OdbcCommand cmd = new OdbcCommand(String.Format(_sql, sheet_name), c) ) {
OdbcDataAdapter da = new OdbcDataAdapter();
da.SelectCommand = cmd;
DataTable schema = da.FillSchema(new DataTable(), SchemaType.Mapped);
foreach (DataColumn schema_col in schema.Columns) {
Type t = schema_col.DataType;
Console.WriteLine("{0} :: {1}", schema_col.ColumnName, t);
}
Console.WriteLine("\n");
}
}
}
}
}
}
Running it on an example file produces the following results:
Sheet Name: Sheet1$
========================================
Order ID :: System.Double
Product ID :: System.Double
Quantity :: System.Double
Sheet Name: Sheet2$
========================================
Product ID :: System.Double
Product Name :: System.String
Notes: