Parsing Simple HTML using iTextSharp's HTMLWorker
Posted 2010-08-28
Lately have been contributing to the ASP.NET forums. A user was able to figure out how to export a GridView to PDF, but wanted to explicitly set the column widths of the table. So I whipped-up a quick and simple example:
<%@ WebHandler Language="C#" Class="HtmlColumnWidths" %>
using System;
using System.Collections.Generic;
using System.IO;
using System.Web;
using iTextSharp.text;
using iTextSharp.text.html.simpleparser;
using iTextSharp.text.pdf;
public class HtmlColumnWidths : IHttpHandler {
public void ProcessRequest (HttpContext context) {
context.Response.ContentType = "application/pdf";
string html = @"
<table>
<tr><td>r1-c1</td><td>r1-c2</td><td>r1-c3</td></tr>
<tr><td>r2-c1</td><td>r2-c2</td><td>r2-c3</td></tr>
</table>
";
Document document = new Document();
PdfWriter.GetInstance(document, context.Response.OutputStream);
document.Open();
/*
* we need to iterate over all the elements
*/
List<IElement> objects = HTMLWorker.ParseToList(
new StringReader(html), null
);
/*
* then get the PdfPTable object(s)
*/
foreach (IElement element in objects) {
PdfPTable tbl = element as PdfPTable;
/*
* so we can set the column widths
*/
if (tbl != null) {
tbl.SetWidths(new int[]{4, 2, 1});
}
document.Add(element);
}
document.Close();
}
public bool IsReusable {
get { return false; }
}
}