kuujinbo_dot_info

Parsing Simple HTML using iTextSharp's HTMLWorker

Posted 2010-08-28

Lately have been contributing to the ASP.NET forums. A user was able to figure out how to export a GridView to PDF, but wanted to explicitly set the column widths of the table. So I whipped-up a quick and simple example:

<%@ WebHandler Language="C#" Class="HtmlColumnWidths" %>
using System;
using System.Collections.Generic;
using System.IO;
using System.Web;
using iTextSharp.text;
using iTextSharp.text.html.simpleparser;
using iTextSharp.text.pdf;

public class HtmlColumnWidths : IHttpHandler {
  public void ProcessRequest (HttpContext context) {
    context.Response.ContentType = "application/pdf";
    string html = @"
<table>
<tr><td>r1-c1</td><td>r1-c2</td><td>r1-c3</td></tr>
<tr><td>r2-c1</td><td>r2-c2</td><td>r2-c3</td></tr>
</table>
    ";
    Document document = new Document();
    PdfWriter.GetInstance(document, context.Response.OutputStream);
    document.Open();
/* 
 * we need to iterate over all the elements
 */
    List<IElement> objects = HTMLWorker.ParseToList(
      new StringReader(html), null
    );
/*
 * then get the PdfPTable object(s)
 */
    foreach (IElement element in objects) {
      PdfPTable tbl = element as PdfPTable;
/*
 * so we can set the column widths
 */
      if (tbl != null) {
        tbl.SetWidths(new int[]{4, 2, 1});
      }
      document.Add(element); 
    } 
    document.Close();  
  }
  public bool IsReusable {
    get { return false; }
  }
}