<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0"
	xmlns:content="http://purl.org/rss/1.0/modules/content/"
	xmlns:wfw="http://wellformedweb.org/CommentAPI/"
	xmlns:dc="http://purl.org/dc/elements/1.1/"
	xmlns:atom="http://www.w3.org/2005/Atom"
	xmlns:sy="http://purl.org/rss/1.0/modules/syndication/"
	xmlns:slash="http://purl.org/rss/1.0/modules/slash/"
	>

<channel>
	<title>jesal gadhia &#187; html</title>
	<atom:link href="http://jesal.us/tag/html/feed/" rel="self" type="application/rss+xml" />
	<link>http://jesal.us</link>
	<description></description>
	<lastBuildDate>Fri, 19 Mar 2010 05:32:28 +0000</lastBuildDate>
	<generator>http://wordpress.org/?v=2.9.2</generator>
	<language>en</language>
	<sy:updatePeriod>hourly</sy:updatePeriod>
	<sy:updateFrequency>1</sy:updateFrequency>
			<item>
		<title>How to easily parse HTML without RegEx</title>
		<link>http://jesal.us/2008/05/how-to-easily-parse-html-without-regex/</link>
		<comments>http://jesal.us/2008/05/how-to-easily-parse-html-without-regex/#comments</comments>
		<pubDate>Tue, 06 May 2008 16:50:04 +0000</pubDate>
		<dc:creator>J</dc:creator>
				<category><![CDATA[c#]]></category>
		<category><![CDATA[html]]></category>
		<category><![CDATA[regex]]></category>

		<guid isPermaLink="false">http://jesal.us/blog/?p=91</guid>
		<description><![CDATA[I recently discovered an absolutely amazing HTML parsing library for .NET called HtmlAgilityPack. It completely takes away the pain of parsing complicated HTML with regular expressions.
Here&#8217;s a very simple example of what you could do with it &#8211; I&#8217;m just extracting inner HTML from any element inside a HTML file which has a css class [...]]]></description>
			<content:encoded><![CDATA[<p>I recently discovered an absolutely amazing HTML parsing library for .NET called <a href="http://www.codeplex.com/htmlagilitypack" target="_blank">HtmlAgilityPack</a>. It completely takes away the pain of parsing complicated HTML with regular expressions.</p>
<p>Here&#8217;s a very simple example of what you could do with it &#8211; I&#8217;m just extracting inner HTML from any element inside a HTML file which has a css class called &#8220;scrape&#8221; assigned to it:<br />
<br />
<!-- code formatted by http://manoli.net/csharpformat/ --></p>
<pre class="csharpcode">
<span class="kwrd">using</span> HtmlAgilityPack;

<span class="kwrd">public</span> <span class="kwrd">partial</span> <span class="kwrd">class</span> _Default : System.Web.UI.Page
{
    <span class="kwrd">protected</span> <span class="kwrd">void</span> Page_Load(<span class="kwrd">object</span> sender, EventArgs e)
    {
        HtmlDocument doc = <span class="kwrd">new</span> HtmlDocument();
        doc.Load(Server.MapPath(filePath));
        Parse(doc.DocumentNode);
    }
    <span class="kwrd">private</span> <span class="kwrd">void</span> Parse(HtmlNode n)
    {
        <span class="kwrd">foreach</span> (HtmlAttribute atr <span class="kwrd">in</span> n.Attributes)
        {
            <span class="kwrd">if</span> (atr.Name == <span class="str">"class"</span> &amp;&amp; atr.Value == <span class="str">"scrape"</span>)
            {
                Response.Write(n.InnerHtml);
            }
        }

        <span class="kwrd">if</span> (n.HasChildNodes)
        {
            <span class="kwrd">foreach</span> (HtmlNode cn <span class="kwrd">in</span> n.ChildNodes)
            {
                Parse(cn);
            }
        }
    }
}
</pre>
<p>
That&#8217;s just a very small part of what it could do. I&#8217;ll expand upon this and post a few more examples in the future showing some interesting things you could do with this. </p>
]]></content:encoded>
			<wfw:commentRss>http://jesal.us/2008/05/how-to-easily-parse-html-without-regex/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
		</item>
		<item>
		<title>How to extract URLs (href property) from HTML</title>
		<link>http://jesal.us/2008/01/how-to-extract-urls-href-property-from-html/</link>
		<comments>http://jesal.us/2008/01/how-to-extract-urls-href-property-from-html/#comments</comments>
		<pubDate>Sat, 05 Jan 2008 07:13:09 +0000</pubDate>
		<dc:creator>J</dc:creator>
				<category><![CDATA[c#]]></category>
		<category><![CDATA[html]]></category>
		<category><![CDATA[regex]]></category>

		<guid isPermaLink="false">http://jesal.us/blog/2008/01/05/how-to-extract-urls-href-property-from-html/</guid>
		<description><![CDATA[

protected ArrayList getURL(string txtIn)
{
    ArrayList outURL = new ArrayList();
    Regex r = new Regex("href\\s*=\\s*(?:(?:\\\"(?&#60;url&#62;[^\\\"]*)\\\")&#124;(?&#60;url&#62;[^\\s]* ))");
    MatchCollection mc1 = r.Matches(txtIn);

    foreach (Match m1 in mc1)
    {
        foreach (Group g in m1.Groups)
    [...]]]></description>
			<content:encoded><![CDATA[<p><!-- code formatted by http://manoli.net/csharpformat/ --></p>
<pre class="csharpcode" style="overflow-y:hidden">
<span class="kwrd">protected</span> ArrayList getURL(<span class="kwrd">string</span> txtIn)
{
    ArrayList outURL = <span class="kwrd">new</span> ArrayList();
    Regex r = <span class="kwrd">new</span> Regex(<span class="str">"href\\s*=\\s*(?:(?:\\\"(?&lt;url&gt;[^\\\"]*)\\\")|(?&lt;url&gt;[^\\s]* ))"</span>);
    MatchCollection mc1 = r.Matches(txtIn);

    <span class="kwrd">foreach</span> (Match m1 <span class="kwrd">in</span> mc1)
    {
        <span class="kwrd">foreach</span> (Group g <span class="kwrd">in</span> m1.Groups)
        {
            outURL.Add(g.Value);
        }
    }

    <span class="kwrd">return</span> outURL;
}</pre>
]]></content:encoded>
			<wfw:commentRss>http://jesal.us/2008/01/how-to-extract-urls-href-property-from-html/feed/</wfw:commentRss>
		<slash:comments>3</slash:comments>
		</item>
		<item>
		<title>Strip out HTML tags using RegEx</title>
		<link>http://jesal.us/2007/11/strip-out-html-tags-via-regex/</link>
		<comments>http://jesal.us/2007/11/strip-out-html-tags-via-regex/#comments</comments>
		<pubDate>Wed, 14 Nov 2007 04:16:51 +0000</pubDate>
		<dc:creator>J</dc:creator>
				<category><![CDATA[c#]]></category>
		<category><![CDATA[html]]></category>
		<category><![CDATA[regex]]></category>

		<guid isPermaLink="false">http://jesal.us/blog/2007/11/13/strip-out-html-tags-via-regex/</guid>
		<description><![CDATA[This code will strip out all the HTML tags and truncate the text to 4 lines.


public static string TruncateText(string txtIn, int newLength)
{
    string txtOut = txtIn;
    string pattern = @"&#60;(.&#124;\n)*?&#62;";

    //Strip out HTML tags
    if (Regex.IsMatch(txtIn, pattern, RegexOptions.None))
      [...]]]></description>
			<content:encoded><![CDATA[<p>This code will strip out all the HTML tags and truncate the text to 4 lines.</p>
<p><!-- code formatted by http://manoli.net/csharpformat/ --></p>
<pre class="csharpcode">
<span class="kwrd">public</span> <span class="kwrd">static</span> <span class="kwrd">string</span> TruncateText(<span class="kwrd">string</span> txtIn, <span class="kwrd">int</span> newLength)
{
    <span class="kwrd">string</span> txtOut = txtIn;
    <span class="kwrd">string</span> pattern = <span class="str">@"&lt;(.|\n)*?&gt;"</span>;

    <span class="rem">//Strip out HTML tags</span>
    <span class="kwrd">if</span> (Regex.IsMatch(txtIn, pattern, RegexOptions.None))
        txtOut = Regex.Replace(txtIn, pattern, <span class="kwrd">string</span>.Empty, RegexOptions.Multiline).Trim();

    <span class="kwrd">if</span> (txtOut.Length &gt; newLength)
    {
        <span class="kwrd">int</span> endPos = txtOut.LastIndexOf(<span class="str">" "</span>, newLength);
        txtOut = txtOut.Substring(0, endPos) + <span class="str">"..."</span>;
    }

    <span class="kwrd">return</span> txtOut;
}</pre>
]]></content:encoded>
			<wfw:commentRss>http://jesal.us/2007/11/strip-out-html-tags-via-regex/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
		</item>
	</channel>
</rss>
