January 5, 2008 3

How to extract URLs (href property) from HTML

By J in Tags: , ,

protected ArrayList getURL(string txtIn)
{
    ArrayList outURL = new ArrayList();
    Regex r = new Regex("href\\s*=\\s*(?:(?:\\\"(?<url>[^\\\"]*)\\\")|(?<url>[^\\s]* ))");
    MatchCollection mc1 = r.Matches(txtIn);

    foreach (Match m1 in mc1)
    {
        foreach (Group g in m1.Groups)
        {
            outURL.Add(g.Value);
        }
    }

    return outURL;
}

Tags: , ,

3 Responses to “How to extract URLs (href property) from HTML”

  1. koperfild says:

    Good site!
    brainybusiness.info

  2. Edita says:

    Interestingly, even for accountants :) ))))

Leave a Reply