Wednesday, July 28, 2010

Removing HTML from strings

Removing HTML from strings

There are three way to do this as follows on performance basis.

using System;
using System.Text.RegularExpressions;

/// <summary>
/// Methods to remove HTML from strings.
/// </summary>
public static class HtmlRemoval
{
    /// <summary>
    /// Remove HTML from string with Regex.
    /// </summary>
    public static string StripTagsRegex(string source)
    {
        return Regex.Replace(source, "<.*?>", string.Empty);
    }

    /// <summary>
    /// Compiled regular expression for performance.
    /// </summary>
    static Regex _htmlRegex = new Regex("<.*?>", RegexOptions.Compiled);

    /// <summary>
    /// Remove HTML from string with compiled Regex.
    /// </summary>
    public static string StripTagsRegexCompiled(string source)
    {
        return _htmlRegex.Replace(source, string.Empty);
    }

    /// <summary>
    /// Remove HTML tags from string using char array.
    /// </summary>
    public static string StripTagsCharArray(string source)
    {
        char[] array = new char[source.Length];
        int arrayIndex = 0;
        bool inside = false;

        for (int i = 0; i < source.Length; i++)
        {
            char let = source[i];
            if (let == '<')
            {
                inside = true;
                continue;
            }
            if (let == '>')
            {
                inside = false;
                continue;
            }
            if (!inside)
            {
                array[arrayIndex] = let;
                arrayIndex++;
            }
        }
        return new string(array, 0, arrayIndex);
    }
}

ASP Net - Stripping html tags from text

 ASP Net - Stripping html tags from text

Remove any tag from a html string.

Extract a specific tag in html using c#

Solution:-

Is pretty simple you just pass the html string and the tag name which you want  to remove or extract in this fucnction ,this will do that for you.

Like if you want to remove all occurence of  a img tag from a html string.please pass your html string and the tag name as img and your result is ready to you.


private string GetImagesInHTMLString(string htmlString,string tag)

    {       

        string pattern = @"<(" + tag + @")\b[^>]*>";



        Regex rgx = new Regex(pattern, RegexOptions.IgnoreCase);

        MatchCollection matches = rgx.Matches(htmlString);



        for (int i = 0, l = matches.Count; i < l; i++)

        {

            htmlString = htmlString.Replace(matches[i].Value, "");         

        }



        return htmlString;

    }

Popular Posts