27 October 2012

Remove HTML tags from HTML markup string




               
              public static class HTMLTextHelper
              {
   
                public static string RemoveHtmlTags(string html)
                {
                    int pos = 0;
                    StringBuilder builder = new StringBuilder();
                    while (pos < html.Length)
                    {
                        if (html[pos] == '<')
                        {
                            pos = SkipTag(html, pos);
                            builder.Append(' ');
                        }
                        else builder.Append(html[pos++]);
                    }
                    return builder.ToString();
                }

    
                private static int SkipTag(string html, int pos)
                {
                    pos++;
                    while (pos < html.Length)
                    {
                        if (html[pos] == '"' || html[pos] == '\'')
                            pos = SkipString(html, pos);
                        else if (html[pos++] == '>')
                            break;
                    }
                    return pos;
                }


                private static int SkipString(string html, int pos)
                {
                    char quote = html[pos++];
                    while (pos < html.Length)
                    {
                        if (html[pos++] == quote)
                            break;
                    }
                    return pos;
                }
              }