[转]C# 替换去除HTML标记方法(正则表达式)

using System.Text.RegularExpressions;     //包含正则表达式

 public static string NoHTML(string Htmlstring) //去除HTML标记

 //删除脚本

          Htmlstring = Regex.Replace(Htmlstring, @"<script[^>]*?>.*?</script>", "", RegexOptions.IgnoreCase);

 //删除HTML

          Htmlstring = Regex.Replace(Htmlstring, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase);

          Htmlstring = Regex.Replace(Htmlstring, @"([rn])[s]+", "", RegexOptions.IgnoreCase);

          Htmlstring = Regex.Replace(Htmlstring, @"-->", "", RegexOptions.IgnoreCase);

          Htmlstring = Regex.Replace(Htmlstring, @"<!--.*", "", RegexOptions.IgnoreCase);

          Htmlstring = Regex.Replace(Htmlstring, @"&(quot|#34);", """, RegexOptions.IgnoreCase);

          Htmlstring = Regex.Replace(Htmlstring, @"&(amp|#38);", "&", RegexOptions.IgnoreCase);

          Htmlstring = Regex.Replace(Htmlstring, @"&(lt|#60);", "<", RegexOptions.IgnoreCase);

          Htmlstring = Regex.Replace(Htmlstring, @"&(gt|#62);", ">", RegexOptions.IgnoreCase);

          Htmlstring = Regex.Replace(Htmlstring, @"&(nbsp|#160);", " ", RegexOptions.IgnoreCase);

          Htmlstring = Regex.Replace(Htmlstring, @"&(iexcl|#161);", "xa1", RegexOptions.IgnoreCase);

          Htmlstring = Regex.Replace(Htmlstring, @"&(cent|#162);", "xa2", RegexOptions.IgnoreCase);

          Htmlstring = Regex.Replace(Htmlstring, @"&(pound|#163);", "xa3", RegexOptions.IgnoreCase);

          Htmlstring = Regex.Replace(Htmlstring, @"&(copy|#169);", "xa9", RegexOptions.IgnoreCase);

          Htmlstring = Regex.Replace(Htmlstring, @"&#(d+);", "", RegexOptions.IgnoreCase);

          Htmlstring.Replace("<", "");

          Htmlstring.Replace(">", "");

          Htmlstring.Replace("rn", "");

          Htmlstring = HttpContext.Current.Server.HtmlEncode(Htmlstring).Trim();

 return Htmlstring;

using System.Text.RegularExpressions;     //包含正则表达式

 public static string NoHTML(string Htmlstring) //去除HTML标记

 //删除脚本

          Htmlstring = Regex.Replace(Htmlstring, @"<script[^>]*?>.*?</script>", "", RegexOptions.IgnoreCase);

 //删除HTML

          Htmlstring = Regex.Replace(Htmlstring, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase);

          Htmlstring = Regex.Replace(Htmlstring, @"([rn])[s]+", "", RegexOptions.IgnoreCase);

          Htmlstring = Regex.Replace(Htmlstring, @"-->", "", RegexOptions.IgnoreCase);

          Htmlstring = Regex.Replace(Htmlstring, @"<!--.*", "", RegexOptions.IgnoreCase);

          Htmlstring = Regex.Replace(Htmlstring, @"&(quot|#34);", """, RegexOptions.IgnoreCase);

          Htmlstring = Regex.Replace(Htmlstring, @"&(amp|#38);", "&", RegexOptions.IgnoreCase);

          Htmlstring = Regex.Replace(Htmlstring, @"&(lt|#60);", "<", RegexOptions.IgnoreCase);

          Htmlstring = Regex.Replace(Htmlstring, @"&(gt|#62);", ">", RegexOptions.IgnoreCase);

          Htmlstring = Regex.Replace(Htmlstring, @"&(nbsp|#160);", " ", RegexOptions.IgnoreCase);

          Htmlstring = Regex.Replace(Htmlstring, @"&(iexcl|#161);", "xa1", RegexOptions.IgnoreCase);

          Htmlstring = Regex.Replace(Htmlstring, @"&(cent|#162);", "xa2", RegexOptions.IgnoreCase);

          Htmlstring = Regex.Replace(Htmlstring, @"&(pound|#163);", "xa3", RegexOptions.IgnoreCase);

          Htmlstring = Regex.Replace(Htmlstring, @"&(copy|#169);", "xa9", RegexOptions.IgnoreCase);

          Htmlstring = Regex.Replace(Htmlstring, @"&#(d+);", "", RegexOptions.IgnoreCase);

          Htmlstring.Replace("<", "");

          Htmlstring.Replace(">", "");

          Htmlstring.Replace("rn", "");

          Htmlstring = HttpContext.Current.Server.HtmlEncode(Htmlstring).Trim();

 return Htmlstring;

 示例:

C#代码

/// <summary>

 /// 将所有HTML标签替换成""

 /// </summary>

 /// <param name="strHtml"></param>

 /// <returns></returns>

 public static string StripHTML(string strHtml)

 string[] aryReg ={

 @"<script[^>]*?>.*?</script>",

 @"<(/s*)?!?((w+:)?w+)(w+(s*=?s*(([""'])(file://[""'tbnr]|[^/7])*?/7|/w+)|.{0})|/s)*?(///s*)?>",

 @"([rn])[s]+",

 @"&(quot|#34);",

 @"&(amp|#38);",

 @"&(lt|#60);",

 @"&(gt|#62);",

 @"&(nbsp|#160);",

 @"&(iexcl|#161);",

 @"&(cent|#162);",

 @"&(pound|#163);",

 @"&(copy|#169);",

 @"&#(d+);",

 @"-->",

 @"<!--.*n"

};

 string[] aryRep = {

"",

"",

"",

 """,

 "&",

 "<",

 ">",

 " ",

 "xa1",//chr(161),

 "xa2",//chr(162),

 "xa3",//chr(163),

 "xa9",//chr(169),

"",

 "rn",

""

};

 string newReg = aryReg[0];

 string strOutput = strHtml;

 for (int i = 0; i < aryReg.Length; i++)

              Regex regex = new Regex(aryReg[i], RegexOptions.IgnoreCase);

              strOutput = regex.Replace(strOutput, aryRep[i]);

          strOutput.Replace("<", "");

          strOutput.Replace(">", "");

          strOutput.Replace("rn", "");

 return strOutput;