[转]C# 替换去除HTML标记方法(正则表达式)

using System.Text.RegularExpressions;     //包含正则表达式   
 
 public static string NoHTML(string Htmlstring) //去除HTML标记   
      {   
 //删除脚本   
          Htmlstring = Regex.Replace(Htmlstring, @"<script[^>]*?>.*?</script>", "", RegexOptions.IgnoreCase);   
 //删除HTML   
          Htmlstring = Regex.Replace(Htmlstring, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase);   
          Htmlstring = Regex.Replace(Htmlstring, @"([rn])[s]+", "", RegexOptions.IgnoreCase);   
          Htmlstring = Regex.Replace(Htmlstring, @"-->", "", RegexOptions.IgnoreCase);   
          Htmlstring = Regex.Replace(Htmlstring, @"<!--.*", "", RegexOptions.IgnoreCase);   
 
          Htmlstring = Regex.Replace(Htmlstring, @"&(quot|#34);", """, RegexOptions.IgnoreCase);   
          Htmlstring = Regex.Replace(Htmlstring, @"&(amp|#38);", "&", RegexOptions.IgnoreCase);   
          Htmlstring = Regex.Replace(Htmlstring, @"&(lt|#60);", "<", RegexOptions.IgnoreCase);   
          Htmlstring = Regex.Replace(Htmlstring, @"&(gt|#62);", ">", RegexOptions.IgnoreCase);   
          Htmlstring = Regex.Replace(Htmlstring, @"&(nbsp|#160);", " ", RegexOptions.IgnoreCase);   
          Htmlstring = Regex.Replace(Htmlstring, @"&(iexcl|#161);", "xa1", RegexOptions.IgnoreCase);   
          Htmlstring = Regex.Replace(Htmlstring, @"&(cent|#162);", "xa2", RegexOptions.IgnoreCase);   
          Htmlstring = Regex.Replace(Htmlstring, @"&(pound|#163);", "xa3", RegexOptions.IgnoreCase);   
          Htmlstring = Regex.Replace(Htmlstring, @"&(copy|#169);", "xa9", RegexOptions.IgnoreCase);   
          Htmlstring = Regex.Replace(Htmlstring, @"&#(d+);", "", RegexOptions.IgnoreCase);   
 
          Htmlstring.Replace("<", "");   
          Htmlstring.Replace(">", "");   
          Htmlstring.Replace("rn", "");   
          Htmlstring = HttpContext.Current.Server.HtmlEncode(Htmlstring).Trim();   
 
 return Htmlstring;   
      }  
 
using System.Text.RegularExpressions;     //包含正则表达式
 
 public static string NoHTML(string Htmlstring) //去除HTML标记
      {
 //删除脚本
          Htmlstring = Regex.Replace(Htmlstring, @"<script[^>]*?>.*?</script>", "", RegexOptions.IgnoreCase);
 //删除HTML
          Htmlstring = Regex.Replace(Htmlstring, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase);
          Htmlstring = Regex.Replace(Htmlstring, @"([rn])[s]+", "", RegexOptions.IgnoreCase);
          Htmlstring = Regex.Replace(Htmlstring, @"-->", "", RegexOptions.IgnoreCase);
          Htmlstring = Regex.Replace(Htmlstring, @"<!--.*", "", RegexOptions.IgnoreCase);
 
          Htmlstring = Regex.Replace(Htmlstring, @"&(quot|#34);", """, RegexOptions.IgnoreCase);
          Htmlstring = Regex.Replace(Htmlstring, @"&(amp|#38);", "&", RegexOptions.IgnoreCase);
          Htmlstring = Regex.Replace(Htmlstring, @"&(lt|#60);", "<", RegexOptions.IgnoreCase);
          Htmlstring = Regex.Replace(Htmlstring, @"&(gt|#62);", ">", RegexOptions.IgnoreCase);
          Htmlstring = Regex.Replace(Htmlstring, @"&(nbsp|#160);", " ", RegexOptions.IgnoreCase);
          Htmlstring = Regex.Replace(Htmlstring, @"&(iexcl|#161);", "xa1", RegexOptions.IgnoreCase);
          Htmlstring = Regex.Replace(Htmlstring, @"&(cent|#162);", "xa2", RegexOptions.IgnoreCase);
          Htmlstring = Regex.Replace(Htmlstring, @"&(pound|#163);", "xa3", RegexOptions.IgnoreCase);
          Htmlstring = Regex.Replace(Htmlstring, @"&(copy|#169);", "xa9", RegexOptions.IgnoreCase);
          Htmlstring = Regex.Replace(Htmlstring, @"&#(d+);", "", RegexOptions.IgnoreCase);
 
          Htmlstring.Replace("<", "");
          Htmlstring.Replace(">", "");
          Htmlstring.Replace("rn", "");
          Htmlstring = HttpContext.Current.Server.HtmlEncode(Htmlstring).Trim();
 
 return Htmlstring;
      }
 
 示例:
 
C#代码  
/// <summary>   
 /// 将所有HTML标签替换成""   
 /// </summary>   
 /// <param name="strHtml"></param>   
 /// <returns></returns>   
 public static string StripHTML(string strHtml)   
      {   
 string[] aryReg ={   
 @"<script[^>]*?>.*?</script>",   
 @"<(/s*)?!?((w+:)?w+)(w+(s*=?s*(([""'])(file://[""'tbnr]|[^/7])*?/7|/w+)|.{0})|/s)*?(///s*)?>",   
 @"([rn])[s]+",   
 @"&(quot|#34);",   
 @"&(amp|#38);",   
 @"&(lt|#60);",   
 @"&(gt|#62);",    
 @"&(nbsp|#160);",    
 @"&(iexcl|#161);",   
 @"&(cent|#162);",   
 @"&(pound|#163);",   
 @"&(copy|#169);",   
 @"&#(d+);",   
 @"-->",   
 @"<!--.*n" 
 
    };   
 
 string[] aryRep = {   
 "",   
 "",   
 "",   
 """,   
 "&",   
 "<",   
 ">",   
 " ",   
 "xa1",//chr(161),   
 "xa2",//chr(162),   
 "xa3",//chr(163),   
 "xa9",//chr(169),   
 "",   
 "rn",   
 "" 
     };   
 
 string newReg = aryReg[0];   
 string strOutput = strHtml;   
 for (int i = 0; i < aryReg.Length; i++)   
          {   
              Regex regex = new Regex(aryReg[i], RegexOptions.IgnoreCase);   
              strOutput = regex.Replace(strOutput, aryRep[i]);   
          }   
 
          strOutput.Replace("<", "");   
          strOutput.Replace(">", "");   
          strOutput.Replace("rn", "");   
 
 
 return strOutput;   
      }  

 

[出处:http://jhxk.iteye.com/blog/436867]