`
isiqi
  • 浏览: 16032622 次
  • 性别: Icon_minigender_1
  • 来自: 济南
社区版块
存档分类
最新评论

利用正则表达式过滤掉HTML字符及自己指定的字符

阅读更多

using System;

using System.Collections.Generic;

using System.Linq;

using System.Text;

using System.Text.RegularExpressions;

using System.Web;

namespace Utility

{

public class StringUtil

{

/// <summary>

/// 过滤掉HTML标签

/// </summary>

/// <param name="Htmlstring"></param>

/// <returns></returns>

public static string NoHTML(string Htmlstring)

{

//正则表达式&.*?; 可以去掉所有的&开头;结尾的

//正则表达式 <.*?> 可以去掉所有 <> </>之类的

//正则表达式 >(.*)</li> 获取<li></li>之间的内容

//正则表达式 <[^>]*> 获取网页文本内容

//删除脚本

Htmlstring = Regex.Replace(Htmlstring, @"<script[^>]*?>.*?</script>", "",RegexOptions.IgnoreCase);

//删除HTML

Htmlstring = Regex.Replace(Htmlstring, @"<(.[^>]*)>", "",RegexOptions.IgnoreCase);

Htmlstring = Regex.Replace(Htmlstring, @"([\r\n])[\s]+", "",RegexOptions.IgnoreCase);

Htmlstring = Regex.Replace(Htmlstring, @"-->", "", RegexOptions.IgnoreCase);

Htmlstring = Regex.Replace(Htmlstring, @"<!--.*", "", RegexOptions.IgnoreCase);

Htmlstring = Regex.Replace(Htmlstring, @"&(quot|#34);", "\"",RegexOptions.IgnoreCase);

Htmlstring = Regex.Replace(Htmlstring, @"&(amp|#38);", "&",RegexOptions.IgnoreCase);

Htmlstring = Regex.Replace(Htmlstring, @"&(lt|#60);", "<",RegexOptions.IgnoreCase);

Htmlstring = Regex.Replace(Htmlstring, @"&(gt|#62);", ">",RegexOptions.IgnoreCase);

Htmlstring = Regex.Replace(Htmlstring, @"&(nbsp|#160);", " ",RegexOptions.IgnoreCase);

Htmlstring = Regex.Replace(Htmlstring, @"&(iexcl|#161);", "\xa1",RegexOptions.IgnoreCase);

Htmlstring = Regex.Replace(Htmlstring, @"&(cent|#162);", "\xa2",RegexOptions.IgnoreCase);

Htmlstring = Regex.Replace(Htmlstring, @"&(pound|#163);", "\xa3",RegexOptions.IgnoreCase);

Htmlstring = Regex.Replace(Htmlstring, @"&(copy|#169);", "\xa9",RegexOptions.IgnoreCase);

Htmlstring = Regex.Replace(Htmlstring, @"&#(\d+);", "",RegexOptions.IgnoreCase);

Htmlstring.Replace("<", "");

Htmlstring.Replace(">", "");

Htmlstring.Replace("\r\n", "");

Htmlstring.Replace("|", "");

Htmlstring = HttpContext.Current.Server.HtmlEncode(Htmlstring).Trim();

return Htmlstring;

}

}

}

分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics