htmlタグを正規フィルタ

2323 ワード

    class Class1

    {

        /// <summary>

        ///  HTML 

        /// </summary>

        /// <param name="strHtml"> HTML  </param>

        /// <returns> </returns>

       public static string StripHTML(string strHtml)

        {

            string[] aryReg ={

         @"<script[^>]*?>.*?</script>",



          @"<(\/\s*)?!?((\w+:)?\w+)(\w+(\s*=?\s*(([""'])(file://[""'tbnr]|[^/7])*?/7|/w+)|.{0})|/s)*?(///s*)?>",

          @"([\r
])[\s]+", @"&(quot|#34);", @"&(amp|#38);", @"&(lt|#60);", @"&(gt|#62);", @"&(nbsp|#160);", @"&(iexcl|#161);", @"&(cent|#162);", @"&(pound|#163);", @"&(copy|#169);", @"&#(\d+);", @"-->", @"<!--.*
" }; string[] aryRep = { "", "", "", "\"", "&", "<", ">", " ", "\xa1",//chr(161), "\xa2",//chr(162), "\xa3",//chr(163), "\xa9",//chr(169), "", "\r
", "" }; string newReg = aryReg[0]; string strOutput = strHtml; for (int i = 0; i < aryReg.Length; i++) { Regex regex = new Regex(aryReg[i], RegexOptions.IgnoreCase); strOutput = regex.Replace(strOutput, aryRep[i]); } strOutput.Replace("<", ""); strOutput.Replace(">", ""); strOutput.Replace("\r
", ""); return strOutput; } }

ソース:http://www.cnblogs.com/mishy/archive/2008/01/03/1024953.html