asp.Netニュース収集の簡単な例
4880 ワード
ネットで資料を見て、自分で整理してみましたが、使うならニュースアドレス、RSSでこのように使いやすいと思います.
o(∩∩)oハハハ
o(∩∩)oハハハ
private void init2(string url,string begin,string end)
{
HttpWebRequest webRequest = (HttpWebRequest)WebRequest.Create(url);
HttpWebResponse webResponse = (HttpWebResponse)webRequest.GetResponse();
Stream stream = webResponse.GetResponseStream();
System.IO.StreamReader streamReader = new StreamReader(stream, Encoding.GetEncoding("gb2312"));
string content = streamReader.ReadToEnd();
streamReader.Close();
webResponse.Close();
if (content.IndexOf(begin) > 0)
content = content.Substring(content.IndexOf(begin));
if (content.IndexOf(end) > 0)
content = content.Substring(0, content.IndexOf(end) + end.Length);
if (content.IndexOf(begin) < 0 || content.IndexOf(end)<0)
{
Response.Write("<script>alert(' !');</script>");
}else
{
content = DelHTML(content);
txtContent.Text = content;
}
}
public static string DelHTML(string Htmlstring)// HTML
{
#region
//
Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"<script[^>]*?>.*?</script>", "", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
// HTML
Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"<(.[^>]*)>", "", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"([\r
])[\s]+", "", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"-->", "", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"<!--.*", "", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
//Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"<A>.*</A>", "");
//Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"<[a-zA-Z]*=\.[a-zA-Z]*\?[a-zA-Z]+=\d&\w=%[a-zA-Z]*|[A-Z0-9]", "");
//Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"<P>.*</P>", "");
Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, " ", "", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"&(quot|#34);", "\"", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"&(amp|#38);", "&", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"&(lt|#60);", "<", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"&(gt|#62);", ">", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"&(nbsp|#160);", " ", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"&(iexcl|#161);", "\xa1", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"&(cent|#162);", "\xa2", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"&(pound|#163);", "\xa3", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"&(copy|#169);", "\xa9", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"&#(\d+);", "", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
Htmlstring.Replace("<", "");
Htmlstring.Replace(">", "");
Htmlstring.Replace("\r
", "");
//Htmlstring=HttpContext.Current.Server.HtmlEncode(Htmlstring).Trim();
#endregion
return Htmlstring;
}