CハハHTTPページのソースコードを取得する
3661 ワード
using System.Net;
using System.IO;
using System.Text.RegularExpressions;
private string getHtml(string url, string charSet)//url ,charSet , null "",
{
WebClient myWebClient = new WebClient(); // WebClient myWebClient
// :
// , cookie,
// cookie
// webclient.Headers.Add("Cookie", cookie);
// 。
// Internet 。
myWebClient.Credentials = CredentialCache.DefaultCredentials;
// ,
//NetworkCredential mycred = new NetworkCredential(struser, strpassword);
//myWebClient.Credentials = mycred;
// 。( @ "/" )
byte[] myDataBuffer = myWebClient.DownloadData(url);
string strWebData = Encoding.Default.GetString(myDataBuffer);
//
Match charSetMatch = Regex.Match(strWebData, "<meta([^<]*)charset=([^<]*)\"", RegexOptions.IgnoreCase | RegexOptions.Multiline);
string webCharSet = charSetMatch.Groups[2].Value;
if (charSet == null || charSet == "")
charSet = webCharSet;
if (charSet != null && charSet != "" && Encoding.GetEncoding(charSet) != Encoding.Default)
strWebData = Encoding.GetEncoding(charSet).GetString(myDataBuffer);
return strWebData;
}