CハハHTTPページのソースコードを取得する

3661 ワード

using System.Net; 

using System.IO; 

using System.Text.RegularExpressions; 

private string getHtml(string url, string charSet)//url         ,charSet        ,      null  "",            

{ 

WebClient myWebClient = new WebClient(); //  WebClient  myWebClient 

////          ,         cookie,       

//                   cookie 

// webclient.Headers.Add("Cookie", cookie); 

////          Internet                 。 

myWebClient.Credentials = CredentialCache.DefaultCredentials; 

//           ,   

//NetworkCredential mycred = new NetworkCredential(struser, strpassword); 

//myWebClient.Credentials = mycred; 

//              。( @        "/"  ) 

byte[] myDataBuffer = myWebClient.DownloadData(url); 

string strWebData = Encoding.Default.GetString(myDataBuffer); 



//             

Match charSetMatch = Regex.Match(strWebData, "<meta([^<]*)charset=([^<]*)\"", RegexOptions.IgnoreCase | RegexOptions.Multiline); 

string webCharSet = charSetMatch.Groups[2].Value; 

if (charSet == null || charSet == "") 

charSet = webCharSet; 



if (charSet != null && charSet != "" && Encoding.GetEncoding(charSet) != Encoding.Default) 

strWebData = Encoding.GetEncoding(charSet).GetString(myDataBuffer); 

return strWebData; 

}