HttpWebRequest Webソースを取得すると自動的にWebエンコードが認識されます
HttpWebRequestがウェブソースを取得すると自動的にウェブコードを認識し、ページ内のcharsetとhttpヘッダ内の符号化情報を読み取ることでページの符号化を取得し、基本的にウェブコードを正確に取得することができる
static
string
GetEncoding(
string
url)
{
HttpWebRequest request
=
null
;
HttpWebResponse response
=
null
;
StreamReader reader
=
null
;
try
{
request
=
(HttpWebRequest)WebRequest.Create(url);
request.Timeout
=
20000
;
request.AllowAutoRedirect
=
false
;
response
=
(HttpWebResponse)request.GetResponse();
if
(response.StatusCode
==
HttpStatusCode.OK
&&
response.ContentLength
<
1024
*
1024
)
{
if
(response.ContentEncoding
!=
null
&&
response.ContentEncoding.Equals(
"
gzip
"
, StringComparison.InvariantCultureIgnoreCase))
reader
=
new
StreamReader(
new
GZipStream(response.GetResponseStream(), CompressionMode.Decompress));
else
reader
=
new
StreamReader(response.GetResponseStream(), Encoding.ASCII);
string
html
=
reader.ReadToEnd();
Regex reg_charset
=
new
Regex(
@"
charset/b/s*=/s*(?<charset>[^""]*)
"
);
if
(reg_charset.IsMatch(html))
{
return
reg_charset.Match(html).Groups[
"
charset
"
].Value;
}
else
if
(response.CharacterSet
!=
string
.Empty)
{
return
response.CharacterSet;
}
else
return
Encoding.Default.BodyName;
}
}
catch
{
}
finally
{
if
(response
!=
null
)
{
response.Close();
response
=
null
;
}
if
(reader
!=
null
)
reader.Close();
if
(request
!=
null
)
request
=
null
;
}
return
Encoding.Default.BodyName;
}
///
<summary>
///
///
</summary>
///
<param name="url"></param>
///
<returns></returns>
static
string
GetHtml(
string
url, Encoding encoding)
{
HttpWebRequest request
=
null
;
HttpWebResponse response
=
null
;
StreamReader reader
=
null
;
try
{
request
=
(HttpWebRequest)WebRequest.Create(url);
request.Timeout
=
20000
;
request.AllowAutoRedirect
=
false
;
response
=
(HttpWebResponse)request.GetResponse();
if
(response.StatusCode
==
HttpStatusCode.OK
&&
response.ContentLength
<
1024
*
1024
)
{
if
(response.ContentEncoding
!=
null
&&
response.ContentEncoding.Equals(
"
gzip
"
, StringComparison.InvariantCultureIgnoreCase))
reader
=
new
StreamReader(
new
GZipStream(response.GetResponseStream(), CompressionMode.Decompress), encoding);
else
reader
=
new
StreamReader(response.GetResponseStream(), encoding);
string
html
=
reader.ReadToEnd();
return
html;
}
}
catch
{
}
finally
{
if
(response
!=
null
)
{
response.Close();
response
=
null
;
}
if
(reader
!=
null
)
reader.Close();
if
(request
!=
null
)
request
=
null
;
}
return
string
.Empty;
}