日期:2014-05-17 浏览次数:20638 次
public static string DownLoadHtml(string url)
{
try
{
CookieContainer c = new CookieContainer();
Uri u = new Uri("http://www.baidu.com");
CookieCollection ccs = new CookieCollection();
ccs.Add(new Cookie("BAIDUID", System.Guid.NewGuid().ToString().ToUpper().Replace("-", "") + ":FG=1"));
c.Add(u, ccs);
HttpWebRequest r = (HttpWebRequest)WebRequest.Create(url);
//r.Headers["Cache-Control"] = "no-cache";
//r.Headers["Pragma"] = "no-cache";
r.UserAgent = @"Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 2.0.50727; .NET4.0C; .NET4.0E)";
r.Accept = @"*/*";
r.Host = "www.baidu.com";
r.Headers["Accept-Encoding"] = "gzip, deflate";
r.Headers["Accept-Language"] = "zh-cn";
r.Method = "get";
//r.Referer = "http://www.baidu.com";
r.CookieContainer = c;
r.AllowAutoRedirect = true;
HttpWebResponse rep = (HttpWebResponse)r.GetResponse();
Stream receiveStream = rep.GetResponseStream();
string data = string.Empty;
string sResponseHeader = rep.Headers["Content-Encoding"];
if (!string.IsNullOrEmpty(sResponseHeader))
{
if (sResponseHeader.ToLower().Contains("gzip"))
{
byte[] b = DecompressGzip(receiveStream);
data = System.Text.Encoding.GetEncoding("gb2312").GetString(b);
}
else if (sResponseHeader.ToLower().Contains("deflate"))
{
byte[] b = DecompressDeflate(receiveStream);
data = System.Text.Encoding.GetEncoding("gb2312").GetString(b);
}
}
//
RegexOptions options = RegexOptions.None | RegexOptions.Singleline;
Regex regex = new Regex("<title>(?<title>.*?)</title>", options);
MatchCollection matches = regex.Matches(data);
if (matches != null)
{
foreach (Match m in matches)
{
if (m.Groups["title"].Value.IndexOf("您的访问出错了") >= 0)
{
data = "err:访问出错,需填写验证码";
break;
}
}
}
return data;
}
catch(Exception er)
{
return "err:"+er.Message;
}
}
private static byte[] DecompressGzip(Stream streamInput)
{
Stream streamOutput = new MemoryStrea