日期:2014-05-19  浏览次数:21274 次

请教C#如何提取网页正文。急!急!急!
C#如何提取网页正文?例如说某网页的新闻标题和新闻正文,标题我会了,关键是正文呐!!
各位高手来帮帮忙!~

------解决方案--------------------
下载源码:
public static string GetHttpSourceValue(string a_strUrl)
{

string strResult;
HttpWebRequest myReq = (HttpWebRequest)WebRequest.Create(new System.Uri(a_strUrl));
myReq.Method = "GET ";
myReq.Accept = "*/* ";
myReq.UserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322) ";

try
{
HttpWebResponse HttpWResp = (HttpWebResponse)myReq.GetResponse();

string tmp = myReq.Headers.ToString();

Stream myStream = HttpWResp.GetResponseStream();

StreamReader sr = new StreamReader(myStream, Encoding.Default);
StringBuilder strBuilder = new StringBuilder();
while (-1 != sr.Peek())
{
strBuilder.Append(sr.ReadLine() + "\r\n ");
}

strResult = strBuilder.ToString();
//StreamWriter sw = new StreamWriter( "E:\\1.txt ", false, Encoding.Default);
//sw.Write(strResult);


myStream.Close();
sr.Close();
// sw.Close();
}
catch (Exception exp)
{
strResult = "错误: " + exp.Message;
}


return strResult;


//StreamWriter sw = new StreamWriter(SaveFileName(), false, Encoding.Default);
//sw.Write(body);
//sw.Close();
}