日期:2014-05-16  浏览次数:20309 次

《小程序---利用jsoup解析CSDN博客信息》
package com.fenghuo.html;

import java.io.IOException;

import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

public class AnalyzeHtml {
	/**
	 * Example program to list links from a URL.
	 */

	public static void main(String[] args) throws IOException {
		String csdn = "http://blog.csdn.net";
		String blog = "http://blog.csdn.net/w695050167";
		String url = blog + "?viewmode=list";

		Connection connection = Jsoup.connect(url);
		connection.timeout(500);//设置连接超时时间
		//给服务器发消息头,告诉服务器,俺不是java程序。CSDN不允许java程序访问
		connection.header("User-Agent","Mozilla/4.0 (compatible; MSIE 5.0; Windows XP; DigExt)");
		Document doc = connection.get();//获取返回的html的document对象
		
		//解析document对象
		Elements links = doc.select(".link_title");

		for (Element e : links) {
			if (e.getAllElements().size() == 2) {
				
				Element ae = e.select("a[href]").first();
				String href = ae.attr("href");
				System.out.println(csdn + href);

				String text = e.text();
				System.out.println(text);
			}
		}

	}

}