`
isiqi
  • 浏览: 16072614 次
  • 性别: Icon_minigender_1
  • 来自: 济南
社区版块
存档分类
最新评论

利用HttpURLConnection抓取网页取名

阅读更多

闲来无事,利用Java 的HttpURLConnection,使用多线程来抓取网页,计算名字的分数。

仅供娱乐。

程序如下
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import org.apache.log4j.Logger;
public class XingMing {
static final Logger log = Logger.getLogger(XingMing.class);
public static String read(String urlStr) {
try {
URL url = new URL(urlStr);
HttpURLConnection connection = (HttpURLConnection) url
.openConnection();
connection.connect();
InputStream in = connection.getInputStream();
BufferedReader read = new BufferedReader(new InputStreamReader(in));
StringBuffer buf = new StringBuffer();
String line = null;
while ((line = read.readLine()) != null) {
buf.append(line);
}
return buf.toString();
} catch (MalformedURLException e) {
return null;
} catch (IOException e) {
return null;
}
}
public static String find(String str, String beginStr, String endStr) {
final int length = beginStr.length();
int index = str.indexOf(beginStr);
String result = null;
if (index != -1) {
int index2 = str.indexOf(endStr, index + length);
if (index2 != -1) {
result = str.substring(index + beginStr.length(), index2);
}
}
return result;
}
public static String findName(String source, String name) {
// value=我的姓名『XX』的分析:
return find(source, "value=我的姓名『", "』的分析");
}
public static String findScore(String source, String name) {
// <font size=3>姓名评分:</font><font color=0000ff size=5FONT-SIZE: 10pt;">
// BT,楷体">99.5</font>
return find(
source,
"<font size=3>姓名评分:</font><font color=0000ff size=5 BT,楷体\">",
"</font>");
}
public static void main(String[] args) throws IOException {
final char firstChar = '';
final char lastChar = '';
// 最大开启100个线程,可以加快查询速度.
int maxThread = 100;
int step = (lastChar - firstChar) / maxThread;
for (int i = 0; i < maxThread; i++) {
char start = (char) (firstChar + i * step);
char end = (char) (firstChar + i * step + step - 1);
System.out.println("开启" + (i + 1) + "处理:" + start + "-" + end
+ (char) (end + 1));
new CallThread(start, end).start();
}
}
static class CallThread extends Thread {
private charstart;
private charend;
private String info;
CallThread(char start, char end) {
this.start = start;
this.end = end;
this.info = this.start + "-" + this.end;
}
public void run() {
//
final char youname1 = '';
final String url = "http://www.xingming.net/cmjg-mz.asp?sex=&youname1="
+ youname1 + "&youname2=";
String youname2;
String webinfo = null;
for (char i = start; i <= end; i++, webinfo = null) {
// 名字规则自己取吧.
// youname2 = "" + i;
// youname2 = "" + i + i;
youname2 = i + "";
for (int j = 0; j < 5 && webinfo == null; j++) {
webinfo = XingMing.read(url + youname2);
}
if (webinfo == null) {
log.warn("获取名字[" + youname1 + youname2 + "]失败");
continue;
}
String webName = XingMing.findName(webinfo, "[" + youname2
+ "]");
String webScore = XingMing.findScore(webinfo, "[" + youname2
+ "]");
try {
if (Float.parseFloat(webScore) >= 90) {
System.out.println(youname2 + ":" + webName + ":"
+ webScore);
}
} catch (Exception e) {
}
log.info(this.info + ":" + webName + ":" + webScore);
if ((i - start) % 100 == 0) {
System.out.println(this.info + "处理了" + (i - start) + "");
}
}
System.out.println(this.info + "结束了.....");
}
}
}
最新程序:
最终版宝宝取名程序,java版,我宝名字已经确定。

http://blog.csdn.net/z3h/archive/2008/01/16/2047420.aspx

分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics