【发布时间】:2011-03-28 11:32:59
【问题描述】:
首先修改后的代码抛出javax.swing.text.ChangedCharSetException:
import java.io.*;
import java.net.*;
public class Main
{
public static void main(String[] args) throws IOException, Exception
{
String query = "#pragma";
Socket s = new Socket("google.com",80);
PrintStream p = new PrintStream(s.getOutputStream());
p.print("GET /search?q="+query+" HTTP/1.0\r\n");
p.print("User-Agent: Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)\r\n");
p.print("Connection: close\r\n\r\n");
InputStreamReader in = new InputStreamReader(s.getInputStream());
BufferedReader buffer = new BufferedReader(in);
// String line;
//
// while ((line = buffer.readLine()) != null)
// { System.out.println(line); }
HTMLUtils.ParseLinks (buffer);
in.close();
}
}
import java.io.BufferedReader;
import java.io.IOException;
//import java.io.FileReader;
import java.io.Reader;
import java.util.List;
import java.util.ArrayList;
import javax.swing.text.html.parser.ParserDelegator;
import javax.swing.text.html.HTMLEditorKit.ParserCallback;
import javax.swing.text.html.HTML.Tag;
import javax.swing.text.html.HTML.Attribute;
import javax.swing.text.MutableAttributeSet;
public class HTMLUtils
{
private HTMLUtils() {}
public static List<String> extractLinks(Reader reader) throws IOException
{
final ArrayList<String> list = new ArrayList<String>();
ParserDelegator parserDelegator = new ParserDelegator();
ParserCallback parserCallback = new ParserCallback()
{
public void handleText(final char[] data, final int pos) { }
public void handleStartTag(Tag tag, MutableAttributeSet attribute, int pos)
{
if (tag == Tag.A) {
String address = (String) attribute.getAttribute(Attribute.HREF);
list.add(address);
}
}
public void handleEndTag(Tag t, final int pos) { }
public void handleSimpleTag(Tag t, MutableAttributeSet a, final int pos) { }
public void handleComment(final char[] data, final int pos) { }
public void handleError(final java.lang.String errMsg, final int pos) { }
};
parserDelegator.parse(reader, parserCallback, false);
return list;
}
public static void ParseLinks(BufferedReader buffer) throws Exception{
//FileReader reader = new FileReader("buffer");
List<String> links = HTMLUtils.extractLinks(buffer);
for (String link : links) {
System.out.println(link);
}
}
}
请注意,此示例中的用户代理适用于 IE。
现在我有 3 个问题:
- 我如何/可以将 HTMLUtils.ParseLinks 方法传递给“原始缓冲区”而不是她期望的 HTML 文件(我可以将缓冲区写入文件,但我想这是不必要的)
- 我不知道如何在查询语句中输入引号 (" ") 以获取整个字符串,即:query=" "New York Yankees" "
- 从主机获取User-Agent字符串有这么复杂吗??? link text
我不得不说它是我使用的导入类,我真的不明白那里发生了什么。我会尝试了解它何时会起作用 [-8
THNX
【问题讨论】: