博主在使用Java自带的API实现网络爬虫的时候,在请求方式为”POST”下,运行出现:java.net.SocketException: Unexpected end of file from server,如下:
博主认为可能使用了“GET”方式请求后,尝试用“POST”方式去请求网站数据后,网站服务器无法继续响应操作,猜测这可能跟网站的代理有关。
博主爬取的网站为酷狗音乐的官网:https://www.kugou.com/(可能就是该网站的问题)
于是博主换了一个网站后,运行成功!!!
故告诉大家,假如出现相同错误,可以不使用“POST"去请求,直接用"GET"方式,或者换个网站/地址。
最后分享博主的菜鸡爬虫demo:
package test;
import org.junit.Test;
import java.io.*;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.HttpURLConnection;
/**
* @Author 海龟
* @Date 2020/10/3 21:30
* @Desc 演示使用JDK自带的API实现网络爬虫
*/
public class JDKAPITest {
@Test
public void testGet() throws Exception {
//1.确定要访问爬取的URL
URL ur1 = new URL("https://www.kugou.com/?username=xx");
//2.获取链接对象
HttpURLConnection urlConnection = (HttpURLConnection) ur1.openConnection();
//3.设置连接信息:请求方式/请求参数/请求头
urlConnection.setRequestMethod("GET");//请求方式默认GET,大写
urlConnection.setRequestProperty("User-Agent","Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Mobile Safari/537.36 Edg/85.0.564.68");
urlConnection.setConnectTimeout(30000);//设置超时时间,单位毫秒
//4.获取数据
InputStream input = urlConnection.getInputStream();//流操作不方便,故使用BufferedReader包装
BufferedReader reader = new BufferedReader(new InputStreamReader(input)); //一行一行读出来
String line;
String all_line = "";
while((line = reader.readLine()) != null){
all_line += line + "\n";
}
System.out.println(all_line);
//5.关闭连接
input.close();
reader.close();
}
@Test
public void testPost() throws Exception {
//1.确定URL
URL url = new URL("http://www.itcast.cn/");
//2.获取链接对象
HttpURLConnection urlConnection = (HttpURLConnection)url.openConnection();
//3.设置连接信息,请求方式//请求参数//请求头
urlConnection.setDoOutput(true);//允许向url输出内容
urlConnection.setRequestMethod("POST");
urlConnection.setRequestProperty("User-Agent","Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Mobile Safari/537.36 Edg/85.0.564.68");
urlConnection.setConnectTimeout(30000);
OutputStream output = urlConnection.getOutputStream();
output.write("username==xx".getBytes());
//4.获取数据
InputStream input = urlConnection.getInputStream();//流操作不方便,故使用BufferedReader包装
BufferedReader reader = new BufferedReader(new InputStreamReader(input)); //一行一行读出来
String line;
String all_line = "";
while((line = reader.readLine()) != null){
all_line += line + "\n";
}
System.out.println(all_line);
//5.关闭连接
input.close();
reader.close();
}
}
感谢大家!!