1 import urllib.request
 2 import chardet
 3 
 4 def main():
 5     count = 1
 6     with open('D:\\urls.txt') as f:
 7         while True:    
 8             url = f.readline()
 9             if url == '':
10                 break
11             
12             url_content = urllib.request.urlopen(url).read()
13             
14             #获取网页编码
15             encode = chardet.detect(url_content)['encoding']
16             if encode == 'GB2312':
17                 encode = 'GBK'
18             #解码
19             url_content = url_content.decode(encode)
20 
21             #文件名称
22             file_name = 'D:\\url_%d.txt' % count
23 
24             #写入内容
25             with open(file_name,'a',encoding = encode) as g:                
26                 g.write(url_content)
27                 
28             count += 1
29             
30 if __name__ == '__main__':
31     main()

 

相关文章:

  • 2021-08-31
  • 2022-02-13
  • 2022-12-23
  • 2021-05-26
  • 2021-06-23
  • 2021-08-27
  • 2021-12-25
猜你喜欢
  • 2021-09-09
  • 2021-12-25
  • 2021-09-03
  • 2022-02-14
  • 2021-12-25
  • 2022-12-23
相关资源
相似解决方案