luolizhi
#!usr/bin/python
# -*- coding:utf-8 -*-
import urllib2

import re

import MySQLdb


class BaiDuNews:

def __init__(self):
self.baseurl = \'http://news.baidu.com/\'

def getPage(self):
request = urllib2.Request(self.baseurl)
response = urllib2.urlopen(request)
# print response.read()
return response.read().decode(\'gbk\')

def getContents(self,page):
pattern = re.compile(\'<li class="hd.*?<a.*?>(.*?)</a>\', re.S)
items = re.findall(pattern, page)
contents = []
for item in items:
print item
contents.append(item.encode(\'utf-8\'))
return contents

def saveDB(self, contents):
db = MySQLdb.connect(host=\'127.0.0.1\',user=\'root\',passwd=\'\',db=\'test\',charset=\'utf8\')
cur = db.cursor()
# sql = \'CREATE TABLE baidunews (`id` INT NOT NULL PRIMARY ,`text` VARCHAR(255))\'
# cur.execute(sql)
sql2 = """INSERT INTO baidunews VALUES (NULL ,"%s")"""
for content in contents:
cur.execute(sql2 % (content))
cur.close()
db.commit()
db.close()


news = BaiDuNews()
news.saveDB(news.getContents(news.getPage()))

分类:

技术点:

相关文章: