使用正则表达式

#判断邮箱格式
import re
mail = \'^(\w)+(\.\w+)*@(\w)+((\.\w{2,3}){1,3})$\'
myMail = \'45612563@qq.com\'
if re.match(mail, myMail):
    print(re.match(mail, myMail).group(0))
else:
    print(\'error\')

#用正则表达式识别出全部电话号码。
str = \'\'\'
版权所有：广州商学院   地址：广州市黄埔区九龙大道206号
学校办公室：020-82876130   招生电话：020-82872773
粤公网安备 44011602000060号    粤ICP备15103669号
\'\'\'
print(re.findall(\'(\d{3,4}-(\d{6,8}))\',str))

#用正则表达式进行英文分词。re.split(\'\',news)
news = \'\'\'Let your friends underrate your advantage,while let your enemies overrate your disadvantage..\'\'\'
print(re.split(\'[\s,.?\-]+\',news))

import requests
from  bs4 import  BeautifulSoup
from datetime import datetime
import re

newsUrl = \'http://news.gzcc.cn/html/xiaoyuanxinwen/\'
res = requests.get(newsUrl)
res.encoding = \'utf-8\'
soup = BeautifulSoup(res.text,\'html.parser\')

#获取点击次数
def getClickTime(newsUrl):
    newsId = re.findall(\'\_(.*).html\', newsUrl)[0].split(\'/\')[1]
    clickUrl = \'http://oa.gzcc.cn/api.php?op=count&id={}&modelid=80\'.format(newsId)
    clickStr = requests.get(clickUrl).text
    count = re.search("hits\'\).html\(\'(.*)\'\);",clickStr).group(1)
    return count


# 获取新闻详情
def getNewDetail(url):
    resd = requests.get(url)
    resd.encoding = \'utf-8\'
    soupd = BeautifulSoup(resd.text, \'html.parser\')
    info = soupd.select(\'.show-info\')[0].text
    time = re.search(\'发布时间:(.*) \xa0\xa0 \xa0\xa0作者：\', info).group(1)
    dtime = datetime.strptime(time, \'%Y-%m-%d %H:%M:%S\')

    print(\'链接：\' + url)
    print(\'标题：\' + title)
    print(\'发布时间：{}\'.format(dtime))
    print(\'作者：\' + re.search(\'作者：(.*)审核：\', info).group(1))
    print(\'审核：\' + re.search(\'审核：(.*)来源：\', info).group(1))
    print(\'来源：\' + re.search(\'来源：(.*)摄影：\', info).group(1))
    print(\'摄影：\' + re.search(\'摄影：(.*)点击\', info).group(1))
    print(\'点击次数：\' + getClickTime(a))


for news in soup.select(\'li\'):
    if len(news.select(\'.news-list-title\'))>0:
        title = news.select(\'.news-list-title\')[0].text
        #获取新闻模块链接
        a = news.a.attrs[\'href\']
        #调用函数获取新闻正文
        getNewDetail(a)
        break

        c = soupd.select(\'#content\')[0].text#正文
        info=soupd.select(\'.show-info\')[0].text
        dt = info.lstrip(\'发布时间：\')[:19]#发布时间
        dati = datetime.strptime(dt,\':%Y-%m-%d %H:%M:%S\')
        sh = info[info.find(\'作者：\'):].split()[0].lstrip(\'作者：\')#作者

        print(dati,t,a,sh,c)
        break