zhangchen-sx

# 分析肯德基门店信息

import requests,json
post_url = \'http://www.kfc.com.cn/kfccda/ashx/GetStoreList.ashx?op=cname\'
headers = {\'User-Agent\':\'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.81 Safari/537.36\'}
all_data = []
IDs =[]
cname = input(\'输入城市名: \')
# for page in range(1,10):    
data = {
    "cname": cname,     
    "pid": "",
    "pageIndex": "1",
    "pageSize": "10",
}
json_obj = requests.post(url=post_url,data=data,headers=headers).json()
num = json_obj[\'Table\'][0][\'rowcount\']  # 餐厅总数  int
print("餐厅总数",num)

小贴士: 运行后,输入城市或省份地址,得到肯德基门店数量

 # 小猪短租

from bs4 import BeautifulSoup
import requests
import time
headers = {\'User-Agent\':\'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.81 Safari/537.36\'}
def judgment_sex(class_name):
    if class_name == [\'member_ico1\']:
        return \'\'
    else:
        return \'\'

def get_links(url):
    wb_data = requests.get(url, headers= headers)
    soup = BeautifulSoup( wb_data. text,\'lxml\')
    links = soup. select(\'#page_list > ul > li > a\')
    for link in links:
        href = link. get("href")
        get_info(href)

def get_info(url):
    wb_data = requests.get( url, headers= headers)
    soup = BeautifulSoup( wb_data.text,\'lxml\')
    tittles = soup. select(\'div.pho_info > h4\')
    addresses = soup.select(\'span.pr5\')
    prices = soup.select(\'#pricePart > div.day_l > span\')
    imgs = soup.select(\'#floatRightBox > div.js_box.clearfix > div.member_pic > a > img\')
    names = soup.select(\'#floatRightBox > div.js_box.clearfix > div.w_240 > h6 > a\')
    sexs = soup.select(\'#floatRightBox > div.js_box.clearfix > div.member_pic > div\')
    for tittle, address, price, img, name, sex in zip( tittles, addresses, prices, imgs, names, sexs):
        data = { \'tittle\': tittle.get_text().strip(),
                 \'address\': address.get_text().strip(),
                 \'price\': price.get_text(),
                 \'img\': img.get("src"),
                 \'name\': name.get_text(),
                 \'sex\': judgment_sex(sex.get("class"))}
        print(data)
if __name__ == \'__main__\':
    urls = [\'http://ty.xiaozhu.com/search-duanzufang-p{}-0/\'.format(number) for number in range(1,14)]
    for single_url in urls:
        get_links(single_url)
        time.sleep(0.3)

 # 酷狗500歌曲

from bs4 import BeautifulSoup
import requests
import time
headers = {\'User-Agent\':\'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.81 Safari/537.36\'}
def judgment_sex(class_name):
    if class_name == [\'member_ico1\']:
        return \'\'
    else:
        return \'\'


def get_info(url):
    wb_data = requests.get( url, headers= headers)
    soup = BeautifulSoup( wb_data.text,\'lxml\')
    ranks = soup.select(\'span.pc_temp_num\')
    titles = soup.select(\'div.pc_temp_songlist > ul > li > a\')
    times = soup.select(\' span.pc_temp_tips_r > span\')


    for rank,title,time in zip(ranks,titles,times):
        data = { \'rank\': rank.get_text().strip(),
                 \'singer\': title.get_text().split(\'-\')[0],
                 \'song\': title.get_text().split(\'-\')[1],
                 \'time\': time.get_text().strip()}
        print(data)
if __name__ == \'__main__\':
    urls = [\'http://www.kugou.com/yy/rank/home/{}-8888.html\'.format(number) for number in range(1,24)]
    for single_url in urls:
        get_info(single_url)
        time.sleep(0.3)
酷狗500
import re
a = \'xxIxxjshdxxlovexxsffaxxpythonxx\'
infos = re.findall(\'xx(.*?)xx\',a,re.S)
print(infos)
# [\'I\', \'love\', \'python\']
re 一个
import re
phone = \'\'\'
123.-4,567,1234,jdd,cdc.23,the
wfewf ,fer3t
\'\'\'
new_phone = re.sub("\d+",\' \', phone)
aa = re.sub(\'\W+\',\' \',new_phone)
print(aa)
re匹配数字标点符号

 

分类:

技术点:

相关文章: