# 分析肯德基门店信息
import requests,json post_url = \'http://www.kfc.com.cn/kfccda/ashx/GetStoreList.ashx?op=cname\' headers = {\'User-Agent\':\'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.81 Safari/537.36\'} all_data = [] IDs =[] cname = input(\'输入城市名: \') # for page in range(1,10): data = { "cname": cname, "pid": "", "pageIndex": "1", "pageSize": "10", } json_obj = requests.post(url=post_url,data=data,headers=headers).json() num = json_obj[\'Table\'][0][\'rowcount\'] # 餐厅总数 int print("餐厅总数",num)
小贴士: 运行后,输入城市或省份地址,得到肯德基门店数量
# 小猪短租
from bs4 import BeautifulSoup import requests import time headers = {\'User-Agent\':\'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.81 Safari/537.36\'} def judgment_sex(class_name): if class_name == [\'member_ico1\']: return \'女\' else: return \'男\' def get_links(url): wb_data = requests.get(url, headers= headers) soup = BeautifulSoup( wb_data. text,\'lxml\') links = soup. select(\'#page_list > ul > li > a\') for link in links: href = link. get("href") get_info(href) def get_info(url): wb_data = requests.get( url, headers= headers) soup = BeautifulSoup( wb_data.text,\'lxml\') tittles = soup. select(\'div.pho_info > h4\') addresses = soup.select(\'span.pr5\') prices = soup.select(\'#pricePart > div.day_l > span\') imgs = soup.select(\'#floatRightBox > div.js_box.clearfix > div.member_pic > a > img\') names = soup.select(\'#floatRightBox > div.js_box.clearfix > div.w_240 > h6 > a\') sexs = soup.select(\'#floatRightBox > div.js_box.clearfix > div.member_pic > div\') for tittle, address, price, img, name, sex in zip( tittles, addresses, prices, imgs, names, sexs): data = { \'tittle\': tittle.get_text().strip(), \'address\': address.get_text().strip(), \'price\': price.get_text(), \'img\': img.get("src"), \'name\': name.get_text(), \'sex\': judgment_sex(sex.get("class"))} print(data) if __name__ == \'__main__\': urls = [\'http://ty.xiaozhu.com/search-duanzufang-p{}-0/\'.format(number) for number in range(1,14)] for single_url in urls: get_links(single_url) time.sleep(0.3)
# 酷狗500歌曲
from bs4 import BeautifulSoup import requests import time headers = {\'User-Agent\':\'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.81 Safari/537.36\'} def judgment_sex(class_name): if class_name == [\'member_ico1\']: return \'女\' else: return \'男\' def get_info(url): wb_data = requests.get( url, headers= headers) soup = BeautifulSoup( wb_data.text,\'lxml\') ranks = soup.select(\'span.pc_temp_num\') titles = soup.select(\'div.pc_temp_songlist > ul > li > a\') times = soup.select(\' span.pc_temp_tips_r > span\') for rank,title,time in zip(ranks,titles,times): data = { \'rank\': rank.get_text().strip(), \'singer\': title.get_text().split(\'-\')[0], \'song\': title.get_text().split(\'-\')[1], \'time\': time.get_text().strip()} print(data) if __name__ == \'__main__\': urls = [\'http://www.kugou.com/yy/rank/home/{}-8888.html\'.format(number) for number in range(1,24)] for single_url in urls: get_info(single_url) time.sleep(0.3)
import re a = \'xxIxxjshdxxlovexxsffaxxpythonxx\' infos = re.findall(\'xx(.*?)xx\',a,re.S) print(infos) # [\'I\', \'love\', \'python\']
import re phone = \'\'\' 123.-4,567,1234,jdd,cdc.23,the wfewf ,fer3t \'\'\' new_phone = re.sub("\d+",\' \', phone) aa = re.sub(\'\W+\',\' \',new_phone) print(aa)