【发布时间】:2021-08-28 12:49:39
【问题描述】:
我正在尝试抓取 pickels.com.au。
如果链接相同并且价格不同,我正在尝试更新 pickels_dataset.csv 文件,我将删除列表并将新行插入 CSV 文件,但不会删除CSV 文件中的旧条目。
删除和更新 CSV 文件中的行的最佳方法是什么。
下面是我的代码...
import requests
from scrapy.selector import Selector
import csv
import re
from tqdm import tqdm
from time import sleep
with open('pickels_dataset.csv', 'a+', newline='', encoding='utf-8') as auction_csv_file:
auction_csv_writer = csv.writer(auction_csv_file)
live_auctions_api = 'https://www.pickles.com.au/PWR-Web/services/api/sales/future'
api_request = requests.get(url=live_auctions_api)
for auctions in api_request.json():
auction_link = auctions.get('viewSaleListingLink')
if 'cars/item/search/-/listing/listSaleItems/' in auction_link:
auction_request = requests.get(url=auction_link)
response = Selector(text=auction_request.text)
sales_id_re = response.xpath('//script[contains(text(), "Product_Type_Sequence")]/text() | //script[contains(text(), "lot_number_suffix_sequence")]/text()').get()
sales_id = re.findall(r'"Product_Type_Sequence";var n="(.*?)"', sales_id_re) or re.findall(r'"lot_number_suffix_sequence";var n="(.*?)"', sales_id_re)
if sales_id == []:
continue
auction_sale_link = f'https://www.pickles.com.au/v4/caradvert/saleid-{sales_id[0]}-public?count=true&inav=Car%7Cbc%7Cha%7Cu&q=(And.ProductType.Vehicles._.Year.range(2010..2021).)&sr=%7Clot_number_suffix_sequence%7C0%7C30'
auction_sale_link_requests = requests.get(url=auction_sale_link)
auctions_data = auction_sale_link_requests.json().get('SearchResults')
if auctions_data == []:
print("NO RESULTS")
for auction_data in auctions_data:
if int(auction_data.get('MinimumBid')) > 0:
ids = auction_data.get('TargetId')
main_title = auction_data.get('Title')
short_title = str(auction_data.get('Year')) + ' ' + str(auction_data.get('Make')) + ' ' + str(auction_data.get('Model'))
make = auction_data.get('M ake')
model = auction_data.get('Model')
variant = auction_data.get('Series')
transmission = auction_data.get('Transmission')
odometer = auction_data.get('Odometer')
state = auction_data.get('Location').get('State')
sale_price = auction_data.get('MinimumBid')
link_path = main_title.replace(' ', '-').replace('/', '-').replace(',', '-') + '/' + str(ids)
link = f'https://www.pickles.com.au/cars/item/-/details/{link_path}'
sale_date = auction_data.get('SaleEndString')
auction_values = [
main_title, short_title, make,
model, variant, transmission, odometer,
state, "${:,.2f}".format(sale_price).strip() ,
link, sale_date
]
with open('pickels_dataset.csv', 'r+') as csv_read:
auction_reader = list(csv.reader(csv_read))
for each in auction_reader:
if link in each:
each_link, each_price = each[9], each[0]
if (link == each_link) and (sale_price != each_price):
auction_reader.clear()
print('New list found, old list deleted')
auction_csv_writer.writerow(auction_values)
print('New value added')
continue
elif (link == each[9]) and (sale_price == each[0]):
print('Same result already exist in the file')
continue
else:
auction_csv_writer.writerow(auction_values)
print('Unique result found and added.')
break
【问题讨论】:
标签: python csv web-scraping scrapy