[+] Crawl index data
This commit is contained in:
@@ -152,3 +152,4 @@ cython_debug/
|
|||||||
.idea/
|
.idea/
|
||||||
crawler
|
crawler
|
||||||
.config
|
.config
|
||||||
|
index-data/
|
||||||
|
|||||||
@@ -0,0 +1,73 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import datetime
|
||||||
|
import json
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
|
|
||||||
|
ses = requests.Session()
|
||||||
|
ses.headers = {'accept-language': 'zh-CN'}
|
||||||
|
|
||||||
|
|
||||||
|
out_path = Path('index-data')
|
||||||
|
out_path.mkdir(exist_ok=True)
|
||||||
|
|
||||||
|
|
||||||
|
def write_entry(data):
|
||||||
|
id = data['guid']
|
||||||
|
time = datetime.datetime.fromtimestamp(data['delivery_time']).strftime('%Y-%m-%d %H-%M')
|
||||||
|
|
||||||
|
p = out_path / f'{time} {id}.json'
|
||||||
|
if p.is_file():
|
||||||
|
return False
|
||||||
|
|
||||||
|
p.write_text(json.dumps(data, ensure_ascii=False, indent=1))
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def setup_proxy():
|
||||||
|
proxies = {
|
||||||
|
'http': 'socks5://localhost:9050',
|
||||||
|
'https': 'socks5://localhost:9050'
|
||||||
|
}
|
||||||
|
|
||||||
|
url = 'http://ifconfig.me/ip'
|
||||||
|
|
||||||
|
ip = requests.get(url).text.strip()
|
||||||
|
print(f'Raw ip: {ip}')
|
||||||
|
|
||||||
|
proxy_ip = requests.get(url, proxies=proxies).text.strip()
|
||||||
|
print(f'Proxy ip: {proxy_ip}')
|
||||||
|
|
||||||
|
assert ip != proxy_ip, 'Proxy did not start correctly.'
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
setup_proxy()
|
||||||
|
|
||||||
|
prev_date_file = out_path / '0-prev-date.txt'
|
||||||
|
|
||||||
|
def send_req(prev_date: int | None):
|
||||||
|
add_param = {'logisticMinDeliveryTime': prev_date} if prev_date is not None else {}
|
||||||
|
|
||||||
|
r = requests.get('https://front.superbuy.com/logistic/get-index-pull-data',
|
||||||
|
params={'onlyPackage': 1, **add_param}).json()
|
||||||
|
|
||||||
|
assert r['state'] == 0, 'Request failed.'
|
||||||
|
|
||||||
|
data = r['data']
|
||||||
|
successes = [write_entry(i) for i in data]
|
||||||
|
|
||||||
|
print(f'Out of {len(data)} entries, successfully wrote {sum(successes)} entries.')
|
||||||
|
prev_date_file.write_text(str(data[-1]['delivery_time'] - 1))
|
||||||
|
|
||||||
|
if not prev_date_file.is_file():
|
||||||
|
print('Getting first request...')
|
||||||
|
send_req(None)
|
||||||
|
|
||||||
|
while True:
|
||||||
|
prev_date = int(prev_date_file.read_text())
|
||||||
|
print(f'Getting entries before {prev_date}')
|
||||||
|
send_req(prev_date)
|
||||||
@@ -104,6 +104,9 @@ def create_diy_order(create: list[TaobaoOrder]):
|
|||||||
|
|
||||||
:param create: List of taobao urls and details
|
:param create: List of taobao urls and details
|
||||||
"""
|
"""
|
||||||
|
orders = gateway_order_list()
|
||||||
|
ids = [get_url_param(i.GoodsLink, 'id') for o in orders for i in o.Items if 'id=' in i.GoodsLink]
|
||||||
|
create = [o for o in create if o.date >= '2022-08-10' and not any(get_url_param(i.url, 'id') in ids for i in o.items)]
|
||||||
for c in create:
|
for c in create:
|
||||||
shop_name = c.store.name
|
shop_name = c.store.name
|
||||||
shop_id = crawl(c.items[0].url)['data']['shop']['shopId']
|
shop_id = crawl(c.items[0].url)['data']['shop']['shopId']
|
||||||
@@ -127,7 +130,6 @@ def create_diy_order(create: list[TaobaoOrder]):
|
|||||||
resp = r.post('https://front.superbuy.com/order/transport/create-diy-order', data=j.encode('utf-8'),
|
resp = r.post('https://front.superbuy.com/order/transport/create-diy-order', data=j.encode('utf-8'),
|
||||||
headers={'content-type': 'application/json; charset=UTF-8'}).json()
|
headers={'content-type': 'application/json; charset=UTF-8'}).json()
|
||||||
print(resp)
|
print(resp)
|
||||||
time.sleep(10)
|
|
||||||
|
|
||||||
|
|
||||||
def fill_express_no(taobao_data: list[TaobaoOrder]):
|
def fill_express_no(taobao_data: list[TaobaoOrder]):
|
||||||
@@ -177,9 +179,7 @@ app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_credentials=True)
|
|||||||
@app.post('/taobao/fill_items')
|
@app.post('/taobao/fill_items')
|
||||||
def taobao_fill_items(body: Any = Body):
|
def taobao_fill_items(body: Any = Body):
|
||||||
taobao_data: list[TaobaoOrder] = js(body)
|
taobao_data: list[TaobaoOrder] = js(body)
|
||||||
orders = gateway_order_list()
|
create_diy_order(taobao_data)
|
||||||
ids = [get_url_param(i.GoodsLink, 'id') for o in orders for i in o.Items if 'id=' in i.GoodsLink]
|
|
||||||
create_diy_order([o for o in taobao_data if o.date >= '2022-08-10' and not any(get_url_param(i.url, 'id') in ids for i in o.items)])
|
|
||||||
|
|
||||||
|
|
||||||
@app.post('/taobao/fill_delivery')
|
@app.post('/taobao/fill_delivery')
|
||||||
@@ -190,6 +190,7 @@ def taobao_fill_delivery(body: Any = Body):
|
|||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
print(login_cached(os.environ['user'], os.environ['pass']))
|
print(login(os.environ['user'], os.environ['pass']))
|
||||||
# print(r.get(f'https://api.superbuy.com/gateway/oauth2/personalcenter/{USERID}').json())
|
# print(r.get(f'https://api.superbuy.com/gateway/oauth2/personalcenter/{USERID}').json())
|
||||||
|
create_diy_order(load_taobao())
|
||||||
fill_express_no(load_taobao())
|
fill_express_no(load_taobao())
|
||||||
|
|||||||
Reference in New Issue
Block a user