From 55a3fef84cce6538ac4a005e960bafd1f71e6146 Mon Sep 17 00:00:00 2001 From: Hykilpikonna Date: Sat, 13 Aug 2022 17:26:51 -0400 Subject: [PATCH] [+] Crawl index data --- .gitignore | 1 + index_crawler.py | 73 ++++++++++++++++++++++++++++++++++++++++++++++++ mobile.py | 11 ++++---- 3 files changed, 80 insertions(+), 5 deletions(-) create mode 100644 index_crawler.py diff --git a/.gitignore b/.gitignore index 229efb3..4b29fe0 100644 --- a/.gitignore +++ b/.gitignore @@ -152,3 +152,4 @@ cython_debug/ .idea/ crawler .config +index-data/ diff --git a/index_crawler.py b/index_crawler.py new file mode 100644 index 0000000..659c404 --- /dev/null +++ b/index_crawler.py @@ -0,0 +1,73 @@ +from __future__ import annotations + +import datetime +import json +from pathlib import Path + +import requests + + +ses = requests.Session() +ses.headers = {'accept-language': 'zh-CN'} + + +out_path = Path('index-data') +out_path.mkdir(exist_ok=True) + + +def write_entry(data): + id = data['guid'] + time = datetime.datetime.fromtimestamp(data['delivery_time']).strftime('%Y-%m-%d %H-%M') + + p = out_path / f'{time} {id}.json' + if p.is_file(): + return False + + p.write_text(json.dumps(data, ensure_ascii=False, indent=1)) + return True + + +def setup_proxy(): + proxies = { + 'http': 'socks5://localhost:9050', + 'https': 'socks5://localhost:9050' + } + + url = 'http://ifconfig.me/ip' + + ip = requests.get(url).text.strip() + print(f'Raw ip: {ip}') + + proxy_ip = requests.get(url, proxies=proxies).text.strip() + print(f'Proxy ip: {proxy_ip}') + + assert ip != proxy_ip, 'Proxy did not start correctly.' + + +if __name__ == '__main__': + setup_proxy() + + prev_date_file = out_path / '0-prev-date.txt' + + def send_req(prev_date: int | None): + add_param = {'logisticMinDeliveryTime': prev_date} if prev_date is not None else {} + + r = requests.get('https://front.superbuy.com/logistic/get-index-pull-data', + params={'onlyPackage': 1, **add_param}).json() + + assert r['state'] == 0, 'Request failed.' + + data = r['data'] + successes = [write_entry(i) for i in data] + + print(f'Out of {len(data)} entries, successfully wrote {sum(successes)} entries.') + prev_date_file.write_text(str(data[-1]['delivery_time'] - 1)) + + if not prev_date_file.is_file(): + print('Getting first request...') + send_req(None) + + while True: + prev_date = int(prev_date_file.read_text()) + print(f'Getting entries before {prev_date}') + send_req(prev_date) diff --git a/mobile.py b/mobile.py index fe66df8..bceb3de 100644 --- a/mobile.py +++ b/mobile.py @@ -104,6 +104,9 @@ def create_diy_order(create: list[TaobaoOrder]): :param create: List of taobao urls and details """ + orders = gateway_order_list() + ids = [get_url_param(i.GoodsLink, 'id') for o in orders for i in o.Items if 'id=' in i.GoodsLink] + create = [o for o in create if o.date >= '2022-08-10' and not any(get_url_param(i.url, 'id') in ids for i in o.items)] for c in create: shop_name = c.store.name shop_id = crawl(c.items[0].url)['data']['shop']['shopId'] @@ -127,7 +130,6 @@ def create_diy_order(create: list[TaobaoOrder]): resp = r.post('https://front.superbuy.com/order/transport/create-diy-order', data=j.encode('utf-8'), headers={'content-type': 'application/json; charset=UTF-8'}).json() print(resp) - time.sleep(10) def fill_express_no(taobao_data: list[TaobaoOrder]): @@ -177,9 +179,7 @@ app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_credentials=True) @app.post('/taobao/fill_items') def taobao_fill_items(body: Any = Body): taobao_data: list[TaobaoOrder] = js(body) - orders = gateway_order_list() - ids = [get_url_param(i.GoodsLink, 'id') for o in orders for i in o.Items if 'id=' in i.GoodsLink] - create_diy_order([o for o in taobao_data if o.date >= '2022-08-10' and not any(get_url_param(i.url, 'id') in ids for i in o.items)]) + create_diy_order(taobao_data) @app.post('/taobao/fill_delivery') @@ -190,6 +190,7 @@ def taobao_fill_delivery(body: Any = Body): if __name__ == '__main__': - print(login_cached(os.environ['user'], os.environ['pass'])) + print(login(os.environ['user'], os.environ['pass'])) # print(r.get(f'https://api.superbuy.com/gateway/oauth2/personalcenter/{USERID}').json()) + create_diy_order(load_taobao()) fill_express_no(load_taobao())