[O] Configurable index path
This commit is contained in:
+3
-54
@@ -1,57 +1,5 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 2,
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"Collecting seaborn\r\n",
|
|
||||||
" Downloading seaborn-0.12.2-py3-none-any.whl (293 kB)\r\n",
|
|
||||||
"\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m293.3/293.3 kB\u001B[0m \u001B[31m7.9 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m00:01\u001B[0m\r\n",
|
|
||||||
"\u001B[?25hCollecting matplotlib\r\n",
|
|
||||||
" Downloading matplotlib-3.7.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (11.6 MB)\r\n",
|
|
||||||
"\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m11.6/11.6 MB\u001B[0m \u001B[31m60.1 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m00:01\u001B[0m00:01\u001B[0m\r\n",
|
|
||||||
"\u001B[?25hRequirement already satisfied: pandas in /home/azalea/.conda/envs/311/lib/python3.11/site-packages (1.5.3)\r\n",
|
|
||||||
"Requirement already satisfied: numpy in /home/azalea/.conda/envs/311/lib/python3.11/site-packages (1.24.2)\r\n",
|
|
||||||
"Requirement already satisfied: tqdm in /home/azalea/.conda/envs/311/lib/python3.11/site-packages (4.65.0)\r\n",
|
|
||||||
"Collecting rapidjson\r\n",
|
|
||||||
" Downloading rapidjson-1.0.0-py3-none-any.whl (1.2 kB)\r\n",
|
|
||||||
"Collecting contourpy>=1.0.1\r\n",
|
|
||||||
" Downloading contourpy-1.0.7-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (299 kB)\r\n",
|
|
||||||
"\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m300.0/300.0 kB\u001B[0m \u001B[31m42.0 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\r\n",
|
|
||||||
"\u001B[?25hCollecting cycler>=0.10\r\n",
|
|
||||||
" Downloading cycler-0.11.0-py3-none-any.whl (6.4 kB)\r\n",
|
|
||||||
"Collecting fonttools>=4.22.0\r\n",
|
|
||||||
" Downloading fonttools-4.39.3-py3-none-any.whl (1.0 MB)\r\n",
|
|
||||||
"\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m1.0/1.0 MB\u001B[0m \u001B[31m69.5 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\r\n",
|
|
||||||
"\u001B[?25hCollecting kiwisolver>=1.0.1\r\n",
|
|
||||||
" Downloading kiwisolver-1.4.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.4 MB)\r\n",
|
|
||||||
"\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m1.4/1.4 MB\u001B[0m \u001B[31m63.2 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\r\n",
|
|
||||||
"\u001B[?25hRequirement already satisfied: packaging>=20.0 in /home/azalea/.conda/envs/311/lib/python3.11/site-packages (from matplotlib) (23.0)\r\n",
|
|
||||||
"Requirement already satisfied: pillow>=6.2.0 in /home/azalea/.conda/envs/311/lib/python3.11/site-packages (from matplotlib) (9.4.0)\r\n",
|
|
||||||
"Requirement already satisfied: pyparsing>=2.3.1 in /home/azalea/.conda/envs/311/lib/python3.11/site-packages (from matplotlib) (3.0.9)\r\n",
|
|
||||||
"Requirement already satisfied: python-dateutil>=2.7 in /home/azalea/.conda/envs/311/lib/python3.11/site-packages (from matplotlib) (2.8.2)\r\n",
|
|
||||||
"Requirement already satisfied: pytz>=2020.1 in /home/azalea/.conda/envs/311/lib/python3.11/site-packages (from pandas) (2022.7.1)\r\n",
|
|
||||||
"Requirement already satisfied: six>=1.5 in /home/azalea/.conda/envs/311/lib/python3.11/site-packages (from python-dateutil>=2.7->matplotlib) (1.16.0)\r\n",
|
|
||||||
"Installing collected packages: rapidjson, kiwisolver, fonttools, cycler, contourpy, matplotlib, seaborn\r\n",
|
|
||||||
"Successfully installed contourpy-1.0.7 cycler-0.11.0 fonttools-4.39.3 kiwisolver-1.4.4 matplotlib-3.7.1 rapidjson-1.0.0 seaborn-0.12.2\r\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"!pip install seaborn matplotlib pandas numpy tqdm rapidjson"
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"collapsed": false,
|
|
||||||
"ExecuteTime": {
|
|
||||||
"start_time": "2023-04-03T09:42:13.735175Z",
|
|
||||||
"end_time": "2023-04-03T09:42:28.796233Z"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 3,
|
"execution_count": 3,
|
||||||
@@ -62,6 +10,7 @@
|
|||||||
"import matplotlib\n",
|
"import matplotlib\n",
|
||||||
"import rapidjson as json\n",
|
"import rapidjson as json\n",
|
||||||
"import os\n",
|
"import os\n",
|
||||||
|
"import toml\n",
|
||||||
"from collections import Counter\n",
|
"from collections import Counter\n",
|
||||||
"from pathlib import Path\n",
|
"from pathlib import Path\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -140,8 +89,8 @@
|
|||||||
" return json.loads(p.read_text())\n",
|
" return json.loads(p.read_text())\n",
|
||||||
"\n",
|
"\n",
|
||||||
"def load_data():\n",
|
"def load_data():\n",
|
||||||
" dir = Path('index-data')\n",
|
" dir = Path(toml.loads(Path('config.toml').read_text())['index_path'])\n",
|
||||||
" files = [(dir / f) for f in tq(os.listdir(dir), 'Loading data') if f.endswith('.json')]\n",
|
" files = [(dir / f) for f in tq(os.listdir(dir), 'Loading file list') if f.endswith('.json')]\n",
|
||||||
" return pmap(_helper, files, desc='Loading json')\n",
|
" return pmap(_helper, files, desc='Loading json')\n",
|
||||||
"\n",
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
|||||||
@@ -0,0 +1,2 @@
|
|||||||
|
# Moved the index data to .. to avoid IntelliJ being stuck on indexing it and processing file system changes
|
||||||
|
index_path = "../SuperbuyData/index-data"
|
||||||
+2
-2
@@ -5,13 +5,13 @@ import json
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
import toml
|
||||||
|
|
||||||
ses = requests.Session()
|
ses = requests.Session()
|
||||||
ses.headers = {'accept-language': 'zh-CN'}
|
ses.headers = {'accept-language': 'zh-CN'}
|
||||||
|
|
||||||
|
|
||||||
out_path = Path('index-data')
|
out_path = toml.loads(Path('config.toml').read_text())['index_path']
|
||||||
out_path.mkdir(exist_ok=True)
|
out_path.mkdir(exist_ok=True)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -4,3 +4,11 @@ hypy_utils
|
|||||||
uvicorn
|
uvicorn
|
||||||
pysocks
|
pysocks
|
||||||
toml
|
toml
|
||||||
|
|
||||||
|
# Analysis
|
||||||
|
seaborn
|
||||||
|
matplotlib
|
||||||
|
pandas
|
||||||
|
numpy
|
||||||
|
tqdm
|
||||||
|
rapidjson
|
||||||
|
|||||||
Reference in New Issue
Block a user