[O] Configurable index path

This commit is contained in:
Azalea Gui
2023-04-03 10:15:17 -04:00
parent 2db202d27d
commit 08f5ad9021
4 changed files with 15 additions and 56 deletions
+3 -54
View File
@@ -1,57 +1,5 @@
{ {
"cells": [ "cells": [
{
"cell_type": "code",
"execution_count": 2,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Collecting seaborn\r\n",
" Downloading seaborn-0.12.2-py3-none-any.whl (293 kB)\r\n",
"\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m293.3/293.3 kB\u001B[0m \u001B[31m7.9 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m00:01\u001B[0m\r\n",
"\u001B[?25hCollecting matplotlib\r\n",
" Downloading matplotlib-3.7.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (11.6 MB)\r\n",
"\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m11.6/11.6 MB\u001B[0m \u001B[31m60.1 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m00:01\u001B[0m00:01\u001B[0m\r\n",
"\u001B[?25hRequirement already satisfied: pandas in /home/azalea/.conda/envs/311/lib/python3.11/site-packages (1.5.3)\r\n",
"Requirement already satisfied: numpy in /home/azalea/.conda/envs/311/lib/python3.11/site-packages (1.24.2)\r\n",
"Requirement already satisfied: tqdm in /home/azalea/.conda/envs/311/lib/python3.11/site-packages (4.65.0)\r\n",
"Collecting rapidjson\r\n",
" Downloading rapidjson-1.0.0-py3-none-any.whl (1.2 kB)\r\n",
"Collecting contourpy>=1.0.1\r\n",
" Downloading contourpy-1.0.7-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (299 kB)\r\n",
"\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m300.0/300.0 kB\u001B[0m \u001B[31m42.0 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\r\n",
"\u001B[?25hCollecting cycler>=0.10\r\n",
" Downloading cycler-0.11.0-py3-none-any.whl (6.4 kB)\r\n",
"Collecting fonttools>=4.22.0\r\n",
" Downloading fonttools-4.39.3-py3-none-any.whl (1.0 MB)\r\n",
"\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m1.0/1.0 MB\u001B[0m \u001B[31m69.5 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\r\n",
"\u001B[?25hCollecting kiwisolver>=1.0.1\r\n",
" Downloading kiwisolver-1.4.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.4 MB)\r\n",
"\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m1.4/1.4 MB\u001B[0m \u001B[31m63.2 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\r\n",
"\u001B[?25hRequirement already satisfied: packaging>=20.0 in /home/azalea/.conda/envs/311/lib/python3.11/site-packages (from matplotlib) (23.0)\r\n",
"Requirement already satisfied: pillow>=6.2.0 in /home/azalea/.conda/envs/311/lib/python3.11/site-packages (from matplotlib) (9.4.0)\r\n",
"Requirement already satisfied: pyparsing>=2.3.1 in /home/azalea/.conda/envs/311/lib/python3.11/site-packages (from matplotlib) (3.0.9)\r\n",
"Requirement already satisfied: python-dateutil>=2.7 in /home/azalea/.conda/envs/311/lib/python3.11/site-packages (from matplotlib) (2.8.2)\r\n",
"Requirement already satisfied: pytz>=2020.1 in /home/azalea/.conda/envs/311/lib/python3.11/site-packages (from pandas) (2022.7.1)\r\n",
"Requirement already satisfied: six>=1.5 in /home/azalea/.conda/envs/311/lib/python3.11/site-packages (from python-dateutil>=2.7->matplotlib) (1.16.0)\r\n",
"Installing collected packages: rapidjson, kiwisolver, fonttools, cycler, contourpy, matplotlib, seaborn\r\n",
"Successfully installed contourpy-1.0.7 cycler-0.11.0 fonttools-4.39.3 kiwisolver-1.4.4 matplotlib-3.7.1 rapidjson-1.0.0 seaborn-0.12.2\r\n"
]
}
],
"source": [
"!pip install seaborn matplotlib pandas numpy tqdm rapidjson"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"start_time": "2023-04-03T09:42:13.735175Z",
"end_time": "2023-04-03T09:42:28.796233Z"
}
}
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 3, "execution_count": 3,
@@ -62,6 +10,7 @@
"import matplotlib\n", "import matplotlib\n",
"import rapidjson as json\n", "import rapidjson as json\n",
"import os\n", "import os\n",
"import toml\n",
"from collections import Counter\n", "from collections import Counter\n",
"from pathlib import Path\n", "from pathlib import Path\n",
"\n", "\n",
@@ -140,8 +89,8 @@
" return json.loads(p.read_text())\n", " return json.loads(p.read_text())\n",
"\n", "\n",
"def load_data():\n", "def load_data():\n",
" dir = Path('index-data')\n", " dir = Path(toml.loads(Path('config.toml').read_text())['index_path'])\n",
" files = [(dir / f) for f in tq(os.listdir(dir), 'Loading data') if f.endswith('.json')]\n", " files = [(dir / f) for f in tq(os.listdir(dir), 'Loading file list') if f.endswith('.json')]\n",
" return pmap(_helper, files, desc='Loading json')\n", " return pmap(_helper, files, desc='Loading json')\n",
"\n", "\n",
"\n", "\n",
+2
View File
@@ -0,0 +1,2 @@
# Moved the index data to .. to avoid IntelliJ being stuck on indexing it and processing file system changes
index_path = "../SuperbuyData/index-data"
+2 -2
View File
@@ -5,13 +5,13 @@ import json
from pathlib import Path from pathlib import Path
import requests import requests
import toml
ses = requests.Session() ses = requests.Session()
ses.headers = {'accept-language': 'zh-CN'} ses.headers = {'accept-language': 'zh-CN'}
out_path = Path('index-data') out_path = toml.loads(Path('config.toml').read_text())['index_path']
out_path.mkdir(exist_ok=True) out_path.mkdir(exist_ok=True)
+8
View File
@@ -4,3 +4,11 @@ hypy_utils
uvicorn uvicorn
pysocks pysocks
toml toml
# Analysis
seaborn
matplotlib
pandas
numpy
tqdm
rapidjson