38 lines
1.2 KiB
Python
38 lines
1.2 KiB
Python
import os
|
|
import random
|
|
import shutil
|
|
from concurrent.futures import ThreadPoolExecutor
|
|
from google.colab import files
|
|
|
|
basepath = os.getcwd()
|
|
uploaded = files.upload() # 上传文件
|
|
for filename in uploaded.keys():
|
|
assert (filename.endswith(".txt")), "speaker-videolink info could only be .txt file!"
|
|
shutil.move(os.path.join(basepath, filename), os.path.join("./speaker_links.txt"))
|
|
|
|
|
|
def generate_infos():
|
|
infos = []
|
|
with open("./speaker_links.txt", 'r', encoding='utf-8') as f:
|
|
lines = f.readlines()
|
|
for line in lines:
|
|
line = line.replace("\n", "").replace(" ", "")
|
|
if line == "":
|
|
continue
|
|
speaker, link = line.split("|")
|
|
filename = speaker + "_" + str(random.randint(0, 1000000))
|
|
infos.append({"link": link, "filename": filename})
|
|
return infos
|
|
|
|
|
|
def download_video(info):
|
|
link = info["link"]
|
|
filename = info["filename"]
|
|
os.system(f"youtube-dl -f 0 {link} -o ./video_data/{filename}.mp4 --no-check-certificate")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
infos = generate_infos()
|
|
with ThreadPoolExecutor(max_workers=os.cpu_count()) as executor:
|
|
executor.map(download_video, infos)
|