[F] Fix null in filtering by language

This commit is contained in:
Hykilpikonna
2021-11-23 12:11:58 -05:00
parent b6d0cda387
commit 1cd7a5ffd0
+2 -1
View File
@@ -121,7 +121,8 @@ def select_user_sample(user_dir: str = './data/twitter/user/') -> None:
users = load_users(user_dir)
# Filter by language first
users = [u for u in users if any(lang in u.lang for lang in {'en', 'zh', 'ja'})]
users = [u for u in users if u.lang is not None and
any(lang in u.lang for lang in {'en', 'zh', 'ja'})]
# Find most popular, and exclude them from the random sample
most_popular = users[:500]