[+] Add news channels from memeburn

This commit is contained in:
Hykilpikonna
2021-11-24 15:22:54 -05:00
parent f9370aedb5
commit 275efda0fe
3 changed files with 21 additions and 4 deletions
+15 -1
View File
@@ -5,6 +5,8 @@ import random
from typing import NamedTuple
from dataclasses import dataclass
import requests
from bs4 import BeautifulSoup
from py7zr import SevenZipFile
from constants import DATA_DIR, TWEETS_DIR, USER_DIR
@@ -149,7 +151,8 @@ def select_user_sample() -> None:
def get_english_news_channels() -> list[str]:
"""
Find news channels that post in English
Find news channels that post in English from retweets of TwitterNews, combined with an
established list of 100 most influential news channels reported by Nur Bermmen from memeburn.com
Run this after download_all_tweets(api, 'TwitterNews')
@@ -166,6 +169,17 @@ def get_english_news_channels() -> list[str]:
user = text[4:].split(':')[0]
news_channels.add(user)
# Find news channels from top 100 list on memeburn.com
url = 'https://memeburn.com/2010/09/the-100-most-influential-news-media-twitter-accounts/'
soup = BeautifulSoup(requests.get(url).text, 'html.parser')
users = {h.text[1:] for h in soup.select('table tr td:nth-child(2) > a')}
# Combine two sets, ignoring case (since the ids in the 100 list are all lowercased)
news_channels_lower = {n.lower() for n in news_channels}
for u in users:
if u not in news_channels_lower:
news_channels.add(u)
return list(news_channels)
+4 -1
View File
@@ -7,7 +7,7 @@ import time
from typing import List
import tweepy
from tweepy import API, TooManyRequests, User, Tweet, Unauthorized
from tweepy import API, TooManyRequests, User, Tweet, Unauthorized, NotFound
from constants import TWEETS_DIR, USER_DIR
from utils import *
@@ -86,6 +86,9 @@ def download_all_tweets(api: API, screen_name: str,
except Unauthorized:
debug(f'- {screen_name}: Unauthorized. Probably a private account, ignoring.')
return
except NotFound:
debug(f'- {screen_name}: Not found. Probably a deleted account, writing []')
tweets = []
# This person has no tweets, done. (By the way, we discovered that @lorde has no tweets but has
# 7 million followers... wow!)
+2 -2
View File
@@ -73,7 +73,7 @@ def write(file: str, text: str) -> None:
"""
Write text to a file
:param file: File path
:param file: File path (will be converted to lowercase)
:param text: Text
:return: None
"""
@@ -91,7 +91,7 @@ def read(file: str) -> str:
"""
Read file content
:param file: File path
:param file: File path (will be converted to lowercase)
:return: None
"""
with open(file.lower(), 'r', encoding='utf-8') as f: