[+] Add news channels from memeburn
This commit is contained in:
@@ -5,6 +5,8 @@ import random
|
||||
from typing import NamedTuple
|
||||
from dataclasses import dataclass
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from py7zr import SevenZipFile
|
||||
|
||||
from constants import DATA_DIR, TWEETS_DIR, USER_DIR
|
||||
@@ -149,7 +151,8 @@ def select_user_sample() -> None:
|
||||
|
||||
def get_english_news_channels() -> list[str]:
|
||||
"""
|
||||
Find news channels that post in English
|
||||
Find news channels that post in English from retweets of TwitterNews, combined with an
|
||||
established list of 100 most influential news channels reported by Nur Bermmen from memeburn.com
|
||||
|
||||
Run this after download_all_tweets(api, 'TwitterNews')
|
||||
|
||||
@@ -166,6 +169,17 @@ def get_english_news_channels() -> list[str]:
|
||||
user = text[4:].split(':')[0]
|
||||
news_channels.add(user)
|
||||
|
||||
# Find news channels from top 100 list on memeburn.com
|
||||
url = 'https://memeburn.com/2010/09/the-100-most-influential-news-media-twitter-accounts/'
|
||||
soup = BeautifulSoup(requests.get(url).text, 'html.parser')
|
||||
users = {h.text[1:] for h in soup.select('table tr td:nth-child(2) > a')}
|
||||
|
||||
# Combine two sets, ignoring case (since the ids in the 100 list are all lowercased)
|
||||
news_channels_lower = {n.lower() for n in news_channels}
|
||||
for u in users:
|
||||
if u not in news_channels_lower:
|
||||
news_channels.add(u)
|
||||
|
||||
return list(news_channels)
|
||||
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@ import time
|
||||
from typing import List
|
||||
|
||||
import tweepy
|
||||
from tweepy import API, TooManyRequests, User, Tweet, Unauthorized
|
||||
from tweepy import API, TooManyRequests, User, Tweet, Unauthorized, NotFound
|
||||
|
||||
from constants import TWEETS_DIR, USER_DIR
|
||||
from utils import *
|
||||
@@ -86,6 +86,9 @@ def download_all_tweets(api: API, screen_name: str,
|
||||
except Unauthorized:
|
||||
debug(f'- {screen_name}: Unauthorized. Probably a private account, ignoring.')
|
||||
return
|
||||
except NotFound:
|
||||
debug(f'- {screen_name}: Not found. Probably a deleted account, writing []')
|
||||
tweets = []
|
||||
|
||||
# This person has no tweets, done. (By the way, we discovered that @lorde has no tweets but has
|
||||
# 7 million followers... wow!)
|
||||
|
||||
+2
-2
@@ -73,7 +73,7 @@ def write(file: str, text: str) -> None:
|
||||
"""
|
||||
Write text to a file
|
||||
|
||||
:param file: File path
|
||||
:param file: File path (will be converted to lowercase)
|
||||
:param text: Text
|
||||
:return: None
|
||||
"""
|
||||
@@ -91,7 +91,7 @@ def read(file: str) -> str:
|
||||
"""
|
||||
Read file content
|
||||
|
||||
:param file: File path
|
||||
:param file: File path (will be converted to lowercase)
|
||||
:return: None
|
||||
"""
|
||||
with open(file.lower(), 'r', encoding='utf-8') as f:
|
||||
|
||||
Reference in New Issue
Block a user