diff --git a/raw_collect/twitter.py b/src/raw_collect/twitter.py similarity index 96% rename from raw_collect/twitter.py rename to src/raw_collect/twitter.py index 02b899f..1da532e 100644 --- a/raw_collect/twitter.py +++ b/src/raw_collect/twitter.py @@ -9,7 +9,7 @@ import tweepy from tweepy import API from tweepy.models import Status -from collect.utils import Config, debug, Posting, json_stringify +from collect.utils import Config, debug, Posting, json_stringify, load_config @dataclass @@ -117,6 +117,9 @@ def download_user_tweets(api: API, screen_name: str) -> None: f.write(json_stringify(postings)) +def download_followings_chain(start_point: str, n: int): + """ + This function downloads n twitter users by using a followings-chain. diff --git a/raw_collect/twitter_individual.py b/src/raw_collect/twitter_individual.py similarity index 100% rename from raw_collect/twitter_individual.py rename to src/raw_collect/twitter_individual.py diff --git a/src/raw_collect/twitter_random_individuals.py b/src/raw_collect/twitter_random_individuals.py new file mode 100644 index 0000000..ef13339 --- /dev/null +++ b/src/raw_collect/twitter_random_individuals.py @@ -0,0 +1,32 @@ + + +def get_followings_chain(start_point: str, n: int) -> tuple[list[str], list[str]]: + """ + This function obtains twitter users through a followings chain. + + 1. This function tries to obtain a semi-random list of accounts from twitter. + + Since there isn't a database with all twitter users, we can't obtain a strictly random list of + twitter users. Therefore, we use the method of follows chaining: we start from a specific + individual, obtain their followers, and pick 6 random individuals from the followings list. + Then, we repeat the process for the selected followings: we pick 6 random followings of the 6 + random followings that we picked. + + In reality, this method will be biased toward individuals that are worthy of following since + we are picking random followings. + + 2. This function tries to obtain a list of most popular accounts from twitter. + + Again, since there isn't a database with all twitter users and their popularity, we can't + obtain a definite list of most popular accounts. So, we obtain our best approximation of a + list of most popular accounts by obtaining + + :param start_point: The starting user of the search + :param n: How many random individuals in total? + :return: 1. A list of semi-random individuals, and 2. a list of most popular individuals. + """ + + + +if __name__ == '__main__': + pass diff --git a/raw_collect/utils.py b/src/raw_collect/utils.py similarity index 100% rename from raw_collect/utils.py rename to src/raw_collect/utils.py