Twitter Posts, Likes, Friends, and Timeline Pull

This python code gets the user timeline, likes, posts, and friends (i.e., following) from Twitter raw data obtained from the Twitter API v2. The code then saves the data to a CSV file. This code works with Twitter data in multiple languages.

"""
@author: rb5286
"""
import json
import pandas as pd
import glob
from tqdm import tqdm


def readFiles(lang):
    outdir='/Users/rb5286/Downloads/twitter3/'
    files =glob.glob(f'/Users/rb5286/Downloads/twitter3/{lang}/*json*')

    timeline_col=['text', 'is_quote_status', 'retweet_count', 'favorite_count', 
             'lang', 'entities.hashtags', 'entities.urls', 'user.screen_name',
             'user.location', 'user.followers_count'] # 'place'  
    posts_likes_col=['text', 'is_quote_status', 'retweet_count', 'favorite_count', 
             'lang', 'entities.hashtags', 'entities.urls', 'user.screen_name', 
             'user.location', 'user.followers_count'] # 'place'
    
    friends_col=['id', 'screen_name', 'location']
               
    likes_df = pd.DataFrame()
    posts_df = pd.DataFrame()
    timeline_df=pd.DataFrame()
    friends_df=pd.DataFrame()

    for file in tqdm(files[:]):
            with open(file,'r') as f:
                data = json.loads(f.read())
                user_id=file.split('/')[-1].split('__')[0]
                
                timeline = pd.json_normalize(data, record_path =['home-timeline'])
                if timeline.empty: 
                    print(f'{user_id} HOME TIMELINE is empty')
                    continue
                posts = pd.json_normalize(data, record_path =['user-timelines'])
                if posts.empty: 
                    print(f'{user_id} POSTS are empty')
                    continue
                likes = pd.json_normalize(data, record_path =['favorites'])
                if likes.empty: 
                    print(f'{user_id} LIKES are empty')
                    continue
                
                friends = pd.json_normalize(data, record_path=['friends'])
                if friends.empty: 
                    print(f'{user_id} FRIENDS are empty')
                    continue
                  
                #print(user_id)
                timeline=timeline[timeline_col]; 
                timeline['userid']=user_id
                
                posts=posts[posts_likes_col]; 
                #if 'place' not in posts.columns: continue
                posts['userid']=user_id
                
                likes=likes[posts_likes_col]; 
                #if 'place' not in likes.columns: continue
                likes['userid']=user_id
                
                friends=friends[friends_col];
                friends['userid']=user_id
                
                timeline_df = pd.concat([timeline_df, timeline], ignore_index=True)
                posts_df = pd.concat([posts_df, posts], ignore_index=True)
                likes_df = pd.concat([likes_df, likes], ignore_index=True)
                friends_df = pd.concat([friends_df, friends], ignore_index=True)

    timeline_df.to_csv(f'{outdir}Twitter_{lang}_timeline.csv', index=False)
    posts_df.to_csv(f'{outdir}Twitter_{lang}_posts.csv', index=False)
    likes_df.to_csv(f'{outdir}Twitter_{lang}_likes.csv', index=False)
    friends_df.to_csv(f'{outdir}Twitter_{lang}_friends.csv', index=False)
    print(len(timeline_df), len(posts_df), len(likes_df), len(friends_df))    
    
### For printing colnames
list(timeline.columns.values)
list(posts.columns.values)
list(likes.columns.values)

Running the function defined above

for lan in ['en', 'sp']:
    readFiles(lan)

Twitter Posts, Likes, Friends, and Timeline Pull

Robert Vidigal, PhD