library(tidyverse)
## ── Attaching packages ─────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.2.1     ✔ purrr   0.3.2
## ✔ tibble  2.1.3     ✔ dplyr   0.8.1
## ✔ tidyr   0.8.3     ✔ stringr 1.3.1
## ✔ readr   1.3.1     ✔ forcats 0.4.0
## ── Conflicts ────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(dplyr)
library(tibble)
library(tidytext)
setwd("/Users/robertvargas/Documents/Projects/Data")
wk1<- read.csv("Coachella_2019_Tweets_Weekend_1_2019-04-07_to_2019-04-16.csv", header = TRUE)
## Warning in scan(file = file, what = what, sep = sep, quote = quote, dec =
## dec, : embedded nul(s) found in input
wk2<- read.csv("Coachella_2019_Tweets_Weekend_2_2019-04-14_to_2019-04-23.csv", header = TRUE)
wk1<- wk1 %>% select(1,2,3,5,9,10,23,24)
wk2<- wk2 %>% select(1,2,3,5,9,10,23,24)

Feature Engineering

In order to better understand the data, I will manually engineer certain columns to learn more about twitter activity surrounding the activity. After these updates I should be able to tell which tweets are organic, the date and time the tweet occurred, and will copy the contents of each tweet into another column.

#organic
wk1$organic<- NA
wk2$organic<- NA
wk1$organic[wk1$reply_to_tweet_id == "None"]<-"organic"
wk1$organic[wk1$reply_to_tweet_id != "None"]<-"non-organic"
wk2$organic[wk2$reply_to_tweet_id == "None"]<-"organic"
wk2$organic[wk2$reply_to_tweet_id != "None"]<-"non-organic"
#date formatting
wk1$clean_date<- "2019-4-  "
wk2$clean_date<- "2019-4-  "
substr(wk1$clean_date, start = 8, stop = 10)<- substring(wk1$created_at, 9,10)
substr(wk2$clean_date, start = 8, stop = 10)<- substring(wk2$created_at, 9,10)
wk1$clean_date<- as.character(wk1$clean_date)
wk2$clean_date<- as.character(wk2$clean_date)
wk1$clean_date<- as.Date(wk1$clean_date)
wk2$clean_date<- as.Date(wk2$clean_date)
#time
wk1$clean_time<- NA
wk2$clean_time<- NA
wk1$clean_time<- substr(wk1$created_at, start = 12, stop = 19)
wk2$clean_time<- substr(wk2$created_at, start = 12, stop = 19)
#transform both data
wk1$clean_datetime<- NA
wk2$clean_datetime<- NA
wk1$clean_datetime<- as.POSIXct(paste(wk1$clean_date, wk1$clean_time), format = "%Y-%m-%d %H:%M:%S")
wk2$clean_datetime<- as.POSIXct(paste(wk2$clean_date, wk2$clean_time), format = "%Y-%m-%d %H:%M:%S")
##copy tweets
wk1$original_tweet<- wk1$full_tweet_text
wk2$original_tweet<- wk2$full_tweet_text

Analyzing the Data

Approx. 80% of the tweets were original and organic tweets. That is a lot of twitter users considering the amount tweets per each weekend.

## [1] "There are 454551.2 tweets for weekend 1."
## [1] "There are 188923.76 tweets for weekend 2."

I removed stop words and certain patterns of words that aren’t helpful. In computing, stop words are words which are filtered out before or after processing of natural language data. Doing so required further featured engineering. These words included the beginning of links, general descriptions of Coachella, and just odd text in general.

wk1$full_tweet_text<- gsub("https.*", "", wk1$full_tweet_text)
wk1$full_tweet_text<- gsub("de", "", wk1$full_tweet_text)
wk1$full_tweet_text<- gsub("coach", "", wk1$full_tweet_text)
wk1$full_tweet_text<- gsub("en", "", wk1$full_tweet_text)
wk1$full_tweet_text<- gsub("i’m", "", wk1$full_tweet_text)
wk1$full_tweet_text<- gsub("coachella2019", "", wk1$full_tweet_text)
wk1$full_tweet_text<- gsub("la", "", wk1$full_tweet_text)
wk1$full_tweet_text<- gsub("live", "", wk1$full_tweet_text)
wk1$full_tweet_text<- gsub("el", "", wk1$full_tweet_text)
wk1$full_tweet_text<- gsub("2019", "", wk1$full_tweet_text)
wk1$full_tweet_text<- gsub("se", "", wk1$full_tweet_text)
wk1$full_tweet_text<- gsub("performance", "", wk1$full_tweet_text)
wk1$full_tweet_text<- gsub("youtube*", "", wk1$full_tweet_text)
wk1$full_tweet_text<- gsub("weekd", "", wk1$full_tweet_text)
wk1$full_tweet_text<- gsub("coachella", "", wk1$full_tweet_text)
wk1$full_tweet_text<- gsub("festival", "", wk1$full_tweet_text)
wk1$full_tweet_text<- gsub("music", "", wk1$full_tweet_text)

wk2$full_tweet_text<- gsub("https*", "", wk2$full_tweet_text)
wk2$full_tweet_text<- gsub("coachella", "", wk2$full_tweet_text)
wk2$full_tweet_text<- gsub("t.co*", "", wk2$full_tweet_text)
wk2$full_tweet_text<- gsub("de", "", wk2$full_tweet_text)
wk2$full_tweet_text<- gsub("en", "", wk2$full_tweet_text)
wk2$full_tweet_text<- gsub("2*", "", wk2$full_tweet_text)
wk2$full_tweet_text<- gsub("019", "", wk2$full_tweet_text)
wk2$full_tweet_text<- gsub("i’m", "", wk2$full_tweet_text)
wk2$full_tweet_text<- gsub("live", "", wk2$full_tweet_text)
wk2$full_tweet_text<- gsub("の", "", wk2$full_tweet_text)
wk2$full_tweet_text<- gsub("weekd", "", wk2$full_tweet_text)
wk2$full_tweet_text<- gsub("love", "", wk2$full_tweet_text)
wk2$full_tweet_text<- gsub("la", "", wk2$full_tweet_text)
wk2$full_tweet_text<- gsub("coachel", "", wk2$full_tweet_text)
wk2$full_tweet_text<- gsub("performance", "", wk2$full_tweet_text)
##Removing the stop words
z<-wk1 %>% select (full_tweet_text) %>% unnest_tokens(word, full_tweet_text)
z<- z %>% anti_join(stop_words)
## Joining, by = "word"
zz<-wk2 %>% select (full_tweet_text) %>% unnest_tokens(word, full_tweet_text)
zz<- zz %>% anti_join(stop_words)
## Joining, by = "word"

Drawing a conclusion

Weekend 1

After formatting the data, we pull the top 10 most used words in tweets. In addition to that I pulled the top 10 most retweeted tweets and the 10 most liked tweets.

z %>% count(word, sort = TRUE)
## # A tibble: 247,964 x 2
##    word                 n
##    <chr>            <int>
##  1 coach           316319
##  2 coachella        27223
##  3 i’m              20522
##  4 blackpink        19590
##  5 ariana           19211
##  6 bckpink          19002
##  7 ygofficialblink  17337
##  8 day              15724
##  9 youtube          14899
## 10 people           14363
## # … with 247,954 more rows
x<- rownames(head(wk1[order(wk1$retweet_count, decreasing = TRUE),],10))
wk1$original_tweet[as.integer(x)]
##  [1] james charles in 2050 dancing at coachella after the nuclear fallout from world war 3 bleaches his skin \nhttps://t.co/0jVLW3nvsi                                                                                                                                                                              
##  [2] Khalid and Billie Eilish performing lovely at Coachella that’s it that’s the tweet https://t.co/nx3Qoo32MJ                                                                                                                                                                                                     
##  [3] “what the fuck are the words tho”\n\nBillie Eilish, Coachella 2019 https://t.co/Mt7G9pmt10                                                                                                                                                                                                                     
##  [4] childish gambino airdropped a picture at coachella of his adidas collab and everyone who accepted got a pair of his unreleased shoes...                                                                                                                                                                        
##  [5] James Charles absolutely lost it at Coachella https://t.co/xXep8qTyWA                                                                                                                                                                                                                                          
##  [6] .@ygofficialblink in your area https://t.co/YTOtbdArIS                                                                                                                                                                                                                                                         
##  [7] An in-depth look at Beyoncé's celebrated 2018 Coachella performance from creative concept to cultural movement. #beyoncehomecoming https://t.co/DfLlBGkCHL                                                                                                                                                     
##  [8] \U0001f4cdLGBTQ+ influencers   |   |   | _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _      Knowing full well the owner of    | \U0001f4cdCoachella uses the profit to fund|      anti-lgbt &amp; gun pro-gun orgs      |      _ _ _ _ _ _ _ _ _ _ __ _ _ _ _ _ _ _ _     |           |   |   | \U0001f4cdCoachella
##  [9] J Balvin es un fucking duro por vivir su mejor momento en #Coachella y rendirle homenaje a las leyendas del reggaetón, mis respetos   https://t.co/vHM4uUjzAH                                                                                                                                                  
## [10] Lorde also performed at coachella at 17. https://t.co/LGPWsJsScy                                                                                                                                                                                                                                               
## 557743 Levels: ­\n\n­\n\n­\n\n­\n\n­\n\n­\n\n­\n\n­\n\n­\n\n­\n\n­\n\n­\n\n­\n\n­\n\n­\nreminder we start getting coachella pics today ...
x<- rownames(head(wk1[order(wk1$favorite_count, decreasing = TRUE),],10))
wk1$original_tweet[as.integer(x)]
##  [1] james charles in 2050 dancing at coachella after the nuclear fallout from world war 3 bleaches his skin \nhttps://t.co/0jVLW3nvsi      
##  [2] childish gambino airdropped a picture at coachella of his adidas collab and everyone who accepted got a pair of his unreleased shoes...
##  [3] these coachella bitches WISH they fuckin could \U0001f525\U0001f451\U0001f33c https://t.co/T4Y4E8bdae                                  
##  [4] “what the fuck are the words tho”\n\nBillie Eilish, Coachella 2019 https://t.co/Mt7G9pmt10                                             
##  [5] Lorde also performed at coachella at 17. https://t.co/LGPWsJsScy                                                                       
##  [6] James Charles absolutely lost it at Coachella https://t.co/xXep8qTyWA                                                                  
##  [7] ☀️\U0001f334@coachella https://t.co/BESi2qjs2B                                                                                          
##  [8] My chicken’s first Coachella I’m sooooo proud of him :) https://t.co/KNcplJegnl                                                        
##  [9] i know some ppl think imma pop out at coachella but i have to wash the dishes this weekend or dad’s gonna be mad                       
## [10] Khalid and Billie Eilish performing lovely at Coachella that’s it that’s the tweet https://t.co/nx3Qoo32MJ                             
## 557743 Levels: ­\n\n­\n\n­\n\n­\n\n­\n\n­\n\n­\n\n­\n\n­\n\n­\n\n­\n\n­\n\n­\n\n­\n\n­\nreminder we start getting coachella pics today ...

By looking at the top 10 words for weekend 1, it appears the most talked about performers were Blackpink, Ariana (Ariana Grande), and YG. Though it doesn’t give us much context into what specifically made them standout, we can agree that it got people talking. Looking at the top tweets, I see that some popular performances were those by Khalid and Billie Eilish, YG, Beyonce, and J. Balvin.

Weekend 2

zz%>%
  count(word,sort = TRUE)%>%
  top_n(10)%>%
  mutate(word = reorder(word,n))%>%
  ggplot(aes(x = word, y = n)) +
  geom_col() +
  xlab(NULL) +
  coord_flip() +
  theme_classic() +
  labs(x = "Count", y = "Unique Word in Tweets", title = "Coachella 2019 Weekend 2", subtitle = "Stop words removed from the list")
## Selecting by n

##top 10 most retweeted tweets

y<- rownames(head(wk2[order(wk2$retweet_count, decreasing = TRUE),],10))
wk2$original_tweet[as.integer(y)]
##  [1] เมื่อใส่ผ้าไทยไป coachella สาบานว่ามีแต่คนชมผ้าไทยบ้านเรา ทั้งชม ทั้งขอจับ ทั้งถามว่าหาซื้อยังไง ภูมิใจมากกกกกกกกกก ดีใจที่ได้มีส่วนร่วมกันชุดนี้ตั้งแต่เลือกผ้า                                                                                                                                                       
##  [2] Thank you @ArianaGrande . Thank you @coachella. That felt right. Love you.                                                                                                                                                                                                       
##  [3] Beyoncé rehearsed for 8 months for Coachella. I never want to hear anybody call her overrated ever again. She puts her blood sweat and tears into everything while y’all favs put on one flower crown &amp; sing off-key with technical difficulties. She’s above the standard.  
##  [4] \U0001f5a4\U0001f496 @ygofficialblink https://t.co/uIZqPYs6e0                                                                                                                                                                                                                    
##  [5] เมื่อใส่ผ้าไทยไป coachella EP.2 เสนอตอนผ้าแพรวา เช่นเคยว่าฝรั่งมองผ้าไม่มองหน้าเลยจ้า 5555 สมเป็นราชินีผ้าไทย ปลื้มปริ่ม                                                                                                                                                                              
##  [6] ชอบดูแฟชั่นสาว ๆ ในงาน Coachella เป็น a.s style ปัง ๆ อยากแต่งแบบนี้ไปเที่ยวเทศกาลดนตรีในบ้านเรามาก เขาใหญ่อาจไม่ไหว เอาอีสานเขียวปีหน้าดีไหม 5555555                                                                                                                                               
##  [7] Blessing your Monday with these iconic queens \U0001f451 \n\n@ArianaGrande x #BLACKPINK take #Coachella! https://t.co/dKR08xFpgQ                                                                                                                                                 
##  [8] Me at Coachella 2007 20 mins before i smoked Cali weed for the first time and forgot EVERY SINGLE word of my set. Still killed it.                                                                                                                                               
##  [9] So she took that $4M from Coachella and flipped it into $60M. Queen behavior. https://t.co/3kuZNjQlXE                                                                                                                                                                            
## [10] So Beyonce had a strict diet prepping for Coachella..no dairy, no carbs, no alcohol, no meat, no fish, and no sugar. I know some women with that same diet prepping for a wedding. You are marrying a guy name Dan who eats chicken pot pie at diners, ya’ll can have some bread.
## 235956 Levels: ̑̑̑ 。 。chela's upchaȓ \u2e19 : [IG] 190416 westbrook instagram updates with #BLACKPINK!  "Lucky me, lucky you. #BLACKPINK #블랙핑크 #Coachella \U0001f4f7: @.tonypillow" \U0001f517https://t.co/ElPFUqamqk  #BLACKPINK #블랙핑크 #JENNIE #JISOO #LISA #ROSÉ ...
y<- rownames(head(wk1[order(wk2$favorite_count, decreasing = TRUE),],10))
wk2$original_tweet[as.integer(y)]
##  [1] Thank you @ArianaGrande . Thank you @coachella. That felt right. Love you.                                                                                                                                                                                                       
##  [2] So Beyonce had a strict diet prepping for Coachella..no dairy, no carbs, no alcohol, no meat, no fish, and no sugar. I know some women with that same diet prepping for a wedding. You are marrying a guy name Dan who eats chicken pot pie at diners, ya’ll can have some bread.
##  [3] Me at Coachella 2007 20 mins before i smoked Cali weed for the first time and forgot EVERY SINGLE word of my set. Still killed it.                                                                                                                                               
##  [4] Beyoncé rehearsed for 8 months for Coachella. I never want to hear anybody call her overrated ever again. She puts her blood sweat and tears into everything while y’all favs put on one flower crown &amp; sing off-key with technical difficulties. She’s above the standard.  
##  [5] my friends and I deadass JUST decided to go to Coachella weekend 2 \U0001f480 here we go again                                                                                                                                                                                   
##  [6] Coachella is officially a wrap lol see y’all in 356 days                                                                                                                                                                                                                         
##  [7] So she took that $4M from Coachella and flipped it into $60M. Queen behavior. https://t.co/3kuZNjQlXE                                                                                                                                                                            
##  [8] i was just talking to my mom about coachella and the first thing she said was “i heard james charles was there with his ass out again” hahahahaha                                                                                                                                
##  [9] just heard from coachella. they're replacing ariana grande with me playing harmonica. see you all there                                                                                                                                                                          
## [10] I forgot to upload Sunday Service Videos this week. So here’s one that I love. I just don’t know what’s gonna happen at Coachella https://t.co/ebY8gJdkgQ                                                                                                                        
## 235956 Levels: ̑̑̑ 。 。chela's upchaȓ \u2e19 : [IG] 190416 westbrook instagram updates with #BLACKPINK!  "Lucky me, lucky you. #BLACKPINK #블랙핑크 #Coachella \U0001f4f7: @.tonypillow" \U0001f517https://t.co/ElPFUqamqk  #BLACKPINK #블랙핑크 #JENNIE #JISOO #LISA #ROSÉ ...

Twitter activity for weekend 2 closely resembled weekend 1 but also had its own talking points. For starters the top 10 most used words were still related to Ariana Grande and Blackpink. The 4th top used word is Beyonce, who was one of the headlines for both weekends. One key difference between both events was the performance of the Sunday Service by Kanye West, which he exclusively performed for the weekend 2 audience only. As we see, words related to the Sunday service were consistently used in tweets.

So by looking at the most tweeted and popular tweets for weekend 2 its fairly obvious that Beyonce stole the show. Ariana Grande and Blackpink were still one of the most popular performances.