Jeff Bezos VS AMZN

The purpose is looking for correlation of twitter sentiment Jeff Bezos and amazone(AMZN). Amazon(AMZN) data can be downloaded from Yahoo Finance. Scrapping twitter use "#jeffbezos", and "@jeffBezos"

1.Upload library

1.1 Setup Twitter

api_key <- "use your api_key"
api_secret <- "use your api_secret"
access_token <- "use your access_token " 
access_token_secret <- "use your access_token_secret" 
setup_twitter_oauth(api_key, api_secret, 
access_token, access_token_secret)
## [1] "Using direct authentication"

2. Set criteria and get tweets

numberOfTweets <- 3000
#Scrape tweets containing "#jeffBezos" and "@jeffBezos"
tweets <- searchTwitter(searchString="#jeffbezos", n = numberOfTweets, lang="en")
tweets2 <- searchTwitter(searchString="@jeffBezos", n = numberOfTweets, lang="en")
tweetsDF <- twListToDF(tweets)
tweetsDF2 <- twListToDF(tweets2)
tweetsFullDF <- rbind(tweetsDF, tweetsDF2)
write.csv(tweetsFullDF,"d:/twitter-sentiment/tweetsFullDF-N.csv")

3. Scrub Data amazone stock

Create subset of data
amzn <- subset(amzn, select = c(Date, Close))
Convert factors to dates
amzn$Date <- as.Date(amzn$Date)

4.Clean text from tweets

Remove white spaces
Replace apostrophes with %% (for later replacement)
Remove emojis and other Unicode characters
Remove additional Unicode parts that may have remained
Remove orphaned full-stops
Reduce double spaces to single spaces
Change %% back to apostrophes
Remove URL from tweet
Replace any line breaks with “-”
Remove double hyphens where there were two line breaks
Fix ampersand
Add string to empty values (when only a URL was posted)
Look for truncated tweets (the API only retrieves 140 characters) 
and add ellipses
Write new data frame for cleaned tweets

4.1 Convert to dataframe

x <- tweetsFullDF
x$text <- enc2native(x$text)

4.2 Clean text

x$text <- gsub("^[[:space:]]*","",x$text) # Remove leading whitespaces
x$text <- gsub("[[:space:]]*$","",x$text) # Remove trailing whitespaces
x$text <- gsub(" +"," ",x$text) #Remove extra whitespaces
x$text <- gsub("'", "%%", x$text) #Replace apostrophes with %%
x$text <- iconv(x$text, "latin1", "ASCII", sub="") # Remove emojis
x$text <- gsub("<(.*)>", "", x$text) #Remove Unicodes like <U+A>
x$text <- gsub("\\ \\. ", " ", x$text) #Replace orphaned fullstops with space
x$text <- gsub("  ", " ", x$text) #Replace double space with single space
x$text <- gsub("%%", "\'", x$text) #Change %% back to apostrophes
x$text <- gsub("https(.*)*$", "", x$text) #Remove tweet URL
x$text <- gsub("\\n", "-", x$text) #Replace line breaks with "-"
x$text <- gsub("--", "-", x$text) #Remove double "-" from double line breaks
x$text <- gsub("&amp;", "&", x$text) #Fix ampersand &
x$text[x$text == " "] <- "<no text>"

for (i in 1:nrow(x)) {
    if (x$truncated[i] == TRUE) {
        x$text[i] <- gsub("[[:space:]]*$","...",x$text[i])
    }
}

#Select desired column
cleanTweets <- x %>% 
               select("text")

5. Sentiment analysis

sentiment <- analyzeSentiment(cleanTweets)
#---Extract dictionary-based sentiment according to the QDAP dictionary
sentiment2 <- sentiment$SentimentQDAP
#---View sentiment direction (i.e. positive, neutral and negative)
sentiment3 <- convertToDirection(sentiment$SentimentQDAP)

#Extract and convert 'date' column
date <- x$created
date <- str_extract(date, "\\d{4}-\\d{2}-\\d{2}")
date <- as.Date(date)
date <- as.Date(date, format = "%m/%d/%y")

#Create new dataframe with desired columns
df <- cbind(cleanTweets, sentiment2, sentiment3, date)
#Remove rows with NA
df <- df[complete.cases(df), ]
#df <- df[complete.cases(df), ]
#write.csv(df,"d:/data/JEFF-df-01.csv")

5.1 Sentiment score

#Calculate the average of daily sentiment score
df <- read.csv("d:/data/JEFF-df-01.csv")
df <- df[,-1]
df2 <- df %>% 
       group_by(date) %>%
       #summarise(meanSentiment = mean(sentiment2, na.rm=TRUE))
        mutate(meanSentiment = mean(sentiment2, na.rm=TRUE))

#Get frquency of each sentiment i.e. positive, neutral, and negative  
freq <- df %>% 
        group_by(date,sentiment3) %>% 
        dplyr::summarise(Freq=n())

## `summarise()` has grouped output by 'date'. You can override using the `.groups` argument.

#Convert data from long to wide
freq2 <- freq %>% 
         spread(key = sentiment3, value = Freq)

DT::datatable(freq2, editable = TRUE)

head(freq2)

## # A tibble: 6 x 4
## # Groups:   date [6]
##   date       negative neutral positive
##   <chr>         <int>   <int>    <int>
## 1 2021-08-20        1       9        4
## 2 2021-08-21       15      41       61
## 3 2021-08-22        5      52       79
## 4 2021-08-23        8      58       74
## 5 2021-08-24       11      34       37
## 6 2021-08-25       12      33       39

6. Plot Sentiment Frequency

p1 <- ggplot() + 
  geom_bar(mapping = aes(x = freq$date, y = freq$Freq, 
                         fill = freq$sentiment3), stat = "identity") +
  ylab('Sentiment Frequency') +
  xlab('Date')+
  theme(axis.text.x = element_text(angle = 90))
p1

7. Calculate z-Scores FC

AMZ <- read.csv("d:/twitter-sentiment/AMZ_CLEAN.csv")
AMZ <- AMZ[,-1]
mu <- mean(AMZ$close)
sd <- sd(AMZ$close)
AMZ2 <- AMZ%>% 
         mutate(AMZScore = (AMZ$close-mu)/sd)
head(AMZ2)

##         date  close   AMZScore
## 1 2011-01-03 184.22 -0.9109478
## 2 2011-01-04 185.01 -0.9101420
## 3 2011-01-05 187.42 -0.9076837
## 4 2011-01-06 185.86 -0.9092749
## 5 2011-01-07 185.49 -0.9096523
## 6 2011-01-10 184.68 -0.9104786

8. Plot

8.1 Plot twitter sentiment AMZN stock

AMZ02 <- AMZ2[2676:2682,]
p02 <- ggplot(data=AMZ02, aes(x=date,y=AMZScore, group=1)) +
  geom_line()+
  geom_point()+
  ylab('AMZScore') +
  xlab('date')+
  ggtitle('Twitter sentiment AMZN stock')+
  theme(axis.text.x = element_text(angle = 90))
p02

8.2 Plot sentiment Jeff Bezos

p01 <- ggplot(data=df2, aes(x=date,y=meanSentiment, group=1)) +
  geom_line()+
  geom_point()+
  ylab('meanSentiment') +
  xlab('Date')+
  ggtitle('Twitter sentiment Jeff Bezos')+
  theme(axis.text.x = element_text(angle = 90))
p01

8.3 Plot both AMZN and Jeff Bezos

grid.arrange(p02,p01)

9. Conclusion

Based on the data collected and analysis of twitter sentiment regarding Jeff Bezos and the 
closing price of amazone stock(AMZN) from yahoo finance, it does not have the same trend and 
does not have predictive properties. 
When the average twitter score sentiment Jeff Bezos decreases(2021-08-27), the AMZN closing price increases. 
also twitter score sentiment Jeff Bezos decreases(2021-08-24),the AMZN closing price increases. 
Scrapping twitter data, is general in nature, while fluctuations in amazon shares 
in yahoo finance are more influenced by economic fundamentals
Further investigations included collecting more data over a larger timeframe.

Jeff Bezos VS AMZN

Crafted by Bambangpe

1.Upload library

1.1 Setup Twitter

2. Set criteria and get tweets

3. Scrub Data amazone stock

4.Clean text from tweets

4.1 Convert to dataframe

4.2 Clean text

5. Sentiment analysis

5.1 Sentiment score

6. Plot Sentiment Frequency

7. Calculate z-Scores FC

8. Plot

8.1 Plot twitter sentiment AMZN stock

8.2 Plot sentiment Jeff Bezos

8.3 Plot both AMZN and Jeff Bezos

9. Conclusion