Analysis the impact of the flight of Elon Musk out of space with the SpaceX rocket. Does it affect Tesla stock? Use Sentiment Twitter scraping database, and Yahoo finance stock price fluctuations TESLA(TSLA) data can be downloaded from Yahoo Finance. Scrapping twitter use @elonmusk", and "@Elonjet"
api_key <- "use your api_key"
api_secret <- "use your api_secret"
access_token <- "use your access_token "
access_token_secret <- "use your access_token_secret"
setup_twitter_oauth(api_key, api_secret,
access_token, access_token_secret)
## [1] "Using direct authentication"
numberOfTweets <- 3000
#Scrape tweets containing "#jeffBezos" and "@jeffBezos"
tweets <- searchTwitter(searchString="@elonmusk", n = numberOfTweets, lang="en")
tweets2 <- searchTwitter(searchString="@Elonjet", n = numberOfTweets, lang="en")
tweetsDF <- twListToDF(tweets)
tweetsDF2 <- twListToDF(tweets2)
tweetsFullDF <- rbind(tweetsDF, tweetsDF2)
#write.csv(tweetsFullDF,"d:/twitter-sentiment/tweetsFull-elon.csv")#ok
Create subset of data
tsla <- subset(TSLA, select = c(Date, Close))
Convert factors to dates
amzn$Date <- as.Date(amzn$Date)
Remove white spaces
Replace apostrophes with %% (for later replacement)
Remove emojis and other Unicode characters
Remove additional Unicode parts that may have remained
Remove orphaned full-stops
Reduce double spaces to single spaces
Change %% back to apostrophes
Remove URL from tweet
Replace any line breaks with “-”
Remove double hyphens where there were two line breaks
Fix ampersand
Add string to empty values (when only a URL was posted)
Look for truncated tweets (the API only retrieves 140 characters)
and add ellipses
Write new data frame for cleaned tweets
x <- tweetsFullDF
x$text <- enc2native(x$text)
x$text <- gsub("^[[:space:]]*","",x$text) # Remove leading whitespaces
x$text <- gsub("[[:space:]]*$","",x$text) # Remove trailing whitespaces
x$text <- gsub(" +"," ",x$text) #Remove extra whitespaces
x$text <- gsub("'", "%%", x$text) #Replace apostrophes with %%
x$text <- iconv(x$text, "latin1", "ASCII", sub="") # Remove emojis
x$text <- gsub("<(.*)>", "", x$text) #Remove Unicodes like <U+A>
x$text <- gsub("\\ \\. ", " ", x$text) #Replace orphaned fullstops with space
x$text <- gsub(" ", " ", x$text) #Replace double space with single space
x$text <- gsub("%%", "\'", x$text) #Change %% back to apostrophes
x$text <- gsub("https(.*)*$", "", x$text) #Remove tweet URL
x$text <- gsub("\\n", "-", x$text) #Replace line breaks with "-"
x$text <- gsub("--", "-", x$text) #Remove double "-" from double line breaks
x$text <- gsub("&", "&", x$text) #Fix ampersand &
x$text[x$text == " "] <- "<no text>"
for (i in 1:nrow(x)) {
if (x$truncated[i] == TRUE) {
x$text[i] <- gsub("[[:space:]]*$","...",x$text[i])
}
}
#Select desired column
cleanTweets <- x %>%
select("text")
sentiment <- analyzeSentiment(cleanTweets)
#---Extract dictionary-based sentiment according to the QDAP dictionary
sentiment2 <- sentiment$SentimentQDAP
#---View sentiment direction (i.e. positive, neutral and negative)
sentiment3 <- convertToDirection(sentiment$SentimentQDAP)
#Extract and convert 'date' column
date <- x$created
date <- str_extract(date, "\\d{4}-\\d{2}-\\d{2}")
date <- as.Date(date)
date <- as.Date(date, format = "%m/%d/%y")
#Create new dataframe with desired columns
df <- cbind(cleanTweets, sentiment2, sentiment3, date)
#Remove rows with NA
df <- df[complete.cases(df), ]
#df <- df[complete.cases(df), ]
#write.csv(df,"d:/data/JEFF-df-01.csv")
#Calculate the average of daily sentiment score
df <- read.csv("d:/data/dftsla-01.csv")
df <- df[,-1]
df2 <- df %>%
dplyr::group_by(date) %>%
#summarise(meanSentiment = mean(sentiment2, na.rm=TRUE))
dplyr::mutate(meanSentiment = mean(sentiment2, na.rm=TRUE))
#Get frquency of each sentiment i.e. positive, neutral, and negative
freq <- df %>%
group_by(date,sentiment3) %>%
dplyr::summarise(Freq=n())
## `summarise()` has grouped output by 'date'. You can override using the `.groups` argument.
#Convert data from long to wide
freq2 <- freq %>%
spread(key = sentiment3, value = Freq)
DT::datatable(freq2, editable = TRUE)
head(freq2)
## # A tibble: 6 x 4
## # Groups: date [6]
## date negative neutral positive
## <chr> <int> <int> <int>
## 1 2021-09-08 1 2 8
## 2 2021-09-09 13 22 12
## 3 2021-09-10 NA 6 6
## 4 2021-09-11 6 16 13
## 5 2021-09-12 6 29 9
## 6 2021-09-13 1 12 7
p1 <- ggplot() +
geom_bar(mapping = aes(x = freq$date, y = freq$Freq,
fill = freq$sentiment3), stat = "identity") +
ylab('Sentiment Frequency') +
xlab('Date')+
theme(axis.text.x = element_text(angle = 90))
p1
#AMZ <- read.csv("d:/twitter-sentiment/AMZ_CLEAN.csv")
TSL <- read.csv("d:/data/Ft-1.csv")
TSL <- TSL[,-1]
mu <- mean(TSL$close)
sd <- sd(TSL$close)
TSL2 <- TSL%>%
mutate(TSLScore = (TSL$close-mu)/sd)
head(TSL2)
## date close TSLScore
## 1 2011-01-03 5.324 -0.5464368
## 2 2011-01-04 5.334 -0.5463823
## 3 2011-01-05 5.366 -0.5462082
## 4 2011-01-06 5.576 -0.5450655
## 5 2011-01-07 5.648 -0.5446737
## 6 2011-01-10 5.690 -0.5444452
TSL02 <- TSL2[2689:2695,]
p02 <- ggplot(data=TSL02, aes(x=date,y=TSLScore, group=1)) +
geom_line()+
geom_point()+
ylab('TSA Score') +
xlab('date')+
ggtitle('TSLA stock')+
theme(axis.text.x = element_text(angle = 90))
p02
p01 <- ggplot(data=df2, aes(x=date,y=meanSentiment, group=1)) +
geom_line()+
geom_point()+
ylab('meanSentiment') +
xlab('Date')+
ggtitle('Twitter sentiment Elon Musk')+
theme(axis.text.x = element_text(angle = 90))
p01
grid.arrange(p02,p01)
Based on the data collected and analysis of twitter sentiment regarding Elon Musk and the
closing price of TESLA(TSLA) from yahoo finance, have the same trend.
If we look at the graph of the twitter sentiment score on September 19, 2021, it looks like it has increased significantly, of course the result of tweets after Elon Musk flew with the SpaceX rocket.
Along with rising of the sentiment twitter score,the TESLA stock closing price increases.
It can be concluded that the launch of SpaceX will have a direct effect on increasing TESLA shares, all of which are owned by billionaire Elon Musk.
```