Hashtag
setwd("C:/Users/subas/Syncplicity/MyProjects_IMP/SafeD_ADV")
library(readxl)
dat= read_excel("autonomouscar.xlsx")
library(tidyverse)
clean_tweets <- function(x) {
x %>%
str_remove_all(" ?(f|ht)(tp)(s?)(://)(.*)[.|/](.*)") %>%
str_replace_all("&", "and") %>%
str_remove_all("[[:punct:]]") %>%
str_remove_all("^RT:? ") %>%
str_remove_all("@[[:alnum:]]+") %>%
str_remove_all("#[[:alnum:]]+") %>%
str_remove_all("pictwittercom") %>%
str_replace_all("\\\n", " ") %>%
str_to_lower() %>%
str_trim("both")
}
library(DT)
names(dat)
## [1] "id" "conversation_id" "created_at" "date"
## [5] "time" "timezone" "user_id" "username"
## [9] "name" "place" "tweet" "language"
## [13] "mentions" "urls" "photos" "replies_count"
## [17] "retweets_count" "likes_count" "hashtags" "cashtags"
## [21] "link" "retweet" "quote_url" "video"
## [25] "thumbnail" "near" "geo" "source"
## [29] "user_rt_id" "user_rt" "retweet_id" "reply_to"
## [33] "retweet_date" "translate" "trans_src" "trans_dest"
dat$hashtags1= dat$hashtags %>% clean_tweets
dat1= dat[, c(8, 37)]
head(dat1)
## # A tibble: 6 x 2
## username hashtags1
## <chr> <chr>
## 1 selfdrivingfeed autonomouscar selfdrivingcar
## 2 selfdrivingfeed autonomouscar selfdrivingcar
## 3 selfdrivingfeed autonomouscar selfdrivingcar
## 4 selfdrivingfeed autonomouscar selfdrivingcar
## 5 selfdrivingfeed autonomouscar selfdrivingcar
## 6 selfdrivingfeed autonomouscar selfdrivingcar
## tibble [107,315 x 2] (S3: tbl_df/tbl/data.frame)
## $ username : chr [1:107315] "selfdrivingfeed" "selfdrivingfeed" "selfdrivingfeed" "selfdrivingfeed" ...
## $ hashtags1: chr [1:107315] "autonomouscar selfdrivingcar" "autonomouscar selfdrivingcar" "autonomouscar selfdrivingcar" "autonomouscar selfdrivingcar" ...
## [1] "username" "hashtags1"
## [1] "Year" "Title"
library(tm)
library(quanteda)
corp_tweets <- corpus(dat1$hashtags1)
tweet_dfm <- dfm(corp_tweets, remove_punct = TRUE)
head(tweet_dfm)
## Document-feature matrix of: 6 documents, 9,804 features (100.0% sparse).
## features
## docs autonomouscar selfdrivingcar 5g uber av aurora avs autonomousvehicles
## text1 1 1 0 0 0 0 0 0
## text2 1 1 0 0 0 0 0 0
## text3 1 1 0 0 0 0 0 0
## text4 1 1 0 0 0 0 0 0
## text5 1 1 0 0 0 0 0 0
## text6 1 1 0 0 0 0 0 0
## features
## docs drone virtualreality
## text1 0 0
## text2 0 0
## text3 0 0
## text4 0 0
## text5 0 0
## text6 0 0
## [ reached max_nfeat ... 9,794 more features ]
library(quanteda)
###library("quanteda.textplots")
tag_dfm <- dfm_select(tweet_dfm)
toptag <- names(topfeatures(tag_dfm, 40))
tag_fcm <- fcm(tag_dfm)
head(tag_fcm)
## Feature co-occurrence matrix of: 6 by 6 features.
## features
## features autonomouscar selfdrivingcar 5g uber av aurora
## autonomouscar 22 71368 1062 633 155 11
## selfdrivingcar 0 7 18 51 12 0
## 5g 0 0 11 1 4 0
## uber 0 0 0 2 7 2
## av 0 0 0 0 1 1
## aurora 0 0 0 0 0 0
topgat_fcm <- fcm_select(tag_fcm, pattern = toptag)
textplot_network(topgat_fcm, min_freq = 0.1, edge_alpha = 0.8,
edge_color = "orange", edge_size = 2)

Mention
corp_tweets <- corpus(dat$tweet)
tweet_dfm <- dfm(corp_tweets, remove_punct = TRUE)
head(tweet_dfm)
## Document-feature matrix of: 6 documents, 185,794 features (>99.99% sparse).
## features
## docs micron technology are we there yet today's adas technologies may
## text1 1 1 1 1 1 1 1 1 1 1
## text2 0 0 0 0 0 0 0 0 0 0
## text3 0 0 0 0 0 0 0 0 0 1
## text4 0 0 0 0 0 0 0 0 0 0
## text5 0 0 0 0 0 0 0 0 0 0
## text6 0 0 0 0 0 0 0 0 0 0
## [ reached max_nfeat ... 185,784 more features ]
tag_dfm <- dfm_select(tweet_dfm, pattern = "@*")
topuser <- names(topfeatures(tag_dfm, 50))
user_fcm <- fcm(tag_dfm)
head(user_fcm)
## Feature co-occurrence matrix of: 6 by 6 features.
## features
## features @kmmdisc @counterpointtr @neiltwitz @faddy0015 @ingliguori
## @kmmdisc 0 0 0 0 0
## @counterpointtr 0 0 1 1 0
## @neiltwitz 0 0 0 1 0
## @faddy0015 0 0 0 0 0
## @ingliguori 0 0 0 0 0
## @chairmanmdec 0 0 0 0 0
## features
## features @chairmanmdec
## @kmmdisc 0
## @counterpointtr 0
## @neiltwitz 0
## @faddy0015 0
## @ingliguori 1
## @chairmanmdec 0
user_fcm <- fcm_select(user_fcm, pattern = topuser)
textplot_network(user_fcm, min_freq = 0.1,
edge_color = "orange",
edge_alpha = 0.8, edge_size = 3)
