Edge Computer Twitter Analytics

edgecompute <- search_tweets("Edge Computing", n=1000, include_rts=FALSE, lang="en")

# Remove hashtags
eh <- subset(edgecompute, is.na(edgecompute$hashtags))

Creating dataframe

Then, create a separate data frame containing the number of organic tweets, retweets, and replies. These numbers are easy to find: they are the number of observations for your three respective datasets.

# Adding columns 
edge_org <- edgecompute %>% 
  select(source) %>% 
  group_by(source) %>%
  summarize(count=n())
edge_org <- subset(edge_org, count > 11)

data <- data.frame(
  category=edge_org$source,
  count=edge_org$count
)
data$fraction = data$count / sum(data$count)
data$percentage = data$count / sum(data$count) * 100
data$ymax = cumsum(data$fraction)
data$ymin = c(0, head(data$ymax, n=-1))
data <- round_df(data, 2)
Source <- paste(data$category, data$percentage, "%")
ggplot(data, aes(ymax=ymax, ymin=ymin, xmax=4, xmin=3, fill=Source)) +
  geom_rect() +
  coord_polar(theta="y") + # Try to remove that to understand how the chart is built initially
  xlim(c(2, 4)) +
  theme_void() +
  theme(legend.position = "right")

## Most frequent words found in edge computation

Cleaning

edgecompute$text <-  gsub("https\\S*", "", edgecompute$text)
edgecompute$text <-  gsub("@\\S*", "", edgecompute$text)

Actual tweets

tweets <- edgecompute %>%
  select(text) %>%
  unnest_tokens(word, text)
tweets <- tweets %>%
  anti_join(stop_words)

## Joining, by = "word"

Bar chart

tweets %>% # gives you a bar chart of the most frequent words found in the tweets
  count(word, sort = TRUE) %>%
  top_n(15) %>%
  mutate(word = reorder(word, n)) %>%
  ggplot(aes(x = word, y = n)) +
  geom_col() +
  xlab(NULL) +
  coord_flip() +
  labs(y = "Count",
       x = "Unique words",
       title = "Most frequent words found in the tweets related to edge computing",
       subtitle = "Stop words removed from the list")

## Selecting by n

Most frequently used hashtags

edgecompute$hashtags <- as.character(edgecompute$hashtags)
edgecompute$hashtags <- gsub("c\\(", "", edgecompute$hashtags)
set.seed(1234)
wordcloud(edgecompute$hashtags, min.freq=5, scale=c(3.5, .5), random.order=FALSE, rot.per=0.35, 
          colors=brewer.pal(8, "Dark2"))

## Loading required namespace: tm

## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents

## Warning in tm_map.SimpleCorpus(corpus, function(x) tm::removeWords(x,
## tm::stopwords())): transformation drops documents