This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
library(httpuv)
library(tidyverse)
## -- Attaching packages ------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.0.0 v purrr 0.2.5
## v tibble 1.4.2 v dplyr 0.7.6
## v tidyr 0.8.1 v stringr 1.3.1
## v readr 1.1.1 v forcats 0.3.0
## -- Conflicts ---------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(rtweet)
##
## Attaching package: 'rtweet'
## The following object is masked from 'package:purrr':
##
## flatten
appname <- "XavierBAIS3"
key <- "52Ez3k5SSxUWAL6EOhPUtYVLT"
secret <- "RJEOB1O2BB8V0RY3vaGdjqFJR8UJGBRUztY99PCWVAx3tzNS7I"
Zoo <- read_csv("zoo.csv")
## Parsed with column specification:
## cols(
## .default = col_character(),
## created_at = col_datetime(format = ""),
## display_text_width = col_integer(),
## is_quote = col_logical(),
## is_retweet = col_logical(),
## favorite_count = col_integer(),
## retweet_count = col_integer(),
## quoted_created_at = col_datetime(format = ""),
## quoted_favorite_count = col_integer(),
## quoted_retweet_count = col_integer(),
## quoted_followers_count = col_integer(),
## quoted_friends_count = col_integer(),
## quoted_statuses_count = col_integer(),
## quoted_verified = col_logical(),
## protected = col_logical(),
## followers_count = col_integer(),
## friends_count = col_integer(),
## listed_count = col_integer(),
## statuses_count = col_integer(),
## favourites_count = col_integer(),
## account_created_at = col_datetime(format = "")
## # ... with 1 more columns
## )
## See spec(...) for full column specifications.
write_as_csv(Zoo, "zoo.csv")
Hashtags <- Zoo%>%
select(user_id, hashtags)
TweetLengths <- Zoo %>%
group_by(source) %>%
summarise(len = mean(display_text_width))
Cincinnatians <- Zoo%>%
select(user_id, screen_name,source, name, place_name)
filter(Zoo, place_name == "Cincinnati")
## # A tibble: 4 x 88
## user_id status_id created_at screen_name text source
## <chr> <chr> <dttm> <chr> <chr> <chr>
## 1 x28276~ x1046205~ 2018-09-30 01:10:16 McBeast44 Much~ Insta~
## 2 x26902~ x1046198~ 2018-09-30 00:40:31 mrscazad Zoo ~ Insta~
## 3 x33520~ x1046140~ 2018-09-29 20:52:49 KarolineBa~ Just~ Insta~
## 4 x17935~ x1045703~ 2018-09-28 15:56:58 picklesnpo~ "i m~ Insta~
## # ... with 82 more variables: display_text_width <int>,
## # reply_to_status_id <chr>, reply_to_user_id <chr>,
## # reply_to_screen_name <chr>, is_quote <lgl>, is_retweet <lgl>,
## # favorite_count <int>, retweet_count <int>, hashtags <chr>,
## # symbols <chr>, urls_url <chr>, urls_t.co <chr>,
## # urls_expanded_url <chr>, media_url <chr>, media_t.co <chr>,
## # media_expanded_url <chr>, media_type <chr>, ext_media_url <chr>,
## # ext_media_t.co <chr>, ext_media_expanded_url <chr>,
## # ext_media_type <chr>, mentions_user_id <chr>,
## # mentions_screen_name <chr>, lang <chr>, quoted_status_id <chr>,
## # quoted_text <chr>, quoted_created_at <dttm>, quoted_source <chr>,
## # quoted_favorite_count <int>, quoted_retweet_count <int>,
## # quoted_user_id <chr>, quoted_screen_name <chr>, quoted_name <chr>,
## # quoted_followers_count <int>, quoted_friends_count <int>,
## # quoted_statuses_count <int>, quoted_location <chr>,
## # quoted_description <chr>, quoted_verified <lgl>,
## # retweet_status_id <chr>, retweet_text <chr>, retweet_created_at <chr>,
## # retweet_source <chr>, retweet_favorite_count <chr>,
## # retweet_retweet_count <chr>, retweet_user_id <chr>,
## # retweet_screen_name <chr>, retweet_name <chr>,
## # retweet_followers_count <chr>, retweet_friends_count <chr>,
## # retweet_statuses_count <chr>, retweet_location <chr>,
## # retweet_description <chr>, retweet_verified <chr>, place_url <chr>,
## # place_name <chr>, place_full_name <chr>, place_type <chr>,
## # country <chr>, country_code <chr>, geo_coords <chr>,
## # coords_coords <chr>, bbox_coords <chr>, status_url <chr>, name <chr>,
## # location <chr>, description <chr>, url <chr>, protected <lgl>,
## # followers_count <int>, friends_count <int>, listed_count <int>,
## # statuses_count <int>, favourites_count <int>,
## # account_created_at <dttm>, verified <lgl>, profile_url <chr>,
## # profile_expanded_url <chr>, account_lang <chr>,
## # profile_banner_url <chr>, profile_background_url <chr>,
## # profile_image_url <chr>
#shows tweets only from the Cincinati area, NEEDS HELP
IphoneFans <- Zoo%>%
select(user_id, screen_name,source, name, location)
#shows only iphone users, NEEDS HELP
Attached <- Zoo%>%
select(user_id, screen_name, media_type)
#shows what media types are most commonly attached to tweets about the Cincinnati Zoo, probably most useful
#to check during potential media uproars, such as Kendi biting a guest
OnlinePopularity <- Zoo%>%
select(followers_count, friends_count, user_id) %>%
mutate(popularity = followers_count + friends_count)
#displays the number of users that posts from the selected user are likely to be exposed to
#the tweet(s)
Zoo%>%
ggplot(aes(x = followers_count, y = favorite_count)) +
geom_point() +
ggtitle("Unpredictability between Followers and Favoriting")
#shows there isn't a truly predictable pattern between how many followers a user has and the
#number of favorites their post will get
Zoo%>%
transmute(activity = statuses_count + favourites_count) %>%
ggplot(aes(x = activity)) +
geom_density() +
ggtitle("Overall Activity of Users Tweeting about the Zoo", subtitle = "By Posts and Favorites")
#density plot of user activity
Zoo%>%
filter(source %in% c("Twitter Lite", "Twitter for iPhone", "Twitter for Android", "TweetDeck", "Twitter Web Client")) %>%
ggplot(aes(x = source)) +
geom_bar()
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.