Import data
setwd("/Users/allisontewksbury/Downloads")
library(formattable) #loading packages
library(readr)
library(psych)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ purrr 1.0.2
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ ggplot2::%+%() masks psych::%+%()
## ✖ ggplot2::alpha() masks psych::alpha()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(lubridate)
library(ggplot2)
coviddata=read_csv("figure-data.csv")
## Rows: 221 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (7): Social Media, Twitter, Big Data, Google Trend, Facebook, Cell Phon...
## date (1): date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(coviddata,5)
## # A tibble: 5 × 8
## date `Social Media` Twitter `Big Data` `Google Trend` Facebook
## <date> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2020-01-29 0 0 0 1 0
## 2 2020-01-30 0 0 0 1 0
## 3 2020-01-31 0 0 0 1 0
## 4 2020-02-01 0 0 0 1 0
## 5 2020-02-02 0 0 0 1 0
## # ℹ 2 more variables: `Cell Phone` <dbl>, `Call Detail Records` <dbl>
i=1:nrow(coviddata)
for(i in 1:nrow(coviddata)){
coviddata$month[i]=month(coviddata$date[i])
}
## Warning: Unknown or uninitialised column: `month`.
How much discussion of Covid-19 happened before March of 2020?
monthly_breakdown=coviddata%>%count(month) #how much data
before_march=coviddata%>% group_by(month<3)
early_discussion=subset(before_march,before_march$`month < 3`==TRUE)
paste("There was", early_discussion$'Social Media'[nrow(early_discussion)],"instance(s) of Covid-19 in article headlines in Social Media before March 2020")
## [1] "There was 1 instance(s) of Covid-19 in article headlines in Social Media before March 2020"
before=early_discussion$'Social Media'[nrow(early_discussion)]
during_march=coviddata%>% group_by(month==3)
discussion=subset(during_march,during_march$`month == 3`==TRUE)
paste("There was", discussion$'Social Media'[nrow(discussion)],"instance(s) of Covid-19 in article headlines in Social Media during March 2020")
## [1] "There was 18 instance(s) of Covid-19 in article headlines in Social Media during March 2020"
during=discussion$'Social Media'[nrow(discussion)]
after_march=coviddata%>% group_by(month>3)
discussion=subset(after_march,after_march$`month > 3`==TRUE)
paste("There was", discussion$'Social Media'[nrow(discussion)],"instance(s) of Covid-19 in article headlines in Social Media after March 2020 through the rest of the data collection period")
## [1] "There was 530 instance(s) of Covid-19 in article headlines in Social Media after March 2020 through the rest of the data collection period"
after=discussion$'Social Media'[nrow(discussion)]
Plots
time=data_frame(coviddata$month,coviddata$`Social Media`)
## Warning: `data_frame()` was deprecated in tibble 1.1.0.
## ℹ Please use `tibble()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
ggplot(time,aes(x=coviddata$month,y=coviddata$`Social Media`))+geom_bar(stat="identity")+labs(x="Month",y="Number of Mentions on Social Media")
