Covid Data

Import data

setwd("/Users/allisontewksbury/Downloads")
library(formattable) #loading packages
library(readr)
library(psych)
library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ purrr     1.0.2
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ ggplot2::%+%()   masks psych::%+%()
## ✖ ggplot2::alpha() masks psych::alpha()
## ✖ dplyr::filter()  masks stats::filter()
## ✖ dplyr::lag()     masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(dplyr)
library(lubridate)
library(ggplot2)
coviddata=read_csv("figure-data.csv")

## Rows: 221 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl  (7): Social Media, Twitter, Big Data, Google Trend, Facebook, Cell Phon...
## date (1): date
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

head(coviddata,5)

## # A tibble: 5 × 8
##   date       `Social Media` Twitter `Big Data` `Google Trend` Facebook
##   <date>              <dbl>   <dbl>      <dbl>          <dbl>    <dbl>
## 1 2020-01-29              0       0          0              1        0
## 2 2020-01-30              0       0          0              1        0
## 3 2020-01-31              0       0          0              1        0
## 4 2020-02-01              0       0          0              1        0
## 5 2020-02-02              0       0          0              1        0
## # ℹ 2 more variables: `Cell Phone` <dbl>, `Call Detail Records` <dbl>

i=1:nrow(coviddata)
for(i in 1:nrow(coviddata)){
  coviddata$month[i]=month(coviddata$date[i])
}

## Warning: Unknown or uninitialised column: `month`.

How much discussion of Covid-19 happened before March of 2020?

monthly_breakdown=coviddata%>%count(month) #how much data
before_march=coviddata%>% group_by(month<3)
early_discussion=subset(before_march,before_march$`month < 3`==TRUE)
paste("There was", early_discussion$'Social Media'[nrow(early_discussion)],"instance(s) of Covid-19 in article headlines in Social Media before March 2020")

## [1] "There was 1 instance(s) of Covid-19 in article headlines in Social Media before March 2020"

before=early_discussion$'Social Media'[nrow(early_discussion)]

during_march=coviddata%>% group_by(month==3)
discussion=subset(during_march,during_march$`month == 3`==TRUE)
paste("There was", discussion$'Social Media'[nrow(discussion)],"instance(s) of Covid-19 in article headlines in Social Media during March 2020")

## [1] "There was 18 instance(s) of Covid-19 in article headlines in Social Media during March 2020"

during=discussion$'Social Media'[nrow(discussion)]

after_march=coviddata%>% group_by(month>3)
discussion=subset(after_march,after_march$`month > 3`==TRUE)
paste("There was", discussion$'Social Media'[nrow(discussion)],"instance(s) of Covid-19 in article headlines in Social Media after March 2020 through the rest of the data collection period")

## [1] "There was 530 instance(s) of Covid-19 in article headlines in Social Media after March 2020 through the rest of the data collection period"

after=discussion$'Social Media'[nrow(discussion)]

Plots

time=data_frame(coviddata$month,coviddata$`Social Media`)

## Warning: `data_frame()` was deprecated in tibble 1.1.0.
## ℹ Please use `tibble()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

ggplot(time,aes(x=coviddata$month,y=coviddata$`Social Media`))+geom_bar(stat="identity")+labs(x="Month",y="Number of Mentions on Social Media")

Covid Data

Allison Tewksbury

2024-09-17

Import data

How much discussion of Covid-19 happened before March of 2020?

Plots