library(foreign)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.4 ✓ dplyr 1.0.7
## ✓ tidyr 1.1.3 ✓ stringr 1.4.0
## ✓ readr 2.0.1 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(boot)
SCS <- read.spss("SCS_QE.sav", to.data.frame = TRUE)
## re-encoding from CP1252
## Warning in read.spss("SCS_QE.sav", to.data.frame = TRUE): Undeclared level(s) 0
## added in variable: married
b.stat <- function(data, i) {
b.dat <- data[i ,]
out.lm <- lm(mars ~ mathpre, b.dat)
predict(out.lm, data.frame(mathpre=SCS$mathpre))
}
b.out <- boot(SCS, b.stat, R = 2000)
b.ci <- t(sapply(1:nrow(SCS), function(x) boot.ci(b.out, index = x, type = 'perc')$percent))[, 4:5]
dimnames(b.ci) <- list(rownames(SCS), c('lower', 'upper'))
combined <- cbind(SCS, b.ci)
ggplot(combined, aes(x = mathpre, y = mars)) + geom_jitter() + theme_bw() + labs(x = 'Mathematics Achievement Score', y = 'Mathematics Anxiety Score', title = 'Scatterplot of Mathematics Achievement and Mathematics Anxiety Scores') + geom_smooth(method='lm', formula = y~x, se=FALSE) + geom_ribbon(aes(ymin=lower, ymax=upper), alpha = 0.3, fill='#69b3a2')
data <- read_csv("WHO COVID-19 global table data October 28th 2021 at 2.03.36 PM.csv")
## New names:
## * `` -> ...13
## Rows: 238 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Name, WHO Region
## dbl (10): Cases - cumulative total, Cases - cumulative total per 100000 popu...
## lgl (1): ...13
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
focus <- c('Brazil', 'United States of America', 'Canada', 'South Africa', 'Ethiopia', 'Kenya', 'Iran (Islamic Republic of)', 'Iraq', 'Pakistan', 'The United Kingdom', 'Russian Federation', 'Italy', 'Philippines', 'Japan', 'Malaysia', 'India', 'Indonesia', 'Thailand')
data %>% filter(Name %in% focus) %>% mutate(rate = `Deaths - cumulative total`/`Cases - cumulative total`, SE = sqrt(rate*(1-rate)/`Cases - cumulative total`)) %>%
ggplot(aes(x=Name, y=rate)) + geom_bar(position=position_dodge(), stat='identity') + theme_bw() + xlab('WHO: # Reported Deaths / # Reported Cases') + facet_grid(~ `WHO Region`, scale="free") + geom_errorbar(aes(ymin=rate-1.96*SE, ymax=rate+1.96*SE), width=.2) + theme(axis.text.x = element_text(size=8, angle=90), strip.text=element_text(size=7)) + ylab('Mortality Rate') + xlab('Countries') + ggtitle('WHO Reporting Barplots by Region')
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
vaccine <- read_csv('covid_vaccines.csv')
## Rows: 8 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Vaccine, Period
## dbl (3): VE against COVID-19 hospitalization (95% CI), Lower CI, Upper CI
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
vaccine
## # A tibble: 8 × 5
## Vaccine Period `VE against COVID… `Lower CI` `Upper CI`
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 Moderna Full sur… 93 91 95
## 2 Moderna 14–120 d… 93 90 95
## 3 Moderna >120 day… 92 87 96
## 4 Pfizer-BioNTech Full sur… 88 85 91
## 5 Pfizer-BioNTech 14–120 d… 91 88 93
## 6 Pfizer-BioNTech >120 day… 77 67 84
## 7 Janssen (Johnson & Johnson) Full sur… 71 56 81
## 8 Janssen (Johnson & Johnson) >28 days… 68 49 80
plot <- ggplot(vaccine, aes(x=Period, y=`VE against COVID-19 hospitalization (95% CI)`)) + theme(axis.title.x = element_text(margin = margin(t = 70))) + geom_bar(position=position_dodge(), stat='identity') + theme_bw() + facet_grid(~ `Vaccine`, scale="free") + geom_errorbar(aes(ymin=`Lower CI`, ymax=`Upper CI`), width=.2) + theme(axis.text.x = element_text(size=8, angle=15), strip.text=element_text(size=8)) + ylab('Vaccine Effectiveness') + ggtitle('COVID-19 Vaccine Effectiveness Against Hospitalization')
ggplotly(plot)
Overall, it is clear that the Moderna Vaccine has higher effectiveness than the other vaccines. The error bars are also shorter, indicating a smaller margin of error for this vaccine. The Pfizer Vaccine is the second most effective, with shorter error bars for the ‘14-120 days after full vaccination’ and ‘Full surveillance period’ periods. The Johnson & Johnson vaccine is the least effective overall. This vaccine also has the largest error bars, indicating a greater margin of error for this vaccine.