1 Honor Pledge

Honor Pledge: I have recreated my group submission using using the tools I have installed on my own computer.

2 Part 1

df <- read.spss("SCS_QE.sav", to.data.frame = TRUE)
## re-encoding from CP1252
## Warning in read.spss("SCS_QE.sav", to.data.frame = TRUE): Undeclared level(s) 0
## added in variable: married
b1 <- function(data, i){
  b.data <- data[i, ]
  output <- lm(mars ~ mathpre, b.data)
  predict(output, data.frame(mathpre = df$mathpre))
}

b1o <- boot(df, b1, R = 2000)

bconfidence_interval <- t(sapply(1:nrow(df), function(x) boot.ci(b1o, index = x, type = 'perc')$percent))[, 4:5]
dimnames(bconfidence_interval) <-list(rownames(df), c('lower',"upper"))

com <- cbind(df,bconfidence_interval )

ggplot(com, aes(x = mathpre, y=mars))+ geom_jitter() + theme_bw() + labs(x = "mathematics achievement score", y = "mathematics anxiety score", title = "scatterplot of mathpre vs mars") + geom_smooth(method = "lm", formula = y ~x, se = FALSE) + geom_ribbon(aes(ymin = lower, ymax = upper), alpha =.3, fill ="red" )

3 Part 2

df2 <- read_csv(paste0("covid.csv"))
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   Name = col_character(),
##   `WHO Region` = col_character(),
##   `Cases - cumulative total` = col_double(),
##   `Cases - cumulative total per 100000 population` = col_double(),
##   `Cases - newly reported in last 7 days` = col_double(),
##   `Cases - newly reported in last 7 days per 100000 population` = col_double(),
##   `Cases - newly reported in last 24 hours` = col_double(),
##   `Deaths - cumulative total` = col_double(),
##   `Deaths - cumulative total per 100000 population` = col_double(),
##   `Deaths - newly reported in last 7 days` = col_double(),
##   `Deaths - newly reported in last 7 days per 100000 population` = col_double(),
##   `Deaths - newly reported in last 24 hours` = col_double()
## )
## Warning: 1 parsing failure.
## row col   expected     actual        file
##   1  -- 12 columns 13 columns 'covid.csv'
regions <- c("Brazil","United States of America", "Canada", "South Africa"," Ethiopia","Kenya","Iran (Islamic Republic of)","Iraq","Pakistan","The United Kingdom","Russian Federation", "Italy","Philippines","Japan","Malaysia","India","Indonesia","Thailand")

df2 %>% filter( Name %in% regions) %>%
   mutate(rate = `Deaths - cumulative total`/`Cases - cumulative total`, 
         SE = sqrt(rate*(1-rate)/`Cases - cumulative total`))%>%
  ggplot(aes(x = Name, y = rate)) + geom_bar(position = position_dodge(), stat = 'identity') + theme_bw() + xlab("WHO: # Reported Deaths / # Reported Cases") + facet_grid(~`WHO Region`, scale = "free") + geom_errorbar(aes(ymin = rate-1.96*SE, ymax = rate+1.96*SE), width = .2) + theme(axis.text.x = element_text(size = 8, angle = 90), strip.text = element_text(size = 7))

4 Part 3

df3 <- read.csv("covid_vaccines.csv", check.names = FALSE)
plot <- ggplot(df3, aes(x=Period, y=`VE against COVID-19 hospitalization (95% CI)`)) + theme(axis.title.x = element_text(margin = margin(t = 70))) + geom_bar(position=position_dodge(), stat='identity') + theme_bw() + facet_grid(~ `Vaccine`, scale="free") + geom_errorbar(aes(ymin=`Lower CI`, ymax=`Upper CI`), width=.2) + theme(axis.text.x = element_text(size=8,angle = 15), strip.text=element_text(size=8)) + ylab('Vaccine Effectiveness') + ggtitle('COVID-19 Vaccine Effectiveness Against Hospitalization')
ggplotly(plot)
#plot