Honor Pledge: I have recreated my group submission using using the tools I have installed on my own computer.

Part 1

SCS_data <- read.spss('/Users/student/Desktop/Fourth Year/DS 3003 R Codes/Week 10 - error bars/SCS_QE.sav', to.data.frame = TRUE)
## re-encoding from CP1252
## Warning in read.spss("/Users/student/Desktop/Fourth Year/DS 3003 R Codes/Week 10
## - error bars/SCS_QE.sav", : Undeclared level(s) 0 added in variable: married
b.stat <- function(data, i){
  b.dat <- data[i ,]
  out.lm <- lm(mars ~ mathpre, b.dat)
  predict(out.lm, data.frame(mathpre=SCS_data2$mathpre))
}

SCS_data2 <- SCS_data[1:100, ] #subset of the first 100 cases
b.out <- boot(SCS_data2, b.stat, R = 2000)

boot.ci(b.out, index = 1, type = "perc") # 95% CI for the first observation
## BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
## Based on 2000 bootstrap replicates
## 
## CALL : 
## boot.ci(boot.out = b.out, type = "perc", index = 1)
## 
## Intervals : 
## Level     Percentile     
## 95%   (52.49, 60.23 )  
## Calculations and Intervals on Original Scale
b.ci <- t(sapply(1:nrow(SCS_data2), function(x) boot.ci(b.out, index = x, type = 'perc')$percent))[, 4:5]
dimnames(b.ci) <- list(rownames(SCS_data2), c('lower', 'upper'))
#kable(head(b.ci, 4))
# Plot with bootstrap confidence intervals
SCS_data3 <- cbind(SCS_data2, b.ci) # combine two datasets
ggplot(SCS_data3, aes(x = mathpre, y = mars)) + geom_point(alpha = 0.2) + labs(x = "Math Proxy Pre-Test Score", y = "Math Anxiety Score", title = "Math Pre-Test Score vs. Anxiety Score for Math") + theme_bw() + geom_smooth(method='lm', formula= y~x, se = FALSE) + geom_ribbon(aes(ymin = lower, ymax = upper), alpha = 0.3, fill="#69b3a2")

Part 2

# data cleaning
covid <- read.csv("C:/Users/student/Desktop/Fourth Year/DS 3003 R Codes/Week 10 - error bars/WHO COVID-19 global table data November 1st 2021 at 4.44.37 PM.csv")
covid <- tibble::rownames_to_column(covid, "Names")
names(covid) = c('Name', 'WHO.Region', 'Cases...cumulative.total', 'Cases...cumulative.total.per.100000.population', 'Cases...newly.reported.in.last.7.days', 'Cases...newly.reported.in.last.7.days.per.100000.population', 'Cases...newly.reported.in.last.24.hours', 'Deaths...cumulative.total', 'Deaths...cumulative.total.per.100000.population', 'Deaths...newly.reported.in.last.7.days', 'Deaths...newly.reported.in.last.7.days.per.100000.population', 'Deaths...newly.reported.in.last.24.hours')
covid <- covid[1:(length(covid)-1)]

# creating subset 
countries <- c('France', 'Italy', 'Spain', 'South Africa', 'Nigeria', 'Ethiopia', 'United States of America', 'Brazil', 'Mexico', 'Iraq', 'Pakistan', 'Saudi Arabia', 'India', 'Thailand', 'Indonesia', 'Philippines', 'Japan', 'Singapore')
sub_covid <- covid[covid$Name %in% countries,]

# adding mortality rate and SE columns
sub_covid <- sub_covid %>% mutate(MortalityRate = sub_covid$Deaths...cumulative.total/sub_covid$Cases...cumulative.total)
sub_covid <- sub_covid %>% mutate(SE = sqrt(sub_covid$MortalityRate*(1-sub_covid$MortalityRate)/sub_covid$Cases...cumulative.total))
sub_covid["Name"][sub_covid["Name"] == "United States of America"] <- "USA"

# plotting graph
ggplot(sub_covid, aes(x=Name, y=MortalityRate)) +
  geom_bar(stat="identity") +
  facet_grid(~WHO.Region, scale="free", labeller = label_wrap_gen(width=10)) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 40, vjust = 1, hjust=1)) +
  geom_errorbar(aes(ymin=MortalityRate-1.96*SE, ymax=MortalityRate+1.96*SE), width=.2) +
  labs(x='Country', y='Mortality Rate', title='WHO: # Reported Deaths / # Reported Cases')

Part 3

  • See TABLE 2. COVID-19 vaccine effectiveness against COVID-19–associated hospitalization among adults without immunocompromising conditions, by vaccine product — 21 hospitals in 18 U.S. states, March–August 2021 from a recent study on Comparative Effectiveness of Moderna, Pfizer-BioNTech, and Janssen (Johnson & Johnson) Vaccines.

  • Draw your best plot to visualize results of VE against COVID-19 hospitalization (95% CI), i.e., the third column of TABLE 2.

  • First, save data about Vaccine/Period and VE against COVID-19 hospitalization (95% CI).

    • You could create a .csv file including VE against COVID-19 hospitalization (95% CI) and Vaccine/Period.
    • Or, you could save data in R directly, say using data.frame() or tibble().
  • Second, draw bar plots with error bars, using the data you saved in the first step.

    • You could use the facets function as in Part 2.

    • Or, you could draw separate bar plots for each vaccine and collect three bar plots into a single figure using something like, e.g., gridExtra::grid.arrange(). Here are some helpul notes and explanations.

#Read in the CSV I filled out using the table as a reference
vax.data <- read.csv("C:/Users/student/Desktop/Fourth Year/DS 3003 R Codes/Week 10 - error bars/Vaccine Efficacy.csv")

# Creating my plot
ggplot(vax.data, aes(x=Period, fill=Vaccine)) + geom_col(aes(y=VE.point.estimate)) + geom_errorbar(aes(ymin=VE.lower.bound, ymax=VE.upper.bound), width=.2) + facet_wrap(~Vaccine, scales = "free") + theme(legend.position = "none", axis.text.x=element_text(angle=45, vjust=.5)) + labs(x="Observation Period", y= "Efficacy", title="Comparing Vaccine Efficacy Against Hospitalizations")