Honor Pledge: I have recreated my group submission using using the tools I have installed on my own computer.
SCS_data <- read.spss('/Users/student/Desktop/Fourth Year/DS 3003 R Codes/Week 10 - error bars/SCS_QE.sav', to.data.frame = TRUE)
## re-encoding from CP1252
## Warning in read.spss("/Users/student/Desktop/Fourth Year/DS 3003 R Codes/Week 10
## - error bars/SCS_QE.sav", : Undeclared level(s) 0 added in variable: married
b.stat <- function(data, i){
b.dat <- data[i ,]
out.lm <- lm(mars ~ mathpre, b.dat)
predict(out.lm, data.frame(mathpre=SCS_data2$mathpre))
}
SCS_data2 <- SCS_data[1:100, ] #subset of the first 100 cases
b.out <- boot(SCS_data2, b.stat, R = 2000)
boot.ci(b.out, index = 1, type = "perc") # 95% CI for the first observation
## BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
## Based on 2000 bootstrap replicates
##
## CALL :
## boot.ci(boot.out = b.out, type = "perc", index = 1)
##
## Intervals :
## Level Percentile
## 95% (52.49, 60.23 )
## Calculations and Intervals on Original Scale
b.ci <- t(sapply(1:nrow(SCS_data2), function(x) boot.ci(b.out, index = x, type = 'perc')$percent))[, 4:5]
dimnames(b.ci) <- list(rownames(SCS_data2), c('lower', 'upper'))
#kable(head(b.ci, 4))
# Plot with bootstrap confidence intervals
SCS_data3 <- cbind(SCS_data2, b.ci) # combine two datasets
ggplot(SCS_data3, aes(x = mathpre, y = mars)) + geom_point(alpha = 0.2) + labs(x = "Math Proxy Pre-Test Score", y = "Math Anxiety Score", title = "Math Pre-Test Score vs. Anxiety Score for Math") + theme_bw() + geom_smooth(method='lm', formula= y~x, se = FALSE) + geom_ribbon(aes(ymin = lower, ymax = upper), alpha = 0.3, fill="#69b3a2")
# data cleaning
covid <- read.csv("C:/Users/student/Desktop/Fourth Year/DS 3003 R Codes/Week 10 - error bars/WHO COVID-19 global table data November 1st 2021 at 4.44.37 PM.csv")
covid <- tibble::rownames_to_column(covid, "Names")
names(covid) = c('Name', 'WHO.Region', 'Cases...cumulative.total', 'Cases...cumulative.total.per.100000.population', 'Cases...newly.reported.in.last.7.days', 'Cases...newly.reported.in.last.7.days.per.100000.population', 'Cases...newly.reported.in.last.24.hours', 'Deaths...cumulative.total', 'Deaths...cumulative.total.per.100000.population', 'Deaths...newly.reported.in.last.7.days', 'Deaths...newly.reported.in.last.7.days.per.100000.population', 'Deaths...newly.reported.in.last.24.hours')
covid <- covid[1:(length(covid)-1)]
# creating subset
countries <- c('France', 'Italy', 'Spain', 'South Africa', 'Nigeria', 'Ethiopia', 'United States of America', 'Brazil', 'Mexico', 'Iraq', 'Pakistan', 'Saudi Arabia', 'India', 'Thailand', 'Indonesia', 'Philippines', 'Japan', 'Singapore')
sub_covid <- covid[covid$Name %in% countries,]
# adding mortality rate and SE columns
sub_covid <- sub_covid %>% mutate(MortalityRate = sub_covid$Deaths...cumulative.total/sub_covid$Cases...cumulative.total)
sub_covid <- sub_covid %>% mutate(SE = sqrt(sub_covid$MortalityRate*(1-sub_covid$MortalityRate)/sub_covid$Cases...cumulative.total))
sub_covid["Name"][sub_covid["Name"] == "United States of America"] <- "USA"
# plotting graph
ggplot(sub_covid, aes(x=Name, y=MortalityRate)) +
geom_bar(stat="identity") +
facet_grid(~WHO.Region, scale="free", labeller = label_wrap_gen(width=10)) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 40, vjust = 1, hjust=1)) +
geom_errorbar(aes(ymin=MortalityRate-1.96*SE, ymax=MortalityRate+1.96*SE), width=.2) +
labs(x='Country', y='Mortality Rate', title='WHO: # Reported Deaths / # Reported Cases')
See TABLE 2. COVID-19 vaccine effectiveness against COVID-19–associated hospitalization among adults without immunocompromising conditions, by vaccine product — 21 hospitals in 18 U.S. states, March–August 2021 from a recent study on Comparative Effectiveness of Moderna, Pfizer-BioNTech, and Janssen (Johnson & Johnson) Vaccines.
Draw your best plot to visualize results of VE against COVID-19 hospitalization (95% CI), i.e., the third column of TABLE 2.
First, save data about Vaccine/Period and VE against COVID-19 hospitalization (95% CI).
.csv file including VE against COVID-19 hospitalization (95% CI) and Vaccine/Period.data.frame() or tibble().Second, draw bar plots with error bars, using the data you saved in the first step.
You could use the facets function as in Part 2.
Or, you could draw separate bar plots for each vaccine and collect three bar plots into a single figure using something like, e.g., gridExtra::grid.arrange(). Here are some helpul notes and explanations.
#Read in the CSV I filled out using the table as a reference
vax.data <- read.csv("C:/Users/student/Desktop/Fourth Year/DS 3003 R Codes/Week 10 - error bars/Vaccine Efficacy.csv")
# Creating my plot
ggplot(vax.data, aes(x=Period, fill=Vaccine)) + geom_col(aes(y=VE.point.estimate)) + geom_errorbar(aes(ymin=VE.lower.bound, ymax=VE.upper.bound), width=.2) + facet_wrap(~Vaccine, scales = "free") + theme(legend.position = "none", axis.text.x=element_text(angle=45, vjust=.5)) + labs(x="Observation Period", y= "Efficacy", title="Comparing Vaccine Efficacy Against Hospitalizations")