data).
You can check the definition of each variable here.covid_data <- read.csv("covid_data_states.csv")
view(covid_data)
Describe the dataset: The data set goes into detail labels state, country, population, positivity ratio, case density, infection rate & more information on the Covid-19 virus.
ncol(covid_data)
## [1] 52
covid_data$tests_per_capita <- covid_data$actuals.positiveTests / covid_data$population
covid_data$deaths_per_capita <- covid_data$actuals.deaths / covid_data$population
covid_data$vaccinations_per_capita <- covid_data$actuals.vaccinesDistributed / covid_data$population
skim function from the skimr
package to obtain common summary statistics for the variables new cases
per capita, deaths per capita, and vaccinations per capita. (Hint: Use
dplyr to select the variables and then simple pipe (%>%)
skim().)summary_stats <- covid_data %>%
select(tests_per_capita, deaths_per_capita, vaccinations_per_capita) %>%
skim()
I do have priors, I would assume that the more vaccinations per capita would decrese the deaths per capita. To ge a first insight I would use common sense to get a general understanding of what the data should look like and do some research on the topic.
ggplot(covid_data, aes(x = vaccinations_per_capita, y = deaths_per_capita)) +
geom_point() +
xlab("Vaccinations per capita") +
ylab("Deaths per capita")
ggplot(covid_data, aes(x = actuals.vaccinesDistributed, y = actuals.deaths)) +
geom_point() +
xlab("Vaccinations") +
ylab("Deaths")
regression <- lm(deaths_per_capita ~ vaccinations_per_capita, data = covid_data)
summary(regression)
##
## Call:
## lm(formula = deaths_per_capita ~ vaccinations_per_capita, data = covid_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0015679 -0.0004479 0.0001212 0.0003763 0.0014222
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0031076 0.0008504 3.654 0.000609 ***
## vaccinations_per_capita -0.0010184 0.0006382 -1.596 0.116727
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0006721 on 51 degrees of freedom
## Multiple R-squared: 0.04755, Adjusted R-squared: 0.02888
## F-statistic: 2.546 on 1 and 51 DF, p-value: 0.1167
Are coefficients similar to those found in class? Do they differ in a way that makes sense? The coefficients are similar to those that were shown in class.
What is the predicted change in deaths when moving from 0.3 vaccination rate to 0.8?
You can assume that increasing the vaccination rate would decrease the deaths.
regression_model <- lm(deaths_per_capita ~ vaccinations_per_capita, data = covid_data)
b0 <- coef(regression_model)[1]
b1 <- coef(regression_model)[2]
deaths_per_capita_0_3 <- b0 + b1 * 0.3
deaths_per_capita_0_8 <- b0 + b1 * 0.8
delta_deaths_per_capita <- deaths_per_capita_0_8 - deaths_per_capita_0_3
delta_deaths_per_capita
## (Intercept)
## -0.0005092128
residuals. You can take a very small number as
a zero.residuals <- residuals(regression_model)
sum_of_residuals <- sum(residuals)
sum_of_residuals
## [1] -5.854692e-18