This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
setwd("C:/Users/sampa/OneDrive/Desktop/Sampan/Masters/RR")
df_data <- read.csv(file = "CovidProj.csv", header = TRUE, na.strings = ".")
# Convert to numeric BEFORE dropping NAs
df_data$TotalDeathCount <- as.numeric(df_data$TotalDeathCount)
## Warning: NAs introduced by coercion
df_data$population <- as.numeric(df_data$population)
## Warning: NAs introduced by coercion
df_data$life_expectancy <- as.numeric(df_data$life_expectancy)
## Warning: NAs introduced by coercion
df_data$deaths_per_million <- as.numeric(df_data$deaths_per_million)
## Warning: NAs introduced by coercion
df_data$gdp_per_capita <- as.numeric(df_data$gdp_per_capita)
## Warning: NAs introduced by coercion
# Now drop rows with NA *after* conversion
df_data_clean <- na.omit(df_data)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.3
# Plotting gdp against COVID 19- deaths per million
plot_gdp<- ggplot(df_data_clean, aes(x = gdp_per_capita, y = deaths_per_million)) +
geom_point(color = "blue", alpha = 0.6, size = 3) +
geom_smooth(method = "lm", se = TRUE, color = "red") +
labs(
title = "COVID-19 Deaths per Million vs GDP per Capita",
x = "GDP per Capita (USD)",
y = "Deaths per Million"
) +
theme_minimal()
# Plotting Life expectancy against COVID-19 deaths per million
plot_life<- ggplot(df_data_clean, aes(x = life_expectancy, y = deaths_per_million)) +
geom_point(color = "darkgreen", alpha = 0.6, size = 3) +
geom_smooth(method = "lm", se = TRUE, color = "orange") +
labs(
title = "COVID-19 Deaths per Million vs Life Expectancy",
x = "Life Expectancy (Years)",
y = "Deaths per Million"
) +
theme_minimal()
ggsave("gdp_vs_deaths.png", plot = plot_gdp, width = 8, height = 6, dpi = 300)
## `geom_smooth()` using formula = 'y ~ x'
ggsave("life_expectancy_vs_deaths.png", plot = plot_life, width = 8, height = 6, dpi = 300)
## `geom_smooth()` using formula = 'y ~ x'
model <- lm(deaths_per_million ~ gdp_per_capita + life_expectancy, data = df_data_clean)
summary(model)
##
## Call:
## lm(formula = deaths_per_million ~ gdp_per_capita + life_expectancy,
## data = df_data_clean)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1066.9 -398.2 -110.2 221.7 2081.8
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.300e+03 5.632e+02 -5.859 2.33e-08 ***
## gdp_per_capita -3.327e-03 3.175e-03 -1.048 0.296
## life_expectancy 5.409e+01 8.256e+00 6.551 6.45e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 607.9 on 171 degrees of freedom
## Multiple R-squared: 0.2716, Adjusted R-squared: 0.263
## F-statistic: 31.87 on 2 and 171 DF, p-value: 1.716e-12
# Plotting COVID-19 deaths against GDP and life expectancy
plot_deathlife <- ggplot(df_data_clean, aes(x = gdp_per_capita, y = life_expectancy, size = deaths_per_million)) +
geom_point(alpha = 0.7) +
scale_size(range = c(2, 12)) +
labs(
title = "COVID-19 Deaths per Million by GDP and Life Expectancy",
x = "GDP per Capita",
y = "Life Expectancy",
size = "Deaths per Million",
) +
theme_minimal()
ggsave("gdp_life_expectancy_vs_deaths.png", plot = plot_deathlife, width = 8, height = 6, dpi = 300)
plot_deathlife
plot_life
## `geom_smooth()` using formula = 'y ~ x'
plot_gdp
## `geom_smooth()` using formula = 'y ~ x'