R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

setwd("C:/Users/sampa/OneDrive/Desktop/Sampan/Masters/RR")

df_data <- read.csv(file = "CovidProj.csv", header = TRUE, na.strings = ".")

 # Convert to numeric BEFORE dropping NAs
df_data$TotalDeathCount <- as.numeric(df_data$TotalDeathCount)
## Warning: NAs introduced by coercion
df_data$population <- as.numeric(df_data$population)
## Warning: NAs introduced by coercion
df_data$life_expectancy <- as.numeric(df_data$life_expectancy)
## Warning: NAs introduced by coercion
df_data$deaths_per_million <- as.numeric(df_data$deaths_per_million)
## Warning: NAs introduced by coercion
df_data$gdp_per_capita <- as.numeric(df_data$gdp_per_capita)
## Warning: NAs introduced by coercion
# Now drop rows with NA *after* conversion
df_data_clean <- na.omit(df_data)

library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.3
# Plotting gdp against COVID 19- deaths per million

plot_gdp<- ggplot(df_data_clean, aes(x = gdp_per_capita, y = deaths_per_million)) +
  geom_point(color = "blue", alpha = 0.6, size = 3) +
  geom_smooth(method = "lm", se = TRUE, color = "red") +
  labs(
    title = "COVID-19 Deaths per Million vs GDP per Capita",
    x = "GDP per Capita (USD)",
    y = "Deaths per Million"
  ) +
  theme_minimal()

# Plotting Life expectancy against COVID-19 deaths per million

plot_life<- ggplot(df_data_clean, aes(x = life_expectancy, y = deaths_per_million)) +
  geom_point(color = "darkgreen", alpha = 0.6, size = 3) +
  geom_smooth(method = "lm", se = TRUE, color = "orange") +
  labs(
    title = "COVID-19 Deaths per Million vs Life Expectancy",
    x = "Life Expectancy (Years)",
    y = "Deaths per Million"
  ) +
  theme_minimal()

ggsave("gdp_vs_deaths.png", plot = plot_gdp, width = 8, height = 6, dpi = 300)
## `geom_smooth()` using formula = 'y ~ x'
ggsave("life_expectancy_vs_deaths.png", plot = plot_life, width = 8, height = 6, dpi = 300)
## `geom_smooth()` using formula = 'y ~ x'
model <- lm(deaths_per_million ~ gdp_per_capita + life_expectancy, data = df_data_clean)
summary(model)
## 
## Call:
## lm(formula = deaths_per_million ~ gdp_per_capita + life_expectancy, 
##     data = df_data_clean)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1066.9  -398.2  -110.2   221.7  2081.8 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -3.300e+03  5.632e+02  -5.859 2.33e-08 ***
## gdp_per_capita  -3.327e-03  3.175e-03  -1.048    0.296    
## life_expectancy  5.409e+01  8.256e+00   6.551 6.45e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 607.9 on 171 degrees of freedom
## Multiple R-squared:  0.2716, Adjusted R-squared:  0.263 
## F-statistic: 31.87 on 2 and 171 DF,  p-value: 1.716e-12
# Plotting COVID-19 deaths against GDP and life expectancy

plot_deathlife <- ggplot(df_data_clean, aes(x = gdp_per_capita, y = life_expectancy, size = deaths_per_million)) +
  geom_point(alpha = 0.7) +
  scale_size(range = c(2, 12)) +
  labs(
    title = "COVID-19 Deaths per Million by GDP and Life Expectancy",
    x = "GDP per Capita",
    y = "Life Expectancy",
    size = "Deaths per Million",
  ) +
  theme_minimal()

ggsave("gdp_life_expectancy_vs_deaths.png", plot = plot_deathlife, width = 8, height = 6, dpi = 300)

plot_deathlife 

plot_life
## `geom_smooth()` using formula = 'y ~ x'

plot_gdp
## `geom_smooth()` using formula = 'y ~ x'