gdp_data[, c("V2", "V3", "V4", "V5", "V6", "V7")] <-
lapply(gdp_data[, c("V2", "V3", "V4", "V5", "V6", "V7")], function(x) as.numeric(gsub(",", "", x)))
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
gdp_data$Average_GDP<-rowMeans(gdp_data[, c("V2", "V3", "V4", "V5", "V6", "V7")], na.rm = TRUE)
joined_data <- clean_data %>%
left_join(
gdp_data %>% select(V1, Average_GDP),
by = c("countries_and_areas" = "V1"))
clean_joined_data<- na.omit(joined_data)
# I used chat-gpt to help me join my two data sets!
model1 <- lm(maternal_mortality_ratio_2017 ~ delivery_care_institutional + Average_GDP, data = clean_joined_data)
summary(model1)
##
## Call:
## lm(formula = maternal_mortality_ratio_2017 ~ delivery_care_institutional +
## Average_GDP, data = clean_joined_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -357.32 -141.66 -10.05 102.60 774.33
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 771.20416 108.76237 7.091 3.91e-09 ***
## delivery_care_institutional -5.29167 1.64038 -3.226 0.00219 **
## Average_GDP -0.03090 0.01356 -2.278 0.02692 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 211.3 on 51 degrees of freedom
## Multiple R-squared: 0.4064, Adjusted R-squared: 0.3831
## F-statistic: 17.46 on 2 and 51 DF, p-value: 1.676e-06
#I will now summarize the data using stargazer:
stargazer(model1,
type = "text",
title = "Regression Results",
dep.var.labels = "Maternal Mortality Ratio 2017",
covariate.labels = c("Intercept", "Delivery Care Institutional", "Countries GDP"),
omit.stat = c("f", "ser"),
report = "vc*st",
star.cutoffs = c(0.05, 0.01, 0.001))
##
## Regression Results
## =========================================================
## Dependent variable:
## -----------------------------
## Maternal Mortality Ratio 2017
## ---------------------------------------------------------
## Intercept -5.292**
## (1.640)
## t = -3.226
##
## Delivery Care Institutional -0.031*
## (0.014)
## t = -2.278
##
## Countries GDP 771.204***
## (108.762)
## t = 7.091
##
## ---------------------------------------------------------
## Observations 54
## R2 0.406
## Adjusted R2 0.383
## =========================================================
## Note: *p<0.05; **p<0.01; ***p<0.001
confint(model1, level = 0.95)
## 2.5 % 97.5 %
## (Intercept) 552.8545765 989.553733887
## delivery_care_institutional -8.5848691 -1.998471210
## Average_GDP -0.0581205 -0.003672194
install.packages("car")
## Installing package into '/Users/semihaaa/Library/R/arm64/4.4/library'
## (as 'lib' is unspecified)
##
## The downloaded binary packages are in
## /var/folders/2l/kt9lbnv54px1bthtt8ntyqlw0000gn/T//Rtmp5SCcxU/downloaded_packages
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:openintro':
##
## densityPlot
## The following object is masked from 'package:dplyr':
##
## recode
# Perform F-test for joint significance
linearHypothesis(model1, c("delivery_care_institutional=0", "Average_GDP=0"))
##
## Linear hypothesis test:
## delivery_care_institutional = 0
## Average_GDP = 0
##
## Model 1: restricted model
## Model 2: maternal_mortality_ratio_2017 ~ delivery_care_institutional +
## Average_GDP
##
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 53 3834602
## 2 51 2276231 2 1558371 17.458 1.676e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# I had trouble performing the F-test. I looked at the textbook reference you provided and copied the code (I think?) but I kept on getting an error code. It may be a mistake on my part though
#Building residual plot
lm(maternal_mortality_ratio_2017 ~ delivery_care_institutional + Average_GDP, data = clean_joined_data) %>%
ggplot(aes(.fitted, .resid)) +
geom_point() +
geom_hline(yintercept = 0) +
labs(
title = "Residual Plot",
x = "Predicted Maternal Mortality Ratio",
y = "Residuals (Errors)"
)
#Building the Density Plot
model1 <- lm(maternal_mortality_ratio_2017 ~ delivery_care_institutional + Average_GDP, data = clean_joined_data)
# Create the density plot for residuals
ggplot(data = clean_joined_data, aes(x = residuals(model1))) +
geom_density(fill = "slateblue3", alpha = 0.5) +
labs(title = "Density Plot of Residuals", x = "Residuals", y = "Density") +
theme_minimal()
# Question 8 cont: The error terms in my density distribution obtained
from my residuals does not appear to be normally distributed, but rather
a bit skewed to the left. This contrasts the shape of my error terms
distribution in my simple regression analysis, which appears to have its
error terms normally distributed (although also a bit skewed to the
left).