** Please submit both .Rmd file and the compiled .html file.

Check your work directory.

getwd()
## [1] "C:/Users/zahir/OneDrive/Desktop/Rutgers Semester Folders/Spring 2025/Regression Methods/RMD HW/HW 1"

1. Consider the example using the data file kidiq.dta. [Note. You need install the package foreign first

(a) Make boxplots of kid score grouped by mom hs. (1 pts)

library(foreign)
kidiq = read.dta("kidiq.dta")

boxplot(kidiq$kid_score ~ kidiq$mom_hs, data = kidiq,
        xlab= "Kid Score", ylab = "Mom HS", main = "Boxplots of Kid Score Grouped by Mom HS", horizontal = TRUE)

(b) Fit a simple linear regression model with kid score as the response and mom hs as the predictor using the R function lm() for each subgroup. Report the estimated coefficients.(1 pts)

model_1 <- lm(kidiq$kid_score ~ kidiq$mom_hs == 1, data = kidiq)
model_1
## 
## Call:
## lm(formula = kidiq$kid_score ~ kidiq$mom_hs == 1, data = kidiq)
## 
## Coefficients:
##           (Intercept)  kidiq$mom_hs == 1TRUE  
##                 77.55                  11.77
model_2 <- lm(kidiq$kid_score ~ kidiq$mom_hs == 0, data = kidiq)
model_2
## 
## Call:
## lm(formula = kidiq$kid_score ~ kidiq$mom_hs == 0, data = kidiq)
## 
## Coefficients:
##           (Intercept)  kidiq$mom_hs == 0TRUE  
##                 89.32                 -11.77

(c) Interpret the slope and the intercept of the regression line? (1 pts)

#Estimated coefficients are: β₀ = 77.55, β1 = 11.77

#Meaning a mother who graduates through high school is related by an estimated 11.17 point increase in a child's predicted IQ score

#Estimated coefficients are: β₀ = 89.32, β1 = -11.77

#Meaning a mother who does not graduate from high school is related by an estimated 11.17 decrease in a child's predicted IQ score.

2. (continued from Q2) Split the data into two subgroups, corresponding to mom_hs=0 and mom_hs=1 respectively. For each subgroup, use the Kid’s Score (kid_score) and Mom’s Score (mom_iq) as the response and predictor respectively, and do the following.

kidiq_mom_hs_0 <- subset(kidiq, mom_hs == 0)
kidiq_mom_hs_1 <- subset(kidiq, mom_hs == 1)

(a) Give a scatter plot of the pair of variables. Use lightblue and lightgreen for mom_hs=0 and mom_hs=1 respectively. (1 pts)

par(mfrow = c(1,3))

plot(kidiq_mom_hs_0$mom_iq, kidiq_mom_hs_0$kid_score, col = 'lightblue', pch = 16,
     xlab = "Mother's IQ", ylab = "Kid's Score", main = "Kid's Score vs. Mother's IQ")
#adding points where mom_hs is 1
points(kidiq_mom_hs_1$mom_iq, kidiq_mom_hs_1$kid_score, col = 'lightgreen', pch = 16)
legend("topleft", legend = c("mom_hs = 0", "mom_hs = 1"),
       col = c('lightblue', 'lightgreen'), pch = 16, bty = 'n')

plot(kidiq_mom_hs_0$mom_iq, kidiq_mom_hs_0$kid_score, col = 'lightblue', pch = 16,
     xlab = "Mother's IQ", ylab = "Kid's Score", main = "mom_hs = 0")

plot(kidiq_mom_hs_1$mom_iq, kidiq_mom_hs_1$kid_score, col = 'light green', pch = 16,
     xlab = "Mother's IQ", ylab = "Kid's Score", main = "mom_hs = 1")

par(mfrow = c(1,1))

(b) Fit a simple linear regression model using R function lm(). Report the estimated coefficients. Add the regression line with black color to the scatter plot in blue color. (2 pts)

#fitting simple linear regression model
model_3 <- lm(kid_score ~ mom_iq, data = kidiq)
model_3
## 
## Call:
## lm(formula = kid_score ~ mom_iq, data = kidiq)
## 
## Coefficients:
## (Intercept)       mom_iq  
##       25.80         0.61
#creating plot again
par(mfrow = c(1,3))

plot(kidiq_mom_hs_0$mom_iq, kidiq_mom_hs_0$kid_score, col = 'lightblue', pch = 16,
     xlab = "Mother's IQ", ylab = "Kid's Score", main = "Kid's Score vs. Mother's IQ")
points(kidiq_mom_hs_1$mom_iq, kidiq_mom_hs_1$kid_score, col = 'lightgreen', pch = 16)
#adding line based on our simple linear regression model_3
abline(model_3, col = 'blue', lwd = 2)
legend("topleft", legend = c("mom_hs = 0", "mom_hs = 1"),
       col = c('lightblue', 'lightgreen'), pch = 16, bty = 'n')

plot(kidiq_mom_hs_0$mom_iq, kidiq_mom_hs_0$kid_score, col = 'lightblue', pch = 16,
     xlab = "Mother's IQ", ylab = "Kid's Score", main = "mom_hs = 0")
abline(model_3, col = 'blue', lwd = 2)

plot(kidiq_mom_hs_1$mom_iq, kidiq_mom_hs_1$kid_score, col = 'lightgreen', pch = 16,
     xlab = "Mother's IQ", ylab = "Kid's Score", main = "mom_hs = 1")
abline(model_3, col = 'blue', lwd = 2)

par(mfrow = c(1,1))

#slope is a positive trend for kid's score with rising mother's IQ regardless of whether the mother passed high school or not.

3.

(reading in data)

hw1 = read.table("hw1.txt", header=T)
hw1
##         Country       GDP Satisfaction
## 1     Australia 27.055725     7.894780
## 2       Finland 25.860430     7.905812
## 3         Japan 25.592535     6.579316
## 4         Korea  7.351448     5.334750
## 5        Mexico 13.613936     7.964578
## 6        Sweden 29.394784     8.010560
## 7 United States 33.824743     7.658895

(a) Give the scatter plot with black color of the Satisfaction against the GDP. Please label the x and y axes and give a main title of the plot. Use R function lm() to fit a linear regression model with satisfaction as the response and GDP as the predictor. Add the line with blue color to the scatter plot. (2 pts)

(b) Generate the same scatter plot of the Satisfaction against the GDP as previous question and use R function lm() to fit a linear regression model with satisfaction as the response and GDP as the predictor without intercept. Add the line with red color to the scatter plot. (1 pts)

(c)Compare these two models and explain the differences along with your observations. (1 pts)

summ_mod_4 <- summary(model_4)
summ_mod_5 <- summary(model_5)

r_squared_w_inter <- summ_mod_4$r.squared
r_squared_wo_inter <- summ_mod_5$r.squared
paste("R-squared (with intercept):", r_squared_w_inter)
## [1] "R-squared (with intercept): 0.383932560371797"
cat("\n")
paste("R-squared (no intercept):", r_squared_wo_inter)
## [1] "R-squared (no intercept): 0.916234287601187"
adj_r_squared_w_inter <- summary(model_4)$adj.r.squared
adj_r_squared_wo_inter <- summary(model_5)$adj.r.squared

cat("\n")
paste("Adjusted R-squared (With intercept):", adj_r_squared_w_inter)
## [1] "Adjusted R-squared (With intercept): 0.260719072446156"
cat("\n")
paste("Adjusted R-squared (no intercept):", adj_r_squared_wo_inter)
## [1] "Adjusted R-squared (no intercept): 0.902273335534718"