Code for Problem Set 3

pset3_data = read.csv("/Users/ericsretavan/Downloads/EAWE01.csv")

Average this data set

mean(pset3_data$AGE)
[1] 28.908
summary(pset3_data$AGE)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  27.00   28.00   29.00   28.91   30.00   31.00 

Histogram time (age)!

hist(pset3_data$AGE, xlab = "Age",
     main = "Histogram of Age")

Plots years of schooling of mother against household income in 1977

plot(x = pset3_data$SM, y = pset3_data$HHINC97,
     xlab = "Years of Schooling of Mother",
     ylab = "Household Income 1997",
     main = "Scatter Plot")

Estimate linear regression using variables from the scatter plot

reg1 = lm(data = pset3_data, formula = HHINC97~SM)
summary(reg1)

Call:
lm(formula = HHINC97 ~ SM, data = pset3_data)

Residuals:
   Min     1Q Median     3Q    Max 
-69436 -24975  -6289  14573 197476 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) -17717.6    11016.7  -1.608    0.109    
SM            5559.6      813.4   6.835 3.06e-11 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 42030 on 401 degrees of freedom
  (97 observations deleted due to missingness)
Multiple R-squared:  0.1044,    Adjusted R-squared:  0.1021 
F-statistic: 46.72 on 1 and 401 DF,  p-value: 3.057e-11

Finding sample mean and SD of variable “S” and “EARNINGS”

mean(pset3_data$S)
[1] 14.548
mean(pset3_data$EARNINGS)
[1] 18.34576
sd(pset3_data$S)
[1] 2.779775
sd(pset3_data$EARNINGS)
[1] 10.72343

min max pset S

min(pset3_data$S)
[1] 6
max(pset3_data$S)
[1] 20

average married

mean(pset3_data$MARRIED)
[1] 0.404

Average age

mean(pset3_data$AGE)
[1] 28.908

Histogram of Earnings

hist(pset3_data$EARNINGS, xlab = "Dollars Per Hour", main = "Histogram of Earnings")

Finding R^2 of the regression and estimated parameter of ASVABC

model = lm(S ~ ASVABC, data=pset3_data)
summary(model)

Call:
lm(formula = S ~ ASVABC, data = pset3_data)

Residuals:
    Min      1Q  Median      3Q     Max 
-6.3713 -1.7366 -0.1125  1.8222  6.7436 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  14.1838     0.1093  129.78   <2e-16 ***
ASVABC        1.6165     0.1178   13.72   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 2.371 on 498 degrees of freedom
Multiple R-squared:  0.2742,    Adjusted R-squared:  0.2728 
F-statistic: 188.2 on 1 and 498 DF,  p-value: < 2.2e-16
rsq = summary(model)$r.squared
beta2 = summary(model)$coefficients["ASVABC", "Estimate"]

the r^2 is 0.2742218. the estimated paramater of ASVABC is 1.6164539

Question 7: Producing a Scatterplot

plot(x = pset3_data$ASVABC, y = pset3_data$S,
     xlab = "Intellectual Ability",
     ylab = "Years of Schooling",
     main = "Scatter Plot")
abline(lm(S ~ ASVABC, data=pset3_data))

Question 8:LM with earnings as the y and S as the x

model8 = lm(EARNINGS ~ S, data=pset3_data)
summary(model8)

Call:
lm(formula = EARNINGS ~ S, data = pset3_data)

Residuals:
    Min      1Q  Median      3Q     Max 
-17.177  -6.588  -2.147   3.532  86.424 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)   3.0897     2.4638   1.254     0.21    
S             1.0487     0.1664   6.304 6.41e-10 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 10.33 on 498 degrees of freedom
Multiple R-squared:  0.0739,    Adjusted R-squared:  0.07204 
F-statistic: 39.74 on 1 and 498 DF,  p-value: 6.407e-10
rsq8 = summary(model8)$r.squared
summary(model8)$coefficients["S", "Estimate"]
[1] 1.048667

Question 9

model9 = lm(EARNINGS ~ HEIGHT, data=pset3_data)
summary(model9)

Call:
lm(formula = EARNINGS ~ HEIGHT, data = pset3_data)

Residuals:
    Min      1Q  Median      3Q     Max 
-17.316  -6.892  -2.783   4.070  80.276 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)  
(Intercept)  -0.5133     7.9723  -0.064   0.9487  
HEIGHT        0.2772     0.1170   2.370   0.0182 *
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 10.67 on 498 degrees of freedom
Multiple R-squared:  0.01115,   Adjusted R-squared:  0.009166 
F-statistic: 5.616 on 1 and 498 DF,  p-value: 0.01818
rsq9 = summary(model9)$r.squared
summary(model9)$coefficients["HEIGHT", "Estimate"]
[1] 0.2772173