pset3_data = read.csv("/Users/ericsretavan/Downloads/EAWE01.csv")Code for Problem Set 3
Average this data set
mean(pset3_data$AGE)[1] 28.908
summary(pset3_data$AGE) Min. 1st Qu. Median Mean 3rd Qu. Max.
27.00 28.00 29.00 28.91 30.00 31.00
Histogram time (age)!
hist(pset3_data$AGE, xlab = "Age",
main = "Histogram of Age")Plots years of schooling of mother against household income in 1977
plot(x = pset3_data$SM, y = pset3_data$HHINC97,
xlab = "Years of Schooling of Mother",
ylab = "Household Income 1997",
main = "Scatter Plot")Estimate linear regression using variables from the scatter plot
reg1 = lm(data = pset3_data, formula = HHINC97~SM)
summary(reg1)
Call:
lm(formula = HHINC97 ~ SM, data = pset3_data)
Residuals:
Min 1Q Median 3Q Max
-69436 -24975 -6289 14573 197476
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -17717.6 11016.7 -1.608 0.109
SM 5559.6 813.4 6.835 3.06e-11 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 42030 on 401 degrees of freedom
(97 observations deleted due to missingness)
Multiple R-squared: 0.1044, Adjusted R-squared: 0.1021
F-statistic: 46.72 on 1 and 401 DF, p-value: 3.057e-11
Finding sample mean and SD of variable “S” and “EARNINGS”
mean(pset3_data$S)[1] 14.548
mean(pset3_data$EARNINGS)[1] 18.34576
sd(pset3_data$S)[1] 2.779775
sd(pset3_data$EARNINGS)[1] 10.72343
min max pset S
min(pset3_data$S)[1] 6
max(pset3_data$S)[1] 20
average married
mean(pset3_data$MARRIED)[1] 0.404
Average age
mean(pset3_data$AGE)[1] 28.908
Histogram of Earnings
hist(pset3_data$EARNINGS, xlab = "Dollars Per Hour", main = "Histogram of Earnings")Finding R^2 of the regression and estimated parameter of ASVABC
model = lm(S ~ ASVABC, data=pset3_data)
summary(model)
Call:
lm(formula = S ~ ASVABC, data = pset3_data)
Residuals:
Min 1Q Median 3Q Max
-6.3713 -1.7366 -0.1125 1.8222 6.7436
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 14.1838 0.1093 129.78 <2e-16 ***
ASVABC 1.6165 0.1178 13.72 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 2.371 on 498 degrees of freedom
Multiple R-squared: 0.2742, Adjusted R-squared: 0.2728
F-statistic: 188.2 on 1 and 498 DF, p-value: < 2.2e-16
rsq = summary(model)$r.squared
beta2 = summary(model)$coefficients["ASVABC", "Estimate"]the r^2 is 0.2742218. the estimated paramater of ASVABC is 1.6164539
Question 7: Producing a Scatterplot
plot(x = pset3_data$ASVABC, y = pset3_data$S,
xlab = "Intellectual Ability",
ylab = "Years of Schooling",
main = "Scatter Plot")
abline(lm(S ~ ASVABC, data=pset3_data))Question 8:LM with earnings as the y and S as the x
model8 = lm(EARNINGS ~ S, data=pset3_data)
summary(model8)
Call:
lm(formula = EARNINGS ~ S, data = pset3_data)
Residuals:
Min 1Q Median 3Q Max
-17.177 -6.588 -2.147 3.532 86.424
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3.0897 2.4638 1.254 0.21
S 1.0487 0.1664 6.304 6.41e-10 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 10.33 on 498 degrees of freedom
Multiple R-squared: 0.0739, Adjusted R-squared: 0.07204
F-statistic: 39.74 on 1 and 498 DF, p-value: 6.407e-10
rsq8 = summary(model8)$r.squared
summary(model8)$coefficients["S", "Estimate"][1] 1.048667
Question 9
model9 = lm(EARNINGS ~ HEIGHT, data=pset3_data)
summary(model9)
Call:
lm(formula = EARNINGS ~ HEIGHT, data = pset3_data)
Residuals:
Min 1Q Median 3Q Max
-17.316 -6.892 -2.783 4.070 80.276
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.5133 7.9723 -0.064 0.9487
HEIGHT 0.2772 0.1170 2.370 0.0182 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 10.67 on 498 degrees of freedom
Multiple R-squared: 0.01115, Adjusted R-squared: 0.009166
F-statistic: 5.616 on 1 and 498 DF, p-value: 0.01818
rsq9 = summary(model9)$r.squared
summary(model9)$coefficients["HEIGHT", "Estimate"][1] 0.2772173