#1#
data(teengamb,package = "faraway")
my_data <- teengamb
head(my_data)
## sex status income verbal gamble
## 1 1 51 2.00 8 0.0
## 2 1 28 2.50 8 0.0
## 3 1 37 2.00 6 0.0
## 4 1 28 7.00 4 7.3
## 5 1 65 2.00 8 19.6
## 6 1 61 3.47 6 0.1
l_model <- lm(gamble~ sex+ status +income +verbal , my_data)
summary(l_model)
##
## Call:
## lm(formula = gamble ~ sex + status + income + verbal, data = my_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -51.082 -11.320 -1.451 9.452 94.252
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 22.55565 17.19680 1.312 0.1968
## sex -22.11833 8.21111 -2.694 0.0101 *
## status 0.05223 0.28111 0.186 0.8535
## income 4.96198 1.02539 4.839 1.79e-05 ***
## verbal -2.95949 2.17215 -1.362 0.1803
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 22.69 on 42 degrees of freedom
## Multiple R-squared: 0.5267, Adjusted R-squared: 0.4816
## F-statistic: 11.69 on 4 and 42 DF, p-value: 1.815e-06
#which variables are statistically significant at the 5% level
data.frame(summary(l_model)$coef[summary(l_model)$coef[,4]<=0.05,4])
## summary.l_model..coef.summary.l_model..coef...4.....0.05..4.
## sex 1.011184e-02
## income 1.791882e-05
#2#
#what interpretation should be given to the coefficient for sex.
#If all the predictors except sex are held constant, the difference in predicted expenditure on gambling between
#male (sex=0) and female (sex=1) will be equal to the regression coefficient of sex, i.e., −22.11833. Therefore
#whenever sex changes from male (sex=0) to female (sex=1), the value of gamble decreases by 22.11833. In
#other words, according to the current regression model, a female spends $22.11833 less than a comparable
#(i.e., other predictors being held constant) male on gambling.
#3#
#fit a model with just income, .......
l_model_two <- lm(gamble~income,my_data)
summary(l_model_two)
##
## Call:
## lm(formula = gamble ~ income, data = my_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -46.020 -11.874 -3.757 11.934 107.120
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.325 6.030 -1.049 0.3
## income 5.520 1.036 5.330 3.05e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 24.95 on 45 degrees of freedom
## Multiple R-squared: 0.387, Adjusted R-squared: 0.3734
## F-statistic: 28.41 on 1 and 45 DF, p-value: 3.045e-06
#use F test to compare.
#Two linear models are Nested if one (the restricted model) is
#obtained from the other (the full model) by setting some
#parameters to zero (i.e. removing terms from the model), or
#some other constraint on the parameters.
#We can compare nested models fit to the same dataset with the
#F test.
anova(l_model_two,l_model)
## Analysis of Variance Table
##
## Model 1: gamble ~ income
## Model 2: gamble ~ sex + status + income + verbal
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 45 28009
## 2 42 21624 3 6384.8 4.1338 0.01177 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#null hypo is Beta_sex = Beta_status = Beta_verbal= 0
# we can see the P-value is 0.01177,
#it indicate that null can be rejected here and the proposed
#simplification of model is NOT justifiable.
#.2.3
###(a)####
x <- c(6,6.3,6.5,6.8,7.0,7.1,7.5,7.5,7.6,7.8,8.0,8.2,8.4,8.4,8.9)
x
## [1] 6.0 6.3 6.5 6.8 7.0 7.1 7.5 7.5 7.6 7.8 8.0 8.2 8.4 8.4 8.9
y <- c(39,58,49,53,80,86,115,124,104,131,147,160,156,172,180)
y
## [1] 39 58 49 53 80 86 115 124 104 131 147 160 156 172 180
#l_model <- lm(y~x)
#summary(l_model)
plot(x, y, pch = 16, cex = 1.3, col = "blue", main = "(a) linear", xlab = "X", ylab = "Y")
abline(lm(y~x),col="red")

###(b)###
m_parabola <- lm(y ~ poly(x,2))
m_linear <- lm(y~x)
summary(m_parabola)
##
## Call:
## lm(formula = y ~ poly(x, 2))
##
## Residuals:
## Min 1Q Median 3Q Max
## -20.209 -7.213 3.230 8.746 12.921
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 110.267 2.836 38.884 5.40e-14 ***
## poly(x, 2)1 173.646 10.983 15.811 2.13e-09 ***
## poly(x, 2)2 4.070 10.983 0.371 0.717
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.98 on 12 degrees of freedom
## Multiple R-squared: 0.9542, Adjusted R-squared: 0.9466
## F-statistic: 125.1 on 2 and 12 DF, p-value: 9.208e-09
summary(m_linear)
##
## Call:
## lm(formula = y ~ x)
##
## Residuals:
## Min 1Q Median 3Q Max
## -20.647 -6.851 2.423 9.374 11.902
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -299.877 25.215 -11.89 2.33e-08 ***
## x 54.930 3.357 16.36 4.70e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.61 on 13 degrees of freedom
## Multiple R-squared: 0.9537, Adjusted R-squared: 0.9501
## F-statistic: 267.7 on 1 and 13 DF, p-value: 4.701e-10
#fitted parabola
xx <- seq(from=6,to=9,by=0.05)
yy <- 110.267+173.646*xx +4.070*(xx^2)
plot(x, y, pch = 16, cex = 1.3, col = "blue", main = "(b) non linear", xlab = "X", ylab = "Y")
lines(x, predict(m_parabola), col = "red")

###2.14####
#input the points
#(a)
x <- c(0,0,0,0,20,20,20,20,40,40,40,40,60,60,60,60,80,80,80,80,100,100,100,100)
y <- c(0.727,0.721,0.742,0.746,0.884,0.880,0.885,0.890,1.073,1.050,1.045,1.033,1.194,
1.184,1.205,1.180,1.350,1.291,1.291,1.323,1.442,1.369,1.458,1.459)
x
## [1] 0 0 0 0 20 20 20 20 40 40 40 40 60 60 60 60 80
## [18] 80 80 80 100 100 100 100
y
## [1] 0.727 0.721 0.742 0.746 0.884 0.880 0.885 0.890 1.073 1.050 1.045
## [12] 1.033 1.194 1.184 1.205 1.180 1.350 1.291 1.291 1.323 1.442 1.369
## [23] 1.458 1.459
plot(x, y, pch = "*", cex = 1.3, col = "black", main = "#2.14(a)# linear", xlab = "X", ylab = "Y")
matrix_y <- matrix(y,nrow=4)
matrix_y
## [,1] [,2] [,3] [,4] [,5] [,6]
## [1,] 0.727 0.884 1.073 1.194 1.350 1.442
## [2,] 0.721 0.880 1.050 1.184 1.291 1.369
## [3,] 0.742 0.885 1.045 1.205 1.291 1.458
## [4,] 0.746 0.890 1.033 1.180 1.323 1.459
y_model <- apply(X = matrix_y,MARGIN = 2,FUN=mean)
x_model <- seq(0,100,20)
y_model
## [1] 0.73400 0.88475 1.05025 1.19075 1.31375 1.43200
x_model
## [1] 0 20 40 60 80 100
points(x_model,y_model,col="red",pch="o")
lin_model <- lm(y_model~x_model)
abline(lin_model,col='blue')

summary(lin_model)
##
## Call:
## lm(formula = y_model ~ x_model)
##
## Residuals:
## 1 2 3 4 5 6
## -0.015667 -0.005417 0.019583 0.019583 0.002083 -0.020167
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.7496667 0.0137932 54.35 6.86e-07 ***
## x_model 0.0070250 0.0002278 30.84 6.59e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.01906 on 4 degrees of freedom
## Multiple R-squared: 0.9958, Adjusted R-squared: 0.9948
## F-statistic: 951.1 on 1 and 4 DF, p-value: 6.586e-06
lin_model$coefficients
## (Intercept) x_model
## 0.7496667 0.0070250
###c###
parabola_model <- lm(y_model ~ poly(x_model,2))
plot(x, y, pch = "*", cex = 1.3, col = "black", main = "#2.14(c)# parabola", xlab = "X", ylab = "Y")
points(x_model,y_model,col="red",pch="o")
lines(x_model, predict(parabola_model), col = "blue")

summary(parabola_model)
##
## Call:
## lm(formula = y_model ~ poly(x_model, 2))
##
## Residuals:
## 1 2 3 4 5 6
## 0.004125 -0.009375 0.003750 0.003750 -0.001875 -0.000375
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.100917 0.002756 399.509 3.46e-08 ***
## poly(x_model, 2)1 0.587754 0.006750 87.075 3.34e-06 ***
## poly(x_model, 2)2 -0.036279 0.006750 -5.375 0.0126 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.00675 on 3 degrees of freedom
## Multiple R-squared: 0.9996, Adjusted R-squared: 0.9993
## F-statistic: 3805 on 2 and 3 DF, p-value: 7.821e-06
parabola_model$coefficients
## (Intercept) poly(x_model, 2)1 poly(x_model, 2)2
## 1.10091667 0.58775367 -0.03627872
###e###
#get x square standard error
x_suqare_SE <- summary(parabola_model)$coefficients[,2][3]
x_square_coefficient <- summary(parabola_model)$coefficients[,1][3]
t_statistic <- (x_square_coefficient-0)/x_suqare_SE
2 * pt(abs(t_statistic), parabola_model$df.residual, lower.tail = FALSE)
## poly(x_model, 2)2
## 0.01261207