This is an R Markdown document. Markdown is a simple form atting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
\[\hat y = \beta_0 + \beta_1 x_1 + \epsilon_i\] \[RSS(\beta_0, \beta_1) = \sum\limits_{i=1}^n [y_i -(\beta_0+\beta_1 x_i)]^2\] \[\hat\beta_0\prime= \sum\limits_{i=1}^n [y_i -(\beta_0+\beta_1 x_i)]^2 * \frac{d}{\partial \beta_0}\] \[= -2*\sum\limits_{i=1}^n [y_i -(\beta_0+\beta_1 x_i)]\] \[= 2(\sum\limits_{i=1}^n y_i - \sum\limits_{i=1}^n(\beta_0+\beta_1 x_i)) =0\] \[ \sum\limits_{i=1}^n y_i = \sum\limits_{i=1}^n (\beta_0+\beta_1x_i)\] \[\bar y = \hat \beta_0 + \beta_1 \bar x \] \[\hat \beta_0 = \bar y - \beta_1 \bar x \]
\[\hat \beta_1 \prime= -2 \sum\limits_{i=1}^n (y_i -[\beta_0 +\beta_1 x_1])*x_i =0\] \[= -2 \sum\limits_{i=1}^n(y_ix_i - \beta_0x_i -\beta_1x_i^2) =0 \] \[ \sum\limits_{i=1}^n y_ix_i - \beta_0\sum\limits_{i=1}^n x_i = \beta_1\sum\limits_{i=1}^n x_i^2\] \[ \sum\limits_{i=1}^n y_ix_i- (\bar y-\beta_1\bar x)\sum\limits_{i=1}^n x_i = \beta_1\sum\limits_{i=1}^nx_i^2\] \[\sum\limits_{i=1}^n y_ix_i - \sum\limits_{i=1}^ny_i \sum\limits_{i=1}^nx_i - \beta_1 \sum\limits_{i=1}^nx_i * \sum\limits_{i=1}^n x_i = \beta_1 \sum\limits_{i=1}^n x_i^2 \] \[\hat \beta_1\prime = \frac {\sum\limits_{i=1}^ny_ix_i - \bar y\bar xn}{ \sum\limits_{i=1}^n x_i^2-n\bar x^2} \]
\[L(\beta_0,\beta_1, \sigma^2)= \prod\limits_{i=1}^n \frac{1}{\sqrt{2\pi\sigma^2}}e^{-(y_i-(\beta_0 +\beta_1x_i)^2/2\sigma^2} \] \[= [\frac{1}{\sqrt{2\pi\sigma^2}}]^n \prod\limits_{i=1}^n e^{-(y_i-(\beta_0 +\beta_1x_i)^2/2\sigma^2} \] ] \[ln(L(\beta_0,\beta_1, \sigma^2)=n ln(\frac{1}{\sqrt{2\pi\sigma^2}})- \frac{1}{2\sigma^2} \sum\limits_{i=1}^n (y_i - (\beta_0 + \beta_1x_i))^2\] \[\frac{\partial L(\beta_0,\beta_1,\sigma^2)}{\partial \beta_0}= \frac{1}{\sigma^2}\sum\limits_{i=1}^n (y_i -(\beta_0 +\beta_1x_i ))=0 \] \[\sum\limits_{i=1}^n \beta_0 = \sum\limits_{i=1}^n y_i - \sum\limits_{i=1}^n \beta_1x_i\] \[\hat \beta_0 = \bar y - \beta_1\bar x \]
\[\frac{\partial L(\beta_0,\beta_1,\sigma^2)}{\partial \beta_1}= \frac{-1}{2\sigma^2} \sum\limits_{i=1}^n (y_i -(\beta_0 +\beta_1x_i))^2\] \[= \frac{1}{\sigma^2} \sum\limits_{i=1}^n (y_ix_i -\beta_0x_i - \beta_1x_i^2) =0\] \[\sum\limits_{i=1}^n x_iy_i - (\bar y-\beta_1 \bar x) \sum\limits_{i=1}^n x_i = \beta_1 \sum\limits_{i=1}^n x_i^2\] \[\hat \beta_1 =\frac{\sum\limits_{i=1}^n y_ix_i - n\bar x \bar y}{\sum\limits_{i=1}^n x_i^2 - n\bar x^2 }\]
\[y = \beta_1x_i + \epsilon_i \] \[RSS = \sum\limits_{i=1}^n (y_i-\beta x_i)^2 \] \[\frac {RSS}{\partial \beta}= -2 \sum\limits_{i=1}^n (yi-\beta x_i)*x_i \] \[0 = -2 \sum\limits_{i=1}^n (y_ix_i - \beta x_i^2) \] \[\sum\limits_{i=1}^n \beta x_i^2= \sum\limits_{i=1}^n y_ix_i\] \[\hat \beta_1 = \frac{\sum\limits_{i=1}^n y_ix_i}{\sum\limits_{i=1}^n x_i^2}\] \[Var(\hat \beta_1)= \frac {\sigma^2}{\sum\limits_{i=1}^n x_i^2} \]
library(faraway)
data(teengamb)
attach(teengamb)
teengamb_model = lm(gamble~sex + status + income + verbal)
summary(teengamb_model)
##
## Call:
## lm(formula = gamble ~ sex + status + income + verbal)
##
## Residuals:
## Min 1Q Median 3Q Max
## -51.082 -11.320 -1.451 9.452 94.252
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 22.55565 17.19680 1.312 0.1968
## sex -22.11833 8.21111 -2.694 0.0101 *
## status 0.05223 0.28111 0.186 0.8535
## income 4.96198 1.02539 4.839 1.79e-05 ***
## verbal -2.95949 2.17215 -1.362 0.1803
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 22.69 on 42 degrees of freedom
## Multiple R-squared: 0.5267, Adjusted R-squared: 0.4816
## F-statistic: 11.69 on 4 and 42 DF, p-value: 1.815e-06
teengamb.res = resid(teengamb_model)
teengamb.res
## 1 2 3 4 5 6
## 10.6507430 9.3711318 5.4630298 -17.4957487 29.5194692 -2.9846919
## 7 8 9 10 11 12
## -7.0242994 -12.3060734 6.8496267 -10.3329505 1.5934936 -3.0958161
## 13 14 15 16 17 18
## 0.1172839 9.5331344 2.8488167 17.2107726 -25.2627227 -27.7998544
## 19 20 21 22 23 24
## 13.1446553 -15.9510624 -16.0041386 -9.5801478 -27.2711657 94.2522174
## 25 26 27 28 29 30
## 0.6993361 -9.1670510 -25.8747696 -8.7455549 -6.8803097 -19.8090866
## 31 32 33 34 35 36
## 10.8793766 15.0599340 11.7462296 -3.5932770 -14.4016736 45.6051264
## 37 38 39 40 41 42
## 20.5472529 11.2429290 -51.0824078 8.8669438 -1.4513921 -3.8361619
## 43 44 45 46 47
## -4.3831786 -14.8940753 5.4506347 1.4092321 7.1662399
mean(teengamb.res)
## [1] -3.065293e-17
median(teengamb.res)
## [1] -1.451392
cor(teengamb_model$fitted.values, teengamb_model$residuals)
## [1] -1.070659e-16
As shown by the output, the correlation between the fitted values and the residuals is -1.071 e-16
The estimate given by the summary output for the sex predictor is -22.11, with this variable being included for female participants. This means that a man is predicted to gamble 22.11 more pounds per year than a female.
At the .05 level of significance, sex and income are significant.
teengamb_males = teengamb[teengamb$sex == 0,]
teengamb_males_mean = apply(teengamb_males[,2:5],2,mean)
teengamb_males_mean
## status income verbal gamble
## 52.000000 4.976071 6.821429 29.775000
x = data.frame(sex =0, status = 52, income= 4.976, verbal = 6.821)
predict.lm(teengamb_model, x, interval = "confidence")
## fit lwr upr
## 1 29.77591 21.12224 38.42959
predict.lm(teengamb_model, x, interval = "prediction")
## fit lwr upr
## 1 29.77591 -16.82557 76.3774
The amount gambled by an average male is 29.775 pounds per year.
max_male_status = 75
max_male_income = 15.00
max_male_verbal = 10
y = data.frame(sex=0, status = max_male_status, income = max_male_income, verbal = max_male_verbal)
predict.lm(teengamb_model, y, interval = "confidence")
## fit lwr upr
## 1 71.30794 42.23237 100.3835
predict.lm(teengamb_model, y, interval = "prediction")
## fit lwr upr
## 1 71.30794 17.06588 125.55
The prediction interval for a male with maximized values of status, income, and verbal is larger than the prediction interval for an average male. This is because the standard error of the prediction increases.
teengamb_newmodel = lm(gamble ~ income)
anova(teengamb_newmodel, teengamb_model)
## Analysis of Variance Table
##
## Model 1: gamble ~ income
## Model 2: gamble ~ sex + status + income + verbal
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 45 28009
## 2 42 21624 3 6384.8 4.1338 0.01177 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
library(faraway)
data(sat)
attach(sat)
## The following object is masked from teengamb:
##
## verbal
sat_model = lm(total ~ expend+ratio+salary)
summary(sat_model)
##
## Call:
## lm(formula = total ~ expend + ratio + salary)
##
## Residuals:
## Min 1Q Median 3Q Max
## -140.911 -46.740 -7.535 47.966 123.329
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1069.234 110.925 9.639 1.29e-12 ***
## expend 16.469 22.050 0.747 0.4589
## ratio 6.330 6.542 0.968 0.3383
## salary -8.823 4.697 -1.878 0.0667 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 68.65 on 46 degrees of freedom
## Multiple R-squared: 0.2096, Adjusted R-squared: 0.1581
## F-statistic: 4.066 on 3 and 46 DF, p-value: 0.01209
sat_nosalary_model = lm(total~expend+ratio)
sat_nopredictors_model = lm(total~0)
anova(sat_nosalary_model, sat_model)
## Analysis of Variance Table
##
## Model 1: total ~ expend + ratio
## Model 2: total ~ expend + ratio + salary
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 47 233443
## 2 46 216812 1 16631 3.5285 0.06667 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(sat_nopredictors_model, sat_model)
## Analysis of Variance Table
##
## Model 1: total ~ 0
## Model 2: total ~ expend + ratio + salary
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 50 46924380
## 2 46 216812 4 46707568 2477.4 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
new_sat_model = lm(total~expend+ratio+salary+takers)
anova(sat_model, new_sat_model)
## Analysis of Variance Table
##
## Model 1: total ~ expend + ratio + salary
## Model 2: total ~ expend + ratio + salary + takers
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 46 216812
## 2 45 48124 1 168688 157.74 2.607e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(new_sat_model)
##
## Call:
## lm(formula = total ~ expend + ratio + salary + takers)
##
## Residuals:
## Min 1Q Median 3Q Max
## -90.531 -20.855 -1.746 15.979 66.571
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1045.9715 52.8698 19.784 < 2e-16 ***
## expend 4.4626 10.5465 0.423 0.674
## ratio -3.6242 3.2154 -1.127 0.266
## salary 1.6379 2.3872 0.686 0.496
## takers -2.9045 0.2313 -12.559 2.61e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 32.7 on 45 degrees of freedom
## Multiple R-squared: 0.8246, Adjusted R-squared: 0.809
## F-statistic: 52.88 on 4 and 45 DF, p-value: < 2.2e-16