library('ggplot2')
library('faraway')
data(gala)
head(gala[,-2])
## Species Area Elevation Nearest Scruz Adjacent
## Baltra 58 25.09 346 0.6 0.6 1.84
## Bartolome 31 1.24 109 0.6 26.3 572.33
## Caldwell 3 0.21 114 2.8 58.7 0.78
## Champion 25 0.10 46 1.9 47.4 0.18
## Coamano 2 0.05 77 1.9 1.9 903.82
## Daphne.Major 18 0.34 119 8.0 8.0 1.84
str(gala)
## 'data.frame': 30 obs. of 7 variables:
## $ Species : num 58 31 3 25 2 18 24 10 8 2 ...
## $ Endemics : num 23 21 3 9 1 11 0 7 4 2 ...
## $ Area : num 25.09 1.24 0.21 0.1 0.05 ...
## $ Elevation: num 346 109 114 46 77 119 93 168 71 112 ...
## $ Nearest : num 0.6 0.6 2.8 1.9 1.9 8 6 34.1 0.4 2.6 ...
## $ Scruz : num 0.6 26.3 58.7 47.4 1.9 ...
## $ Adjacent : num 1.84 572.33 0.78 0.18 903.82 ...
lmod <- lm(Species ~ Area + Elevation + Nearest+ Scruz + Adjacent,data=gala)
summary(lmod)
##
## Call:
## lm(formula = Species ~ Area + Elevation + Nearest + Scruz + Adjacent,
## data = gala)
##
## Residuals:
## Min 1Q Median 3Q Max
## -111.679 -34.898 -7.862 33.460 182.584
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.068221 19.154198 0.369 0.715351
## Area -0.023938 0.022422 -1.068 0.296318
## Elevation 0.319465 0.053663 5.953 3.82e-06 ***
## Nearest 0.009144 1.054136 0.009 0.993151
## Scruz -0.240524 0.215402 -1.117 0.275208
## Adjacent -0.074805 0.017700 -4.226 0.000297 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 60.98 on 24 degrees of freedom
## Multiple R-squared: 0.7658, Adjusted R-squared: 0.7171
## F-statistic: 15.7 on 5 and 24 DF, p-value: 6.838e-07
simplefit=lm(Species~Nearest, data=gala)
summary(simplefit)
##
## Call:
## lm(formula = Species ~ Nearest, data = gala)
##
## Residuals:
## Min 1Q Median 3Q Max
## -84.16 -71.78 -41.66 9.84 357.70
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 86.3720 26.2035 3.296 0.00267 **
## Nearest -0.1132 1.5175 -0.075 0.94107
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 116.7 on 28 degrees of freedom
## Multiple R-squared: 0.0001986, Adjusted R-squared: -0.03551
## F-statistic: 0.005563 on 1 and 28 DF, p-value: 0.9411
par(mfrow=c(2,2))
plot(simplefit)
multifit<-lm(Species~Area+Elevation, data=gala)
multifit
##
## Call:
## lm(formula = Species ~ Area + Elevation, data = gala)
##
## Coefficients:
## (Intercept) Area Elevation
## 17.1052 0.0188 0.1717
x<- model.matrix( ~ Area + Elevation + Nearest + Scruz + Adjacent, gala)
x
## (Intercept) Area Elevation Nearest Scruz Adjacent
## Baltra 1 25.09 346 0.6 0.6 1.84
## Bartolome 1 1.24 109 0.6 26.3 572.33
## Caldwell 1 0.21 114 2.8 58.7 0.78
## Champion 1 0.10 46 1.9 47.4 0.18
## Coamano 1 0.05 77 1.9 1.9 903.82
## Daphne.Major 1 0.34 119 8.0 8.0 1.84
## Daphne.Minor 1 0.08 93 6.0 12.0 0.34
## Darwin 1 2.33 168 34.1 290.2 2.85
## Eden 1 0.03 71 0.4 0.4 17.95
## Enderby 1 0.18 112 2.6 50.2 0.10
## Espanola 1 58.27 198 1.1 88.3 0.57
## Fernandina 1 634.49 1494 4.3 95.3 4669.32
## Gardner1 1 0.57 49 1.1 93.1 58.27
## Gardner2 1 0.78 227 4.6 62.2 0.21
## Genovesa 1 17.35 76 47.4 92.2 129.49
## Isabela 1 4669.32 1707 0.7 28.1 634.49
## Marchena 1 129.49 343 29.1 85.9 59.56
## Onslow 1 0.01 25 3.3 45.9 0.10
## Pinta 1 59.56 777 29.1 119.6 129.49
## Pinzon 1 17.95 458 10.7 10.7 0.03
## Las.Plazas 1 0.23 94 0.5 0.6 25.09
## Rabida 1 4.89 367 4.4 24.4 572.33
## SanCristobal 1 551.62 716 45.2 66.6 0.57
## SanSalvador 1 572.33 906 0.2 19.8 4.89
## SantaCruz 1 903.82 864 0.6 0.0 0.52
## SantaFe 1 24.08 259 16.5 16.5 0.52
## SantaMaria 1 170.92 640 2.6 49.2 0.10
## Seymour 1 1.84 147 0.6 9.6 25.09
## Tortuga 1 1.24 186 6.8 50.9 17.95
## Wolf 1 2.85 253 34.1 254.7 2.33
## attr(,"assign")
## [1] 0 1 2 3 4 5
y<-gala$Species
y
## [1] 58 31 3 25 2 18 24 10 8 2 97 93 58 5 40 347 51 2 104
## [20] 108 12 70 280 237 444 62 285 44 16 21
hist(gala$Endemics)
plot(Species ~ Area, data = gala)
When X is a real matrix, the elements of (XTX)−1 also provide a
measure of the extent of linear dependence among the columns of X
If XTX is invertible then the columns of X have to be independent *
SOlve A xomputes A-1 while solve(A,b)solves Ax=b
xtxt<-solve(t(x)%*%x)
xtxt
## (Intercept) Area Elevation Nearest
## (Intercept) 9.867829e-02 3.778242e-05 -1.561976e-04 -2.339027e-04
## Area 3.778242e-05 1.352247e-07 -2.593617e-07 1.294003e-06
## Elevation -1.561976e-04 -2.593617e-07 7.745339e-07 -3.549366e-06
## Nearest -2.339027e-04 1.294003e-06 -3.549366e-06 2.988732e-04
## Scruz -3.760293e-04 -4.913149e-08 3.080831e-07 -3.821077e-05
## Adjacent 2.309832e-05 4.620303e-08 -1.640241e-07 1.424729e-06
## Scruz Adjacent
## (Intercept) -3.760293e-04 2.309832e-05
## Area -4.913149e-08 4.620303e-08
## Elevation 3.080831e-07 -1.640241e-07
## Nearest -3.821077e-05 1.424729e-06
## Scruz 1.247941e-05 -1.958356e-07
## Adjacent -1.958356e-07 8.426543e-08
Now we can hat Beta directly, using (XtX)-Ty
xtxt %*% t(x) %*% y
## [,1]
## (Intercept) 7.068220709
## Area -0.023938338
## Elevation 0.319464761
## Nearest 0.009143961
## Scruz -0.240524230
## Adjacent -0.074804832
xtxt
## (Intercept) Area Elevation Nearest
## (Intercept) 9.867829e-02 3.778242e-05 -1.561976e-04 -2.339027e-04
## Area 3.778242e-05 1.352247e-07 -2.593617e-07 1.294003e-06
## Elevation -1.561976e-04 -2.593617e-07 7.745339e-07 -3.549366e-06
## Nearest -2.339027e-04 1.294003e-06 -3.549366e-06 2.988732e-04
## Scruz -3.760293e-04 -4.913149e-08 3.080831e-07 -3.821077e-05
## Adjacent 2.309832e-05 4.620303e-08 -1.640241e-07 1.424729e-06
## Scruz Adjacent
## (Intercept) -3.760293e-04 2.309832e-05
## Area -4.913149e-08 4.620303e-08
## Elevation 3.080831e-07 -1.640241e-07
## Nearest -3.821077e-05 1.424729e-06
## Scruz 1.247941e-05 -1.958356e-07
## Adjacent -1.958356e-07 8.426543e-08
names(lmod)
## [1] "coefficients" "residuals" "effects" "rank"
## [5] "fitted.values" "assign" "qr" "df.residual"
## [9] "xlevels" "call" "terms" "model"
lmodsum<-summary(lmod)
names(lmodsum)
## [1] "call" "terms" "residuals" "coefficients"
## [5] "aliased" "sigma" "df" "r.squared"
## [9] "adj.r.squared" "fstatistic" "cov.unscaled"
# we can estimate the sigma
sqrt(deviance(lmod)/df.residual(lmod))
## [1] 60.97519
lmodsum$sigma
## [1] 60.97519
hist(lmodsum$sigma)
* exatract(Xtx)-1 and use it to compute the se for the
coeffieients(diag) * Coefficients: For each variable and the intercept,
a weight is produced and that weight has other attributes like the
standard error, a t-test value and significance.
xtxt<-lmodsum$cov.unscaled
sqrt(diag(xtxt))*60.975
## (Intercept) Area Elevation Nearest Scruz Adjacent
## 19.15413865 0.02242228 0.05366264 1.05413269 0.21540158 0.01770013
# or get them from the summary objects
lmodsum$coef[,2]
## (Intercept) Area Elevation Nearest Scruz Adjacent
## 19.15419782 0.02242235 0.05366280 1.05413595 0.21540225 0.01770019
hist(lmodsum$coef[,2])