7 variables in the dataset. The variables are Species and founf in the island

library('ggplot2')
library('faraway')
data(gala)
head(gala[,-2])
##              Species  Area Elevation Nearest Scruz Adjacent
## Baltra            58 25.09       346     0.6   0.6     1.84
## Bartolome         31  1.24       109     0.6  26.3   572.33
## Caldwell           3  0.21       114     2.8  58.7     0.78
## Champion          25  0.10        46     1.9  47.4     0.18
## Coamano            2  0.05        77     1.9   1.9   903.82
## Daphne.Major      18  0.34       119     8.0   8.0     1.84
str(gala)
## 'data.frame':    30 obs. of  7 variables:
##  $ Species  : num  58 31 3 25 2 18 24 10 8 2 ...
##  $ Endemics : num  23 21 3 9 1 11 0 7 4 2 ...
##  $ Area     : num  25.09 1.24 0.21 0.1 0.05 ...
##  $ Elevation: num  346 109 114 46 77 119 93 168 71 112 ...
##  $ Nearest  : num  0.6 0.6 2.8 1.9 1.9 8 6 34.1 0.4 2.6 ...
##  $ Scruz    : num  0.6 26.3 58.7 47.4 1.9 ...
##  $ Adjacent : num  1.84 572.33 0.78 0.18 903.82 ...

Fiting linear model in R is done using lm()

lmod <- lm(Species ~ Area + Elevation + Nearest+ Scruz + Adjacent,data=gala)
summary(lmod)
## 
## Call:
## lm(formula = Species ~ Area + Elevation + Nearest + Scruz + Adjacent, 
##     data = gala)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -111.679  -34.898   -7.862   33.460  182.584 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  7.068221  19.154198   0.369 0.715351    
## Area        -0.023938   0.022422  -1.068 0.296318    
## Elevation    0.319465   0.053663   5.953 3.82e-06 ***
## Nearest      0.009144   1.054136   0.009 0.993151    
## Scruz       -0.240524   0.215402  -1.117 0.275208    
## Adjacent    -0.074805   0.017700  -4.226 0.000297 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 60.98 on 24 degrees of freedom
## Multiple R-squared:  0.7658, Adjusted R-squared:  0.7171 
## F-statistic:  15.7 on 5 and 24 DF,  p-value: 6.838e-07
simplefit=lm(Species~Nearest, data=gala)
summary(simplefit)
## 
## Call:
## lm(formula = Species ~ Nearest, data = gala)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -84.16 -71.78 -41.66   9.84 357.70 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)  86.3720    26.2035   3.296  0.00267 **
## Nearest      -0.1132     1.5175  -0.075  0.94107   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 116.7 on 28 degrees of freedom
## Multiple R-squared:  0.0001986,  Adjusted R-squared:  -0.03551 
## F-statistic: 0.005563 on 1 and 28 DF,  p-value: 0.9411
par(mfrow=c(2,2))
plot(simplefit)

multifit<-lm(Species~Area+Elevation, data=gala)
multifit
## 
## Call:
## lm(formula = Species ~ Area + Elevation, data = gala)
## 
## Coefficients:
## (Intercept)         Area    Elevation  
##     17.1052       0.0188       0.1717
x<- model.matrix( ~ Area + Elevation + Nearest + Scruz + Adjacent, gala)
x
##              (Intercept)    Area Elevation Nearest Scruz Adjacent
## Baltra                 1   25.09       346     0.6   0.6     1.84
## Bartolome              1    1.24       109     0.6  26.3   572.33
## Caldwell               1    0.21       114     2.8  58.7     0.78
## Champion               1    0.10        46     1.9  47.4     0.18
## Coamano                1    0.05        77     1.9   1.9   903.82
## Daphne.Major           1    0.34       119     8.0   8.0     1.84
## Daphne.Minor           1    0.08        93     6.0  12.0     0.34
## Darwin                 1    2.33       168    34.1 290.2     2.85
## Eden                   1    0.03        71     0.4   0.4    17.95
## Enderby                1    0.18       112     2.6  50.2     0.10
## Espanola               1   58.27       198     1.1  88.3     0.57
## Fernandina             1  634.49      1494     4.3  95.3  4669.32
## Gardner1               1    0.57        49     1.1  93.1    58.27
## Gardner2               1    0.78       227     4.6  62.2     0.21
## Genovesa               1   17.35        76    47.4  92.2   129.49
## Isabela                1 4669.32      1707     0.7  28.1   634.49
## Marchena               1  129.49       343    29.1  85.9    59.56
## Onslow                 1    0.01        25     3.3  45.9     0.10
## Pinta                  1   59.56       777    29.1 119.6   129.49
## Pinzon                 1   17.95       458    10.7  10.7     0.03
## Las.Plazas             1    0.23        94     0.5   0.6    25.09
## Rabida                 1    4.89       367     4.4  24.4   572.33
## SanCristobal           1  551.62       716    45.2  66.6     0.57
## SanSalvador            1  572.33       906     0.2  19.8     4.89
## SantaCruz              1  903.82       864     0.6   0.0     0.52
## SantaFe                1   24.08       259    16.5  16.5     0.52
## SantaMaria             1  170.92       640     2.6  49.2     0.10
## Seymour                1    1.84       147     0.6   9.6    25.09
## Tortuga                1    1.24       186     6.8  50.9    17.95
## Wolf                   1    2.85       253    34.1 254.7     2.33
## attr(,"assign")
## [1] 0 1 2 3 4 5
y<-gala$Species
y
##  [1]  58  31   3  25   2  18  24  10   8   2  97  93  58   5  40 347  51   2 104
## [20] 108  12  70 280 237 444  62 285  44  16  21
hist(gala$Endemics)

plot(Species ~ Area, data = gala)

When X is a real matrix, the elements of (XTX)−1 also provide a measure of the extent of linear dependence among the columns of X If XTX is invertible then the columns of X have to be independent * SOlve A xomputes A-1 while solve(A,b)solves Ax=b

xtxt<-solve(t(x)%*%x)
xtxt
##               (Intercept)          Area     Elevation       Nearest
## (Intercept)  9.867829e-02  3.778242e-05 -1.561976e-04 -2.339027e-04
## Area         3.778242e-05  1.352247e-07 -2.593617e-07  1.294003e-06
## Elevation   -1.561976e-04 -2.593617e-07  7.745339e-07 -3.549366e-06
## Nearest     -2.339027e-04  1.294003e-06 -3.549366e-06  2.988732e-04
## Scruz       -3.760293e-04 -4.913149e-08  3.080831e-07 -3.821077e-05
## Adjacent     2.309832e-05  4.620303e-08 -1.640241e-07  1.424729e-06
##                     Scruz      Adjacent
## (Intercept) -3.760293e-04  2.309832e-05
## Area        -4.913149e-08  4.620303e-08
## Elevation    3.080831e-07 -1.640241e-07
## Nearest     -3.821077e-05  1.424729e-06
## Scruz        1.247941e-05 -1.958356e-07
## Adjacent    -1.958356e-07  8.426543e-08

Now we can hat Beta directly, using (XtX)-Ty

xtxt %*% t(x) %*% y
##                     [,1]
## (Intercept)  7.068220709
## Area        -0.023938338
## Elevation    0.319464761
## Nearest      0.009143961
## Scruz       -0.240524230
## Adjacent    -0.074804832
xtxt
##               (Intercept)          Area     Elevation       Nearest
## (Intercept)  9.867829e-02  3.778242e-05 -1.561976e-04 -2.339027e-04
## Area         3.778242e-05  1.352247e-07 -2.593617e-07  1.294003e-06
## Elevation   -1.561976e-04 -2.593617e-07  7.745339e-07 -3.549366e-06
## Nearest     -2.339027e-04  1.294003e-06 -3.549366e-06  2.988732e-04
## Scruz       -3.760293e-04 -4.913149e-08  3.080831e-07 -3.821077e-05
## Adjacent     2.309832e-05  4.620303e-08 -1.640241e-07  1.424729e-06
##                     Scruz      Adjacent
## (Intercept) -3.760293e-04  2.309832e-05
## Area        -4.913149e-08  4.620303e-08
## Elevation    3.080831e-07 -1.640241e-07
## Nearest     -3.821077e-05  1.424729e-06
## Scruz        1.247941e-05 -1.958356e-07
## Adjacent    -1.958356e-07  8.426543e-08
names(lmod)
##  [1] "coefficients"  "residuals"     "effects"       "rank"         
##  [5] "fitted.values" "assign"        "qr"            "df.residual"  
##  [9] "xlevels"       "call"          "terms"         "model"
lmodsum<-summary(lmod)
names(lmodsum)
##  [1] "call"          "terms"         "residuals"     "coefficients" 
##  [5] "aliased"       "sigma"         "df"            "r.squared"    
##  [9] "adj.r.squared" "fstatistic"    "cov.unscaled"
# we can estimate the sigma
sqrt(deviance(lmod)/df.residual(lmod))
## [1] 60.97519
lmodsum$sigma
## [1] 60.97519
hist(lmodsum$sigma)

* exatract(Xtx)-1 and use it to compute the se for the coeffieients(diag) * Coefficients: For each variable and the intercept, a weight is produced and that weight has other attributes like the standard error, a t-test value and significance.

xtxt<-lmodsum$cov.unscaled
sqrt(diag(xtxt))*60.975
## (Intercept)        Area   Elevation     Nearest       Scruz    Adjacent 
## 19.15413865  0.02242228  0.05366264  1.05413269  0.21540158  0.01770013
# or get them from the summary objects
lmodsum$coef[,2]
## (Intercept)        Area   Elevation     Nearest       Scruz    Adjacent 
## 19.15419782  0.02242235  0.05366280  1.05413595  0.21540225  0.01770019
hist(lmodsum$coef[,2])