Homework Sheet No. 2
#install.packages("faraway")
library(faraway)
#data(package="faraway")
data("fruitfly")
#head(fruitfly)
modelmultivar<-lm(longevity~activity+thorax, data = fruitfly)
summary(modelmultivar)
##
## Call:
## lm(formula = longevity ~ activity + thorax, data = fruitfly)
##
## Residuals:
## Min 1Q Median 3Q Max
## -26.108 -7.014 -1.101 6.234 30.265
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -48.749 10.850 -4.493 1.65e-05 ***
## activityone 2.637 2.984 0.884 0.3786
## activitylow -7.015 2.981 -2.353 0.0203 *
## activitymany 4.139 3.027 1.367 0.1741
## activityhigh -20.004 3.016 -6.632 1.05e-09 ***
## thorax 134.341 12.731 10.552 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.54 on 118 degrees of freedom
## Multiple R-squared: 0.6527, Adjusted R-squared: 0.638
## F-statistic: 44.36 on 5 and 118 DF, p-value: < 2.2e-16
modelunivar<-lm(longevity~activity, data=fruitfly)
summary(modelunivar)
##
## Call:
## lm(formula = longevity ~ activity, data = fruitfly)
##
## Residuals:
## Min 1Q Median 3Q Max
## -35.760 -8.760 0.329 11.200 32.440
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 63.5600 2.9263 21.720 < 2e-16 ***
## activityone 1.2400 4.1384 0.300 0.765
## activitylow -6.8000 4.1384 -1.643 0.103
## activitymany 0.9817 4.1813 0.235 0.815
## activityhigh -24.8400 4.1384 -6.002 2.16e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 14.63 on 119 degrees of freedom
## Multiple R-squared: 0.3251, Adjusted R-squared: 0.3024
## F-statistic: 14.33 on 4 and 119 DF, p-value: 1.411e-09
library(MASS)
data("Animals")
head(Animals,7)
## body brain
## Mountain beaver 1.35 8.1
## Cow 465.00 423.0
## Grey wolf 36.33 119.5
## Goat 27.66 115.0
## Guinea pig 1.04 5.5
## Dipliodocus 11700.00 50.0
## Asian elephant 2547.00 4603.0
dim(Animals)
## [1] 28 2
plot(log(Animals$body), log(Animals$brain), xlab="Log(Body Weight)", ylab="Log(Brain Weight)", main="Plot of Body Wt. to Brain Wt.")
###b. Identification of some animals.
plot(log(Animals$body), log(Animals$brain), xlab="Log(Body Weight)", ylab="Log(Brain Weight)", main="Plot of Body Wt. to Brain Wt.")
identify(log(Animals$body), log(Animals$brain), labels=row.names(Animals))
## integer(0)
anim_reg<-lm(log(brain)~log(body), data=Animals)
summary(anim_reg)
##
## Call:
## lm(formula = log(brain) ~ log(body), data = Animals)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.2890 -0.6763 0.3316 0.8646 2.5835
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.55490 0.41314 6.184 1.53e-06 ***
## log(body) 0.49599 0.07817 6.345 1.02e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.532 on 26 degrees of freedom
## Multiple R-squared: 0.6076, Adjusted R-squared: 0.5925
## F-statistic: 40.26 on 1 and 26 DF, p-value: 1.017e-06
plot(log(Animals$body), log(Animals$brain), xlab="Log(Body Weight)", ylab="Log(Brain Weight)", main="Plot of Body Wt. to Brain Wt.")
abline(anim_reg, lwd=2, col="red")
subsetAnimals<-subset(Animals, !(row.names(Animals)== "Dipliodocus" |
row.names(Animals)=="Triceratops" | row.names(Animals)== "Brachiosaurus"))
dim(subsetAnimals)
## [1] 25 2
head(subsetAnimals, 15)
## body brain
## Mountain beaver 1.35 8.1
## Cow 465.00 423.0
## Grey wolf 36.33 119.5
## Goat 27.66 115.0
## Guinea pig 1.04 5.5
## Asian elephant 2547.00 4603.0
## Donkey 187.10 419.0
## Horse 521.00 655.0
## Potar monkey 10.00 115.0
## Cat 3.30 25.6
## Giraffe 529.00 680.0
## Gorilla 207.00 406.0
## Human 62.00 1320.0
## African elephant 6654.00 5712.0
## Rhesus monkey 6.80 179.0
plot(log(subsetAnimals$body), log(subsetAnimals$brain),
xlab="Log(Body Weight)", ylab="Log(Brain Weight)",
main="Body Wt. vs Brain Wt.:Data subset", cex.main=.8)
####Comment The plot of Log(Body Weight) vs. Log(Brain Weight) after removal of rows for Dinosaurs is shown above.
plot(log(subsetAnimals$body), log(subsetAnimals$brain),
xlab="Log(Body Weight)", ylab="Log(Brain Weight)",
main="Body Wt. vs Brain Wt.:Data subset", cex.main=.8)
identify(log(subsetAnimals$body), log(subsetAnimals$brain), labels=row.names(subsetAnimals))
## integer(0)
subanim_reg<-lm(log(brain)~log(body), data=subsetAnimals)
summary(subanim_reg)
##
## Call:
## lm(formula = log(brain) ~ log(body), data = subsetAnimals)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.9125 -0.4752 -0.1557 0.1940 1.9303
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.15041 0.20060 10.72 2.03e-10 ***
## log(body) 0.75226 0.04572 16.45 3.24e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7258 on 23 degrees of freedom
## Multiple R-squared: 0.9217, Adjusted R-squared: 0.9183
## F-statistic: 270.7 on 1 and 23 DF, p-value: 3.243e-14
plot(log(subsetAnimals$body), log(subsetAnimals$brain),
xlab="Log(Body Weight)", ylab="Log(Brain Weight)",
main="Body Wt. vs Brain Wt. in Data subset after removal of Dinosaurs", cex.main=.8)
abline(subanim_reg, lwd=2, col="blue")
#data(package="MASS")
data(mammals)
#head(mammals)
mamm_reg<-lm(log(mammals$brain)~log(mammals$body))
summary(mamm_reg)
##
## Call:
## lm(formula = log(mammals$brain) ~ log(mammals$body))
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.71550 -0.49228 -0.06162 0.43597 1.94829
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.13479 0.09604 22.23 <2e-16 ***
## log(mammals$body) 0.75169 0.02846 26.41 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6943 on 60 degrees of freedom
## Multiple R-squared: 0.9208, Adjusted R-squared: 0.9195
## F-statistic: 697.4 on 1 and 60 DF, p-value: < 2.2e-16
ratio=(subsetAnimals$brain)/((subsetAnimals$body)^(0.75226))
ratio
## [1] 6.463089 4.166028 8.010187 9.463464 5.340098 12.613094 8.185096
## [8] 5.922062 20.344070 10.427559 6.078020 7.350454 59.187413 7.600391
## [15] 42.324309 3.860534 4.928277 6.830722 6.073388 8.528570 4.913372
## [22] 22.468201 4.950334 14.602130 3.448548
subsetAnimals1<-cbind(subsetAnimals, ratio)
subsetAnimals1<-subsetAnimals1[order(ratio),]
head(subsetAnimals1)
## body brain ratio
## Pig 192.00 180.0 3.448548
## Kangaroo 35.00 56.0 3.860534
## Cow 465.00 423.0 4.166028
## Jaguar 100.00 157.0 4.913372
## Golden hamster 0.12 1.0 4.928277
## Rat 0.28 1.9 4.950334
tail(subsetAnimals1)
## body brain ratio
## Asian elephant 2547.000 4603 12.61309
## Mole 0.122 3 14.60213
## Potar monkey 10.000 115 20.34407
## Chimpanzee 52.160 440 22.46820
## Rhesus monkey 6.800 179 42.32431
## Human 62.000 1320 59.18741
subsetAnimals1
## body brain ratio
## Pig 192.000 180.0 3.448548
## Kangaroo 35.000 56.0 3.860534
## Cow 465.000 423.0 4.166028
## Jaguar 100.000 157.0 4.913372
## Golden hamster 0.120 1.0 4.928277
## Rat 0.280 1.9 4.950334
## Guinea pig 1.040 5.5 5.340098
## Horse 521.000 655.0 5.922062
## Rabbit 2.500 12.1 6.073388
## Giraffe 529.000 680.0 6.078020
## Mountain beaver 1.350 8.1 6.463089
## Mouse 0.023 0.4 6.830722
## Gorilla 207.000 406.0 7.350454
## African elephant 6654.000 5712.0 7.600391
## Grey wolf 36.330 119.5 8.010187
## Donkey 187.100 419.0 8.185096
## Sheep 55.500 175.0 8.528570
## Goat 27.660 115.0 9.463464
## Cat 3.300 25.6 10.427559
## Asian elephant 2547.000 4603.0 12.613094
## Mole 0.122 3.0 14.602130
## Potar monkey 10.000 115.0 20.344070
## Chimpanzee 52.160 440.0 22.468201
## Rhesus monkey 6.800 179.0 42.324309
## Human 62.000 1320.0 59.187413
#install.packages("car")
library(car)
##
## Attaching package: 'car'
## The following objects are masked from 'package:faraway':
##
## logit, vif
outlierTest(anim_reg)
##
## No Studentized residuals with Bonferonni p < 0.05
## Largest |rstudent|:
## rstudent unadjusted p-value Bonferonni p
## Dipliodocus -2.507366 0.019026 0.53272
outlierTest(subanim_reg)
##
## No Studentized residuals with Bonferonni p < 0.05
## Largest |rstudent|:
## rstudent unadjusted p-value Bonferonni p
## Human 3.231759 0.0038339 0.095847
Comments: 65% of the variability in longevity of the fruitfly can be explained by the length of the thorax and by extent of sexual activity.