#Question 1
#A)Null hypothesis is that there is no variation between grazing and not grazing,
#therefore the mean is the same within all groups
#Alternative hypothesis is that there is a difference in means in at least 1 group
library(asbio)
## Warning: package 'asbio' was built under R version 3.3.3
## Loading required package: tcltk
load("ipomopsis.RData")
attach("ipomopsis.RData")
## The following object is masked _by_ .GlobalEnv:
##
## ipomopsis
head(ipomopsis)
## root fruit grazing
## 1 6.225 59.77 Ungrazed
## 2 6.487 60.98 Ungrazed
## 3 4.919 14.73 Ungrazed
## 4 5.130 19.28 Ungrazed
## 5 5.417 34.25 Ungrazed
## 6 5.359 35.53 Ungrazed
str(ipomopsis)
## 'data.frame': 40 obs. of 3 variables:
## $ root : num 6.22 6.49 4.92 5.13 5.42 ...
## $ fruit : num 59.8 61 14.7 19.3 34.2 ...
## $ grazing: Factor w/ 2 levels "Grazed","Ungrazed": 2 2 2 2 2 2 2 2 2 2 ...
plot(ipomopsis$fruit,ipomopsis$grazing)

stripchart(ipomopsis$fruit~ipomopsis$grazing,jitter=0.5)

# B)
# The plot is not able to show visualization when you compare numeric values
# with factor levels. Identifying clusters is not possible. It gets better the plotting if
# you integrate another numeric value as the root values.
plot(ipomopsis$grazing,ipomopsis$fruit,las=1)

stripchart(ipomopsis$fruit~ipomopsis$grazing,vertical=T,las=1)

plot(ipomopsis$fruit[ipomopsis$grazing == 'Grazed'], ipomopsis$root[ipomopsis$grazing == 'Grazed'], xlab='Fruit',
ylab='root', pch=16, col='green')
points(ipomopsis$fruit[ipomopsis$grazing == 'Ungrazed'], ipomopsis$root[ipomopsis$grazing == 'Ungrazed'], xlab='Fruit',
ylab='root', pch=16, col='red')

# C) Centering the concomitant variable
fg1 <- lm(ipomopsis$fruit~ipomopsis$grazing)
summary.lm(fg1)
##
## Call:
## lm(formula = ipomopsis$fruit ~ ipomopsis$grazing)
##
## Residuals:
## Min 1Q Median 3Q Max
## -52.991 -18.028 2.915 14.049 48.109
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 67.941 5.236 12.976 1.54e-15 ***
## ipomopsis$grazingUngrazed -17.060 7.404 -2.304 0.0268 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 23.41 on 38 degrees of freedom
## Multiple R-squared: 0.1226, Adjusted R-squared: 0.09949
## F-statistic: 5.309 on 1 and 38 DF, p-value: 0.02678
plot(fg1)




fglm1<- glm(ipomopsis$fruit~ipomopsis$grazing)
summary.glm(fglm1)
##
## Call:
## glm(formula = ipomopsis$fruit ~ ipomopsis$grazing)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -52.990 -18.028 2.915 14.049 48.109
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 67.941 5.236 12.976 1.54e-15 ***
## ipomopsis$grazingUngrazed -17.060 7.404 -2.304 0.0268 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 548.2474)
##
## Null deviance: 23744 on 39 degrees of freedom
## Residual deviance: 20833 on 38 degrees of freedom
## AIC: 369.73
##
## Number of Fisher Scoring iterations: 2
plot(fglm1)




fg1_1 <- lm(ipomopsis$fruit~1,ipomopsis)
fg1_1$coef
## (Intercept)
## 59.4105
plot(fg1_1)


## hat values (leverages) are all = 0.025
## and there are no factor predictors; no plot no. 5

fglm1_1<- glm(ipomopsis$fruit~1,ipomopsis,family = gaussian)
summary.glm(fglm1_1)
##
## Call:
## glm(formula = ipomopsis$fruit ~ 1, family = gaussian, data = ipomopsis)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -44.681 -18.263 1.464 16.777 56.639
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 59.411 3.901 15.23 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 608.8163)
##
## Null deviance: 23744 on 39 degrees of freedom
## Residual deviance: 23744 on 39 degrees of freedom
## AIC: 372.96
##
## Number of Fisher Scoring iterations: 2
plot(fglm1_1)


## hat values (leverages) are all = 0.025
## and there are no factor predictors; no plot no. 5

# D using ancova
fg1 <- lm(ipomopsis$fruit~ipomopsis$grazing)
anova(fg1)
## Analysis of Variance Table
##
## Response: ipomopsis$fruit
## Df Sum Sq Mean Sq F value Pr(>F)
## ipomopsis$grazing 1 2910.4 2910.44 5.3086 0.02678 *
## Residuals 38 20833.4 548.25
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary.lm(fg1)
##
## Call:
## lm(formula = ipomopsis$fruit ~ ipomopsis$grazing)
##
## Residuals:
## Min 1Q Median 3Q Max
## -52.991 -18.028 2.915 14.049 48.109
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 67.941 5.236 12.976 1.54e-15 ***
## ipomopsis$grazingUngrazed -17.060 7.404 -2.304 0.0268 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 23.41 on 38 degrees of freedom
## Multiple R-squared: 0.1226, Adjusted R-squared: 0.09949
## F-statistic: 5.309 on 1 and 38 DF, p-value: 0.02678
plot(fg1)




fg2<- lm(ipomopsis$fruit~ipomopsis$grazing+ipomopsis$root)
summary.lm(fg2)
##
## Call:
## lm(formula = ipomopsis$fruit ~ ipomopsis$grazing + ipomopsis$root)
##
## Residuals:
## Min 1Q Median 3Q Max
## -17.1920 -2.8224 0.3223 3.9144 17.3290
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -127.829 9.664 -13.23 1.35e-15 ***
## ipomopsis$grazingUngrazed 36.103 3.357 10.75 6.11e-13 ***
## ipomopsis$root 23.560 1.149 20.51 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.747 on 37 degrees of freedom
## Multiple R-squared: 0.9291, Adjusted R-squared: 0.9252
## F-statistic: 242.3 on 2 and 37 DF, p-value: < 2.2e-16
plot(fg2)




plot(ipomopsis$fruit,ipomopsis$root)

fg3 <- lm(ipomopsis$fruit~ipomopsis$root)
summary.lm(fg3)
##
## Call:
## lm(formula = ipomopsis$fruit ~ ipomopsis$root)
##
## Residuals:
## Min 1Q Median 3Q Max
## -29.3844 -10.4447 -0.7574 10.7606 23.7556
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -41.286 10.723 -3.850 0.000439 ***
## ipomopsis$root 14.022 1.463 9.584 1.1e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 13.52 on 38 degrees of freedom
## Multiple R-squared: 0.7073, Adjusted R-squared: 0.6996
## F-statistic: 91.84 on 1 and 38 DF, p-value: 1.099e-11
par(mfrow=c(2,2))
plot(fg3)

fg4 <- lm(ipomopsis$fruit~ipomopsis$root*ipomopsis$grazing)
summary.lm(fg4)
##
## Call:
## lm(formula = ipomopsis$fruit ~ ipomopsis$root * ipomopsis$grazing)
##
## Residuals:
## Min 1Q Median 3Q Max
## -17.3177 -2.8320 0.1247 3.8511 17.1313
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) -125.173 12.811 -9.771
## ipomopsis$root 23.240 1.531 15.182
## ipomopsis$grazingUngrazed 30.806 16.842 1.829
## ipomopsis$root:ipomopsis$grazingUngrazed 0.756 2.354 0.321
## Pr(>|t|)
## (Intercept) 1.15e-11 ***
## ipomopsis$root < 2e-16 ***
## ipomopsis$grazingUngrazed 0.0757 .
## ipomopsis$root:ipomopsis$grazingUngrazed 0.7500
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.831 on 36 degrees of freedom
## Multiple R-squared: 0.9293, Adjusted R-squared: 0.9234
## F-statistic: 157.6 on 3 and 36 DF, p-value: < 2.2e-16
par(mfrow=c(2,2))
plot(fg4)

# in the interaction between root and grazing there is a significant value in the root
#p-value
fg5 <- lm(ipomopsis$fruit~ipomopsis$grazing*ipomopsis$root)
summary.lm(fg5)
##
## Call:
## lm(formula = ipomopsis$fruit ~ ipomopsis$grazing * ipomopsis$root)
##
## Residuals:
## Min 1Q Median 3Q Max
## -17.3177 -2.8320 0.1247 3.8511 17.1313
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) -125.173 12.811 -9.771
## ipomopsis$grazingUngrazed 30.806 16.842 1.829
## ipomopsis$root 23.240 1.531 15.182
## ipomopsis$grazingUngrazed:ipomopsis$root 0.756 2.354 0.321
## Pr(>|t|)
## (Intercept) 1.15e-11 ***
## ipomopsis$grazingUngrazed 0.0757 .
## ipomopsis$root < 2e-16 ***
## ipomopsis$grazingUngrazed:ipomopsis$root 0.7500
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.831 on 36 degrees of freedom
## Multiple R-squared: 0.9293, Adjusted R-squared: 0.9234
## F-statistic: 157.6 on 3 and 36 DF, p-value: < 2.2e-16
plot(fg5)

fg6 <- lm(ipomopsis$fruit~ipomopsis$grazing:ipomopsis$root)
summary.lm(fg6)
##
## Call:
## lm(formula = ipomopsis$fruit ~ ipomopsis$grazing:ipomopsis$root)
##
## Residuals:
## Min 1Q Median 3Q Max
## -18.4030 -3.0094 0.1571 4.5053 15.5703
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) -107.348 8.576 -12.52
## ipomopsis$grazingGrazed:ipomopsis$root 21.126 1.035 20.42
## ipomopsis$grazingUngrazed:ipomopsis$root 26.099 1.413 18.47
## Pr(>|t|)
## (Intercept) 7.22e-15 ***
## ipomopsis$grazingGrazed:ipomopsis$root < 2e-16 ***
## ipomopsis$grazingUngrazed:ipomopsis$root < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.044 on 37 degrees of freedom
## Multiple R-squared: 0.9227, Adjusted R-squared: 0.9185
## F-statistic: 220.8 on 2 and 37 DF, p-value: < 2.2e-16
plot(fg6)

# the root data keeps being more significant than others even if interaction was switch?
#INTERACTION WONT MATTER THE ORDER!!!!
dev.off()
## null device
## 1
anova(fg1)
## Analysis of Variance Table
##
## Response: ipomopsis$fruit
## Df Sum Sq Mean Sq F value Pr(>F)
## ipomopsis$grazing 1 2910.4 2910.44 5.3086 0.02678 *
## Residuals 38 20833.4 548.25
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(anova(fg1))
plot(anova(fg2))
plot(anova(fg3))
plot(fg2)
abline(plot(fg2))
abline(plot(ipomopsis$fruit,ipomopsis$root))
# the hypothesis is that there is no significance between the fruits mg and
# grazing
# by integrating the root values to fruit and the grazing then we would find
# significant p-values
fg6<-lm(ipomopsis$fruit~ipomopsis$grazing)
summary.lm(lm(ipomopsis$fruit~ipomopsis$grazing))
##
## Call:
## lm(formula = ipomopsis$fruit ~ ipomopsis$grazing)
##
## Residuals:
## Min 1Q Median 3Q Max
## -52.991 -18.028 2.915 14.049 48.109
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 67.941 5.236 12.976 1.54e-15 ***
## ipomopsis$grazingUngrazed -17.060 7.404 -2.304 0.0268 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 23.41 on 38 degrees of freedom
## Multiple R-squared: 0.1226, Adjusted R-squared: 0.09949
## F-statistic: 5.309 on 1 and 38 DF, p-value: 0.02678
par(mfrow=c(2,2))
fg7<- lm(ipomopsis$root~ipomopsis$grazing)
summary.lm(fg7)
##
## Call:
## lm(formula = ipomopsis$root ~ ipomopsis$grazing)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.2034 -0.6504 0.1836 0.6688 1.9436
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.3094 0.2131 39.001 < 2e-16 ***
## ipomopsis$grazingUngrazed -2.2565 0.3013 -7.489 5.41e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9528 on 38 degrees of freedom
## Multiple R-squared: 0.5961, Adjusted R-squared: 0.5855
## F-statistic: 56.09 on 1 and 38 DF, p-value: 5.411e-09
# if ipomopsis rot is linear modeled with grazing) there is a higher inrcrease in
#p-value
#QUESTION 2
library (MASS)
library(asbio)
my.data <- read.table('BCI.plant.txt',sep = "")
plot(my.data)
head(my.data)
## site.no. UTM.E UTM.N precip elev age geology
## 1 p1 614856.9 1031786 2993.2 20 2 Tct
## 2 p2 613985.4 1030725 3072.0 100 3 Tc
## 3 p3 614674.3 1023802 3007.4 180 1 Tc
## 4 p4 615018.6 1023548 2999.8 180 1 Tc
## 5 p5 637157.8 1012428 2414.3 40 2 Tgo
## 6 p6 637983.7 1012395 2393.7 30 2 Tgo
## Alchornea.costaricensis Anacardium.excelsum
## 1 0 1
## 2 0 1
## 3 1 1
## 4 1 1
## 5 0 1
## 6 0 0