#Question 1  

#A)Null hypothesis is that there is no variation between grazing and not grazing, 
#therefore the mean is the same within all groups
#Alternative hypothesis is that there is a difference in means in at least 1 group

library(asbio)
## Warning: package 'asbio' was built under R version 3.3.3
## Loading required package: tcltk
load("ipomopsis.RData")
attach("ipomopsis.RData")
## The following object is masked _by_ .GlobalEnv:
## 
##     ipomopsis
head(ipomopsis)
##    root fruit  grazing
## 1 6.225 59.77 Ungrazed
## 2 6.487 60.98 Ungrazed
## 3 4.919 14.73 Ungrazed
## 4 5.130 19.28 Ungrazed
## 5 5.417 34.25 Ungrazed
## 6 5.359 35.53 Ungrazed
str(ipomopsis)
## 'data.frame':    40 obs. of  3 variables:
##  $ root   : num  6.22 6.49 4.92 5.13 5.42 ...
##  $ fruit  : num  59.8 61 14.7 19.3 34.2 ...
##  $ grazing: Factor w/ 2 levels "Grazed","Ungrazed": 2 2 2 2 2 2 2 2 2 2 ...
plot(ipomopsis$fruit,ipomopsis$grazing)

stripchart(ipomopsis$fruit~ipomopsis$grazing,jitter=0.5)

# B)
# The plot is not able to show visualization when you compare numeric values
# with factor levels. Identifying clusters is not possible. It gets better the plotting if 
# you integrate another numeric value as the root values.


plot(ipomopsis$grazing,ipomopsis$fruit,las=1)

stripchart(ipomopsis$fruit~ipomopsis$grazing,vertical=T,las=1)

plot(ipomopsis$fruit[ipomopsis$grazing == 'Grazed'], ipomopsis$root[ipomopsis$grazing == 'Grazed'], xlab='Fruit',
     ylab='root', pch=16, col='green')
points(ipomopsis$fruit[ipomopsis$grazing == 'Ungrazed'], ipomopsis$root[ipomopsis$grazing == 'Ungrazed'], xlab='Fruit',
     ylab='root', pch=16, col='red')

# C) Centering the concomitant variable

fg1 <- lm(ipomopsis$fruit~ipomopsis$grazing)
summary.lm(fg1)
## 
## Call:
## lm(formula = ipomopsis$fruit ~ ipomopsis$grazing)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -52.991 -18.028   2.915  14.049  48.109 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 67.941      5.236  12.976 1.54e-15 ***
## ipomopsis$grazingUngrazed  -17.060      7.404  -2.304   0.0268 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 23.41 on 38 degrees of freedom
## Multiple R-squared:  0.1226, Adjusted R-squared:  0.09949 
## F-statistic: 5.309 on 1 and 38 DF,  p-value: 0.02678
plot(fg1)

fglm1<- glm(ipomopsis$fruit~ipomopsis$grazing)
summary.glm(fglm1)
## 
## Call:
## glm(formula = ipomopsis$fruit ~ ipomopsis$grazing)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -52.990  -18.028    2.915   14.049   48.109  
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 67.941      5.236  12.976 1.54e-15 ***
## ipomopsis$grazingUngrazed  -17.060      7.404  -2.304   0.0268 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 548.2474)
## 
##     Null deviance: 23744  on 39  degrees of freedom
## Residual deviance: 20833  on 38  degrees of freedom
## AIC: 369.73
## 
## Number of Fisher Scoring iterations: 2
plot(fglm1)

fg1_1 <- lm(ipomopsis$fruit~1,ipomopsis)
fg1_1$coef
## (Intercept) 
##     59.4105
plot(fg1_1)

## hat values (leverages) are all = 0.025
##  and there are no factor predictors; no plot no. 5

fglm1_1<- glm(ipomopsis$fruit~1,ipomopsis,family = gaussian)
summary.glm(fglm1_1)
## 
## Call:
## glm(formula = ipomopsis$fruit ~ 1, family = gaussian, data = ipomopsis)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -44.681  -18.263    1.464   16.777   56.639  
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   59.411      3.901   15.23   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 608.8163)
## 
##     Null deviance: 23744  on 39  degrees of freedom
## Residual deviance: 23744  on 39  degrees of freedom
## AIC: 372.96
## 
## Number of Fisher Scoring iterations: 2
plot(fglm1_1)

## hat values (leverages) are all = 0.025
##  and there are no factor predictors; no plot no. 5

# D using ancova

fg1 <- lm(ipomopsis$fruit~ipomopsis$grazing)
anova(fg1)
## Analysis of Variance Table
## 
## Response: ipomopsis$fruit
##                   Df  Sum Sq Mean Sq F value  Pr(>F)  
## ipomopsis$grazing  1  2910.4 2910.44  5.3086 0.02678 *
## Residuals         38 20833.4  548.25                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary.lm(fg1)
## 
## Call:
## lm(formula = ipomopsis$fruit ~ ipomopsis$grazing)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -52.991 -18.028   2.915  14.049  48.109 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 67.941      5.236  12.976 1.54e-15 ***
## ipomopsis$grazingUngrazed  -17.060      7.404  -2.304   0.0268 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 23.41 on 38 degrees of freedom
## Multiple R-squared:  0.1226, Adjusted R-squared:  0.09949 
## F-statistic: 5.309 on 1 and 38 DF,  p-value: 0.02678
plot(fg1)

fg2<- lm(ipomopsis$fruit~ipomopsis$grazing+ipomopsis$root)
summary.lm(fg2)
## 
## Call:
## lm(formula = ipomopsis$fruit ~ ipomopsis$grazing + ipomopsis$root)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -17.1920  -2.8224   0.3223   3.9144  17.3290 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)               -127.829      9.664  -13.23 1.35e-15 ***
## ipomopsis$grazingUngrazed   36.103      3.357   10.75 6.11e-13 ***
## ipomopsis$root              23.560      1.149   20.51  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.747 on 37 degrees of freedom
## Multiple R-squared:  0.9291, Adjusted R-squared:  0.9252 
## F-statistic: 242.3 on 2 and 37 DF,  p-value: < 2.2e-16
plot(fg2)

plot(ipomopsis$fruit,ipomopsis$root)

fg3 <- lm(ipomopsis$fruit~ipomopsis$root)
summary.lm(fg3)
## 
## Call:
## lm(formula = ipomopsis$fruit ~ ipomopsis$root)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -29.3844 -10.4447  -0.7574  10.7606  23.7556 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -41.286     10.723  -3.850 0.000439 ***
## ipomopsis$root   14.022      1.463   9.584  1.1e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 13.52 on 38 degrees of freedom
## Multiple R-squared:  0.7073, Adjusted R-squared:  0.6996 
## F-statistic: 91.84 on 1 and 38 DF,  p-value: 1.099e-11
par(mfrow=c(2,2))
plot(fg3)

fg4 <- lm(ipomopsis$fruit~ipomopsis$root*ipomopsis$grazing)
summary.lm(fg4)
## 
## Call:
## lm(formula = ipomopsis$fruit ~ ipomopsis$root * ipomopsis$grazing)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -17.3177  -2.8320   0.1247   3.8511  17.1313 
## 
## Coefficients:
##                                          Estimate Std. Error t value
## (Intercept)                              -125.173     12.811  -9.771
## ipomopsis$root                             23.240      1.531  15.182
## ipomopsis$grazingUngrazed                  30.806     16.842   1.829
## ipomopsis$root:ipomopsis$grazingUngrazed    0.756      2.354   0.321
##                                          Pr(>|t|)    
## (Intercept)                              1.15e-11 ***
## ipomopsis$root                            < 2e-16 ***
## ipomopsis$grazingUngrazed                  0.0757 .  
## ipomopsis$root:ipomopsis$grazingUngrazed   0.7500    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.831 on 36 degrees of freedom
## Multiple R-squared:  0.9293, Adjusted R-squared:  0.9234 
## F-statistic: 157.6 on 3 and 36 DF,  p-value: < 2.2e-16
par(mfrow=c(2,2))
plot(fg4)

# in the interaction between root and grazing there is a significant value in the root 
#p-value

fg5 <- lm(ipomopsis$fruit~ipomopsis$grazing*ipomopsis$root)
summary.lm(fg5)
## 
## Call:
## lm(formula = ipomopsis$fruit ~ ipomopsis$grazing * ipomopsis$root)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -17.3177  -2.8320   0.1247   3.8511  17.1313 
## 
## Coefficients:
##                                          Estimate Std. Error t value
## (Intercept)                              -125.173     12.811  -9.771
## ipomopsis$grazingUngrazed                  30.806     16.842   1.829
## ipomopsis$root                             23.240      1.531  15.182
## ipomopsis$grazingUngrazed:ipomopsis$root    0.756      2.354   0.321
##                                          Pr(>|t|)    
## (Intercept)                              1.15e-11 ***
## ipomopsis$grazingUngrazed                  0.0757 .  
## ipomopsis$root                            < 2e-16 ***
## ipomopsis$grazingUngrazed:ipomopsis$root   0.7500    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.831 on 36 degrees of freedom
## Multiple R-squared:  0.9293, Adjusted R-squared:  0.9234 
## F-statistic: 157.6 on 3 and 36 DF,  p-value: < 2.2e-16
plot(fg5)

fg6 <- lm(ipomopsis$fruit~ipomopsis$grazing:ipomopsis$root)
summary.lm(fg6)
## 
## Call:
## lm(formula = ipomopsis$fruit ~ ipomopsis$grazing:ipomopsis$root)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -18.4030  -3.0094   0.1571   4.5053  15.5703 
## 
## Coefficients:
##                                          Estimate Std. Error t value
## (Intercept)                              -107.348      8.576  -12.52
## ipomopsis$grazingGrazed:ipomopsis$root     21.126      1.035   20.42
## ipomopsis$grazingUngrazed:ipomopsis$root   26.099      1.413   18.47
##                                          Pr(>|t|)    
## (Intercept)                              7.22e-15 ***
## ipomopsis$grazingGrazed:ipomopsis$root    < 2e-16 ***
## ipomopsis$grazingUngrazed:ipomopsis$root  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.044 on 37 degrees of freedom
## Multiple R-squared:  0.9227, Adjusted R-squared:  0.9185 
## F-statistic: 220.8 on 2 and 37 DF,  p-value: < 2.2e-16
plot(fg6)

# the root data keeps being more significant than others even if interaction was switch?
#INTERACTION WONT MATTER THE ORDER!!!!

dev.off()
## null device 
##           1
anova(fg1)
## Analysis of Variance Table
## 
## Response: ipomopsis$fruit
##                   Df  Sum Sq Mean Sq F value  Pr(>F)  
## ipomopsis$grazing  1  2910.4 2910.44  5.3086 0.02678 *
## Residuals         38 20833.4  548.25                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(anova(fg1))

plot(anova(fg2))
plot(anova(fg3))

plot(fg2)
abline(plot(fg2))
abline(plot(ipomopsis$fruit,ipomopsis$root))

# the hypothesis is that there is no significance between the fruits mg and
# grazing

# by integrating the root values to fruit and the grazing then we would find 
# significant p-values

fg6<-lm(ipomopsis$fruit~ipomopsis$grazing)
summary.lm(lm(ipomopsis$fruit~ipomopsis$grazing))
## 
## Call:
## lm(formula = ipomopsis$fruit ~ ipomopsis$grazing)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -52.991 -18.028   2.915  14.049  48.109 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 67.941      5.236  12.976 1.54e-15 ***
## ipomopsis$grazingUngrazed  -17.060      7.404  -2.304   0.0268 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 23.41 on 38 degrees of freedom
## Multiple R-squared:  0.1226, Adjusted R-squared:  0.09949 
## F-statistic: 5.309 on 1 and 38 DF,  p-value: 0.02678
par(mfrow=c(2,2))




fg7<- lm(ipomopsis$root~ipomopsis$grazing)
summary.lm(fg7)
## 
## Call:
## lm(formula = ipomopsis$root ~ ipomopsis$grazing)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.2034 -0.6504  0.1836  0.6688  1.9436 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 8.3094     0.2131  39.001  < 2e-16 ***
## ipomopsis$grazingUngrazed  -2.2565     0.3013  -7.489 5.41e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9528 on 38 degrees of freedom
## Multiple R-squared:  0.5961, Adjusted R-squared:  0.5855 
## F-statistic: 56.09 on 1 and 38 DF,  p-value: 5.411e-09
# if ipomopsis rot is linear modeled with grazing) there is a higher inrcrease in 
#p-value


#QUESTION 2

library (MASS)
library(asbio)
my.data <- read.table('BCI.plant.txt',sep = "")
plot(my.data)
head(my.data)
##   site.no.    UTM.E   UTM.N precip elev age geology
## 1       p1 614856.9 1031786 2993.2   20   2     Tct
## 2       p2 613985.4 1030725 3072.0  100   3      Tc
## 3       p3 614674.3 1023802 3007.4  180   1      Tc
## 4       p4 615018.6 1023548 2999.8  180   1      Tc
## 5       p5 637157.8 1012428 2414.3   40   2     Tgo
## 6       p6 637983.7 1012395 2393.7   30   2     Tgo
##   Alchornea.costaricensis Anacardium.excelsum
## 1                       0                   1
## 2                       0                   1
## 3                       1                   1
## 4                       1                   1
## 5                       0                   1
## 6                       0                   0