# HENV 665Q - Quantitative Research Methods Assignment 3

# Juan Buzzetti ID 9485651

# Part 1

# Importing dataset:
wce <- read.csv("/Users/Buzz/Desktop/HENV665Q/Lab8/wced.csv", header = T)
attach(wce)
wce3r <- subset(wce, Region != "S. America" & Region != "Africa")
names(wce3r)

## [1] "City"                "Year"                "Population"         
## [4] "Emission_total"      "Emissions_percapita" "Region"

attach(wce3r)

## The following object(s) are masked from 'wce':
## 
##     City, Emission_total, Emissions_percapita, Population, Region,
##     Year

View(wce3r)

# * * * QUESTION 1 * * *

kruskal.test(Emission_total ~ Region)

## 
##  Kruskal-Wallis rank sum test
## 
## data:  Emission_total by Region 
## Kruskal-Wallis chi-squared = 12.11, df = 2, p-value = 0.002343


# Comparing the Kruskal-Wallis test to ANOVA it can be seen that the
# values of P differ.  ANOVA yielded a P-Value of 0,0000942, while
# Krusal's was 0.0061.  Both values of tests indicate that Region has a
# signficant effect on CO2 total emissions, however, the non-parametric
# test shows a less significant impact of regions over CO2 emissions.


# * * * QUESTION 2 * * *

logET <- log(Emission_total)
logmodel <- lm(logET ~ Population + Region + Population:Region)
summary.lm(logmodel)

## 
## Call:
## lm(formula = logET ~ Population + Region + Population:Region)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.0960 -0.3292  0.0576  0.3284  1.0199 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  3.19e+00   7.52e-01    4.25  0.00023 ***
## Population                   1.09e-07   5.84e-08    1.87  0.07260 .  
## RegionEurope                -1.20e+00   7.74e-01   -1.55  0.13209    
## RegionN. America            -8.83e-01   8.63e-01   -1.02  0.31502    
## Population:RegionEurope      1.68e-07   7.48e-08    2.24  0.03347 *  
## Population:RegionN. America  1.65e-07   9.03e-08    1.83  0.07877 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Residual standard error: 0.576 on 27 degrees of freedom
## Multiple R-squared: 0.788,   Adjusted R-squared: 0.749 
## F-statistic: 20.1 on 5 and 27 DF,  p-value: 2.39e-08


# The previous model had shown that there were some signficant effects
# between CO2 emissions and population (P-value = 6.13e-11) with the
# interaction term showing a marginal effect of European population over
# C0 total emissions.  The new model ratifies both indicators, showing
# again that population has a significant effect over total CO2 emissions,
# and with a marginal interaction between European population and total
# emissions.  If in this model we consider a level of significance of 0.1
# we will find that populations of North America and Asia have more
# significant effects over total emissions with Europe as the exception to
# the rule.  This differs from what ANOVA yielded if we set the level of
# significance at 0.1 in the previous model, for all regions but N.America
# and interaction terms have effects on C02 total emissions.


# * * * QUESTION 3 * * *

SqET <- sqrt(Emission_total)
Sqmodel <- lm(SqET ~ Population + Region + Population:Region)
summary.lm(Sqmodel)

## 
## Call:
## lm(formula = SqET ~ Population + Region + Population:Region)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.4667 -0.7872 -0.0558  1.0616  2.1633 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  3.06e+00   1.76e+00    1.74  0.09365 .  
## Population                   5.70e-07   1.37e-07    4.16  0.00029 ***
## RegionEurope                -4.13e-01   1.81e+00   -0.23  0.82154    
## RegionN. America            -1.69e-01   2.02e+00   -0.08  0.93419    
## Population:RegionEurope      7.10e-08   1.75e-07    0.40  0.68885    
## Population:RegionN. America  2.74e-07   2.12e-07    1.29  0.20682    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Residual standard error: 1.35 on 27 degrees of freedom
## Multiple R-squared: 0.858,   Adjusted R-squared: 0.831 
## F-statistic: 32.5 on 5 and 27 DF,  p-value: 1.28e-10


# This transformation produced a different set of values. Under the Log
# model we found that population and the interaction term
# Population:RegionEurope had significant effects on CO2 total emissions,
# Square-root tranformation, though, lowered the significance of
# population and eliminated that of the interaction term.  Also, in this
# model the R-Square increased from 0.793 to 0.861 denoting that it
# explains 86% of the variance in the data.


# * * * QUESTION 4 * * *

par(mfrow = c(2, 2))

original <- lm(Emission_total ~ Region + Population + Region + Population:Region)
hist(original$residuals, xlab = "Residuals", main = "Original Model")
hist(logmodel$residuals, xlab = "Residuals", main = "Log-transformed Model")
hist(Sqmodel$residuals, xlab = "Residuals", main = "Square-root-transformed Model")

# I think that the model that best satisfies the assumptions is the
# logarithmic model. The residuals histogram of the logarithmic model
# corresponds clearly to a bell-shaped curve and also the residuals range
# is smaller, going -1.5 to +1.5 which would indicate more constancy.


# * * * QUESTION 5 * * *

glmmodel <- glm(Emission_total ~ Population + Region + Population:Region, family = Gamma)
summary(glmmodel)

## 
## Call:
## glm(formula = Emission_total ~ Population + Region + Population:Region, 
##     family = Gamma)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.253  -0.530  -0.192   0.390   0.998  
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)   
## (Intercept)                  2.47e-02   1.10e-02    2.24   0.0333 * 
## Population                  -1.11e-09   6.87e-10   -1.61   0.1191   
## RegionEurope                 3.89e-02   1.49e-02    2.62   0.0143 * 
## RegionN. America             3.29e-02   2.52e-02    1.30   0.2037   
## Population:RegionEurope     -3.66e-09   1.27e-09   -2.88   0.0077 **
## Population:RegionN. America -4.25e-09   2.60e-09   -1.63   0.1139   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for Gamma family taken to be 0.4569)
## 
##     Null deviance: 37.854  on 32  degrees of freedom
## Residual deviance: 13.434  on 27  degrees of freedom
## AIC: 293.7
## 
## Number of Fisher Scoring iterations: 6

hist(glmmodel$residuals, xlab = "Residuals", main = "GLM (Gamma) Transformation Model")

plot of chunk unnamed-chunk-1


# The gamma-transformed model shows standard errors significanly smaller,
# therefore, this model would be the best.


# * * * QUESTION 6 * * *

# All models show that in different degrees, either tranforming the data
# or not, population have a significant impact on CO2 total emissions.
# Furthermore, both original model and log-tranformed models yielded that
# in population in Europe has a greter impact on CO2 total emissions than
# population in other regions.


# * * * QUESTION 7 * * *

tiger <- read.table("/Users/Buzz/Desktop/HENV665Q/Lab8/tiger.txt", header = TRUE)
names(tiger)

## [1] "MUM_ID"     "Birth_year" "LRS"        "Birth_loc"  "longevity"

attach(tiger)
View(tiger)

tigerMR <- lm(LRS ~ Birth_year + Birth_loc + longevity + Birth_year:longevity)
summary(tigerMR)

## 
## Call:
## lm(formula = LRS ~ Birth_year + Birth_loc + longevity + Birth_year:longevity)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -11.17  -4.31  -1.67   2.60  19.82 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)
## (Intercept)          -809.8521  1073.8280   -0.75     0.45
## Birth_year              0.4107     0.5427    0.76     0.45
## Birth_loczoo_2         -0.1527     2.9816   -0.05     0.96
## Birth_loczoo_3          0.6771     3.0690    0.22     0.83
## Birth_loczoo_4         -2.2137     3.0516   -0.73     0.47
## longevity             104.3113    80.3638    1.30     0.20
## Birth_year:longevity   -0.0525     0.0406   -1.29     0.20
## 
## Residual standard error: 7.41 on 47 degrees of freedom
## Multiple R-squared: 0.181,   Adjusted R-squared: 0.0764 
## F-statistic: 1.73 on 6 and 47 DF,  p-value: 0.135


tigerGLM <- glm(formula = LRS ~ Birth_year + Birth_loc + longevity + Birth_year:longevity, 
    family = poisson)
summary(tigerGLM)

## 
## Call:
## glm(formula = LRS ~ Birth_year + Birth_loc + longevity + Birth_year:longevity, 
##     family = poisson)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -4.069  -1.479  -0.716   0.846   4.680  
## 
## Coefficients:
##                       Estimate Std. Error z value Pr(>|z|)  
## (Intercept)          -54.43941   49.31873   -1.10    0.270  
## Birth_year             0.02830    0.02493    1.14    0.256  
## Birth_loczoo_2        -0.01853    0.12093   -0.15    0.878  
## Birth_loczoo_3         0.06924    0.12297    0.56    0.573  
## Birth_loczoo_4        -0.20098    0.12649   -1.59    0.112  
## longevity              8.30399    3.57549    2.32    0.020 *
## Birth_year:longevity  -0.00417    0.00181   -2.31    0.021 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 283.32  on 53  degrees of freedom
## Residual deviance: 229.68  on 47  degrees of freedom
## AIC: 456.2
## 
## Number of Fisher Scoring iterations: 5


par(mfrow = c(1, 2))
hist(tigerMR$residuals, xlab = "Residuals", main = "Linear Regression Model")
hist(tigerGLM$residuals, xlab = "Residuals", main = "GLM with Poisson Error")

plot of chunk unnamed-chunk-1


# When comparing the histograms it is evident that in the GLM model,
# values are skwed to the left, responding to a poisson distribution.
# Model summary for shows that residual deviance is greater than degrees
# of freedon, indicating that in the model there is overdispersion in
# values.


# * * * QUESTION 8 * * *

tigerQP <- glm(formula = LRS ~ Birth_year + Birth_loc + longevity + Birth_year:longevity, 
    family = quasipoisson)
summary(tigerQP)

## 
## Call:
## glm(formula = LRS ~ Birth_year + Birth_loc + longevity + Birth_year:longevity, 
##     family = quasipoisson)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -4.069  -1.479  -0.716   0.846   4.680  
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)
## (Intercept)          -54.43941  114.24897   -0.48     0.64
## Birth_year             0.02830    0.05775    0.49     0.63
## Birth_loczoo_2        -0.01853    0.28013   -0.07     0.95
## Birth_loczoo_3         0.06924    0.28486    0.24     0.81
## Birth_loczoo_4        -0.20098    0.29302   -0.69     0.50
## longevity              8.30399    8.28279    1.00     0.32
## Birth_year:longevity  -0.00417    0.00419   -1.00     0.32
## 
## (Dispersion parameter for quasipoisson family taken to be 5.366)
## 
##     Null deviance: 283.32  on 53  degrees of freedom
## Residual deviance: 229.68  on 47  degrees of freedom
## AIC: NA
## 
## Number of Fisher Scoring iterations: 5


# Removing interaction term
upd_minusINT <- update(tigerQP, . ~ . - Birth_year:longevity)
summary(upd_minusINT)

## 
## Call:
## glm(formula = LRS ~ Birth_year + Birth_loc + longevity, family = quasipoisson)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -3.920  -1.579  -0.413   0.642   4.786  
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)  
## (Intercept)     56.3914    28.4045    1.99    0.053 .
## Birth_year      -0.0277     0.0144   -1.93    0.060 .
## Birth_loczoo_2   0.0391     0.2783    0.14    0.889  
## Birth_loczoo_3   0.0890     0.2874    0.31    0.758  
## Birth_loczoo_4  -0.1786     0.2971   -0.60    0.551  
## longevity        0.0532     0.0270    1.97    0.054 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for quasipoisson family taken to be 5.486)
## 
##     Null deviance: 283.32  on 53  degrees of freedom
## Residual deviance: 235.00  on 48  degrees of freedom
## AIC: NA
## 
## Number of Fisher Scoring iterations: 5

anova(tigerQP, upd_minusINT, test = "F")

## Analysis of Deviance Table
## 
## Model 1: LRS ~ Birth_year + Birth_loc + longevity + Birth_year:longevity
## Model 2: LRS ~ Birth_year + Birth_loc + longevity
##   Resid. Df Resid. Dev Df Deviance    F Pr(>F)
## 1        47        230                        
## 2        48        235 -1    -5.33 0.99   0.32


# Removing variable Birth_loc
upd_minusBL <- update(upd_minusINT, . ~ . - Birth_loc)
summary(upd_minusBL)

## 
## Call:
## glm(formula = LRS ~ Birth_year + longevity, family = quasipoisson)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -4.340  -1.521  -0.464   0.774   4.917  
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  55.0850    26.6036    2.07    0.043 *
## Birth_year   -0.0270     0.0135   -2.01    0.050 *
## longevity     0.0494     0.0256    1.93    0.060 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for quasipoisson family taken to be 5.121)
## 
##     Null deviance: 283.32  on 53  degrees of freedom
## Residual deviance: 240.36  on 51  degrees of freedom
## AIC: NA
## 
## Number of Fisher Scoring iterations: 5

anova(upd_minusINT, upd_minusBL, test = "F")

## Analysis of Deviance Table
## 
## Model 1: LRS ~ Birth_year + Birth_loc + longevity
## Model 2: LRS ~ Birth_year + longevity
##   Resid. Df Resid. Dev Df Deviance    F Pr(>F)
## 1        48        235                        
## 2        51        240 -3    -5.35 0.33   0.81


# Removing variable Longevity
upd_minusLong <- update(upd_minusBL, . ~ . - longevity)
summary(upd_minusLong)

## 
## Call:
## glm(formula = LRS ~ Birth_year, family = quasipoisson)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -3.982  -1.951  -0.926   1.223   5.381  
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  56.3054    25.9617    2.17    0.035 *
## Birth_year   -0.0273     0.0131   -2.08    0.043 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for quasipoisson family taken to be 5.293)
## 
##     Null deviance: 283.32  on 53  degrees of freedom
## Residual deviance: 260.19  on 52  degrees of freedom
## AIC: NA
## 
## Number of Fisher Scoring iterations: 5

anova(upd_minusBL, upd_minusLong, test = "F")

## Analysis of Deviance Table
## 
## Model 1: LRS ~ Birth_year + longevity
## Model 2: LRS ~ Birth_year
##   Resid. Df Resid. Dev Df Deviance    F Pr(>F)  
## 1        51        240                          
## 2        52        260 -1    -19.8 3.87  0.054 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1


# According to GLM, quasipoisson family, the best model is upd_minusBL
# (LRS ~ Birth_year) since it provides the lowest p-value, although
# marginally below the 0.05 confidence level.


# * * * QUESTION 9 * * *

# The result of our analysis show that depending on the analysis method
# chosen, factors influencing LRS vary greatly. In the linear model the
# analysis showed no significant effects of year of birth, location, or
# longevity over life reproductive success.  However, in the linear model
# analysis, we did not take into account that the data was not normally
# distributed; therefore, other analysis methods needed to be implemented
# in order to ratify or rectify our assumptions.  Our first attempt was to
# evaluate the model using a generalized linear model with the family
# Poisson. Still, this test yielded marginal lower p-values, indicating no
# significant effects of explanatory variables variables over response
# variable.  But Poisson demonstrated that the were some overdispersion in
# the data; in consequence, it was necessary to deal with tha
# overdispersion using the Quasipoisson family in GLM. After comparing the
# results of different models and comparing the effects of interacion term
# and explanatory variables, we arrived to the conclusion that tigers' LRS
# is only influenced by year of birth.


# * * * QUESTION 10 * * *

par(mfrow = c(1, 1))
plot(LRS ~ Birth_year, pch = 18, xlab = "Birth of year", ylab = " Life Reproductive Success", 
    main = "Sumatra Female Tigers LRS")
d <- seq(1960, 2000)
pred <- predict(upd_minusLong, list(Birth_year = d), type = "response")
lines(d, pred)

plot of chunk unnamed-chunk-1



# * # * # * # * # * # * # * # * # * # * # * # * # * # * # * # * #


# Part 2

sheep <- read.table("/Users/Buzz/Desktop/HENV665Q/Lab8/sheep.txt", header = T)
attach(sheep)
names(sheep)

##  [1] "ID"      "Age"     "sex"     "yr"      "date"    "yrbirth" "wt"     
##  [8] "rtHl"    "rtHb"    "lHl"     "lHb"

View(sheep)

# * * * QUESTION 1 * * *

# The independent experimental unit are samples taken on sheeps at a
# different times over their lifetime, including different samples on a
# same sheep.


# * * * QUESTION 2 * * *

# The problem we had with the treatemend with made on this dataset was
# that we treated values as if they were independent of each other.  As we
# were not dealing with a set of different individuals, with the linear
# model we did not take into consideration that some female sheeps might
# have been pregnant at the moment of the sampling, influencing our
# results considerably.  The linear regression model assumed significant
# effects of explanatory variables over response variables that might have
# been less meaningfull. This makes the linear regression model more
# powerful than it is. This is called pseude-replication.


# * * * QUESTION 3 * * *

# There are two problems with averaging. First, averaging reduces the
# number of observations and, therefore, the degrees of freedom of our
# model.  Second, when averaging, much of the variation among values is
# removed, which may conduct to incorrect assumptions on the results.


# * * * QUESTION 4 * * *

osheep <- lm(wt ~ sex + date, na.action = na.exclude)
summary(osheep)

## 
## Call:
## lm(formula = wt ~ sex + date, na.action = na.exclude)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -12.831  -2.812   0.032   2.537  11.852 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   20.095      1.189   16.89  < 2e-16 ***
## sex            5.069      0.691    7.33  7.3e-12 ***
## date           0.146      0.010   14.56  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Residual standard error: 4.49 on 180 degrees of freedom
##   (4 observations deleted due to missingness)
## Multiple R-squared: 0.584,   Adjusted R-squared: 0.58 
## F-statistic:  126 on 2 and 180 DF,  p-value: <2e-16


library(nlme)
library(lattice)

mxsheep1 <- lme(wt ~ sex + date, random = ~1 | ID, na.action = na.exclude, method = "ML")
summary(mxsheep1)

## Linear mixed-effects model fit by maximum likelihood
##  Data: NULL 
##     AIC   BIC logLik
##   945.7 961.8 -467.9
## 
## Random effects:
##  Formula: ~1 | ID
##         (Intercept) Residual
## StdDev:       4.034    1.961
## 
## Fixed effects: wt ~ sex + date 
##              Value Std.Error  DF t-value p-value
## (Intercept) 19.041    1.5710 112   12.12       0
## sex          5.359    1.0512  68    5.10       0
## date         0.153    0.0047 112   32.45       0
##  Correlation: 
##      (Intr) sex   
## sex  -0.930       
## date -0.192  0.019
## 
## Standardized Within-Group Residuals:
##      Min       Q1      Med       Q3      Max 
## -3.03418 -0.55096  0.02805  0.47680  2.22493 
## 
## Number of Observations: 183
## Number of Groups: 70


mxsheep2 <- update(mxsheep1, random = ~1 | ID/yrbirth, na.action = na.exclude, 
    method = "ML")
summary(mxsheep2)

## Linear mixed-effects model fit by maximum likelihood
##  Data: NULL 
##     AIC BIC logLik
##   947.7 967 -467.9
## 
## Random effects:
##  Formula: ~1 | ID
##         (Intercept)
## StdDev:       2.853
## 
##  Formula: ~1 | yrbirth %in% ID
##         (Intercept) Residual
## StdDev:       2.853    1.961
## 
## Fixed effects: wt ~ sex + date 
##              Value Std.Error  DF t-value p-value
## (Intercept) 19.041    1.5710 112   12.12       0
## sex          5.359    1.0512  68    5.10       0
## date         0.153    0.0047 112   32.45       0
##  Correlation: 
##      (Intr) sex   
## sex  -0.930       
## date -0.192  0.019
## 
## Standardized Within-Group Residuals:
##      Min       Q1      Med       Q3      Max 
## -3.03418 -0.55096  0.02805  0.47680  2.22493 
## 
## Number of Observations: 183
## Number of Groups: 
##              ID yrbirth %in% ID 
##              70              70

anova(mxsheep1, mxsheep2)

##          Model df   AIC   BIC logLik   Test   L.Ratio p-value
## mxsheep1     1  5 945.7 961.8 -467.9                         
## mxsheep2     2  6 947.7 967.0 -467.9 1 vs 2 2.274e-13       1


mxsheep3 <- update(mxsheep1, random = ~date | ID/sex, na.action = na.exclude, 
    method = "ML")
summary(mxsheep3)

## Linear mixed-effects model fit by maximum likelihood
##  Data: NULL 
##     AIC   BIC logLik
##   932.4 964.5 -456.2
## 
## Random effects:
##  Formula: ~date | ID
##  Structure: General positive-definite, Log-Cholesky parametrization
##             StdDev  Corr  
## (Intercept) 2.94543 (Intr)
## date        0.02808 -0.257
## 
##  Formula: ~date | sex %in% ID
##  Structure: General positive-definite, Log-Cholesky parametrization
##             StdDev  Corr  
## (Intercept) 2.94528 (Intr)
## date        0.02809 -0.257
## Residual    1.26222       
## 
## Fixed effects: wt ~ sex + date 
##              Value Std.Error  DF t-value p-value
## (Intercept) 19.784    1.5418 112  12.832       0
## sex          4.956    1.0357  68   4.785       0
## date         0.150    0.0061 112  24.689       0
##  Correlation: 
##      (Intr) sex   
## sex  -0.933       
## date -0.153  0.007
## 
## Standardized Within-Group Residuals:
##     Min      Q1     Med      Q3     Max 
## -1.9923 -0.4458 -0.0717  0.4054  1.9445 
## 
## Number of Observations: 183
## Number of Groups: 
##          ID sex %in% ID 
##          70          70

anova(mxsheep2, mxsheep3)

##          Model df   AIC   BIC logLik   Test L.Ratio p-value
## mxsheep2     1  6 947.7 967.0 -467.9                       
## mxsheep3     2 10 932.4 964.5 -456.2 1 vs 2   23.32   1e-04


mxsheep4 <- update(mxsheep3, . ~ . - date, na.action = na.exclude, method = "ML")
summary(mxsheep4)

## Linear mixed-effects model fit by maximum likelihood
##  Data: NULL 
##    AIC  BIC logLik
##   1071 1100 -526.3
## 
## Random effects:
##  Formula: ~date | ID
##  Structure: General positive-definite, Log-Cholesky parametrization
##             StdDev Corr  
## (Intercept) 4.5543 (Intr)
## date        0.1092 -0.783
## 
##  Formula: ~date | sex %in% ID
##  Structure: General positive-definite, Log-Cholesky parametrization
##             StdDev Corr  
## (Intercept) 4.5580 (Intr)
## date        0.1093 -0.783
## Residual    1.1813       
## 
## Fixed effects: wt ~ sex 
##              Value Std.Error  DF t-value p-value
## (Intercept) 25.311     1.533 113  16.513       0
## sex          4.779     1.039  68   4.599       0
##  Correlation: 
##     (Intr)
## sex -0.944
## 
## Standardized Within-Group Residuals:
##      Min       Q1      Med       Q3      Max 
## -1.52048 -0.38578 -0.03433  0.29185  2.09477 
## 
## Number of Observations: 183
## Number of Groups: 
##          ID sex %in% ID 
##          70          70

anova(mxsheep3, mxsheep4)

##          Model df    AIC    BIC logLik   Test L.Ratio p-value
## mxsheep3     1 10  932.4  964.5 -456.2                       
## mxsheep4     2  9 1070.7 1099.6 -526.3 1 vs 2   140.3  <.0001


mxsheep5 <- update(mxsheep4, . ~ . - sex, na.action = na.exclude, method = "ML")
summary(mxsheep5)

## Linear mixed-effects model fit by maximum likelihood
##  Data: NULL 
##    AIC  BIC logLik
##   1087 1113 -535.5
## 
## Random effects:
##  Formula: ~date | ID
##  Structure: General positive-definite, Log-Cholesky parametrization
##             StdDev Corr  
## (Intercept) 4.313  (Intr)
## date        0.109  -0.658
## 
##  Formula: ~date | sex %in% ID
##  Structure: General positive-definite, Log-Cholesky parametrization
##             StdDev Corr  
## (Intercept) 4.313  (Intr)
## date        0.109  -0.658
## Residual    1.177        
## 
## Fixed effects: wt ~ 1 
##             Value Std.Error  DF t-value p-value
## (Intercept) 31.07    0.5788 113   53.68       0
## 
## Standardized Within-Group Residuals:
##       Min        Q1       Med        Q3       Max 
## -1.467692 -0.327116  0.001173  0.304720  2.135781 
## 
## Number of Observations: 183
## Number of Groups: 
##          ID sex %in% ID 
##          70          70

anova(mxsheep4, mxsheep5)

##          Model df  AIC  BIC logLik   Test L.Ratio p-value
## mxsheep4     1  9 1071 1100 -526.3                       
## mxsheep5     2  8 1087 1113 -535.5 1 vs 2   18.29  <.0001


anova(mxsheep1, mxsheep2, mxsheep3, mxsheep4, mxsheep5)

##          Model df    AIC    BIC logLik   Test L.Ratio p-value
## mxsheep1     1  5  945.7  961.8 -467.9                       
## mxsheep2     2  6  947.7  967.0 -467.9 1 vs 2    0.00   1e+00
## mxsheep3     3 10  932.4  964.5 -456.2 2 vs 3   23.32   1e-04
## mxsheep4     4  9 1070.7 1099.6 -526.3 3 vs 4  140.29  <.0001
## mxsheep5     5  8 1087.0 1112.6 -535.5 4 vs 5   18.29  <.0001


# * * * QUESTION 5 * * *

# Parameter estimates

# # # mxmodel3 (mixed model)

# Intercept 19.8

# Date Slope 0.15

# Sex Slope 4.96

# # # osheep (linear regression model)

# Intercept 20.1

# Date Slope 0.145

# Sex Slope 5.07

# Data shows that the parameters of the linear regression model are
# marginally higher than those of the mixed model.  Also, in the mixed
# mode p-values = 0, which indicates that explanatory variables have
# significant effects on sheep's weight, While in the linear regression
# model values tended to zero , indicating a strong significance, in the
# mixed model effect model the effect of the explanatory variables on the
# response variable is absolutely significant.


# * * * QUESTION 6 * * *

plot(date, wt, col = c("orange", "green")[sex], main = "Effects of Date on Sheep's Weight", 
    xlab = "Date", ylab = "Weight")
legend(90, 25, c("Female", "Male"), pch = c(1), cex = 1, col = c("orange", "green"))

rgl <- seq(0, 120, 1)
male <- rep(2, 121)
datem <- 0.15 * (rgl) + 4.96 * (male) + 19.8
lines(rgl, datem, col = "green")
female <- rep(1, 121)
datef <- 0.15 * (rgl) + 4.96 * (female) + 19.8
lines(rgl, datef, col = "orange")

plot of chunk unnamed-chunk-1