# HENV 665Q - Quantitative Research Methods Assignment 3
# Juan Buzzetti ID 9485651
# Part 1
# Importing dataset:
wce <- read.csv("/Users/Buzz/Desktop/HENV665Q/Lab8/wced.csv", header = T)
attach(wce)
wce3r <- subset(wce, Region != "S. America" & Region != "Africa")
names(wce3r)
## [1] "City" "Year" "Population"
## [4] "Emission_total" "Emissions_percapita" "Region"
attach(wce3r)
## The following object(s) are masked from 'wce':
##
## City, Emission_total, Emissions_percapita, Population, Region,
## Year
View(wce3r)
# * * * QUESTION 1 * * *
kruskal.test(Emission_total ~ Region)
##
## Kruskal-Wallis rank sum test
##
## data: Emission_total by Region
## Kruskal-Wallis chi-squared = 12.11, df = 2, p-value = 0.002343
# Comparing the Kruskal-Wallis test to ANOVA it can be seen that the
# values of P differ. ANOVA yielded a P-Value of 0,0000942, while
# Krusal's was 0.0061. Both values of tests indicate that Region has a
# signficant effect on CO2 total emissions, however, the non-parametric
# test shows a less significant impact of regions over CO2 emissions.
# * * * QUESTION 2 * * *
logET <- log(Emission_total)
logmodel <- lm(logET ~ Population + Region + Population:Region)
summary.lm(logmodel)
##
## Call:
## lm(formula = logET ~ Population + Region + Population:Region)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.0960 -0.3292 0.0576 0.3284 1.0199
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.19e+00 7.52e-01 4.25 0.00023 ***
## Population 1.09e-07 5.84e-08 1.87 0.07260 .
## RegionEurope -1.20e+00 7.74e-01 -1.55 0.13209
## RegionN. America -8.83e-01 8.63e-01 -1.02 0.31502
## Population:RegionEurope 1.68e-07 7.48e-08 2.24 0.03347 *
## Population:RegionN. America 1.65e-07 9.03e-08 1.83 0.07877 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.576 on 27 degrees of freedom
## Multiple R-squared: 0.788, Adjusted R-squared: 0.749
## F-statistic: 20.1 on 5 and 27 DF, p-value: 2.39e-08
# The previous model had shown that there were some signficant effects
# between CO2 emissions and population (P-value = 6.13e-11) with the
# interaction term showing a marginal effect of European population over
# C0 total emissions. The new model ratifies both indicators, showing
# again that population has a significant effect over total CO2 emissions,
# and with a marginal interaction between European population and total
# emissions. If in this model we consider a level of significance of 0.1
# we will find that populations of North America and Asia have more
# significant effects over total emissions with Europe as the exception to
# the rule. This differs from what ANOVA yielded if we set the level of
# significance at 0.1 in the previous model, for all regions but N.America
# and interaction terms have effects on C02 total emissions.
# * * * QUESTION 3 * * *
SqET <- sqrt(Emission_total)
Sqmodel <- lm(SqET ~ Population + Region + Population:Region)
summary.lm(Sqmodel)
##
## Call:
## lm(formula = SqET ~ Population + Region + Population:Region)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.4667 -0.7872 -0.0558 1.0616 2.1633
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.06e+00 1.76e+00 1.74 0.09365 .
## Population 5.70e-07 1.37e-07 4.16 0.00029 ***
## RegionEurope -4.13e-01 1.81e+00 -0.23 0.82154
## RegionN. America -1.69e-01 2.02e+00 -0.08 0.93419
## Population:RegionEurope 7.10e-08 1.75e-07 0.40 0.68885
## Population:RegionN. America 2.74e-07 2.12e-07 1.29 0.20682
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.35 on 27 degrees of freedom
## Multiple R-squared: 0.858, Adjusted R-squared: 0.831
## F-statistic: 32.5 on 5 and 27 DF, p-value: 1.28e-10
# This transformation produced a different set of values. Under the Log
# model we found that population and the interaction term
# Population:RegionEurope had significant effects on CO2 total emissions,
# Square-root tranformation, though, lowered the significance of
# population and eliminated that of the interaction term. Also, in this
# model the R-Square increased from 0.793 to 0.861 denoting that it
# explains 86% of the variance in the data.
# * * * QUESTION 4 * * *
par(mfrow = c(2, 2))
original <- lm(Emission_total ~ Region + Population + Region + Population:Region)
hist(original$residuals, xlab = "Residuals", main = "Original Model")
hist(logmodel$residuals, xlab = "Residuals", main = "Log-transformed Model")
hist(Sqmodel$residuals, xlab = "Residuals", main = "Square-root-transformed Model")
# I think that the model that best satisfies the assumptions is the
# logarithmic model. The residuals histogram of the logarithmic model
# corresponds clearly to a bell-shaped curve and also the residuals range
# is smaller, going -1.5 to +1.5 which would indicate more constancy.
# * * * QUESTION 5 * * *
glmmodel <- glm(Emission_total ~ Population + Region + Population:Region, family = Gamma)
summary(glmmodel)
##
## Call:
## glm(formula = Emission_total ~ Population + Region + Population:Region,
## family = Gamma)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.253 -0.530 -0.192 0.390 0.998
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.47e-02 1.10e-02 2.24 0.0333 *
## Population -1.11e-09 6.87e-10 -1.61 0.1191
## RegionEurope 3.89e-02 1.49e-02 2.62 0.0143 *
## RegionN. America 3.29e-02 2.52e-02 1.30 0.2037
## Population:RegionEurope -3.66e-09 1.27e-09 -2.88 0.0077 **
## Population:RegionN. America -4.25e-09 2.60e-09 -1.63 0.1139
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Gamma family taken to be 0.4569)
##
## Null deviance: 37.854 on 32 degrees of freedom
## Residual deviance: 13.434 on 27 degrees of freedom
## AIC: 293.7
##
## Number of Fisher Scoring iterations: 6
hist(glmmodel$residuals, xlab = "Residuals", main = "GLM (Gamma) Transformation Model")
# The gamma-transformed model shows standard errors significanly smaller,
# therefore, this model would be the best.
# * * * QUESTION 6 * * *
# All models show that in different degrees, either tranforming the data
# or not, population have a significant impact on CO2 total emissions.
# Furthermore, both original model and log-tranformed models yielded that
# in population in Europe has a greter impact on CO2 total emissions than
# population in other regions.
# * * * QUESTION 7 * * *
tiger <- read.table("/Users/Buzz/Desktop/HENV665Q/Lab8/tiger.txt", header = TRUE)
names(tiger)
## [1] "MUM_ID" "Birth_year" "LRS" "Birth_loc" "longevity"
attach(tiger)
View(tiger)
tigerMR <- lm(LRS ~ Birth_year + Birth_loc + longevity + Birth_year:longevity)
summary(tigerMR)
##
## Call:
## lm(formula = LRS ~ Birth_year + Birth_loc + longevity + Birth_year:longevity)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.17 -4.31 -1.67 2.60 19.82
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -809.8521 1073.8280 -0.75 0.45
## Birth_year 0.4107 0.5427 0.76 0.45
## Birth_loczoo_2 -0.1527 2.9816 -0.05 0.96
## Birth_loczoo_3 0.6771 3.0690 0.22 0.83
## Birth_loczoo_4 -2.2137 3.0516 -0.73 0.47
## longevity 104.3113 80.3638 1.30 0.20
## Birth_year:longevity -0.0525 0.0406 -1.29 0.20
##
## Residual standard error: 7.41 on 47 degrees of freedom
## Multiple R-squared: 0.181, Adjusted R-squared: 0.0764
## F-statistic: 1.73 on 6 and 47 DF, p-value: 0.135
tigerGLM <- glm(formula = LRS ~ Birth_year + Birth_loc + longevity + Birth_year:longevity,
family = poisson)
summary(tigerGLM)
##
## Call:
## glm(formula = LRS ~ Birth_year + Birth_loc + longevity + Birth_year:longevity,
## family = poisson)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -4.069 -1.479 -0.716 0.846 4.680
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -54.43941 49.31873 -1.10 0.270
## Birth_year 0.02830 0.02493 1.14 0.256
## Birth_loczoo_2 -0.01853 0.12093 -0.15 0.878
## Birth_loczoo_3 0.06924 0.12297 0.56 0.573
## Birth_loczoo_4 -0.20098 0.12649 -1.59 0.112
## longevity 8.30399 3.57549 2.32 0.020 *
## Birth_year:longevity -0.00417 0.00181 -2.31 0.021 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 283.32 on 53 degrees of freedom
## Residual deviance: 229.68 on 47 degrees of freedom
## AIC: 456.2
##
## Number of Fisher Scoring iterations: 5
par(mfrow = c(1, 2))
hist(tigerMR$residuals, xlab = "Residuals", main = "Linear Regression Model")
hist(tigerGLM$residuals, xlab = "Residuals", main = "GLM with Poisson Error")
# When comparing the histograms it is evident that in the GLM model,
# values are skwed to the left, responding to a poisson distribution.
# Model summary for shows that residual deviance is greater than degrees
# of freedon, indicating that in the model there is overdispersion in
# values.
# * * * QUESTION 8 * * *
tigerQP <- glm(formula = LRS ~ Birth_year + Birth_loc + longevity + Birth_year:longevity,
family = quasipoisson)
summary(tigerQP)
##
## Call:
## glm(formula = LRS ~ Birth_year + Birth_loc + longevity + Birth_year:longevity,
## family = quasipoisson)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -4.069 -1.479 -0.716 0.846 4.680
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -54.43941 114.24897 -0.48 0.64
## Birth_year 0.02830 0.05775 0.49 0.63
## Birth_loczoo_2 -0.01853 0.28013 -0.07 0.95
## Birth_loczoo_3 0.06924 0.28486 0.24 0.81
## Birth_loczoo_4 -0.20098 0.29302 -0.69 0.50
## longevity 8.30399 8.28279 1.00 0.32
## Birth_year:longevity -0.00417 0.00419 -1.00 0.32
##
## (Dispersion parameter for quasipoisson family taken to be 5.366)
##
## Null deviance: 283.32 on 53 degrees of freedom
## Residual deviance: 229.68 on 47 degrees of freedom
## AIC: NA
##
## Number of Fisher Scoring iterations: 5
# Removing interaction term
upd_minusINT <- update(tigerQP, . ~ . - Birth_year:longevity)
summary(upd_minusINT)
##
## Call:
## glm(formula = LRS ~ Birth_year + Birth_loc + longevity, family = quasipoisson)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.920 -1.579 -0.413 0.642 4.786
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 56.3914 28.4045 1.99 0.053 .
## Birth_year -0.0277 0.0144 -1.93 0.060 .
## Birth_loczoo_2 0.0391 0.2783 0.14 0.889
## Birth_loczoo_3 0.0890 0.2874 0.31 0.758
## Birth_loczoo_4 -0.1786 0.2971 -0.60 0.551
## longevity 0.0532 0.0270 1.97 0.054 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for quasipoisson family taken to be 5.486)
##
## Null deviance: 283.32 on 53 degrees of freedom
## Residual deviance: 235.00 on 48 degrees of freedom
## AIC: NA
##
## Number of Fisher Scoring iterations: 5
anova(tigerQP, upd_minusINT, test = "F")
## Analysis of Deviance Table
##
## Model 1: LRS ~ Birth_year + Birth_loc + longevity + Birth_year:longevity
## Model 2: LRS ~ Birth_year + Birth_loc + longevity
## Resid. Df Resid. Dev Df Deviance F Pr(>F)
## 1 47 230
## 2 48 235 -1 -5.33 0.99 0.32
# Removing variable Birth_loc
upd_minusBL <- update(upd_minusINT, . ~ . - Birth_loc)
summary(upd_minusBL)
##
## Call:
## glm(formula = LRS ~ Birth_year + longevity, family = quasipoisson)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -4.340 -1.521 -0.464 0.774 4.917
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 55.0850 26.6036 2.07 0.043 *
## Birth_year -0.0270 0.0135 -2.01 0.050 *
## longevity 0.0494 0.0256 1.93 0.060 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for quasipoisson family taken to be 5.121)
##
## Null deviance: 283.32 on 53 degrees of freedom
## Residual deviance: 240.36 on 51 degrees of freedom
## AIC: NA
##
## Number of Fisher Scoring iterations: 5
anova(upd_minusINT, upd_minusBL, test = "F")
## Analysis of Deviance Table
##
## Model 1: LRS ~ Birth_year + Birth_loc + longevity
## Model 2: LRS ~ Birth_year + longevity
## Resid. Df Resid. Dev Df Deviance F Pr(>F)
## 1 48 235
## 2 51 240 -3 -5.35 0.33 0.81
# Removing variable Longevity
upd_minusLong <- update(upd_minusBL, . ~ . - longevity)
summary(upd_minusLong)
##
## Call:
## glm(formula = LRS ~ Birth_year, family = quasipoisson)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.982 -1.951 -0.926 1.223 5.381
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 56.3054 25.9617 2.17 0.035 *
## Birth_year -0.0273 0.0131 -2.08 0.043 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for quasipoisson family taken to be 5.293)
##
## Null deviance: 283.32 on 53 degrees of freedom
## Residual deviance: 260.19 on 52 degrees of freedom
## AIC: NA
##
## Number of Fisher Scoring iterations: 5
anova(upd_minusBL, upd_minusLong, test = "F")
## Analysis of Deviance Table
##
## Model 1: LRS ~ Birth_year + longevity
## Model 2: LRS ~ Birth_year
## Resid. Df Resid. Dev Df Deviance F Pr(>F)
## 1 51 240
## 2 52 260 -1 -19.8 3.87 0.054 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# According to GLM, quasipoisson family, the best model is upd_minusBL
# (LRS ~ Birth_year) since it provides the lowest p-value, although
# marginally below the 0.05 confidence level.
# * * * QUESTION 9 * * *
# The result of our analysis show that depending on the analysis method
# chosen, factors influencing LRS vary greatly. In the linear model the
# analysis showed no significant effects of year of birth, location, or
# longevity over life reproductive success. However, in the linear model
# analysis, we did not take into account that the data was not normally
# distributed; therefore, other analysis methods needed to be implemented
# in order to ratify or rectify our assumptions. Our first attempt was to
# evaluate the model using a generalized linear model with the family
# Poisson. Still, this test yielded marginal lower p-values, indicating no
# significant effects of explanatory variables variables over response
# variable. But Poisson demonstrated that the were some overdispersion in
# the data; in consequence, it was necessary to deal with tha
# overdispersion using the Quasipoisson family in GLM. After comparing the
# results of different models and comparing the effects of interacion term
# and explanatory variables, we arrived to the conclusion that tigers' LRS
# is only influenced by year of birth.
# * * * QUESTION 10 * * *
par(mfrow = c(1, 1))
plot(LRS ~ Birth_year, pch = 18, xlab = "Birth of year", ylab = " Life Reproductive Success",
main = "Sumatra Female Tigers LRS")
d <- seq(1960, 2000)
pred <- predict(upd_minusLong, list(Birth_year = d), type = "response")
lines(d, pred)
# * # * # * # * # * # * # * # * # * # * # * # * # * # * # * # * #
# Part 2
sheep <- read.table("/Users/Buzz/Desktop/HENV665Q/Lab8/sheep.txt", header = T)
attach(sheep)
names(sheep)
## [1] "ID" "Age" "sex" "yr" "date" "yrbirth" "wt"
## [8] "rtHl" "rtHb" "lHl" "lHb"
View(sheep)
# * * * QUESTION 1 * * *
# The independent experimental unit are samples taken on sheeps at a
# different times over their lifetime, including different samples on a
# same sheep.
# * * * QUESTION 2 * * *
# The problem we had with the treatemend with made on this dataset was
# that we treated values as if they were independent of each other. As we
# were not dealing with a set of different individuals, with the linear
# model we did not take into consideration that some female sheeps might
# have been pregnant at the moment of the sampling, influencing our
# results considerably. The linear regression model assumed significant
# effects of explanatory variables over response variables that might have
# been less meaningfull. This makes the linear regression model more
# powerful than it is. This is called pseude-replication.
# * * * QUESTION 3 * * *
# There are two problems with averaging. First, averaging reduces the
# number of observations and, therefore, the degrees of freedom of our
# model. Second, when averaging, much of the variation among values is
# removed, which may conduct to incorrect assumptions on the results.
# * * * QUESTION 4 * * *
osheep <- lm(wt ~ sex + date, na.action = na.exclude)
summary(osheep)
##
## Call:
## lm(formula = wt ~ sex + date, na.action = na.exclude)
##
## Residuals:
## Min 1Q Median 3Q Max
## -12.831 -2.812 0.032 2.537 11.852
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 20.095 1.189 16.89 < 2e-16 ***
## sex 5.069 0.691 7.33 7.3e-12 ***
## date 0.146 0.010 14.56 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.49 on 180 degrees of freedom
## (4 observations deleted due to missingness)
## Multiple R-squared: 0.584, Adjusted R-squared: 0.58
## F-statistic: 126 on 2 and 180 DF, p-value: <2e-16
library(nlme)
library(lattice)
mxsheep1 <- lme(wt ~ sex + date, random = ~1 | ID, na.action = na.exclude, method = "ML")
summary(mxsheep1)
## Linear mixed-effects model fit by maximum likelihood
## Data: NULL
## AIC BIC logLik
## 945.7 961.8 -467.9
##
## Random effects:
## Formula: ~1 | ID
## (Intercept) Residual
## StdDev: 4.034 1.961
##
## Fixed effects: wt ~ sex + date
## Value Std.Error DF t-value p-value
## (Intercept) 19.041 1.5710 112 12.12 0
## sex 5.359 1.0512 68 5.10 0
## date 0.153 0.0047 112 32.45 0
## Correlation:
## (Intr) sex
## sex -0.930
## date -0.192 0.019
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -3.03418 -0.55096 0.02805 0.47680 2.22493
##
## Number of Observations: 183
## Number of Groups: 70
mxsheep2 <- update(mxsheep1, random = ~1 | ID/yrbirth, na.action = na.exclude,
method = "ML")
summary(mxsheep2)
## Linear mixed-effects model fit by maximum likelihood
## Data: NULL
## AIC BIC logLik
## 947.7 967 -467.9
##
## Random effects:
## Formula: ~1 | ID
## (Intercept)
## StdDev: 2.853
##
## Formula: ~1 | yrbirth %in% ID
## (Intercept) Residual
## StdDev: 2.853 1.961
##
## Fixed effects: wt ~ sex + date
## Value Std.Error DF t-value p-value
## (Intercept) 19.041 1.5710 112 12.12 0
## sex 5.359 1.0512 68 5.10 0
## date 0.153 0.0047 112 32.45 0
## Correlation:
## (Intr) sex
## sex -0.930
## date -0.192 0.019
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -3.03418 -0.55096 0.02805 0.47680 2.22493
##
## Number of Observations: 183
## Number of Groups:
## ID yrbirth %in% ID
## 70 70
anova(mxsheep1, mxsheep2)
## Model df AIC BIC logLik Test L.Ratio p-value
## mxsheep1 1 5 945.7 961.8 -467.9
## mxsheep2 2 6 947.7 967.0 -467.9 1 vs 2 2.274e-13 1
mxsheep3 <- update(mxsheep1, random = ~date | ID/sex, na.action = na.exclude,
method = "ML")
summary(mxsheep3)
## Linear mixed-effects model fit by maximum likelihood
## Data: NULL
## AIC BIC logLik
## 932.4 964.5 -456.2
##
## Random effects:
## Formula: ~date | ID
## Structure: General positive-definite, Log-Cholesky parametrization
## StdDev Corr
## (Intercept) 2.94543 (Intr)
## date 0.02808 -0.257
##
## Formula: ~date | sex %in% ID
## Structure: General positive-definite, Log-Cholesky parametrization
## StdDev Corr
## (Intercept) 2.94528 (Intr)
## date 0.02809 -0.257
## Residual 1.26222
##
## Fixed effects: wt ~ sex + date
## Value Std.Error DF t-value p-value
## (Intercept) 19.784 1.5418 112 12.832 0
## sex 4.956 1.0357 68 4.785 0
## date 0.150 0.0061 112 24.689 0
## Correlation:
## (Intr) sex
## sex -0.933
## date -0.153 0.007
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -1.9923 -0.4458 -0.0717 0.4054 1.9445
##
## Number of Observations: 183
## Number of Groups:
## ID sex %in% ID
## 70 70
anova(mxsheep2, mxsheep3)
## Model df AIC BIC logLik Test L.Ratio p-value
## mxsheep2 1 6 947.7 967.0 -467.9
## mxsheep3 2 10 932.4 964.5 -456.2 1 vs 2 23.32 1e-04
mxsheep4 <- update(mxsheep3, . ~ . - date, na.action = na.exclude, method = "ML")
summary(mxsheep4)
## Linear mixed-effects model fit by maximum likelihood
## Data: NULL
## AIC BIC logLik
## 1071 1100 -526.3
##
## Random effects:
## Formula: ~date | ID
## Structure: General positive-definite, Log-Cholesky parametrization
## StdDev Corr
## (Intercept) 4.5543 (Intr)
## date 0.1092 -0.783
##
## Formula: ~date | sex %in% ID
## Structure: General positive-definite, Log-Cholesky parametrization
## StdDev Corr
## (Intercept) 4.5580 (Intr)
## date 0.1093 -0.783
## Residual 1.1813
##
## Fixed effects: wt ~ sex
## Value Std.Error DF t-value p-value
## (Intercept) 25.311 1.533 113 16.513 0
## sex 4.779 1.039 68 4.599 0
## Correlation:
## (Intr)
## sex -0.944
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -1.52048 -0.38578 -0.03433 0.29185 2.09477
##
## Number of Observations: 183
## Number of Groups:
## ID sex %in% ID
## 70 70
anova(mxsheep3, mxsheep4)
## Model df AIC BIC logLik Test L.Ratio p-value
## mxsheep3 1 10 932.4 964.5 -456.2
## mxsheep4 2 9 1070.7 1099.6 -526.3 1 vs 2 140.3 <.0001
mxsheep5 <- update(mxsheep4, . ~ . - sex, na.action = na.exclude, method = "ML")
summary(mxsheep5)
## Linear mixed-effects model fit by maximum likelihood
## Data: NULL
## AIC BIC logLik
## 1087 1113 -535.5
##
## Random effects:
## Formula: ~date | ID
## Structure: General positive-definite, Log-Cholesky parametrization
## StdDev Corr
## (Intercept) 4.313 (Intr)
## date 0.109 -0.658
##
## Formula: ~date | sex %in% ID
## Structure: General positive-definite, Log-Cholesky parametrization
## StdDev Corr
## (Intercept) 4.313 (Intr)
## date 0.109 -0.658
## Residual 1.177
##
## Fixed effects: wt ~ 1
## Value Std.Error DF t-value p-value
## (Intercept) 31.07 0.5788 113 53.68 0
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -1.467692 -0.327116 0.001173 0.304720 2.135781
##
## Number of Observations: 183
## Number of Groups:
## ID sex %in% ID
## 70 70
anova(mxsheep4, mxsheep5)
## Model df AIC BIC logLik Test L.Ratio p-value
## mxsheep4 1 9 1071 1100 -526.3
## mxsheep5 2 8 1087 1113 -535.5 1 vs 2 18.29 <.0001
anova(mxsheep1, mxsheep2, mxsheep3, mxsheep4, mxsheep5)
## Model df AIC BIC logLik Test L.Ratio p-value
## mxsheep1 1 5 945.7 961.8 -467.9
## mxsheep2 2 6 947.7 967.0 -467.9 1 vs 2 0.00 1e+00
## mxsheep3 3 10 932.4 964.5 -456.2 2 vs 3 23.32 1e-04
## mxsheep4 4 9 1070.7 1099.6 -526.3 3 vs 4 140.29 <.0001
## mxsheep5 5 8 1087.0 1112.6 -535.5 4 vs 5 18.29 <.0001
# * * * QUESTION 5 * * *
# Parameter estimates
# # # mxmodel3 (mixed model)
# Intercept 19.8
# Date Slope 0.15
# Sex Slope 4.96
# # # osheep (linear regression model)
# Intercept 20.1
# Date Slope 0.145
# Sex Slope 5.07
# Data shows that the parameters of the linear regression model are
# marginally higher than those of the mixed model. Also, in the mixed
# mode p-values = 0, which indicates that explanatory variables have
# significant effects on sheep's weight, While in the linear regression
# model values tended to zero , indicating a strong significance, in the
# mixed model effect model the effect of the explanatory variables on the
# response variable is absolutely significant.
# * * * QUESTION 6 * * *
plot(date, wt, col = c("orange", "green")[sex], main = "Effects of Date on Sheep's Weight",
xlab = "Date", ylab = "Weight")
legend(90, 25, c("Female", "Male"), pch = c(1), cex = 1, col = c("orange", "green"))
rgl <- seq(0, 120, 1)
male <- rep(2, 121)
datem <- 0.15 * (rgl) + 4.96 * (male) + 19.8
lines(rgl, datem, col = "green")
female <- rep(1, 121)
datef <- 0.15 * (rgl) + 4.96 * (female) + 19.8
lines(rgl, datef, col = "orange")