plot sepal length against sepal width
fit a regression model
## alpha beta sigma
## 1 6.526223 -0.2233611 0.1550809
The Normal Q-Q plot looks alright, but after looking at histograms it’s clear that sepal width is normally distributed and speal length is not. Also, the residuals of this model are structured. This model is not appropriate.
## Analysis of Variance Table
##
## Response: Sepal.Length
## Df Sum Sq Mean Sq F value Pr(>F)
## Sepal.Width 1 1.412 1.41224 2.0744 0.1519
## Residuals 148 100.756 0.68078
plot the sepal length against species
produce a summary table for each species
summarySE(iris, measurevar="Sepal.Length", groupvars = "Species", na.rm = T)
## Species N Sepal.Length sd se ci
## 1 setosa 50 5.006 0.3524897 0.04984957 0.1001765
## 2 versicolor 50 5.936 0.5161711 0.07299762 0.1466942
## 3 virginica 50 6.588 0.6358796 0.08992695 0.1807150
##
## Call:
## lm(formula = Sepal.Length ~ Species, data = iris)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.6880 -0.3285 -0.0060 0.3120 1.3120
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.0060 0.0728 68.762 < 2e-16 ***
## Speciesversicolor 0.9300 0.1030 9.033 8.77e-16 ***
## Speciesvirginica 1.5820 0.1030 15.366 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5148 on 147 degrees of freedom
## Multiple R-squared: 0.6187, Adjusted R-squared: 0.6135
## F-statistic: 119.3 on 2 and 147 DF, p-value: < 2.2e-16
## Analysis of Variance Table
##
## Response: Sepal.Length
## Df Sum Sq Mean Sq F value Pr(>F)
## Species 2 63.212 31.606 119.26 < 2.2e-16 ***
## Residuals 147 38.956 0.265
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
M3
## alpha1 alpha2 alpha3 beta se
## 1 2.251393 1.458743 1.946817 0.8035609 0.438
M4
## alpha1 alpha2 alpha3 beta1 beta2 beta3 se
## 1 2.639001 0.9007335 1.267835 0.6904897 0.174588 0.2110448 0.4397
M3
M4
M3
## Analysis of Variance Table
##
## Response: Sepal.Length
## Df Sum Sq Mean Sq F value Pr(>F)
## Sepal.Width 1 1.412 1.412 7.3628 0.00746 **
## Species 2 72.752 36.376 189.6512 < 2e-16 ***
## Residuals 146 28.004 0.192
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
M4
## Analysis of Variance Table
##
## Response: Sepal.Length
## Df Sum Sq Mean Sq F value Pr(>F)
## Sepal.Width 1 1.412 1.412 7.3030 0.007712 **
## Species 2 72.752 36.376 188.1091 < 2.2e-16 ***
## Sepal.Width:Species 2 0.157 0.079 0.4064 0.666777
## Residuals 144 27.846 0.193
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Analysis of Variance Table
##
## Response: Sepal.Width
## Df Sum Sq Mean Sq F value Pr(>F)
## Sepal.Length 1 0.3913 0.3913 4.6851 0.03205 *
## Species 2 15.7225 7.8613 94.1304 < 2e-16 ***
## Residuals 146 12.1931 0.0835
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Analysis of Variance Table
##
## Response: Sepal.Width
## Df Sum Sq Mean Sq F value Pr(>F)
## Sepal.Length 1 0.3913 0.3913 5.2757 0.02307 *
## Species 2 15.7225 7.8613 105.9948 < 2.2e-16 ***
## Sepal.Length:Species 2 1.5132 0.7566 10.2011 7.19e-05 ***
## Residuals 144 10.6800 0.0742
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
M3 inverted
M4 inverted
## mean var skew
## 1 79.11932 11.77511 -0.4060653
LL Normal
LL.normal <- function(p, X){
L <- log(dnorm(x = X, p["mu"], p["sigma"]))
return(-sum(L))
}
LL Weibull
LL.weibull <- function(p, X){
L <- log(dweibull(X, p["shape"], p["scale"]))
return(-sum(L))
}
X = Height
p <- c(mu = 80, sigma = 4)
fit.normal <- optim(p, LL.normal, X=X, hessian = TRUE)
## Warning in dnorm(x = X, p["mu"], p["sigma"]): NaNs produced
pw <- c(shape = 26, scale = 81)
fit.weibull <- optim(pw, LL.weibull, X = X, hessian = TRUE)
c(AIC.Normal = 2*(fit.normal$value + 2*5),
AIC.Weibull = 2*(fit.weibull$value + 2*5))
## AIC.Normal AIC.Weibull
## 2819.440 2795.231
seNorm <- 1/sqrt(diag(fit.normal$hessian))
p.hatNorm <- fit.normal$par
data.frame(p.hatNorm, p.low = p.hatNorm - qnorm(0.975) * seNorm, p.high = p.hatNorm + qnorm(0.975) * seNorm)
## p.hatNorm p.low p.high
## mu 79.120191 78.827832 79.412551
## sigma 3.427565 3.220897 3.634234
Normal - Bootstrap
Weibull - Using the Hessian of the MLS’s
seWeibull <- 1/sqrt(diag(fit.weibull$hessian))
p.hatWeibull <- fit.weibull$par
data.frame(p.hatWeibull, p.low = p.hatWeibull - qnorm(0.975) * seWeibull, p.high = p.hatWeibull + qnorm(0.975) * seWeibull)
## p.hatWeibull p.low p.high
## shape 27.42804 25.69099 29.16510
## scale 80.70316 80.45217 80.95415
Weibull - Bootstrap
(glm.null1 <- glm(Y ~ 1))
##
## Call: glm(formula = Y ~ 1)
##
## Coefficients:
## (Intercept)
## 0.4
##
## Degrees of Freedom: 64 Total (i.e. Null); 64 Residual
## Null Deviance: 15.6
## Residual Deviance: 15.6 AIC: 95.7
## Warning in optim(coef0, fn = logLike.Binomial.null, X = X, Y = Y, hessian = TRUE): one-dimensional optimization by Nelder-Mead is unreliable:
## use "Brent" or optimize() directly
## $par
## beta0
## -0.4046875
##
## $value
## [1] 43.74576
##
## $counts
## function gradient
## 20 NA
##
## $convergence
## [1] 0
##
## $message
## NULL
##
## $hessian
## beta0
## beta0 15.60242
Null
## logL params AIC BIC
## 1 43.74576 1 91.49153 87.49153
Solea_solea ~ Salinity
## logL params AIC BIC
## 1 43.74576 3 80.56 68.56
LRT <- 2*(-glm.fit$value + glm.null2$value)
1-pchisq(LRT, 1)
## [1] 1.354947e-05
Temperature
## $par
## beta0 beta1
## -2.637519 0.100766
##
## $value
## [1] 43.31612
##
## $counts
## function gradient
## 91 NA
##
## $convergence
## [1] 0
##
## $message
## NULL
##
## $hessian
## beta0 beta1
## beta0 15.39404 341.8561
## beta1 341.85615 7675.0293
## beta0 beta1
## -2.637519 0.100766
Depth
## Sample season month Area depth temperature salinity transparency gravel
## 1 1 1 5 2 3.0 20 30 15 3.74
## 2 2 1 5 2 2.6 18 29 15 1.94
## 3 3 1 5 2 2.6 19 30 15 2.88
## 4 4 1 5 4 2.1 20 29 15 11.06
## 5 5 1 5 4 3.2 20 30 15 9.87
## 6 6 1 5 4 3.5 20 32 7 32.45
## large_sand med_fine_sand mud Solea_solea
## 1 13.15 11.93 71.18 0
## 2 4.99 5.43 87.63 0
## 3 8.98 16.85 71.29 1
## 4 11.96 21.95 55.03 0
## 5 28.60 19.49 42.04 0
## 6 7.39 9.43 50.72 0
## $par
## beta0 beta1
## -2.3249896 0.6153755
##
## $value
## [1] 38.07141
##
## $counts
## function gradient
## 71 NA
##
## $convergence
## [1] 0
##
## $message
## NULL
##
## $hessian
## beta0 beta1
## beta0 12.98812 39.71173
## beta1 39.71173 144.00384
## beta0 beta1
## -2.3249896 0.6153755
Generalized AIC, BIC, and LRT
AICfun <- function(fit){
par <- length(fit$par + 1)
c(2*(fit$value + 2*par))
}
BICfun <- function(fit){
par <- length(fit$par + 1)
c(2*(fit$value + par * log(1)))
}
LRTfun <- function(fit){
LRT <- 2*(-fit$value + glm.null2$value)
1-pchisq(LRT, 1)
}
data.frame(salinAIC = AICfun(glm.fit),
salinBIC = BICfun(glm.fit),
salinLRT = LRTfun(glm.fit),
depthAIC = AICfun(depth.fit),
depthBIC = BICfun(depth.fit),
depthLRT = LRTfun(depth.fit),
tempAIC = AICfun(temp.fit),
tempBIC = BICfun(temp.fit),
tempLRT = LRTfun(temp.fit))
## salinAIC salinBIC salinLRT depthAIC depthBIC depthLRT tempAIC
## 1 76.56 68.56 1.354947e-05 84.14281 76.14281 0.0007550023 94.63225
## tempBIC tempLRT
## 1 86.63225 0.3539409
Logistic model for temp and depth
summary(tempglm <- glm(Solea_solea ~ temperature, Solea, family = binomial))
##
## Call:
## glm(formula = Solea_solea ~ temperature, family = binomial, data = Solea)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.2126 -1.0449 -0.8898 1.2719 1.5861
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.6329 2.4443 -1.077 0.281
## temperature 0.1006 0.1095 0.919 0.358
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 87.492 on 64 degrees of freedom
## Residual deviance: 86.632 on 63 degrees of freedom
## AIC: 90.632
##
## Number of Fisher Scoring iterations: 4
summary(depthglm <- glm(Solea_solea ~ depth, Solea, family = binomial))
##
## Call:
## glm(formula = Solea_solea ~ depth, family = binomial, data = Solea)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.7853 -0.8663 -0.7008 0.9957 1.8693
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.3246 0.7006 -3.318 0.000907 ***
## depth 0.6152 0.2104 2.924 0.003457 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 87.492 on 64 degrees of freedom
## Residual deviance: 76.143 on 63 degrees of freedom
## AIC: 80.143
##
## Number of Fisher Scoring iterations: 4
summary(salinglm <- glm(Solea_solea ~ salinity, Solea, family = binomial))
##
## Call:
## glm(formula = Solea_solea ~ salinity, family = binomial, data = Solea)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.0674 -0.7146 -0.6362 0.7573 1.8997
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.66071 0.90167 2.951 0.003169 **
## salinity -0.12985 0.03494 -3.716 0.000202 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 87.492 on 64 degrees of freedom
## Residual deviance: 68.560 on 63 degrees of freedom
## AIC: 72.56
##
## Number of Fisher Scoring iterations: 4
anova(tempglm)
## Analysis of Deviance Table
##
## Model: binomial, link: logit
##
## Response: Solea_solea
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev
## NULL 64 87.492
## temperature 1 0.85927 63 86.632
anova(depthglm)
## Analysis of Deviance Table
##
## Model: binomial, link: logit
##
## Response: Solea_solea
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev
## NULL 64 87.492
## depth 1 11.349 63 76.143
anova(salinglm)
## Analysis of Deviance Table
##
## Model: binomial, link: logit
##
## Response: Solea_solea
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev
## NULL 64 87.492
## salinity 1 18.931 63 68.560