setwd("C:/Users/lisanjie2/Desktop/TEACHING/1_STATS_CalU/1_STAT_CalU_2016_by_NLB/1_STAT_Duq_2017/HOMEWORK/Duq_Biostats_Homework_4_Milk_regression/homework4_RMD_vs2")
dat2 <- read.csv("milk_subset.csv")
They note: “W/in particular phylogenetic groups, there are significant negative relationships between body mass & … fat … (Merchant et al. 1989; Derrickson et el 1996; Hinde & Milligan 2011). Spp w/ small body masses are expected to produce more concentrated & energetically dense milk b/c they have higher mass-specific metabolic demands & reduced digestive capacity due to smaller GI tracts (Blaxter 1961). Thus, smallbodied species should be incapable of ingesting greater quantities of milk to meet metabolic & nutritional demands & should instead require more concentrated & energy dense milk (Blaxter 1961).”
References
library(ggplot2)
qplot(y=fat.log10,
x=mass.log10,
data = dat2)
logit.log.mass <- lm(fat.logit ~ mass.log10, data = dat2)
summary(logit.log.mass)
##
## Call:
## lm(formula = fat.logit ~ mass.log10, data = dat2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.1076 -0.7710 -0.0754 0.7805 2.5916
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.62319 0.30681 -8.550 3.2e-14 ***
## mass.log10 0.08713 0.07275 1.198 0.233
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.217 on 128 degrees of freedom
## Multiple R-squared: 0.01108, Adjusted R-squared: 0.003358
## F-statistic: 1.435 on 1 and 128 DF, p-value: 0.2332
Compare just at the coefficients using coef()
coef(log.log.mass)
## (Intercept) mass.log10
## 0.84812647 0.02255102
coef(logit.log.mass)
## (Intercept) mass.log10
## -2.62319113 0.08713388
Look at the p values for the two slopes
summary(log.log.mass)$coefficients
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.84812647 0.11530526 7.3554882 2.016361e-11
## mass.log10 0.02255102 0.02733924 0.8248592 4.109861e-01
summary(logit.log.mass)$coefficients
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.62319113 0.30681324 -8.549798 3.197174e-14
## mass.log10 0.08713388 0.07274638 1.197776 2.332174e-01
The p value is ~0.4 for the log-log and ~0.2 for logit-log.
par(mfrow = c(2,2),
mar = c(4,4,2,1))
plot(log.log.mass)
QUESTIONS
Why plot provides information about… * … homogenity of variance? * … normality of residuals? * … outliers/influential poitns?
ANSWERS
Some thoughts:
par(mfrow = c(2,2),
mar = c(4,4,2,1))
plot(logit.log.mass)
Get the predictions with predict()
y.hat <- predict(log.log.mass)
Add prediction to the dataframe
dat2$y.hat <- y.hat
Plot predictions with points()
par(mfrow = c(1,1))
#plot raw dat
plot(fat.log10 ~ mass.log10, data = dat2)
#plot predicitons as points
points(y.hat ~ mass.log10, data = dat2, col = 2)
We can plot the predictions as a straight line like this by using the “type = ‘l’” arguement in points().
#plot raw dat
plot(fat.log10 ~ mass.log10, data = dat2)
#plot predicitons as points
points(y.hat ~ mass.log10,
data = dat2,
col = 2,
type = "l")
1st, get the range of the x variable
#get the range of x variable
observed.range <- range(dat2$mass.log10)
observed.range
## [1] 0.903090 8.230449
Make into a dataframee
#make new dataframe
newdat <- data.frame(mass.log10 = observed.range)
newdat
## mass.log10
## 1 0.903090
## 2 8.230449
y.hat2 <- predict(log.log.mass,
newdat = newdat)
y.hat2
## 1 2
## 0.8684921 1.0337315
Add the y.hat predictions to our ne wdataframe
newdat$y.hat2 <- y.hat2
newdat
## mass.log10 y.hat2
## 1 0.903090 0.8684921
## 2 8.230449 1.0337315
Plot raw data w/ line defined by the predictions
#plot raw dat
plot(fat.log10 ~ mass.log10, data = dat2)
#plot predicitons as points
points(y.hat2 ~ mass.log10,
data = newdat,
col = 2,
type = "l")
#plot raw dat
plot(fat.log10 ~ mass.log10,
data = dat2)
#plot model w/abline
abline(log.log.mass,
col = 2)