install.packages(“dplyr”) install.packages(“tidyr”) library(dplyr) library(tidyr)
--> Here is the given Data of Bacteria and Exposure.Bacteria<-c(175,108,95,82,71,50,49,31,28,17,16,11)
Exposure<-c(1,2,3,4,5,6,7,8,9,10,11,12)
dat<-data.frame(Bacteria,Exposure)
print(dat,row.names=FALSE)
## Bacteria Exposure
## 175 1
## 108 2
## 95 3
## 82 4
## 71 5
## 50 6
## 49 7
## 31 8
## 28 9
## 17 10
## 16 11
## 11 12
Fit a simple linear regression model to the data. What Is the value of R^2?
model=lm(Bacteria~Exposure)
summary(model)
##
## Call:
## lm(formula = Bacteria ~ Exposure)
##
## Residuals:
## Min 1Q Median 3Q Max
## -17.323 -9.890 -7.323 2.463 45.282
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 142.20 11.26 12.627 1.81e-07 ***
## Exposure -12.48 1.53 -8.155 9.94e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 18.3 on 10 degrees of freedom
## Multiple R-squared: 0.8693, Adjusted R-squared: 0.8562
## F-statistic: 66.51 on 1 and 10 DF, p-value: 9.944e-06
summary(model)$r.square
## [1] 0.8693011
--> Here you can see, we used lm function to create regression model Check for model adequacy
plot(model)
Use Box-Cox to perform a power transformation, transform the data as appropriate
library(MASS)
b<-boxcox(model)
lambda<-b$x
likelihood<-b$y
z<-lambda[which.max(likelihood)]
print(z)
## [1] 0.1010101
--> Here you can see, With the help of Box-cox Transformation we eliminate the nonlinearity. Fit a simple linear regression model to the transformed data. What Is the value of R^2?
y<-Bacteria^z
newmodel<-lm(y~Exposure)
summary(newmodel)
##
## Call:
## lm(formula = y ~ Exposure)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.026103 -0.008334 -0.003293 0.012661 0.025559
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.700726 0.010226 166.31 < 2e-16 ***
## Exposure -0.034957 0.001389 -25.16 2.25e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.01662 on 10 degrees of freedom
## Multiple R-squared: 0.9844, Adjusted R-squared: 0.9829
## F-statistic: 632.9 on 1 and 10 DF, p-value: 2.255e-10
--> Here you can see, we created newmodel. Check for model adequacy using the transformed data (comment)
plot(newmodel)
Estimate the number of bacteria at 10 minutes of exposure, how does this compare with the observed value?
pred<-fitted.values(newmodel,Exposure)
pred[10]^(1/z)
## 10
## 19.67812
--> As you can see the number of Bacteria at 10 min of Exposure is 19.67812
Provide a 95% prediction interval on the number of bacteria at 10 minutes of exposure.
predict(newmodel,data.frame(exposure=10),interval="prediction")
## fit lwr upr
## 1 1.665769 1.623640 1.707898
## 2 1.630812 1.589836 1.671788
## 3 1.595855 1.555826 1.635884
## 4 1.560898 1.521594 1.600202
## 5 1.525941 1.487127 1.564754
## 6 1.490984 1.452418 1.529549
## 7 1.456027 1.417461 1.494592
## 8 1.421070 1.382256 1.459883
## 9 1.386113 1.346809 1.425417
## 10 1.351156 1.311127 1.391185
## 11 1.316199 1.275223 1.357174
## 12 1.281242 1.239113 1.323371
pred_interval<-predict(newmodel,data.frame(exposure=10),interval="prediction")
pred_interval^(1/z)
## fit lwr upr
## 1 156.30969 121.296382 200.15799
## 2 126.70629 98.489163 161.99169
## 3 102.24322 79.508493 130.66207
## 4 82.11213 63.790666 105.03414
## 5 65.61802 50.844880 84.14271
## 6 52.16533 40.244614 67.17052
## 7 41.24552 31.619920 53.42879
## 8 32.42594 24.650598 42.33982
## 9 25.33996 19.060242 33.42142
## 10 19.67812 14.611076 26.27301
## 11 15.18038 11.099490 20.56332
## 12 11.62914 8.352124 16.01944
--> As you can see the prediction interval on the number of bacteria at 10 minutes of exposure is: Upper= 26.27301 and Lower= 14.611076 and fit= 19.67812