Analysis
x= c(64,65,65,64,61,55,39,41,46,59,56,56,62,37,37,45,57,58,60,55)
sort(x)
## [1] 37 37 39 41 45 46 55 55 56 56 57 58 59 60 61 62 64 64 65 65
summary(x)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 37.00 45.75 56.50 54.10 61.25 65.00
sd(x)
## [1] 9.645834
hist(x,main="Distribution of temperature(F)")

y_original= c(4.65,4.58,4.67,4.60,4.83,4.55,5.14,4.71,4.69,4.65,4.36,4.82,4.65,4.66,4.95,4.60,4.68,4.65,4.6,.446)
sort(y_original)
## [1] 0.446 4.360 4.550 4.580 4.600 4.600 4.600 4.650 4.650 4.650 4.650 4.660
## [13] 4.670 4.680 4.690 4.710 4.820 4.830 4.950 5.140
summary(y_original)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.446 4.600 4.650 4.474 4.695 5.140
sd(y_original)
## [1] 0.961378
hist(y_original,main="Distribution of butterfat(%)")

boxplot(x,y_original,names=c("Temperature","Butter fat"),main="Boxplots of variables")

plot(x,y_original,main="Scatterplot with suspected error",xlab="Temperature(F)",ylab="Butter fat(%)")

cor(x,y_original)
## [1] -0.09911088
#replacing with 4.46
Corrected_y_1=c(4.65,4.58,4.67,4.60,4.83,4.55,5.14,4.71,4.69,4.65,4.36,4.82,4.65,4.66,4.95,4.60,4.68,4.65,4.6,4.46)
sd(Corrected_y_1)
## [1] 0.1667175
summary(Corrected_y_1)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 4.360 4.600 4.650 4.675 4.695 5.140
hist(Corrected_y_1,main="Distribution of corrected butterfat")

cor(x,Corrected_y_1)
## [1] -0.4532889
#without last observation
y_without_last_obs=c(4.65,4.58,4.67,4.60,4.83,4.55,5.14,4.71,4.69,4.65,4.36,4.82,4.65,4.66,4.95,4.60,4.68,4.65,4.6)
summary(y_without_last_obs)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 4.360 4.600 4.650 4.686 4.700 5.140
sd(y_without_last_obs)
## [1] 0.1632044
cor(x,y_original)
## [1] -0.09911088
x_last_obs_removed=c(64,65,65,64,61,55,39,41,46,59,56,56,62,37,37,45,57,58,60)
cor(x_last_obs_removed,y_without_last_obs)
## [1] -0.4688517
#replacing with mean
z=mean(y_without_last_obs)
Corrected_y_mean=c(4.65,4.58,4.67,4.60,4.83,4.55,5.14,4.71,4.69,4.65,4.36,4.82,4.65,4.66,4.95,4.60,4.68,4.65,4.6,z)
summary(Corrected_y_mean)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 4.360 4.600 4.655 4.686 4.695 5.140
sd(Corrected_y_mean)
## [1] 0.1588515
cor(x,Corrected_y_mean)
## [1] -0.4687387
#replacing with predicted value using SLR
df=data.frame(
Temperature=x,
Butterfat=c(4.65,4.58,4.67,4.60,4.83,4.55,5.14,4.71,4.69,4.65,4.36,4.82,4.65,4.66,4.95,4.60,4.68,4.65,4.6,NA) #replacing the last obs with NA
)
df_complete=df[complete.cases(df), ]
plot(x_last_obs_removed,y_without_last_obs,main="scatterplot")

#install.packages("nlme")
library(nlme)
## Warning: package 'nlme' was built under R version 4.5.1
model_for_imputation=(lm(Butterfat ~ Temperature, data = df_complete))
summary(model_for_imputation) #we will try to fit the model using OLS
##
## Call:
## lm(formula = Butterfat ~ Temperature, data = df_complete)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.31128 -0.06781 -0.00583 0.05437 0.33743
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.103770 0.193754 26.342 3.19e-15 ***
## Temperature -0.007723 0.003529 -2.189 0.0429 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1483 on 17 degrees of freedom
## Multiple R-squared: 0.2198, Adjusted R-squared: 0.1739
## F-statistic: 4.79 on 1 and 17 DF, p-value: 0.04288
model_for_imputation2=(gls(Butterfat ~ Temperature, data = df_complete,weights = varPower(form = ~ fitted(.)),method = "ML")) #we will try to fit the model using GLS
summary(model_for_imputation2)
## Generalized least squares fit by maximum likelihood
## Model: Butterfat ~ Temperature
## Data: df_complete
## AIC BIC logLik
## -17.1433 -13.36554 12.57165
##
## Variance function:
## Structure: Power of variance covariate
## Formula: ~fitted(.)
## Parameter estimates:
## power
## 43.00293
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) 4.929666 0.25291501 19.491392 0.0000
## Temperature -0.004683 0.00421806 -1.110306 0.2823
##
## Correlation:
## (Intr)
## Temperature -0.995
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -2.67230855 -0.43743893 -0.07612137 0.41069990 2.00759179
##
## Residual standard error: 1.94374e-30
## Degrees of freedom: 19 total; 17 residual
par(mfrow = c(2, 2))
plot(model_for_imputation) #we will generate diagnostic plots to check the assumptions

par(mfrow = c(1, 1))
new_data=data.frame(Temperature = x[20])
imputed_value=predict(model_for_imputation2, newdata = new_data)
y_predicted=c(4.65,4.58,4.67,4.60,4.83,4.55,5.14,4.71,4.69,4.65,4.36,4.82,4.65,4.66,4.95,4.60,4.68,4.65,4.6,4.672082)
summary(y_predicted)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 4.360 4.600 4.655 4.686 4.695 5.140
mean(y_predicted)
## [1] 4.685604
sd(y_predicted)
## [1] 0.1588834
cor(x,y_predicted)
## [1] -0.4690845
#Problem1
y=c(11,7,2,7,4,8,13,3,6,6,15,8,2,4,5,11,11,4,9,3,9,8,5,9,6)
mean(y)
## [1] 7.04
#problem1
dpois(2,lambda=2.3)
## [1] 0.2651846
1-ppois(1,lambda=2.3)
## [1] 0.6691458
dpois(0,lambda=2.3)
## [1] 0.1002588
ppois(5,lambda=2.3)
## [1] 0.9700243