Analysis

x= c(64,65,65,64,61,55,39,41,46,59,56,56,62,37,37,45,57,58,60,55)
sort(x)
##  [1] 37 37 39 41 45 46 55 55 56 56 57 58 59 60 61 62 64 64 65 65
summary(x)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   37.00   45.75   56.50   54.10   61.25   65.00
sd(x)
## [1] 9.645834
hist(x,main="Distribution of temperature(F)")

y_original= c(4.65,4.58,4.67,4.60,4.83,4.55,5.14,4.71,4.69,4.65,4.36,4.82,4.65,4.66,4.95,4.60,4.68,4.65,4.6,.446)
sort(y_original)
##  [1] 0.446 4.360 4.550 4.580 4.600 4.600 4.600 4.650 4.650 4.650 4.650 4.660
## [13] 4.670 4.680 4.690 4.710 4.820 4.830 4.950 5.140
summary(y_original)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.446   4.600   4.650   4.474   4.695   5.140
sd(y_original)
## [1] 0.961378
hist(y_original,main="Distribution of butterfat(%)")

boxplot(x,y_original,names=c("Temperature","Butter fat"),main="Boxplots of variables")

plot(x,y_original,main="Scatterplot with suspected error",xlab="Temperature(F)",ylab="Butter fat(%)")

cor(x,y_original)
## [1] -0.09911088
#replacing with 4.46
Corrected_y_1=c(4.65,4.58,4.67,4.60,4.83,4.55,5.14,4.71,4.69,4.65,4.36,4.82,4.65,4.66,4.95,4.60,4.68,4.65,4.6,4.46)
sd(Corrected_y_1)
## [1] 0.1667175
summary(Corrected_y_1)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   4.360   4.600   4.650   4.675   4.695   5.140
hist(Corrected_y_1,main="Distribution of corrected butterfat")

cor(x,Corrected_y_1)
## [1] -0.4532889
#without last observation
y_without_last_obs=c(4.65,4.58,4.67,4.60,4.83,4.55,5.14,4.71,4.69,4.65,4.36,4.82,4.65,4.66,4.95,4.60,4.68,4.65,4.6)
summary(y_without_last_obs)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   4.360   4.600   4.650   4.686   4.700   5.140
sd(y_without_last_obs)
## [1] 0.1632044
cor(x,y_original)
## [1] -0.09911088
x_last_obs_removed=c(64,65,65,64,61,55,39,41,46,59,56,56,62,37,37,45,57,58,60)
cor(x_last_obs_removed,y_without_last_obs)
## [1] -0.4688517
#replacing with mean
z=mean(y_without_last_obs)
Corrected_y_mean=c(4.65,4.58,4.67,4.60,4.83,4.55,5.14,4.71,4.69,4.65,4.36,4.82,4.65,4.66,4.95,4.60,4.68,4.65,4.6,z)
summary(Corrected_y_mean)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   4.360   4.600   4.655   4.686   4.695   5.140
sd(Corrected_y_mean)
## [1] 0.1588515
cor(x,Corrected_y_mean)
## [1] -0.4687387
#replacing with predicted value using SLR

df=data.frame(
  Temperature=x,
  Butterfat=c(4.65,4.58,4.67,4.60,4.83,4.55,5.14,4.71,4.69,4.65,4.36,4.82,4.65,4.66,4.95,4.60,4.68,4.65,4.6,NA) #replacing the last obs with NA
)
df_complete=df[complete.cases(df), ]
plot(x_last_obs_removed,y_without_last_obs,main="scatterplot")

#install.packages("nlme")
library(nlme)
## Warning: package 'nlme' was built under R version 4.5.1
model_for_imputation=(lm(Butterfat ~ Temperature, data = df_complete))
summary(model_for_imputation)  #we will try to fit the model using OLS
## 
## Call:
## lm(formula = Butterfat ~ Temperature, data = df_complete)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.31128 -0.06781 -0.00583  0.05437  0.33743 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  5.103770   0.193754  26.342 3.19e-15 ***
## Temperature -0.007723   0.003529  -2.189   0.0429 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1483 on 17 degrees of freedom
## Multiple R-squared:  0.2198, Adjusted R-squared:  0.1739 
## F-statistic:  4.79 on 1 and 17 DF,  p-value: 0.04288
model_for_imputation2=(gls(Butterfat ~ Temperature, data = df_complete,weights = varPower(form = ~ fitted(.)),method = "ML"))   #we will try to fit the model using GLS
summary(model_for_imputation2)
## Generalized least squares fit by maximum likelihood
##   Model: Butterfat ~ Temperature 
##   Data: df_complete 
##        AIC       BIC   logLik
##   -17.1433 -13.36554 12.57165
## 
## Variance function:
##  Structure: Power of variance covariate
##  Formula: ~fitted(.) 
##  Parameter estimates:
##    power 
## 43.00293 
## 
## Coefficients:
##                 Value  Std.Error   t-value p-value
## (Intercept)  4.929666 0.25291501 19.491392  0.0000
## Temperature -0.004683 0.00421806 -1.110306  0.2823
## 
##  Correlation: 
##             (Intr)
## Temperature -0.995
## 
## Standardized residuals:
##         Min          Q1         Med          Q3         Max 
## -2.67230855 -0.43743893 -0.07612137  0.41069990  2.00759179 
## 
## Residual standard error: 1.94374e-30 
## Degrees of freedom: 19 total; 17 residual
par(mfrow = c(2, 2))
plot(model_for_imputation)  #we will generate diagnostic plots to check the assumptions 

par(mfrow = c(1, 1))
new_data=data.frame(Temperature = x[20]) 
imputed_value=predict(model_for_imputation2, newdata = new_data)
y_predicted=c(4.65,4.58,4.67,4.60,4.83,4.55,5.14,4.71,4.69,4.65,4.36,4.82,4.65,4.66,4.95,4.60,4.68,4.65,4.6,4.672082)
summary(y_predicted)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   4.360   4.600   4.655   4.686   4.695   5.140
mean(y_predicted)
## [1] 4.685604
sd(y_predicted)
## [1] 0.1588834
cor(x,y_predicted)
## [1] -0.4690845
#Problem1
y=c(11,7,2,7,4,8,13,3,6,6,15,8,2,4,5,11,11,4,9,3,9,8,5,9,6)
mean(y)
## [1] 7.04
#problem1
dpois(2,lambda=2.3)
## [1] 0.2651846
1-ppois(1,lambda=2.3)
## [1] 0.6691458
dpois(0,lambda=2.3)
## [1] 0.1002588
ppois(5,lambda=2.3)
## [1] 0.9700243