Oil_data=read.csv("/Users/ruiqianli/Desktop/EBAC_SB/Day\ 4/oil\ data.csv")
#Check the first four rows of the data
head(Oil_data,4)
## Oil..Gal. Temp Insulation
## 1 275.3 40 3
## 2 363.8 27 3
## 3 164.3 40 10
## 4 40.8 73 6
# check how many rows and columns the data is
dim(Oil_data)
## [1] 15 3
cor.test(Oil_data$Temp,Oil_data$Oil..Gal.)
##
## Pearson's product-moment correlation
##
## data: Oil_data$Temp and Oil_data$Oil..Gal.
## t = -6.3543, df = 13, p-value = 2.518e-05
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.9560495 -0.6447278
## sample estimates:
## cor
## -0.8697412
cor.test(Oil_data$Insulation,Oil_data$Oil..Gal.)
##
## Pearson's product-moment correlation
##
## data: Oil_data$Insulation and Oil_data$Oil..Gal.
## t = -1.8942, df = 13, p-value = 0.08066
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.7892992 0.0619363
## sample estimates:
## cor
## -0.4650825
library(ggplot2)
ggplot(Oil_data, aes(x = Temp, y = Oil..Gal.)) + geom_point() +
geom_smooth(method = 'lm')
## `geom_smooth()` using formula 'y ~ x'

library(ggplot2)
ggplot(Oil_data, aes(x = Insulation, y = Oil..Gal.)) + geom_point() +
geom_smooth(method = 'lm')
## `geom_smooth()` using formula 'y ~ x'

#Scatter plot matrix
library(car)
## Loading required package: carData
scatterplotMatrix(Oil_data,col="blue",main="SAT")

hist(Oil_data$Oil..Gal.)

#Check for multi-collinearity between (X1, X2, X3,……. )
library(car)
cor.test(Oil_data$Temp,Oil_data$Insulation)
##
## Pearson's product-moment correlation
##
## data: Oil_data$Temp and Oil_data$Insulation
## t = 0.03217, df = 13, p-value = 0.9748
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.5056519 0.5188137
## sample estimates:
## cor
## 0.008922039
# model fit
OilFit=lm(formula = Oil..Gal. ~ Temp + Insulation, data = Oil_data)
#CALULATE STANDARDISED COEFFICIENT
library(lm.beta)
OPBeta = lm.beta(OilFit)
summary(OPBeta)
##
## Call:
## lm(formula = Oil..Gal. ~ Temp + Insulation, data = Oil_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -38.209 -16.806 0.164 14.105 53.154
##
## Coefficients:
## Estimate Standardized Std. Error t value Pr(>|t|)
## (Intercept) 562.1510 0.0000 21.0931 26.651 4.78e-12 ***
## Temp -5.4366 -0.8657 0.3362 -16.170 1.64e-09 ***
## Insulation -20.0123 -0.4574 2.3425 -8.543 1.91e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 26.01 on 12 degrees of freedom
## Multiple R-squared: 0.9656, Adjusted R-squared: 0.9599
## F-statistic: 168.5 on 2 and 12 DF, p-value: 1.654e-09
# residual analyses
residuals(OilFit)
## 1 2 3 4 5 6
## -9.3508237 8.4736286 19.7354210 -4.4067023 0.1640724 -26.3333452
## 7 8 9 10 11 12
## -26.4478599 -17.9351579 0.8135510 -38.2094702 22.7499357 -15.6772811
## 13 14 15
## 53.1541451 27.4760151 5.7938715
plot(OilFit)




OilFit
##
## Call:
## lm(formula = Oil..Gal. ~ Temp + Insulation, data = Oil_data)
##
## Coefficients:
## (Intercept) Temp Insulation
## 562.151 -5.437 -20.012
# prediction
newdata<-data.frame(Temp = 15, Insulation = 10)
predict(OilFit,newdata,interval="prediction",level=0.95)
## fit lwr upr
## 1 280.4791 216.2476 344.7106