Oil_data=read.csv("/Users/ruiqianli/Desktop/EBAC_SB/Day\ 4/oil\ data.csv")
#Check the first four rows of the data
head(Oil_data,4)
##   Oil..Gal. Temp Insulation
## 1     275.3   40          3
## 2     363.8   27          3
## 3     164.3   40         10
## 4      40.8   73          6
# check how many rows and columns the data is
dim(Oil_data)
## [1] 15  3
cor.test(Oil_data$Temp,Oil_data$Oil..Gal.)
## 
##  Pearson's product-moment correlation
## 
## data:  Oil_data$Temp and Oil_data$Oil..Gal.
## t = -6.3543, df = 13, p-value = 2.518e-05
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.9560495 -0.6447278
## sample estimates:
##        cor 
## -0.8697412
cor.test(Oil_data$Insulation,Oil_data$Oil..Gal.)
## 
##  Pearson's product-moment correlation
## 
## data:  Oil_data$Insulation and Oil_data$Oil..Gal.
## t = -1.8942, df = 13, p-value = 0.08066
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.7892992  0.0619363
## sample estimates:
##        cor 
## -0.4650825
library(ggplot2)
ggplot(Oil_data, aes(x = Temp, y = Oil..Gal.)) + geom_point() +
geom_smooth(method = 'lm')
## `geom_smooth()` using formula 'y ~ x'

library(ggplot2)
ggplot(Oil_data, aes(x = Insulation, y = Oil..Gal.)) + geom_point() +
geom_smooth(method = 'lm')
## `geom_smooth()` using formula 'y ~ x'

#Scatter plot matrix
library(car)
## Loading required package: carData
scatterplotMatrix(Oil_data,col="blue",main="SAT")

hist(Oil_data$Oil..Gal.)

#Check for multi-collinearity between (X1, X2, X3,……. )
library(car)
cor.test(Oil_data$Temp,Oil_data$Insulation)
## 
##  Pearson's product-moment correlation
## 
## data:  Oil_data$Temp and Oil_data$Insulation
## t = 0.03217, df = 13, p-value = 0.9748
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.5056519  0.5188137
## sample estimates:
##         cor 
## 0.008922039
# model fit
OilFit=lm(formula = Oil..Gal. ~ Temp + Insulation, data = Oil_data)
#CALULATE STANDARDISED COEFFICIENT
library(lm.beta)
OPBeta = lm.beta(OilFit)
summary(OPBeta)
## 
## Call:
## lm(formula = Oil..Gal. ~ Temp + Insulation, data = Oil_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -38.209 -16.806   0.164  14.105  53.154 
## 
## Coefficients:
##             Estimate Standardized Std. Error t value Pr(>|t|)    
## (Intercept) 562.1510       0.0000    21.0931  26.651 4.78e-12 ***
## Temp         -5.4366      -0.8657     0.3362 -16.170 1.64e-09 ***
## Insulation  -20.0123      -0.4574     2.3425  -8.543 1.91e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 26.01 on 12 degrees of freedom
## Multiple R-squared:  0.9656, Adjusted R-squared:  0.9599 
## F-statistic: 168.5 on 2 and 12 DF,  p-value: 1.654e-09
# residual analyses
residuals(OilFit)
##           1           2           3           4           5           6 
##  -9.3508237   8.4736286  19.7354210  -4.4067023   0.1640724 -26.3333452 
##           7           8           9          10          11          12 
## -26.4478599 -17.9351579   0.8135510 -38.2094702  22.7499357 -15.6772811 
##          13          14          15 
##  53.1541451  27.4760151   5.7938715
plot(OilFit)

OilFit
## 
## Call:
## lm(formula = Oil..Gal. ~ Temp + Insulation, data = Oil_data)
## 
## Coefficients:
## (Intercept)         Temp   Insulation  
##     562.151       -5.437      -20.012
# prediction

newdata<-data.frame(Temp = 15, Insulation = 10)
predict(OilFit,newdata,interval="prediction",level=0.95)
##        fit      lwr      upr
## 1 280.4791 216.2476 344.7106