simple linear regration

dt <- read.csv("E:\\EXCELR ASSIGMENTS\\delivery_time (1).csv") # choose the wc-at.csv data set
View(dt)
#colnames(dt)<- c("dt","st")   to change the colounm name  
attach(dt)
plot(Delivery.Time,Sorting.Time)

# Correlation coefficient value for Waist and Addipose tissue
cor(Delivery.Time,Sorting.Time)
## [1] 0.8259973
reg<-lm(Sorting.Time~Delivery.Time)
summary(reg)
## 
## Call:
## lm(formula = Sorting.Time ~ Delivery.Time)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.1388 -1.0014 -0.1045  0.5521  3.3507 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -0.75667    1.13395  -0.667    0.513    
## Delivery.Time  0.41374    0.06477   6.387 3.98e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.47 on 19 degrees of freedom
## Multiple R-squared:  0.6823, Adjusted R-squared:  0.6655 
## F-statistic:  40.8 on 1 and 19 DF,  p-value: 3.983e-06
confint(reg,level = 0.95)
##                    2.5 %    97.5 %
## (Intercept)   -3.1300583 1.6167115
## Delivery.Time  0.2781691 0.5493182
predict(reg,inteval="predict")
##         1         2         3         4         5         6         7 
##  7.931943  4.828866  7.414763  9.173174 11.241892  5.594291  7.104456 
##         8         9        10        11        12        13        14 
##  3.173891  6.649338  7.001020  7.447863  3.691071  6.144570  4.001378 
##        15        16        17        18        19        20        21 
##  4.220662  5.399832  4.932302  6.736224  2.553276  6.620376  8.138815
# R-squared value for the above model is 0.667. 
# we may have to do transformation of variables for better R-squared value
# Applying transformations

# Logarthmic transformation
reg_log<-lm(Delivery.Time~log(Sorting.Time))  # Regression using logarthmic transformation
summary(reg_log)
## 
## Call:
## lm(formula = Delivery.Time ~ log(Sorting.Time))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.0829 -2.0133 -0.1965  0.9351  7.0171 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          1.160      2.455   0.472    0.642    
## log(Sorting.Time)    9.043      1.373   6.587 2.64e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.873 on 19 degrees of freedom
## Multiple R-squared:  0.6954, Adjusted R-squared:  0.6794 
## F-statistic: 43.39 on 1 and 19 DF,  p-value: 2.642e-06
confint(reg_log,level=0.95)
##                      2.5 %    97.5 %
## (Intercept)       -3.97778  6.297147
## log(Sorting.Time)  6.16977 11.917057
predict(reg_log,interval="predict")
## Warning in predict.lm(reg_log, interval = "predict"): predictions on current data refer to _future_ responses
##         fit        lwr      upr
## 1  21.98291 15.6099875 28.35584
## 2  13.69652  7.4628028 19.93023
## 3  17.36331 11.2049447 23.52167
## 4  21.03009 14.7287585 27.33143
## 5  21.98291 15.6099875 28.35584
## 6  17.36331 11.2049447 23.52167
## 7  18.75735 12.5700473 24.94466
## 8  11.09489  4.6786298 17.51115
## 9  21.98291 15.6099875 28.35584
## 10 21.03009 14.7287585 27.33143
## 11 19.96493 13.7271824 26.20268
## 12 13.69652  7.4628028 19.93023
## 13 18.75735 12.5700473 24.94466
## 14 11.09489  4.6786298 17.51115
## 15 11.09489  4.6786298 17.51115
## 16 13.69652  7.4628028 19.93023
## 17 17.36331 11.2049447 23.52167
## 18 18.75735 12.5700473 24.94466
## 19  7.42810  0.5911537 14.26505
## 20 18.75735 12.5700473 24.94466
## 21 15.71450  9.5493253 21.87967
# R-squared value for the above model is 0.6723. 
# we may have to do different transformation better R-squared value
# Applying different transformations

# Exponential model 
reg_exp<-lm(log(Delivery.Time)~Sorting.Time) # regression using Exponential model
summary(reg_exp)
## 
## Call:
## lm(formula = log(Delivery.Time) ~ Sorting.Time)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.29209 -0.13364  0.02065  0.08421  0.41892 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   2.12137    0.10297  20.601 1.86e-14 ***
## Sorting.Time  0.10555    0.01544   6.836 1.59e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1755 on 19 degrees of freedom
## Multiple R-squared:  0.7109, Adjusted R-squared:  0.6957 
## F-statistic: 46.73 on 1 and 19 DF,  p-value: 1.593e-06
# R-squared value has increased from 0.67 to 0.7071 
# Higher the R-sqaured value - Better chances of getting good model 
# for Waist and addipose Tissue