newdata <- read.csv(file.choose()) # choose the wc-at.csv data set
wc.at <- newdata

View(wc.at)
attach(wc.at)
dim(wc.at)
## [1] 109   2
head(wc.at)
##   Waist    AT
## 1 74.75 25.72
## 2 72.60 25.89
## 3 81.80 42.60
## 4 83.95 42.80
## 5 74.65 29.84
## 6 71.85 21.68
tail(wc.at,2)
##     Waist  AT
## 108 107.9 208
## 109 108.5 208
colnames(wc.at)
## [1] "Waist" "AT"
windows()

plot(Waist,AT)
# Correlation coefficient value for Waist and Addipose tissue
cor(AT,Waist)
## [1] 0.8185578
cor(Waist,AT)
## [1] 0.8185578
library(caret)
## Warning: package 'caret' was built under R version 3.5.2
## Loading required package: lattice
## Warning: package 'lattice' was built under R version 3.5.2
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.5.2

inTrain <- createDataPartition(y=AT, p=0.70,list = FALSE)  
train <-  wc.at[inTrain,]
test <- wc.at[-inTrain,]




wc.at[1:70,]
##     Waist     AT
## 1   74.75  25.72
## 2   72.60  25.89
## 3   81.80  42.60
## 4   83.95  42.80
## 5   74.65  29.84
## 6   71.85  21.68
## 7   80.90  29.08
## 8   83.40  32.98
## 9   63.50  11.44
## 10  73.20  32.22
## 11  71.90  28.32
## 12  75.00  43.86
## 13  73.10  38.21
## 14  79.00  42.48
## 15  77.00  30.96
## 16  68.85  55.78
## 17  75.95  43.78
## 18  74.15  33.41
## 19  73.80  43.35
## 20  75.90  29.31
## 21  76.85  36.60
## 22  80.90  40.25
## 23  79.90  35.43
## 24  89.20  60.09
## 25  82.00  45.84
## 26  92.00  70.40
## 27  86.60  83.45
## 28  80.50  84.30
## 29  86.00  78.89
## 30  82.50  64.75
## 31  83.50  72.56
## 32  88.10  89.31
## 33  90.80  78.94
## 34  89.40  83.55
## 35 102.00 127.00
## 36  94.50 121.00
## 37  91.00 107.00
## 38 103.00 129.00
## 39  80.00  74.02
## 40  79.00  55.48
## 41  83.50  73.13
## 42  76.00  50.50
## 43  80.50  50.88
## 44  86.50 140.00
## 45  83.00  96.54
## 46 107.10 118.00
## 47  94.30 107.00
## 48  94.50 123.00
## 49  79.70  65.92
## 50  79.30  81.29
## 51  89.80 111.00
## 52  83.80  90.73
## 53  85.20 133.00
## 54  75.50  41.90
## 55  78.40  41.71
## 56  78.60  58.16
## 57  87.80  88.85
## 58  86.30 155.00
## 59  85.50  70.77
## 60  83.70  75.08
## 61  77.60  57.05
## 62  84.90  99.73
## 63  79.80  27.96
## 64 108.30 123.00
## 65 119.60  90.41
## 66 119.90 106.00
## 67  96.50 144.00
## 68 105.50 121.00
## 69 105.00  97.13
## 70 107.00 166.00
model1 <- lm(AT ~ Waist,data = train)
summary(model1)
## 
## Call:
## lm(formula = AT ~ Waist, data = train)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -106.097  -18.437   -2.586   16.017   91.205 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -214.2821    27.1025  -7.906 1.66e-11 ***
## Waist          3.4347     0.2903  11.830  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 35.88 on 76 degrees of freedom
## Multiple R-squared:  0.6481, Adjusted R-squared:  0.6434 
## F-statistic: 139.9 on 1 and 76 DF,  p-value: < 2.2e-16
pv <- predict(model1,newdata = test)
class(pv)
## [1] "numeric"
pv
##         2         3         4         8        13        15        16 
##  35.07651  66.67567  74.06026  72.17118  36.79386  50.18915  22.19642 
##        22        24        31        32        33        37        38 
##  63.58445  92.09238  72.51464  88.31422  97.58789  98.27483 139.49112 
##        47        50        52        53        54        57        70 
## 109.60931  58.08894  73.54505  78.35362  45.03712  87.28382 153.22989 
##        81        83        84        86        88        90        91 
## 141.20847 163.53396 170.40334 142.92581 158.38192 160.09927 120.60032 
##        93        95        98 
## 122.31767 118.88298 125.75236
pv <- as.data.frame(pv)
final<-cbind(test,pv)
# R-squared value for the above model is 0.69. 
# we may have to do transformation of variables for better R-squared value
# Applying transformations

# Logarthmic transformation
reg_log <- lm(AT ~ log(Waist),data = train)  # Regression using logarthmic transformation
summary(reg_log)
## 
## Call:
## lm(formula = AT ~ log(Waist), data = train)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -97.980 -20.885  -2.245  17.638  90.803 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1325.29     118.90  -11.15   <2e-16 ***
## log(Waist)    316.39      26.33   12.02   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 35.52 on 76 degrees of freedom
## Multiple R-squared:  0.6552, Adjusted R-squared:  0.6506 
## F-statistic: 144.4 on 1 and 76 DF,  p-value: < 2.2e-16
predict(reg_log,newdata = test)
##         2         3         4         8        13        15        16 
##  30.44983  68.19954  76.40811  74.32842  32.62139  49.06661  13.66997 
##        22        24        31        32        33        37        38 
##  64.69914  95.60046  74.70757  91.67448 101.22539 101.92152 141.11308 
##        47        50        52        53        54        57        70 
## 113.19201  58.37895  75.84227  81.08443  42.84227  90.59525 153.16765 
##        81        83        84        86        88        90        91 
## 142.64526 161.91643 167.61737 144.17006 157.57228 159.02696 123.75045 
##        93        95        98 
## 125.36884 122.12375 128.58099
# R-squared value for the above model is 0.6723. 
# we may have to do different transformation better R-squared value
# Applying different transformations

# Exponential model 
reg_exp <- lm(log(AT) ~ Waist, data = train) # regression using Exponential model
summary(reg_exp)
## 
## Call:
## lm(formula = log(AT) ~ Waist, data = train)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.02689 -0.21352  0.06214  0.24493  0.84685 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.737666   0.275725   2.675  0.00914 ** 
## Waist       0.040080   0.002954  13.569  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3651 on 76 degrees of freedom
## Multiple R-squared:  0.7078, Adjusted R-squared:  0.704 
## F-statistic: 184.1 on 1 and 76 DF,  p-value: < 2.2e-16
# R-squared value has increased from 0.67 to 0.7071 
# Higher the R-sqaured value - Better chances of getting good model 
# for Waist and addipose Tissue





# Sqrt Transformation model 
sqrtmodel1 <- lm(AT ~ (1/(Waist*Waist)) , data = train) # regression using Exponential model
summary(sqrtmodel1)
## 
## Call:
## lm(formula = AT ~ (1/(Waist * Waist)), data = train)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -91.269 -50.679  -5.874  28.791 150.291 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  102.709      6.804    15.1   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 60.09 on 77 degrees of freedom