newdata <- read.csv(file.choose()) # choose the wc-at.csv data set
wc.at <- newdata
View(wc.at)
attach(wc.at)
dim(wc.at)
## [1] 109 2
head(wc.at)
## Waist AT
## 1 74.75 25.72
## 2 72.60 25.89
## 3 81.80 42.60
## 4 83.95 42.80
## 5 74.65 29.84
## 6 71.85 21.68
tail(wc.at,2)
## Waist AT
## 108 107.9 208
## 109 108.5 208
colnames(wc.at)
## [1] "Waist" "AT"
windows()
plot(Waist,AT)
# Correlation coefficient value for Waist and Addipose tissue
cor(AT,Waist)
## [1] 0.8185578
cor(Waist,AT)
## [1] 0.8185578
library(caret)
## Warning: package 'caret' was built under R version 3.5.2
## Loading required package: lattice
## Warning: package 'lattice' was built under R version 3.5.2
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.5.2

inTrain <- createDataPartition(y=AT, p=0.70,list = FALSE)
train <- wc.at[inTrain,]
test <- wc.at[-inTrain,]
wc.at[1:70,]
## Waist AT
## 1 74.75 25.72
## 2 72.60 25.89
## 3 81.80 42.60
## 4 83.95 42.80
## 5 74.65 29.84
## 6 71.85 21.68
## 7 80.90 29.08
## 8 83.40 32.98
## 9 63.50 11.44
## 10 73.20 32.22
## 11 71.90 28.32
## 12 75.00 43.86
## 13 73.10 38.21
## 14 79.00 42.48
## 15 77.00 30.96
## 16 68.85 55.78
## 17 75.95 43.78
## 18 74.15 33.41
## 19 73.80 43.35
## 20 75.90 29.31
## 21 76.85 36.60
## 22 80.90 40.25
## 23 79.90 35.43
## 24 89.20 60.09
## 25 82.00 45.84
## 26 92.00 70.40
## 27 86.60 83.45
## 28 80.50 84.30
## 29 86.00 78.89
## 30 82.50 64.75
## 31 83.50 72.56
## 32 88.10 89.31
## 33 90.80 78.94
## 34 89.40 83.55
## 35 102.00 127.00
## 36 94.50 121.00
## 37 91.00 107.00
## 38 103.00 129.00
## 39 80.00 74.02
## 40 79.00 55.48
## 41 83.50 73.13
## 42 76.00 50.50
## 43 80.50 50.88
## 44 86.50 140.00
## 45 83.00 96.54
## 46 107.10 118.00
## 47 94.30 107.00
## 48 94.50 123.00
## 49 79.70 65.92
## 50 79.30 81.29
## 51 89.80 111.00
## 52 83.80 90.73
## 53 85.20 133.00
## 54 75.50 41.90
## 55 78.40 41.71
## 56 78.60 58.16
## 57 87.80 88.85
## 58 86.30 155.00
## 59 85.50 70.77
## 60 83.70 75.08
## 61 77.60 57.05
## 62 84.90 99.73
## 63 79.80 27.96
## 64 108.30 123.00
## 65 119.60 90.41
## 66 119.90 106.00
## 67 96.50 144.00
## 68 105.50 121.00
## 69 105.00 97.13
## 70 107.00 166.00
model1 <- lm(AT ~ Waist,data = train)
summary(model1)
##
## Call:
## lm(formula = AT ~ Waist, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -106.097 -18.437 -2.586 16.017 91.205
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -214.2821 27.1025 -7.906 1.66e-11 ***
## Waist 3.4347 0.2903 11.830 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 35.88 on 76 degrees of freedom
## Multiple R-squared: 0.6481, Adjusted R-squared: 0.6434
## F-statistic: 139.9 on 1 and 76 DF, p-value: < 2.2e-16
pv <- predict(model1,newdata = test)
class(pv)
## [1] "numeric"
pv
## 2 3 4 8 13 15 16
## 35.07651 66.67567 74.06026 72.17118 36.79386 50.18915 22.19642
## 22 24 31 32 33 37 38
## 63.58445 92.09238 72.51464 88.31422 97.58789 98.27483 139.49112
## 47 50 52 53 54 57 70
## 109.60931 58.08894 73.54505 78.35362 45.03712 87.28382 153.22989
## 81 83 84 86 88 90 91
## 141.20847 163.53396 170.40334 142.92581 158.38192 160.09927 120.60032
## 93 95 98
## 122.31767 118.88298 125.75236
pv <- as.data.frame(pv)
final<-cbind(test,pv)
# R-squared value for the above model is 0.69.
# we may have to do transformation of variables for better R-squared value
# Applying transformations
# Logarthmic transformation
reg_log <- lm(AT ~ log(Waist),data = train) # Regression using logarthmic transformation
summary(reg_log)
##
## Call:
## lm(formula = AT ~ log(Waist), data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -97.980 -20.885 -2.245 17.638 90.803
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1325.29 118.90 -11.15 <2e-16 ***
## log(Waist) 316.39 26.33 12.02 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 35.52 on 76 degrees of freedom
## Multiple R-squared: 0.6552, Adjusted R-squared: 0.6506
## F-statistic: 144.4 on 1 and 76 DF, p-value: < 2.2e-16
predict(reg_log,newdata = test)
## 2 3 4 8 13 15 16
## 30.44983 68.19954 76.40811 74.32842 32.62139 49.06661 13.66997
## 22 24 31 32 33 37 38
## 64.69914 95.60046 74.70757 91.67448 101.22539 101.92152 141.11308
## 47 50 52 53 54 57 70
## 113.19201 58.37895 75.84227 81.08443 42.84227 90.59525 153.16765
## 81 83 84 86 88 90 91
## 142.64526 161.91643 167.61737 144.17006 157.57228 159.02696 123.75045
## 93 95 98
## 125.36884 122.12375 128.58099
# R-squared value for the above model is 0.6723.
# we may have to do different transformation better R-squared value
# Applying different transformations
# Exponential model
reg_exp <- lm(log(AT) ~ Waist, data = train) # regression using Exponential model
summary(reg_exp)
##
## Call:
## lm(formula = log(AT) ~ Waist, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.02689 -0.21352 0.06214 0.24493 0.84685
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.737666 0.275725 2.675 0.00914 **
## Waist 0.040080 0.002954 13.569 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3651 on 76 degrees of freedom
## Multiple R-squared: 0.7078, Adjusted R-squared: 0.704
## F-statistic: 184.1 on 1 and 76 DF, p-value: < 2.2e-16
# R-squared value has increased from 0.67 to 0.7071
# Higher the R-sqaured value - Better chances of getting good model
# for Waist and addipose Tissue
# Sqrt Transformation model
sqrtmodel1 <- lm(AT ~ (1/(Waist*Waist)) , data = train) # regression using Exponential model
summary(sqrtmodel1)
##
## Call:
## lm(formula = AT ~ (1/(Waist * Waist)), data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -91.269 -50.679 -5.874 28.791 150.291
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 102.709 6.804 15.1 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 60.09 on 77 degrees of freedom