wc.at <- read.csv("C:\\Users\\Admin\\Desktop\\SARLAKG\\R _Codes\\Simple Linear Regression\\wc-at.csv") # choose the wc-at.csv data set
View(wc.at)
attach(wc.at)
mean(AT)
## [1] 101.894
dim(wc.at)
## [1] 109 2
summary(wc.at)
## Waist AT
## Min. : 63.5 Min. : 11.44
## 1st Qu.: 80.0 1st Qu.: 50.88
## Median : 90.8 Median : 96.54
## Mean : 91.9 Mean :101.89
## 3rd Qu.:104.0 3rd Qu.:137.00
## Max. :121.0 Max. :253.00
windows()
qqnorm(AT)

windows()
plot(Waist,AT) # plot(x,y)

# Correlation coefficient value for Waist and Addipose tissue
cor(AT,Waist)
## [1] 0.8185578
m1 <- lm(AT ~ Waist) # lm(y ~ x)
summary(m1)
##
## Call:
## lm(formula = AT ~ Waist)
##
## Residuals:
## Min 1Q Median 3Q Max
## -107.288 -19.143 -2.939 16.376 90.342
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -215.9815 21.7963 -9.909 <2e-16 ***
## Waist 3.4589 0.2347 14.740 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 33.06 on 107 degrees of freedom
## Multiple R-squared: 0.67, Adjusted R-squared: 0.667
## F-statistic: 217.3 on 1 and 107 DF, p-value: < 2.2e-16
pv <- predict(m1,wc.at)
class(pv)
## [1] "numeric"
pv <- as.data.frame(pv)
pv
## pv
## 1 42.568252
## 2 35.131704
## 3 66.953210
## 4 74.389758
## 5 42.222366
## 6 32.537559
## 7 63.840237
## 8 72.487385
## 9 3.656083
## 10 37.207020
## 11 32.710502
## 12 43.432966
## 13 36.861134
## 14 57.268404
## 15 50.350685
## 16 22.160981
## 17 46.718883
## 18 40.492936
## 19 39.282335
## 20 46.545940
## 21 49.831856
## 22 63.840237
## 23 60.381377
## 24 92.548770
## 25 67.644982
## 26 102.233576
## 27 83.555735
## 28 62.456693
## 29 81.480420
## 30 69.374412
## 31 72.833271
## 32 88.744024
## 33 98.082945
## 34 93.240542
## 35 136.822170
## 36 110.880725
## 37 98.774717
## 38 140.281029
## 39 60.727263
## 40 57.268404
## 41 72.833271
## 42 46.891826
## 43 62.456693
## 44 83.209849
## 45 71.103842
## 46 154.462353
## 47 110.188953
## 48 110.880725
## 49 59.689606
## 50 58.306062
## 51 94.624085
## 52 73.870929
## 53 78.713332
## 54 45.162396
## 55 55.193088
## 56 55.884860
## 57 87.706367
## 58 82.518078
## 59 79.750990
## 60 73.525043
## 61 52.426001
## 62 77.675674
## 63 60.035492
## 64 158.612984
## 65 197.698095
## 66 198.735753
## 67 117.798443
## 68 148.928178
## 69 147.198748
## 70 154.116467
## 71 154.116467
## 72 133.363311
## 73 119.527873
## 74 129.904451
## 75 157.575326
## 76 129.904451
## 77 140.281029
## 78 143.739889
## 79 150.657608
## 80 161.034186
## 81 142.010459
## 82 164.493045
## 83 164.493045
## 84 171.410764
## 85 159.304756
## 86 143.739889
## 87 167.951905
## 88 159.304756
## 89 202.540498
## 90 161.034186
## 91 121.257303
## 92 148.928178
## 93 122.986732
## 94 110.880725
## 95 119.527873
## 96 147.198748
## 97 150.657608
## 98 126.445592
## 99 98.774717
## 100 138.551600
## 101 150.657608
## 102 161.380072
## 103 181.787342
## 104 133.363311
## 105 130.250337
## 106 106.730093
## 107 136.130398
## 108 157.229440
## 109 159.304756
final <- cbind(wc.at,pv)
write.csv(final, "Aug1.csv")
getwd()
## [1] "C:/Users/Admin/Desktop/SARLAKG/R _Codes/Simple Linear Regression"
test <- read.csv("C:\\Users\\Admin\\Desktop\\SARLAKG\\R _Codes\\Simple Linear Regression\\newdata.csv") # choose the wc-at.csv data set
pv1 <- predict(m1,newdata = test)
pv1
## 1 2 3 4
## 74.38976 42.22237 32.53756 9.88203
pv <- as.data.frame(pv)
getwd()
## [1] "C:/Users/Admin/Desktop/SARLAKG/R _Codes/Simple Linear Regression"
setwd("C:\\Users\\Admin\\Desktop\\SARLAKG")
# R-squared value for the above model is 0.667.
# we may have to do transformation of variables for better R-squared value
# Applying transformations
# Logarthmic transformation
reg_log <- lm(AT ~ sqrt(Waist)) # Regression using logarthmic transformation
summary(reg_log)
##
## Call:
## lm(formula = AT ~ sqrt(Waist))
##
## Residuals:
## Min 1Q Median 3Q Max
## -102.880 -18.732 -1.924 15.319 90.270
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -533.34 42.86 -12.45 <2e-16 ***
## sqrt(Waist) 66.44 4.47 14.86 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 32.88 on 107 degrees of freedom
## Multiple R-squared: 0.6737, Adjusted R-squared: 0.6706
## F-statistic: 220.9 on 1 and 107 DF, p-value: < 2.2e-16
predict(reg_log,newdata = test)
## 1 2 3 4
## 75.436465 40.726568 29.857455 3.572794
# R-squared value for the above model is 0.6723.
# we may have to do different transformation better R-squared value
# Applying different transformations
# Exponential model
reg_exp <-lm(log(AT) ~ Waist) # regression using Exponential model
summary(reg_exp)
##
## Call:
## lm(formula = log(AT) ~ Waist)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.05086 -0.21688 0.03623 0.23044 0.82862
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.741021 0.232628 3.185 0.00189 **
## Waist 0.040252 0.002504 16.073 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3529 on 107 degrees of freedom
## Multiple R-squared: 0.7071, Adjusted R-squared: 0.7044
## F-statistic: 258.3 on 1 and 107 DF, p-value: < 2.2e-16
# R-squared value has increased from 0.67 to 0.7071
# Higher the R-sqaured value - Better chances of getting good model
# for Waist and addipose Tissue