wc.at <- read.csv("C:\\Users\\Admin\\Desktop\\SARLAKG\\R _Codes\\Simple Linear Regression\\wc-at.csv") # choose the wc-at.csv data set
View(wc.at)
attach(wc.at)
mean(AT)
## [1] 101.894
dim(wc.at)
## [1] 109   2
summary(wc.at)
##      Waist             AT        
##  Min.   : 63.5   Min.   : 11.44  
##  1st Qu.: 80.0   1st Qu.: 50.88  
##  Median : 90.8   Median : 96.54  
##  Mean   : 91.9   Mean   :101.89  
##  3rd Qu.:104.0   3rd Qu.:137.00  
##  Max.   :121.0   Max.   :253.00
windows()
qqnorm(AT)

windows()
plot(Waist,AT) # plot(x,y)

# Correlation coefficient value for Waist and Addipose tissue
cor(AT,Waist) 
## [1] 0.8185578
m1 <- lm(AT ~ Waist) # lm(y ~ x)
summary(m1)
## 
## Call:
## lm(formula = AT ~ Waist)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -107.288  -19.143   -2.939   16.376   90.342 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -215.9815    21.7963  -9.909   <2e-16 ***
## Waist          3.4589     0.2347  14.740   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 33.06 on 107 degrees of freedom
## Multiple R-squared:   0.67,  Adjusted R-squared:  0.667 
## F-statistic: 217.3 on 1 and 107 DF,  p-value: < 2.2e-16
pv <- predict(m1,wc.at)
class(pv)
## [1] "numeric"
pv <- as.data.frame(pv)
pv
##             pv
## 1    42.568252
## 2    35.131704
## 3    66.953210
## 4    74.389758
## 5    42.222366
## 6    32.537559
## 7    63.840237
## 8    72.487385
## 9     3.656083
## 10   37.207020
## 11   32.710502
## 12   43.432966
## 13   36.861134
## 14   57.268404
## 15   50.350685
## 16   22.160981
## 17   46.718883
## 18   40.492936
## 19   39.282335
## 20   46.545940
## 21   49.831856
## 22   63.840237
## 23   60.381377
## 24   92.548770
## 25   67.644982
## 26  102.233576
## 27   83.555735
## 28   62.456693
## 29   81.480420
## 30   69.374412
## 31   72.833271
## 32   88.744024
## 33   98.082945
## 34   93.240542
## 35  136.822170
## 36  110.880725
## 37   98.774717
## 38  140.281029
## 39   60.727263
## 40   57.268404
## 41   72.833271
## 42   46.891826
## 43   62.456693
## 44   83.209849
## 45   71.103842
## 46  154.462353
## 47  110.188953
## 48  110.880725
## 49   59.689606
## 50   58.306062
## 51   94.624085
## 52   73.870929
## 53   78.713332
## 54   45.162396
## 55   55.193088
## 56   55.884860
## 57   87.706367
## 58   82.518078
## 59   79.750990
## 60   73.525043
## 61   52.426001
## 62   77.675674
## 63   60.035492
## 64  158.612984
## 65  197.698095
## 66  198.735753
## 67  117.798443
## 68  148.928178
## 69  147.198748
## 70  154.116467
## 71  154.116467
## 72  133.363311
## 73  119.527873
## 74  129.904451
## 75  157.575326
## 76  129.904451
## 77  140.281029
## 78  143.739889
## 79  150.657608
## 80  161.034186
## 81  142.010459
## 82  164.493045
## 83  164.493045
## 84  171.410764
## 85  159.304756
## 86  143.739889
## 87  167.951905
## 88  159.304756
## 89  202.540498
## 90  161.034186
## 91  121.257303
## 92  148.928178
## 93  122.986732
## 94  110.880725
## 95  119.527873
## 96  147.198748
## 97  150.657608
## 98  126.445592
## 99   98.774717
## 100 138.551600
## 101 150.657608
## 102 161.380072
## 103 181.787342
## 104 133.363311
## 105 130.250337
## 106 106.730093
## 107 136.130398
## 108 157.229440
## 109 159.304756
final <- cbind(wc.at,pv)
write.csv(final, "Aug1.csv")
getwd()
## [1] "C:/Users/Admin/Desktop/SARLAKG/R _Codes/Simple Linear Regression"
test <- read.csv("C:\\Users\\Admin\\Desktop\\SARLAKG\\R _Codes\\Simple Linear Regression\\newdata.csv") # choose the wc-at.csv data set

pv1 <- predict(m1,newdata = test)
pv1
##        1        2        3        4 
## 74.38976 42.22237 32.53756  9.88203
pv <- as.data.frame(pv)
getwd()
## [1] "C:/Users/Admin/Desktop/SARLAKG/R _Codes/Simple Linear Regression"
setwd("C:\\Users\\Admin\\Desktop\\SARLAKG")
# R-squared value for the above model is 0.667. 
# we may have to do transformation of variables for better R-squared value
# Applying transformations

# Logarthmic transformation
reg_log <- lm(AT ~ sqrt(Waist))  #  Regression using logarthmic transformation
summary(reg_log)
## 
## Call:
## lm(formula = AT ~ sqrt(Waist))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -102.880  -18.732   -1.924   15.319   90.270 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -533.34      42.86  -12.45   <2e-16 ***
## sqrt(Waist)    66.44       4.47   14.86   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 32.88 on 107 degrees of freedom
## Multiple R-squared:  0.6737, Adjusted R-squared:  0.6706 
## F-statistic: 220.9 on 1 and 107 DF,  p-value: < 2.2e-16
predict(reg_log,newdata = test)
##         1         2         3         4 
## 75.436465 40.726568 29.857455  3.572794
# R-squared value for the above model is 0.6723. 
# we may have to do different transformation better R-squared value
# Applying different transformations

# Exponential model 
reg_exp <-lm(log(AT) ~ Waist) # regression using Exponential model
summary(reg_exp)
## 
## Call:
## lm(formula = log(AT) ~ Waist)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.05086 -0.21688  0.03623  0.23044  0.82862 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.741021   0.232628   3.185  0.00189 ** 
## Waist       0.040252   0.002504  16.073  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3529 on 107 degrees of freedom
## Multiple R-squared:  0.7071, Adjusted R-squared:  0.7044 
## F-statistic: 258.3 on 1 and 107 DF,  p-value: < 2.2e-16
# R-squared value has increased from 0.67 to 0.7071 
# Higher the R-sqaured value - Better chances of getting good model 
# for Waist and addipose Tissue