Implementation of Linear Regression
wc.at <- read.csv("E:\\DataScience Yogesh\\R _Codes\\Simple Linear Regression\\wc-at.csv") # choose the wc-at.csv data set
x <- read.csv("E:\\DataScience Yogesh\\R _Codes\\Simple Linear Regression\\x.csv")
dim(wc.at)
## [1] 109 2
#View(wc.at)
#var(Waist)
attach(wc.at)
summary(wc.at)
## Waist AT
## Min. : 63.5 Min. : 11.44
## 1st Qu.: 80.0 1st Qu.: 50.88
## Median : 90.8 Median : 96.54
## Mean : 91.9 Mean :101.89
## 3rd Qu.:104.0 3rd Qu.:137.00
## Max. :121.0 Max. :253.00
#windows()
plot(AT,Waist)

#plot(x,y) # Syntax
# Correlation coefficient value for Waist and FAT Data
#cor(x,y) # Syntax
cor(AT,Waist)
## [1] 0.8185578
cor(Waist,AT)
## [1] 0.8185578
#dim(wc.at)
class(wc.at)
## [1] "data.frame"
colnames(wc.at)
## [1] "Waist" "AT"
str(wc.at)
## 'data.frame': 109 obs. of 2 variables:
## $ Waist: num 74.8 72.6 81.8 84 74.7 ...
## $ AT : num 25.7 25.9 42.6 42.8 29.8 ...
summary(wc.at)
## Waist AT
## Min. : 63.5 Min. : 11.44
## 1st Qu.: 80.0 1st Qu.: 50.88
## Median : 90.8 Median : 96.54
## Mean : 91.9 Mean :101.89
## 3rd Qu.:104.0 3rd Qu.:137.00
## Max. :121.0 Max. :253.00
sd(Waist)
## [1] 13.55912
Implementation of Linear
lmodel1 <- lm(AT ~ Waist,data = wc.at)
summary(lmodel1)
##
## Call:
## lm(formula = AT ~ Waist, data = wc.at)
##
## Residuals:
## Min 1Q Median 3Q Max
## -107.288 -19.143 -2.939 16.376 90.342
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -215.9815 21.7963 -9.909 <2e-16 ***
## Waist 3.4589 0.2347 14.740 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 33.06 on 107 degrees of freedom
## Multiple R-squared: 0.67, Adjusted R-squared: 0.667
## F-statistic: 217.3 on 1 and 107 DF, p-value: < 2.2e-16
pred <- predict(lmodel1,newdata = x)
pred
## 1 2 3 4
## 40.31999 -19.86416 36.86113 9.88203
class(pred)
## [1] "numeric"
pred <- as.data.frame(pred)
pred1 <- predict(lmodel1,data = wc.at)
pred1
## 1 2 3 4 5 6
## 42.568252 35.131704 66.953210 74.389758 42.222366 32.537559
## 7 8 9 10 11 12
## 63.840237 72.487385 3.656083 37.207020 32.710502 43.432966
## 13 14 15 16 17 18
## 36.861134 57.268404 50.350685 22.160981 46.718883 40.492936
## 19 20 21 22 23 24
## 39.282335 46.545940 49.831856 63.840237 60.381377 92.548770
## 25 26 27 28 29 30
## 67.644982 102.233576 83.555735 62.456693 81.480420 69.374412
## 31 32 33 34 35 36
## 72.833271 88.744024 98.082945 93.240542 136.822170 110.880725
## 37 38 39 40 41 42
## 98.774717 140.281029 60.727263 57.268404 72.833271 46.891826
## 43 44 45 46 47 48
## 62.456693 83.209849 71.103842 154.462353 110.188953 110.880725
## 49 50 51 52 53 54
## 59.689606 58.306062 94.624085 73.870929 78.713332 45.162396
## 55 56 57 58 59 60
## 55.193088 55.884860 87.706367 82.518078 79.750990 73.525043
## 61 62 63 64 65 66
## 52.426001 77.675674 60.035492 158.612984 197.698095 198.735753
## 67 68 69 70 71 72
## 117.798443 148.928178 147.198748 154.116467 154.116467 133.363311
## 73 74 75 76 77 78
## 119.527873 129.904451 157.575326 129.904451 140.281029 143.739889
## 79 80 81 82 83 84
## 150.657608 161.034186 142.010459 164.493045 164.493045 171.410764
## 85 86 87 88 89 90
## 159.304756 143.739889 167.951905 159.304756 202.540498 161.034186
## 91 92 93 94 95 96
## 121.257303 148.928178 122.986732 110.880725 119.527873 147.198748
## 97 98 99 100 101 102
## 150.657608 126.445592 98.774717 138.551600 150.657608 161.380072
## 103 104 105 106 107 108
## 181.787342 133.363311 130.250337 106.730093 136.130398 157.229440
## 109
## 159.304756
# R-squared value for the above model is 0.68.
# we may have to do transformation of variables for better R-squared value
# Applying transformations
# Logarthmic transformation
lmodel2 <- lm(AT ~ log(Waist),data = wc.at) # Regression using logarthmic transformation
summary(lmodel2)
##
## Call:
## lm(formula = AT ~ log(Waist), data = wc.at)
##
## Residuals:
## Min 1Q Median 3Q Max
## -98.473 -18.273 -2.374 14.538 90.400
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1328.34 95.92 -13.85 <2e-16 ***
## log(Waist) 317.14 21.26 14.92 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 32.8 on 107 degrees of freedom
## Multiple R-squared: 0.6753, Adjusted R-squared: 0.6723
## F-statistic: 222.6 on 1 and 107 DF, p-value: < 2.2e-16
pred <- predict(lmodel2,newdata = x)
class(pred)
## [1] "numeric"
pred <- as.data.frame(pred)
pred <- predict(lmodel1,newdata = x )
# R-squared value for the above model is 0.6723.
# we may have to do different transformation better R-squared value
# Applying different transformations
# Exponential model
lmodel3 <- lm(log(AT) ~ (Waist),data = wc.at) # regression using Exponential model
summary(lmodel3)
##
## Call:
## lm(formula = log(AT) ~ (Waist), data = wc.at)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.05086 -0.21688 0.03623 0.23044 0.82862
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.741021 0.232628 3.185 0.00189 **
## Waist 0.040252 0.002504 16.073 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3529 on 107 degrees of freedom
## Multiple R-squared: 0.7071, Adjusted R-squared: 0.7044
## F-statistic: 258.3 on 1 and 107 DF, p-value: < 2.2e-16
pred <- predict(lmodel3,data = wc.at)
exp(pred)
## 1 2 3 4 5 6 7
## 42.51636 38.99160 56.46768 61.57225 42.34557 37.83206 54.45863
## 8 9 10 11 12 13 14
## 60.22409 27.03275 39.94477 37.90828 42.94637 39.78430 50.44894
## 15 16 17 18 19 20 21
## 46.54674 33.52863 44.62043 41.50183 40.92124 44.53072 46.26654
## 22 23 24 25 26 27 28
## 54.45863 52.31007 76.06106 56.92411 85.13542 68.50318 53.58881
## 29 30 31 32 33 34 35
## 66.86854 58.08138 60.46699 72.76672 81.12087 76.67586 127.32812
## 36 37 38 39 40 41 42
## 94.14859 81.77657 132.55793 52.52106 50.44894 60.46699 44.71033
## 43 44 45 46 47 48 49
## 53.58881 68.22799 59.26218 156.34337 93.39369 94.14859 51.89064
## 50 51 52 53 54 55 56
## 51.06184 77.92041 61.20160 64.74955 43.81947 49.24512 49.64317
## 57 58 59 60 61 62 63
## 71.89330 67.68093 65.53619 60.95574 47.68459 63.97235 52.09993
## 64 65 66 67 68 69 70
## 164.08057 258.58125 261.72273 102.04145 146.59165 143.67081 155.71532
## 71 72 73 74 75 76 77
## 155.71532 122.30464 104.11596 117.47935 162.11110 117.47935 132.55793
## 78 79 80 81 82 83 84
## 138.00256 149.57188 168.76957 135.25285 175.70153 175.70153 190.43131
## 85 86 87 88 89 90 91
## 165.40683 138.00256 182.91822 165.40683 273.57159 168.76957 106.23265
## 92 93 94 95 96 97 98
## 146.59165 108.39237 94.14859 104.11596 143.67081 149.57188 112.84443
## 99 100 101 102 103 104 105
## 81.77657 129.91671 149.57188 169.45028 214.87336 122.30464 117.95318
## 106 107 108 109
## 89.70902 126.30718 161.45987 165.40683
class(pred)
## [1] "numeric"
PV <- as.data.frame(exp(pred))
pred<-exp(pred)