Implementation of Linear Regression

wc.at <- read.csv("E:\\DataScience Yogesh\\R _Codes\\Simple Linear Regression\\wc-at.csv") # choose the wc-at.csv data set
x <- read.csv("E:\\DataScience Yogesh\\R _Codes\\Simple Linear Regression\\x.csv")
dim(wc.at)
## [1] 109   2
#View(wc.at)
#var(Waist)
attach(wc.at)
summary(wc.at)
##      Waist             AT        
##  Min.   : 63.5   Min.   : 11.44  
##  1st Qu.: 80.0   1st Qu.: 50.88  
##  Median : 90.8   Median : 96.54  
##  Mean   : 91.9   Mean   :101.89  
##  3rd Qu.:104.0   3rd Qu.:137.00  
##  Max.   :121.0   Max.   :253.00
#windows()
plot(AT,Waist)

#plot(x,y) # Syntax
# Correlation coefficient value for Waist and FAT Data
#cor(x,y) # Syntax
cor(AT,Waist)
## [1] 0.8185578
cor(Waist,AT)
## [1] 0.8185578
#dim(wc.at)
class(wc.at)
## [1] "data.frame"
colnames(wc.at)
## [1] "Waist" "AT"
str(wc.at)
## 'data.frame':    109 obs. of  2 variables:
##  $ Waist: num  74.8 72.6 81.8 84 74.7 ...
##  $ AT   : num  25.7 25.9 42.6 42.8 29.8 ...
summary(wc.at)
##      Waist             AT        
##  Min.   : 63.5   Min.   : 11.44  
##  1st Qu.: 80.0   1st Qu.: 50.88  
##  Median : 90.8   Median : 96.54  
##  Mean   : 91.9   Mean   :101.89  
##  3rd Qu.:104.0   3rd Qu.:137.00  
##  Max.   :121.0   Max.   :253.00
sd(Waist)
## [1] 13.55912

Implementation of Linear

lmodel1 <- lm(AT ~ Waist,data = wc.at)
summary(lmodel1)
## 
## Call:
## lm(formula = AT ~ Waist, data = wc.at)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -107.288  -19.143   -2.939   16.376   90.342 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -215.9815    21.7963  -9.909   <2e-16 ***
## Waist          3.4589     0.2347  14.740   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 33.06 on 107 degrees of freedom
## Multiple R-squared:   0.67,  Adjusted R-squared:  0.667 
## F-statistic: 217.3 on 1 and 107 DF,  p-value: < 2.2e-16
pred <- predict(lmodel1,newdata = x)
pred
##         1         2         3         4 
##  40.31999 -19.86416  36.86113   9.88203
class(pred)
## [1] "numeric"
pred <- as.data.frame(pred)

pred1 <- predict(lmodel1,data = wc.at)
pred1
##          1          2          3          4          5          6 
##  42.568252  35.131704  66.953210  74.389758  42.222366  32.537559 
##          7          8          9         10         11         12 
##  63.840237  72.487385   3.656083  37.207020  32.710502  43.432966 
##         13         14         15         16         17         18 
##  36.861134  57.268404  50.350685  22.160981  46.718883  40.492936 
##         19         20         21         22         23         24 
##  39.282335  46.545940  49.831856  63.840237  60.381377  92.548770 
##         25         26         27         28         29         30 
##  67.644982 102.233576  83.555735  62.456693  81.480420  69.374412 
##         31         32         33         34         35         36 
##  72.833271  88.744024  98.082945  93.240542 136.822170 110.880725 
##         37         38         39         40         41         42 
##  98.774717 140.281029  60.727263  57.268404  72.833271  46.891826 
##         43         44         45         46         47         48 
##  62.456693  83.209849  71.103842 154.462353 110.188953 110.880725 
##         49         50         51         52         53         54 
##  59.689606  58.306062  94.624085  73.870929  78.713332  45.162396 
##         55         56         57         58         59         60 
##  55.193088  55.884860  87.706367  82.518078  79.750990  73.525043 
##         61         62         63         64         65         66 
##  52.426001  77.675674  60.035492 158.612984 197.698095 198.735753 
##         67         68         69         70         71         72 
## 117.798443 148.928178 147.198748 154.116467 154.116467 133.363311 
##         73         74         75         76         77         78 
## 119.527873 129.904451 157.575326 129.904451 140.281029 143.739889 
##         79         80         81         82         83         84 
## 150.657608 161.034186 142.010459 164.493045 164.493045 171.410764 
##         85         86         87         88         89         90 
## 159.304756 143.739889 167.951905 159.304756 202.540498 161.034186 
##         91         92         93         94         95         96 
## 121.257303 148.928178 122.986732 110.880725 119.527873 147.198748 
##         97         98         99        100        101        102 
## 150.657608 126.445592  98.774717 138.551600 150.657608 161.380072 
##        103        104        105        106        107        108 
## 181.787342 133.363311 130.250337 106.730093 136.130398 157.229440 
##        109 
## 159.304756
# R-squared value for the above model is 0.68. 
# we may have to do transformation of variables for better R-squared value
# Applying transformations

# Logarthmic transformation
lmodel2 <- lm(AT ~ log(Waist),data = wc.at)  # Regression using logarthmic transformation
summary(lmodel2)
## 
## Call:
## lm(formula = AT ~ log(Waist), data = wc.at)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -98.473 -18.273  -2.374  14.538  90.400 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1328.34      95.92  -13.85   <2e-16 ***
## log(Waist)    317.14      21.26   14.92   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 32.8 on 107 degrees of freedom
## Multiple R-squared:  0.6753, Adjusted R-squared:  0.6723 
## F-statistic: 222.6 on 1 and 107 DF,  p-value: < 2.2e-16
pred <- predict(lmodel2,newdata = x)
class(pred)
## [1] "numeric"
pred <- as.data.frame(pred)


pred <- predict(lmodel1,newdata = x )

# R-squared value for the above model is 0.6723. 
# we may have to do different transformation better R-squared value
# Applying different transformations

# Exponential model 
lmodel3 <- lm(log(AT) ~ (Waist),data = wc.at) # regression using Exponential model
summary(lmodel3)
## 
## Call:
## lm(formula = log(AT) ~ (Waist), data = wc.at)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.05086 -0.21688  0.03623  0.23044  0.82862 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.741021   0.232628   3.185  0.00189 ** 
## Waist       0.040252   0.002504  16.073  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3529 on 107 degrees of freedom
## Multiple R-squared:  0.7071, Adjusted R-squared:  0.7044 
## F-statistic: 258.3 on 1 and 107 DF,  p-value: < 2.2e-16
pred <- predict(lmodel3,data = wc.at)
exp(pred)
##         1         2         3         4         5         6         7 
##  42.51636  38.99160  56.46768  61.57225  42.34557  37.83206  54.45863 
##         8         9        10        11        12        13        14 
##  60.22409  27.03275  39.94477  37.90828  42.94637  39.78430  50.44894 
##        15        16        17        18        19        20        21 
##  46.54674  33.52863  44.62043  41.50183  40.92124  44.53072  46.26654 
##        22        23        24        25        26        27        28 
##  54.45863  52.31007  76.06106  56.92411  85.13542  68.50318  53.58881 
##        29        30        31        32        33        34        35 
##  66.86854  58.08138  60.46699  72.76672  81.12087  76.67586 127.32812 
##        36        37        38        39        40        41        42 
##  94.14859  81.77657 132.55793  52.52106  50.44894  60.46699  44.71033 
##        43        44        45        46        47        48        49 
##  53.58881  68.22799  59.26218 156.34337  93.39369  94.14859  51.89064 
##        50        51        52        53        54        55        56 
##  51.06184  77.92041  61.20160  64.74955  43.81947  49.24512  49.64317 
##        57        58        59        60        61        62        63 
##  71.89330  67.68093  65.53619  60.95574  47.68459  63.97235  52.09993 
##        64        65        66        67        68        69        70 
## 164.08057 258.58125 261.72273 102.04145 146.59165 143.67081 155.71532 
##        71        72        73        74        75        76        77 
## 155.71532 122.30464 104.11596 117.47935 162.11110 117.47935 132.55793 
##        78        79        80        81        82        83        84 
## 138.00256 149.57188 168.76957 135.25285 175.70153 175.70153 190.43131 
##        85        86        87        88        89        90        91 
## 165.40683 138.00256 182.91822 165.40683 273.57159 168.76957 106.23265 
##        92        93        94        95        96        97        98 
## 146.59165 108.39237  94.14859 104.11596 143.67081 149.57188 112.84443 
##        99       100       101       102       103       104       105 
##  81.77657 129.91671 149.57188 169.45028 214.87336 122.30464 117.95318 
##       106       107       108       109 
##  89.70902 126.30718 161.45987 165.40683
class(pred)
## [1] "numeric"
PV <- as.data.frame(exp(pred))
pred<-exp(pred)