Simple Linear Regression

library(e1071)
## Warning: package 'e1071' was built under R version 3.5.1
Emp_Data <- read.csv("E:\\Data Science\\data science\\assignments\\Simple Linear Regression\\emp_data.csv")

attach(Emp_Data)

# First Moment Business Decision
summary(Emp_Data)
##   Salary_hike   Churn_out_rate 
##  Min.   :1580   Min.   :60.00  
##  1st Qu.:1618   1st Qu.:65.75  
##  Median :1675   Median :71.00  
##  Mean   :1689   Mean   :72.90  
##  3rd Qu.:1724   3rd Qu.:78.75  
##  Max.   :1870   Max.   :92.00
# Second Moment Business Decision
var(Salary_hike)
## [1] 8481.822
var(Churn_out_rate)
## [1] 105.2111
sd(Salary_hike)
## [1] 92.09681
sd(Churn_out_rate)
## [1] 10.25725
# Third Moment Business Decision
skewness(Salary_hike)
## [1] 0.6180303
skewness(Churn_out_rate)
## [1] 0.466011
# Fourth Moment Business decision
kurtosis(Salary_hike)
## [1] -0.9358547
kurtosis(Churn_out_rate)
## [1] -1.162193
plot(Churn_out_rate, Salary_hike, col = "blue")

# Corelation coefficient
cor(Churn_out_rate, Salary_hike)
## [1] -0.9117216
model1 <- lm(Salary_hike~Churn_out_rate)

summary(model1)
## 
## Call:
## lm(formula = Salary_hike ~ Churn_out_rate)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -35.97 -23.13 -21.41  19.24  75.80 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    2285.365     95.912  23.828 1.02e-08 ***
## Churn_out_rate   -8.186      1.304  -6.277 0.000239 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 40.13 on 8 degrees of freedom
## Multiple R-squared:  0.8312, Adjusted R-squared:  0.8101 
## F-statistic:  39.4 on 1 and 8 DF,  p-value: 0.0002386
confint(model1, level = 0.95)
##                     2.5 %      97.5 %
## (Intercept)    2064.19292 2506.537671
## Churn_out_rate  -11.19332   -5.178839
predict(model1, interval = "predict")
## Warning in predict.lm(model1, interval = "predict"): predictions on current data refer to _future_ responses
##         fit      lwr      upr
## 1  1532.246 1419.468 1645.023
## 2  1589.548 1485.897 1693.200
## 3  1630.479 1531.103 1729.854
## 4  1671.409 1574.149 1768.669
## 5  1695.967 1598.875 1793.060
## 6  1712.340 1614.894 1809.785
## 7  1728.712 1630.545 1826.879
## 8  1753.270 1653.350 1853.190
## 9  1777.828 1675.388 1880.269
## 10 1794.200 1689.680 1898.721
# R-squared value for the above model is 0.8312.
# Residual standard error is 40.13 on 8 degrees of freedom.
# Apply different transformations 

model2 <- lm(log(Salary_hike)~log(Churn_out_rate))
summary(model2)
## 
## Call:
## lm(formula = log(Salary_hike) ~ log(Churn_out_rate))
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.017164 -0.012812 -0.009309  0.008381  0.034919 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          9.00424    0.19663  45.794 5.71e-11 ***
## log(Churn_out_rate) -0.36769    0.04591  -8.008 4.33e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.01895 on 8 degrees of freedom
## Multiple R-squared:  0.8891, Adjusted R-squared:  0.8752 
## F-statistic: 64.13 on 1 and 8 DF,  p-value: 4.335e-05
confint(model2, level = 0.95)
##                          2.5 %     97.5 %
## (Intercept)          8.5508229  9.4576615
## log(Churn_out_rate) -0.4735731 -0.2618155
predict(model2, interval = "predict")
## Warning in predict.lm(model2, interval = "predict"): predictions on current data refer to _future_ responses
##         fit      lwr      upr
## 1  7.341606 7.289128 7.394085
## 2  7.370705 7.321756 7.419654
## 3  7.392996 7.345914 7.440078
## 4  7.416727 7.370722 7.462731
## 5  7.431737 7.385898 7.477575
## 6  7.442095 7.396133 7.488057
## 7  7.452753 7.406464 7.499042
## 8  7.469344 7.422151 7.516537
## 9  7.486718 7.438092 7.535345
## 10 7.498775 7.448882 7.548668
# R-squared value for the above model is 0.8891 which is better than model1.
# Residual standard error is reduced to 0.01895 on 8 degrees of freedom.