Simple Linear Regression
library(e1071)
## Warning: package 'e1071' was built under R version 3.5.1
Salary_Data <- read.csv("E:\\Data Science\\data science\\assignments\\Simple Linear Regression\\Salary_Data.csv")
attach(Salary_Data)
# First Moment Business Decision
summary(Salary_Data)
## YearsExperience Salary
## Min. : 1.100 Min. : 37731
## 1st Qu.: 3.200 1st Qu.: 56721
## Median : 4.700 Median : 65237
## Mean : 5.313 Mean : 76003
## 3rd Qu.: 7.700 3rd Qu.:100545
## Max. :10.500 Max. :122391
# Second Moment Business Decision
var(Salary)
## [1] 751550960
var(YearsExperience)
## [1] 8.053609
sd(Salary)
## [1] 27414.43
sd(YearsExperience)
## [1] 2.837888
#Third Moment Business Decision
skewness(Salary)
## [1] 0.3194946
skewness(YearsExperience)
## [1] 0.3424477
# Fourth Moment Business Decision
kurtosis(Salary)
## [1] -1.395477
kurtosis(YearsExperience)
## [1] -1.17293
plot(Salary, YearsExperience, col = "blue")

# correlation coeffeicient
cor(Salary, YearsExperience)
## [1] 0.9782416
model1 <- lm(YearsExperience~Salary)
summary(model1)
##
## Call:
## lm(formula = YearsExperience ~ Salary)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.12974 -0.46457 0.04105 0.54311 0.79669
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.383e+00 3.273e-01 -7.281 6.3e-08 ***
## Salary 1.013e-04 4.059e-06 24.950 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5992 on 28 degrees of freedom
## Multiple R-squared: 0.957, Adjusted R-squared: 0.9554
## F-statistic: 622.5 on 1 and 28 DF, p-value: < 2.2e-16
confint(model1, level = 0.95)
## 2.5 % 97.5 %
## (Intercept) -3.053603e+00 -1.7127178614
## Salary 9.295173e-05 0.0001095796
predict(model1, interval = "predict")
## Warning in predict.lm(model1, interval = "predict"): predictions on current data refer to _future_ responses
## fit lwr upr
## 1 1.600934 0.3165619 2.885307
## 2 2.295819 1.0237773 3.567861
## 3 1.437694 0.1500755 2.725313
## 4 2.024427 0.7478589 3.300996
## 5 1.656428 0.3731291 2.939727
## 6 3.352729 2.0947042 4.610754
## 7 3.707969 2.4533424 4.962595
## 8 3.130248 1.8697562 4.390740
## 9 4.142905 2.8915256 5.394284
## 10 3.408121 2.1506703 4.665572
## 11 4.018652 2.7664480 5.270856
## 12 3.266856 2.0079094 4.525802
## 13 3.384628 2.1269353 4.642320
## 14 3.397185 2.1396216 4.654747
## 15 3.805285 2.5514728 5.059097
## 16 4.496626 3.2471410 5.746111
## 17 4.303310 3.0528728 5.553747
## 18 6.030801 4.7817265 7.279875
## 19 5.856117 4.6076374 7.104597
## 20 7.129735 5.8731707 8.386300
## 21 6.906748 5.6522247 8.161272
## 22 7.568520 6.3071722 8.829867
## 23 7.875253 6.6099641 9.140542
## 24 9.142087 7.8554139 10.428759
## 25 8.698442 7.4201795 9.976704
## 26 8.308670 7.0369817 9.580359
## 27 9.461782 8.1684469 10.755118
## 28 9.022897 7.7385799 10.307214
## 29 10.010845 8.7049141 11.316775
## 30 9.958288 8.6536250 11.262951
# R-squared value for the above model is 0.957 hence the above model is good.