Simple Linear Regression Q3

a <- read.csv("C:\\Users\\Harisha\\Desktop\\Datascience Assignments\\Simple linear regression\\emp_data 3.csv")
attach(a)
View(a)
# 1St Movement Business Decission(Mean,Meadian,Range)
summary(a)
##   Salary_hike   Churn_out_rate 
##  Min.   :1580   Min.   :60.00  
##  1st Qu.:1618   1st Qu.:65.75  
##  Median :1675   Median :71.00  
##  Mean   :1689   Mean   :72.90  
##  3rd Qu.:1724   3rd Qu.:78.75  
##  Max.   :1870   Max.   :92.00
# 2nd Movement Business Decission(Variance, SD)
 var(a)
##                Salary_hike Churn_out_rate
## Salary_hike      8481.8222      -861.2667
## Churn_out_rate   -861.2667       105.2111
sd(Salary_hike)
## [1] 92.09681
sd(Churn_out_rate)
## [1] 10.25725
# 3rd & 4th movement Business Decission 
library(e1071)
#Skewness
skewness(Salary_hike)
## [1] 0.6180303
# Kurtosis
kurtosis(Salary_hike)
## [1] -0.9358547
barplot(Salary_hike)

hist(Salary_hike)

# rightly skewed.
boxplot(Salary_hike, horizontal = T)

# No outliers. 
qqnorm(Salary_hike)
qqline(Salary_hike)

# based on the qq plot data linearly distributed.
cor(Salary_hike, Churn_out_rate)
## [1] -0.9117216
# its negatively strong correlated.
plot(a)

# scatter plot also shows that the relation between two varibles r strongly negative.
SLR3 <- lm(Churn_out_rate~Salary_hike)
summary(SLR3)
## 
## Call:
## lm(formula = Churn_out_rate ~ Salary_hike)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -3.804 -3.059 -1.819  2.430  8.072 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 244.36491   27.35194   8.934 1.96e-05 ***
## Salary_hike  -0.10154    0.01618  -6.277 0.000239 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.469 on 8 degrees of freedom
## Multiple R-squared:  0.8312, Adjusted R-squared:  0.8101 
## F-statistic:  39.4 on 1 and 8 DF,  p-value: 0.0002386

R squared value is >.8 hence its a good model.