# Simple Linear Regression-Q3
## prediction model for Churn_out_rate

Q3 <-  read.csv("D:\\DataScience\\Assignments\\SimpleLinearRegression\\emp_data.csv")  #Importing Data set Here ChurnOutRate=y and SalaryHike=x #

attach(Q3)
View(Q3)

 # Column Names in given dataset Q3
colnames(Q3)  
## [1] "Salary_hike"    "Churn_out_rate"
# 1St Movement Business Decission(Mean,Meadian,Range)

summary(Salary_hike)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1580    1618    1675    1689    1724    1870
summary(Churn_out_rate)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   60.00   65.75   71.00   72.90   78.75   92.00
#Based on  Summary we can expect both are having some +ve skewness
# 2Nd movement Business Decission(Variance,Standard Deviation)

var(Salary_hike)
## [1] 8481.822
var(Churn_out_rate)
## [1] 105.2111
sd(Salary_hike)
## [1] 92.09681
sd(Churn_out_rate)
## [1] 10.25725
# 3rd & 4th Business Decission(Skewness and Kurtosis)

library(e1071)

skewness(Salary_hike)
## [1] 0.6180303
kurtosis(Salary_hike)
## [1] -0.9358547
barplot(Salary_hike)

hist(Salary_hike)

# Based on Histograme we Confirmed as some +ve Skewness we have.

boxplot(Salary_hike,horizontal = T )

#Based on Boxplot we dont have outliears but we have some +ve Skew


qqnorm(Salary_hike)
qqline(Salary_hike)

# Based on qqnorm we confirmed as Calories data is linearly Distributed.
#Corelation Coefficient(r-value>0.85 Strong Corelation) value for weight and calaries

cor(Churn_out_rate,Salary_hike)
## [1] -0.9117216
#Based on this value we can build 91% Accurate Model

plot(Q3)  

# Scatter Plot is used for relation between two variables.
#Based on this scatter plot we Can expect this model variables are having  -ve Strong Corelation 
Model3 <- lm(Churn_out_rate~Salary_hike)


summary(Model3)
## 
## Call:
## lm(formula = Churn_out_rate ~ Salary_hike)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -3.804 -3.059 -1.819  2.430  8.072 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 244.36491   27.35194   8.934 1.96e-05 ***
## Salary_hike  -0.10154    0.01618  -6.277 0.000239 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.469 on 8 degrees of freedom
## Multiple R-squared:  0.8312, Adjusted R-squared:  0.8101 
## F-statistic:  39.4 on 1 and 8 DF,  p-value: 0.0002386
# Based on Summary we can tell like intercept(B0) is Symentric and (B1) also Symentric.So we can use these two values(B0,B1) in our dataset.

#R^2 value also Having 0.83 so R^2 Value >0.8 we tell as this is Strong  Model