# Simple Linear Regression-Q4
##prediction model for Salary_hike here salary=y/output,Exp=x?input


Q4 <-  read.csv("D:\\DataScience\\Assignments\\SimpleLinearRegression\\Salary_Data.csv")  #Importing Data set #

attach(Q4)
View(Q4)

 # Column Names in given dataset Q4
colnames(Q4)  
## [1] "YearsExperience" "Salary"
# 1St Movement Business Decission(Mean,Meadian,Range)
summary(YearsExperience)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.100   3.200   4.700   5.313   7.700  10.500
summary(Salary)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   37731   56721   65237   76003  100545  122391
#salary having some +ve skewness
# 2Nd movement Business Decission(Variance,Standard Deviation)

var(YearsExperience)
## [1] 8.053609
var(Salary)
## [1] 751550960
sd(YearsExperience)
## [1] 2.837888
sd(Salary)
## [1] 27414.43
# 3rd & 4th Business Decission(Skewness and Kurtosis)

library(e1071)
skewness(Salary)
## [1] 0.3194946
kurtosis(Salary)
## [1] -1.395477
barplot(Salary)

boxplot(Salary,horizontal = T)

hist(Salary)

qqnorm(Salary)
qqline(Salary)

##Based on Boxplot we dont have outliears

# Based on qqnorm we confirmed as data is linearly Distributed.

skewness(YearsExperience)
## [1] 0.3424477
kurtosis(YearsExperience)
## [1] -1.17293
barplot(YearsExperience)

boxplot(YearsExperience,horizontal = T)

hist(YearsExperience)

qqnorm(YearsExperience)
qqline(YearsExperience)

#Based on Boxplot we dont have outliear

# Based on qqnorm we confirmed as data is linearly Distributed.
#Corelation Coefficient(r-value>0.85 Strong Corelation) value for X-axis and Y-axis

cor(Salary,YearsExperience)
## [1] 0.9782416
#Based on this value cor value we can build 97% Accurate Model

plot(Q4)  

# Scatter Plot is used for relation between two variables.
#Based on this scatter plot we Can expect this model variables are having  +ve Strong Corelation 
Model3 <- lm(Salary~YearsExperience)

summary(Model3)
## 
## Call:
## lm(formula = Salary ~ YearsExperience)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7958.0 -4088.5  -459.9  3372.6 11448.0 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      25792.2     2273.1   11.35 5.51e-12 ***
## YearsExperience   9450.0      378.8   24.95  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5788 on 28 degrees of freedom
## Multiple R-squared:  0.957,  Adjusted R-squared:  0.9554 
## F-statistic: 622.5 on 1 and 28 DF,  p-value: < 2.2e-16
#Here R2 value is less than 0.95 so R^2 Value >0.8 we tell as this is Strong  Model