Simple linear regression Q4
a <-read.csv("C:\\Users\\Harisha\\Desktop\\Datascience Assignments\\Simple linear regression\\Salary_Data.csv")
attach(a)
View(a)
# 1St Movement Business Decission(Mean,Meadian,Range)
summary(a)
## YearsExperience Salary
## Min. : 1.100 Min. : 37731
## 1st Qu.: 3.200 1st Qu.: 56721
## Median : 4.700 Median : 65237
## Mean : 5.313 Mean : 76003
## 3rd Qu.: 7.700 3rd Qu.:100545
## Max. :10.500 Max. :122391
# 2nd movement Business Decission(Variance,Standard Deviation)
var(YearsExperience)
## [1] 8.053609
var(Salary)
## [1] 751550960
sd(YearsExperience)
## [1] 2.837888
sd(Salary)
## [1] 27414.43
# 3rd & 4th Business Decission(Skewness and Kurtosis)
library(e1071)
skewness(Salary)
## [1] 0.3194946
kurtosis(Salary)
## [1] -1.395477
barplot(Salary)
hist(Salary)
boxplot(Salary, horizontal = T)
qqnorm(Salary)
qqline(Salary)
skewness(YearsExperience)
## [1] 0.3424477
kurtosis(YearsExperience)
## [1] -1.17293
barplot(YearsExperience)
hist(YearsExperience)
barplot(YearsExperience)
qqnorm(YearsExperience)
qqline(YearsExperience)
cor(Salary,YearsExperience)
## [1] 0.9782416
plot(a)
SLR4 <- lm(Salary~YearsExperience)
summary(SLR4)
##
## Call:
## lm(formula = Salary ~ YearsExperience)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7958.0 -4088.5 -459.9 3372.6 11448.0
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 25792.2 2273.1 11.35 5.51e-12 ***
## YearsExperience 9450.0 378.8 24.95 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5788 on 28 degrees of freedom
## Multiple R-squared: 0.957, Adjusted R-squared: 0.9554
## F-statistic: 622.5 on 1 and 28 DF, p-value: < 2.2e-16
# R squared value is >.8 hence its a strong model.