df <- read.csv("g:\\Rstudio\\udemy\\ml\\Machine Learning AZ\\Part 2 - Regression\\Section 4 - Simple Linear Regression\\Salary_Data.csv")
head(df)
# test filtering using []
df1 <- df[df$Salary>40000,]
df1
# test filtering using subset
df2 <- subset(df, df$Salary>40000)
df2
# test filtering using filter
df3 <- filter(df,df$Salary>40000)
df3
Time Series:
Start = 1
End = 30
Frequency = 1
[,1] [,2]
1 NA NA
2 NA NA
3 NA NA
4 NA NA
5 NA NA
6 NA NA
7 NA NA
8 NA NA
9 NA NA
10 NA NA
11 NA NA
12 NA NA
13 NA NA
14 NA NA
15 130.3 1940001
16 NA NA
17 NA NA
18 NA NA
19 NA NA
20 NA NA
21 NA NA
22 NA NA
23 NA NA
24 NA NA
25 NA NA
26 NA NA
27 NA NA
28 NA NA
29 NA NA
30 NA NA
# taking care of missing values
# No missing values
# dataset$Age = ifelse(is.na(dataset$Age),ave(dataset$Age, FUN=function(x) mean(x, na.rm =TRUE)), dataset$Age)
# dataset$Salary = ifelse(is.na(dataset$Salary), ave(dataset$Salary, FUN =function(x) mean(x, na.rm =TRUE)),dataset$Salary)
# convert country into factors
# No need to convert this dataset
# dataset$Country <- factor(dataset$Country,levels = c("France", "Spain","Germany"), labels =c(1,2,3))
# dataset$Purchased <- factor(dataset$Purchased, levels =c("No","Yes"), labels= c(0,1))
# Split data into training and testing datasets
dataset <- df
library(caTools)
set.seed(123)
split = sample.split(dataset$Salary, SplitRatio = 2/3)
# create training set
training_set = subset(dataset, split ==TRUE)
test_set =subset(dataset, split == FALSE)
# feature scaling
# training_set[,2:3] = scale(training_set[,2:3])
# test_set[,2:3] = scale(test_set[,2:3])
training_set
test_set
# fitting simple linear regression to the training set
regressor = lm(formula = Salary ~ YearsExperience, data= training_set)
summary(regressor)
Call:
lm(formula = Salary ~ YearsExperience, data = training_set)
Residuals:
Min 1Q Median 3Q Max
-7325.1 -3814.4 427.7 3559.7 8884.6
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 25592 2646 9.672 1.49e-08 ***
YearsExperience 9365 421 22.245 1.52e-14 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 5391 on 18 degrees of freedom
Multiple R-squared: 0.9649, Adjusted R-squared: 0.963
F-statistic: 494.8 on 1 and 18 DF, p-value: 1.524e-14
y_pred = predict(regressor, newdata = test_set)
y_pred
2 4 5 8 11 16 20 21 24
37766.77 44322.33 46195.35 55560.43 62115.99 71481.07 81782.66 89274.72 102385.84
26
109877.90
# Visualizing the training set results
library(ggplot2)
ggplot()+
geom_point(aes(x=training_set$YearsExperience, y=training_set$Salary),
colour="red") +
geom_line(aes(x=training_set$YearsExperience, y=predict(regressor, newdata = training_set)),
colour="blue")+
ggtitle("Salary vs Experience (Training Set)")+
xlab("Years of Experience")+
ylab("Salary")

ggplot()+
geom_point(aes(x=test_set$YearsExperience, y=test_set$Salary),
colour="red") +
geom_line(aes(x=training_set$YearsExperience, y=predict(regressor, newdata = training_set)),
colour="blue")+
ggtitle("Salary vs Experience (Training Set)")+
xlab("Years of Experience")+
ylab("Salary")

LS0tDQp0aXRsZTogIlIgTm90ZWJvb2siDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQpgYGB7cn0NCmRmIDwtICByZWFkLmNzdigiZzpcXFJzdHVkaW9cXHVkZW15XFxtbFxcTWFjaGluZSBMZWFybmluZyBBWlxcUGFydCAyIC0gUmVncmVzc2lvblxcU2VjdGlvbiA0IC0gU2ltcGxlIExpbmVhciBSZWdyZXNzaW9uXFxTYWxhcnlfRGF0YS5jc3YiKQ0KaGVhZChkZikNCmBgYA0KDQpgYGB7cn0NCiMgdGVzdCBmaWx0ZXJpbmcgdXNpbmcgW10NCg0KZGYxIDwtIGRmW2RmJFNhbGFyeT40MDAwMCxdDQpkZjENCg0KIyB0ZXN0IGZpbHRlcmluZyB1c2luZyBzdWJzZXQgDQpkZjIgPC0gc3Vic2V0KGRmLCBkZiRTYWxhcnk+NDAwMDApDQpkZjINCg0KIyB0ZXN0IGZpbHRlcmluZyB1c2luZyBmaWx0ZXINCmRmMyA8LSAgZmlsdGVyKGRmLGRmJFNhbGFyeT40MDAwMCkNCmRmMw0KYGBgDQoNCmBgYHtyfQ0KDQojIHRha2luZyBjYXJlIG9mIG1pc3NpbmcgdmFsdWVzDQojIE5vIG1pc3NpbmcgdmFsdWVzIA0KIyBkYXRhc2V0JEFnZSA9IGlmZWxzZShpcy5uYShkYXRhc2V0JEFnZSksYXZlKGRhdGFzZXQkQWdlLCBGVU49ZnVuY3Rpb24oeCkgbWVhbih4LCBuYS5ybSA9VFJVRSkpLCAgICAgICAgICAgICAgICAgICAgIGRhdGFzZXQkQWdlKQ0KDQojIGRhdGFzZXQkU2FsYXJ5ID0gaWZlbHNlKGlzLm5hKGRhdGFzZXQkU2FsYXJ5KSwgYXZlKGRhdGFzZXQkU2FsYXJ5LCBGVU4gPWZ1bmN0aW9uKHgpIG1lYW4oeCwgbmEucm0gPVRSVUUpKSxkYXRhc2V0JFNhbGFyeSkNCg0KIyBjb252ZXJ0IGNvdW50cnkgaW50byBmYWN0b3JzDQojIE5vIG5lZWQgdG8gY29udmVydCB0aGlzIGRhdGFzZXQNCiMgZGF0YXNldCRDb3VudHJ5IDwtICBmYWN0b3IoZGF0YXNldCRDb3VudHJ5LGxldmVscyA9IGMoIkZyYW5jZSIsICJTcGFpbiIsIkdlcm1hbnkiKSwgICAgICAgICAgICAgICAgICAgICAgICAgICBsYWJlbHMgPWMoMSwyLDMpKQ0KDQojIGRhdGFzZXQkUHVyY2hhc2VkIDwtIGZhY3RvcihkYXRhc2V0JFB1cmNoYXNlZCwgbGV2ZWxzID1jKCJObyIsIlllcyIpLCAgICAgICAgICAgICAgICAgICAgICAgICAgICBsYWJlbHM9IGMoMCwxKSkNCg0KIyBTcGxpdCBkYXRhIGludG8gdHJhaW5pbmcgYW5kIHRlc3RpbmcgZGF0YXNldHMNCmRhdGFzZXQgPC0gIGRmDQoNCmxpYnJhcnkoY2FUb29scykNCnNldC5zZWVkKDEyMykNCiMgcmVtZW1iZXIgdG8gc3BsaXQgb24gdGhlIGRlcGVuZGVudCB2YXJpYWJsZSAoeSkNCnNwbGl0ID0gc2FtcGxlLnNwbGl0KGRhdGFzZXQkU2FsYXJ5LCBTcGxpdFJhdGlvID0gMi8zKQ0KDQojIGNyZWF0ZSB0cmFpbmluZyBzZXQNCnRyYWluaW5nX3NldCA9IHN1YnNldChkYXRhc2V0LCBzcGxpdCA9PVRSVUUpDQp0ZXN0X3NldCA9c3Vic2V0KGRhdGFzZXQsIHNwbGl0ID09IEZBTFNFKQ0KDQojIGZlYXR1cmUgc2NhbGluZw0KDQojIHRyYWluaW5nX3NldFssMjozXSA9IHNjYWxlKHRyYWluaW5nX3NldFssMjozXSkNCiMgdGVzdF9zZXRbLDI6M10gPSBzY2FsZSh0ZXN0X3NldFssMjozXSkNCg0KdHJhaW5pbmdfc2V0DQp0ZXN0X3NldA0KDQpgYGANCg0KYGBge3J9DQojIGZpdHRpbmcgc2ltcGxlIGxpbmVhciByZWdyZXNzaW9uIHRvIHRoZSB0cmFpbmluZyBzZXQNCnJlZ3Jlc3NvciA9IGxtKGZvcm11bGEgPSBTYWxhcnkgfiBZZWFyc0V4cGVyaWVuY2UsIGRhdGE9IHRyYWluaW5nX3NldCkNCnN1bW1hcnkocmVncmVzc29yDQogICAgICAgICkNCg0KYGBgDQpgYGB7cn0NCnlfcHJlZCA9ICBwcmVkaWN0KHJlZ3Jlc3NvciwgbmV3ZGF0YSA9IHRlc3Rfc2V0KQ0KDQp5X3ByZWQNCmBgYA0KDQpgYGB7cn0NCiMgVmlzdWFsaXppbmcgdGhlIHRyYWluaW5nIHNldCByZXN1bHRzDQpsaWJyYXJ5KGdncGxvdDIpDQpnZ3Bsb3QoKSsNCiAgZ2VvbV9wb2ludChhZXMoeD10cmFpbmluZ19zZXQkWWVhcnNFeHBlcmllbmNlLCB5PXRyYWluaW5nX3NldCRTYWxhcnkpLA0KICAgICAgICAgICAgIGNvbG91cj0icmVkIikgKw0KICBnZW9tX2xpbmUoYWVzKHg9dHJhaW5pbmdfc2V0JFllYXJzRXhwZXJpZW5jZSwgeT1wcmVkaWN0KHJlZ3Jlc3NvciwgbmV3ZGF0YSA9IHRyYWluaW5nX3NldCkpLA0KICAgICAgICAgICAgY29sb3VyPSJibHVlIikrDQogIGdndGl0bGUoIlNhbGFyeSB2cyBFeHBlcmllbmNlIChUcmFpbmluZyBTZXQpIikrDQogIHhsYWIoIlllYXJzIG9mIEV4cGVyaWVuY2UiKSsNCiAgeWxhYigiU2FsYXJ5IikNCg0KYGBgDQoNCmBgYHtyfQ0KZ2dwbG90KCkrDQogIGdlb21fcG9pbnQoYWVzKHg9dGVzdF9zZXQkWWVhcnNFeHBlcmllbmNlLCB5PXRlc3Rfc2V0JFNhbGFyeSksDQogICAgICAgICAgICAgY29sb3VyPSJyZWQiKSArDQogIGdlb21fbGluZShhZXMoeD10cmFpbmluZ19zZXQkWWVhcnNFeHBlcmllbmNlLCB5PXByZWRpY3QocmVncmVzc29yLCBuZXdkYXRhID0gdHJhaW5pbmdfc2V0KSksDQogICAgICAgICAgICBjb2xvdXI9ImJsdWUiKSsNCiAgZ2d0aXRsZSgiU2FsYXJ5IHZzIEV4cGVyaWVuY2UgKFRyYWluaW5nIFNldCkiKSsNCiAgeGxhYigiWWVhcnMgb2YgRXhwZXJpZW5jZSIpKw0KICB5bGFiKCJTYWxhcnkiKQ0KYGBgDQoNCg==