df <-  read.csv("g:\\Rstudio\\udemy\\ml\\Machine Learning AZ\\Part 2 - Regression\\Section 4 - Simple Linear Regression\\Salary_Data.csv")
head(df)
# test filtering using []
df1 <- df[df$Salary>40000,]
df1
# test filtering using subset 
df2 <- subset(df, df$Salary>40000)
df2
# test filtering using filter
df3 <-  filter(df,df$Salary>40000)
df3
Time Series:
Start = 1 
End = 30 
Frequency = 1 
    [,1]    [,2]
 1    NA      NA
 2    NA      NA
 3    NA      NA
 4    NA      NA
 5    NA      NA
 6    NA      NA
 7    NA      NA
 8    NA      NA
 9    NA      NA
10    NA      NA
11    NA      NA
12    NA      NA
13    NA      NA
14    NA      NA
15 130.3 1940001
16    NA      NA
17    NA      NA
18    NA      NA
19    NA      NA
20    NA      NA
21    NA      NA
22    NA      NA
23    NA      NA
24    NA      NA
25    NA      NA
26    NA      NA
27    NA      NA
28    NA      NA
29    NA      NA
30    NA      NA
# taking care of missing values
# No missing values 
# dataset$Age = ifelse(is.na(dataset$Age),ave(dataset$Age, FUN=function(x) mean(x, na.rm =TRUE)),                     dataset$Age)
# dataset$Salary = ifelse(is.na(dataset$Salary), ave(dataset$Salary, FUN =function(x) mean(x, na.rm =TRUE)),dataset$Salary)
# convert country into factors
# No need to convert this dataset
# dataset$Country <-  factor(dataset$Country,levels = c("France", "Spain","Germany"),                           labels =c(1,2,3))
# dataset$Purchased <- factor(dataset$Purchased, levels =c("No","Yes"),                            labels= c(0,1))
# Split data into training and testing datasets
dataset <-  df
library(caTools)
set.seed(123)
split = sample.split(dataset$Salary, SplitRatio = 2/3)
# create training set
training_set = subset(dataset, split ==TRUE)
test_set =subset(dataset, split == FALSE)
# feature scaling
# training_set[,2:3] = scale(training_set[,2:3])
# test_set[,2:3] = scale(test_set[,2:3])
training_set
test_set
# fitting simple linear regression to the training set
regressor = lm(formula = Salary ~ YearsExperience, data= training_set)
summary(regressor)

Call:
lm(formula = Salary ~ YearsExperience, data = training_set)

Residuals:
    Min      1Q  Median      3Q     Max 
-7325.1 -3814.4   427.7  3559.7  8884.6 

Coefficients:
                Estimate Std. Error t value Pr(>|t|)    
(Intercept)        25592       2646   9.672 1.49e-08 ***
YearsExperience     9365        421  22.245 1.52e-14 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 5391 on 18 degrees of freedom
Multiple R-squared:  0.9649,    Adjusted R-squared:  0.963 
F-statistic: 494.8 on 1 and 18 DF,  p-value: 1.524e-14
y_pred =  predict(regressor, newdata = test_set)
y_pred
        2         4         5         8        11        16        20        21        24 
 37766.77  44322.33  46195.35  55560.43  62115.99  71481.07  81782.66  89274.72 102385.84 
       26 
109877.90 
# Visualizing the training set results
library(ggplot2)
ggplot()+
  geom_point(aes(x=training_set$YearsExperience, y=training_set$Salary),
             colour="red") +
  geom_line(aes(x=training_set$YearsExperience, y=predict(regressor, newdata = training_set)),
            colour="blue")+
  ggtitle("Salary vs Experience (Training Set)")+
  xlab("Years of Experience")+
  ylab("Salary")

ggplot()+
  geom_point(aes(x=test_set$YearsExperience, y=test_set$Salary),
             colour="red") +
  geom_line(aes(x=training_set$YearsExperience, y=predict(regressor, newdata = training_set)),
            colour="blue")+
  ggtitle("Salary vs Experience (Training Set)")+
  xlab("Years of Experience")+
  ylab("Salary")

LS0tDQp0aXRsZTogIlIgTm90ZWJvb2siDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQpgYGB7cn0NCmRmIDwtICByZWFkLmNzdigiZzpcXFJzdHVkaW9cXHVkZW15XFxtbFxcTWFjaGluZSBMZWFybmluZyBBWlxcUGFydCAyIC0gUmVncmVzc2lvblxcU2VjdGlvbiA0IC0gU2ltcGxlIExpbmVhciBSZWdyZXNzaW9uXFxTYWxhcnlfRGF0YS5jc3YiKQ0KaGVhZChkZikNCmBgYA0KDQpgYGB7cn0NCiMgdGVzdCBmaWx0ZXJpbmcgdXNpbmcgW10NCg0KZGYxIDwtIGRmW2RmJFNhbGFyeT40MDAwMCxdDQpkZjENCg0KIyB0ZXN0IGZpbHRlcmluZyB1c2luZyBzdWJzZXQgDQpkZjIgPC0gc3Vic2V0KGRmLCBkZiRTYWxhcnk+NDAwMDApDQpkZjINCg0KIyB0ZXN0IGZpbHRlcmluZyB1c2luZyBmaWx0ZXINCmRmMyA8LSAgZmlsdGVyKGRmLGRmJFNhbGFyeT40MDAwMCkNCmRmMw0KYGBgDQoNCmBgYHtyfQ0KDQojIHRha2luZyBjYXJlIG9mIG1pc3NpbmcgdmFsdWVzDQojIE5vIG1pc3NpbmcgdmFsdWVzIA0KIyBkYXRhc2V0JEFnZSA9IGlmZWxzZShpcy5uYShkYXRhc2V0JEFnZSksYXZlKGRhdGFzZXQkQWdlLCBGVU49ZnVuY3Rpb24oeCkgbWVhbih4LCBuYS5ybSA9VFJVRSkpLCAgICAgICAgICAgICAgICAgICAgIGRhdGFzZXQkQWdlKQ0KDQojIGRhdGFzZXQkU2FsYXJ5ID0gaWZlbHNlKGlzLm5hKGRhdGFzZXQkU2FsYXJ5KSwgYXZlKGRhdGFzZXQkU2FsYXJ5LCBGVU4gPWZ1bmN0aW9uKHgpIG1lYW4oeCwgbmEucm0gPVRSVUUpKSxkYXRhc2V0JFNhbGFyeSkNCg0KIyBjb252ZXJ0IGNvdW50cnkgaW50byBmYWN0b3JzDQojIE5vIG5lZWQgdG8gY29udmVydCB0aGlzIGRhdGFzZXQNCiMgZGF0YXNldCRDb3VudHJ5IDwtICBmYWN0b3IoZGF0YXNldCRDb3VudHJ5LGxldmVscyA9IGMoIkZyYW5jZSIsICJTcGFpbiIsIkdlcm1hbnkiKSwgICAgICAgICAgICAgICAgICAgICAgICAgICBsYWJlbHMgPWMoMSwyLDMpKQ0KDQojIGRhdGFzZXQkUHVyY2hhc2VkIDwtIGZhY3RvcihkYXRhc2V0JFB1cmNoYXNlZCwgbGV2ZWxzID1jKCJObyIsIlllcyIpLCAgICAgICAgICAgICAgICAgICAgICAgICAgICBsYWJlbHM9IGMoMCwxKSkNCg0KIyBTcGxpdCBkYXRhIGludG8gdHJhaW5pbmcgYW5kIHRlc3RpbmcgZGF0YXNldHMNCmRhdGFzZXQgPC0gIGRmDQoNCmxpYnJhcnkoY2FUb29scykNCnNldC5zZWVkKDEyMykNCiMgcmVtZW1iZXIgdG8gc3BsaXQgb24gdGhlIGRlcGVuZGVudCB2YXJpYWJsZSAoeSkNCnNwbGl0ID0gc2FtcGxlLnNwbGl0KGRhdGFzZXQkU2FsYXJ5LCBTcGxpdFJhdGlvID0gMi8zKQ0KDQojIGNyZWF0ZSB0cmFpbmluZyBzZXQNCnRyYWluaW5nX3NldCA9IHN1YnNldChkYXRhc2V0LCBzcGxpdCA9PVRSVUUpDQp0ZXN0X3NldCA9c3Vic2V0KGRhdGFzZXQsIHNwbGl0ID09IEZBTFNFKQ0KDQojIGZlYXR1cmUgc2NhbGluZw0KDQojIHRyYWluaW5nX3NldFssMjozXSA9IHNjYWxlKHRyYWluaW5nX3NldFssMjozXSkNCiMgdGVzdF9zZXRbLDI6M10gPSBzY2FsZSh0ZXN0X3NldFssMjozXSkNCg0KdHJhaW5pbmdfc2V0DQp0ZXN0X3NldA0KDQpgYGANCg0KYGBge3J9DQojIGZpdHRpbmcgc2ltcGxlIGxpbmVhciByZWdyZXNzaW9uIHRvIHRoZSB0cmFpbmluZyBzZXQNCnJlZ3Jlc3NvciA9IGxtKGZvcm11bGEgPSBTYWxhcnkgfiBZZWFyc0V4cGVyaWVuY2UsIGRhdGE9IHRyYWluaW5nX3NldCkNCnN1bW1hcnkocmVncmVzc29yDQogICAgICAgICkNCg0KYGBgDQpgYGB7cn0NCnlfcHJlZCA9ICBwcmVkaWN0KHJlZ3Jlc3NvciwgbmV3ZGF0YSA9IHRlc3Rfc2V0KQ0KDQp5X3ByZWQNCmBgYA0KDQpgYGB7cn0NCiMgVmlzdWFsaXppbmcgdGhlIHRyYWluaW5nIHNldCByZXN1bHRzDQpsaWJyYXJ5KGdncGxvdDIpDQpnZ3Bsb3QoKSsNCiAgZ2VvbV9wb2ludChhZXMoeD10cmFpbmluZ19zZXQkWWVhcnNFeHBlcmllbmNlLCB5PXRyYWluaW5nX3NldCRTYWxhcnkpLA0KICAgICAgICAgICAgIGNvbG91cj0icmVkIikgKw0KICBnZW9tX2xpbmUoYWVzKHg9dHJhaW5pbmdfc2V0JFllYXJzRXhwZXJpZW5jZSwgeT1wcmVkaWN0KHJlZ3Jlc3NvciwgbmV3ZGF0YSA9IHRyYWluaW5nX3NldCkpLA0KICAgICAgICAgICAgY29sb3VyPSJibHVlIikrDQogIGdndGl0bGUoIlNhbGFyeSB2cyBFeHBlcmllbmNlIChUcmFpbmluZyBTZXQpIikrDQogIHhsYWIoIlllYXJzIG9mIEV4cGVyaWVuY2UiKSsNCiAgeWxhYigiU2FsYXJ5IikNCg0KYGBgDQoNCmBgYHtyfQ0KZ2dwbG90KCkrDQogIGdlb21fcG9pbnQoYWVzKHg9dGVzdF9zZXQkWWVhcnNFeHBlcmllbmNlLCB5PXRlc3Rfc2V0JFNhbGFyeSksDQogICAgICAgICAgICAgY29sb3VyPSJyZWQiKSArDQogIGdlb21fbGluZShhZXMoeD10cmFpbmluZ19zZXQkWWVhcnNFeHBlcmllbmNlLCB5PXByZWRpY3QocmVncmVzc29yLCBuZXdkYXRhID0gdHJhaW5pbmdfc2V0KSksDQogICAgICAgICAgICBjb2xvdXI9ImJsdWUiKSsNCiAgZ2d0aXRsZSgiU2FsYXJ5IHZzIEV4cGVyaWVuY2UgKFRyYWluaW5nIFNldCkiKSsNCiAgeGxhYigiWWVhcnMgb2YgRXhwZXJpZW5jZSIpKw0KICB5bGFiKCJTYWxhcnkiKQ0KYGBgDQoNCg==