Importing the Dataset

dataset <- read.csv("~/R Studio/Machine Learning A-Z (Codes and Datasets)/Part 2 - Regression/Section 6 - Polynomial Regression/R/Position_Salaries.csv")
dataset = dataset[2:3]
head(dataset)
##   Level Salary
## 1     1  45000
## 2     2  50000
## 3     3  60000
## 4     4  80000
## 5     5 110000
## 6     6 150000

Splitting the Dataset into the Training Set and Test Set

library(caret)
## Loading required package: ggplot2
## Loading required package: lattice
set.seed(123)
trainIndex <- createDataPartition(dataset$Salary, p = 2/3, list = FALSE)
training_set <- dataset[trainIndex, ]
test_set <- dataset[-trainIndex, ]

Feature Scaling (for Numeric Columns)

numeric_cols <- sapply(training_set, is.numeric)
training_set[numeric_cols] <- scale(training_set[numeric_cols])
test_set[numeric_cols] <- scale(test_set[numeric_cols])

Fitting Linear Regression to the Dataset

lin_reg = lm(formula = Salary ~ .,
             data = dataset)
summary(lin_reg)
## 
## Call:
## lm(formula = Salary ~ ., data = dataset)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -170818 -129720  -40379   65856  386545 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)  -195333     124790  -1.565  0.15615   
## Level          80879      20112   4.021  0.00383 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 182700 on 8 degrees of freedom
## Multiple R-squared:  0.669,  Adjusted R-squared:  0.6277 
## F-statistic: 16.17 on 1 and 8 DF,  p-value: 0.003833

Fitting Polynomial Regression to the Dataset

dataset$Level2 = dataset$Level^2
dataset$Level3 = dataset$Level^3
dataset$Level4 = dataset$Level^4
poly_reg = lm(formula = Salary ~ .,
              data = dataset)
summary(poly_reg)
## 
## Call:
## lm(formula = Salary ~ ., data = dataset)
## 
## Residuals:
##      1      2      3      4      5      6      7      8      9     10 
##  -8357  18240   1358 -14633 -11725   6725  15997  10006 -28695  11084 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)   
## (Intercept)  184166.7    67768.0   2.718  0.04189 * 
## Level       -211002.3    76382.2  -2.762  0.03972 * 
## Level2        94765.4    26454.2   3.582  0.01584 * 
## Level3       -15463.3     3535.0  -4.374  0.00719 **
## Level4          890.2      159.8   5.570  0.00257 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 20510 on 5 degrees of freedom
## Multiple R-squared:  0.9974, Adjusted R-squared:  0.9953 
## F-statistic: 478.1 on 4 and 5 DF,  p-value: 1.213e-06

Visualising the Linear Regression Results

ggplot() +
  geom_point(aes(x = dataset$Level, y = dataset$Salary),
             colour = 'red') +
  geom_line(aes(x = dataset$Level, y = predict(lin_reg, newdata = dataset)),
            colour = 'blue') +
  ggtitle('Truth or Bluff (Linear Regression)') +
  xlab('Level') +
  ylab('Salary')

Visualising the Polynomial Regression Results

ggplot() +
  geom_point(aes(x = dataset$Level, y = dataset$Salary),
             colour = 'red') +
  geom_line(aes(x = dataset$Level, y = predict(poly_reg, newdata = dataset)),
            colour = 'blue') +
  ggtitle('Truth or Bluff (Polynomial Regression)') +
  xlab('Level') +
  ylab('Salary')

Visualising the Regression Model Results (for Higher Resolution and Smoother Curve)

x_grid = seq(min(dataset$Level), max(dataset$Level), 0.1)
ggplot() +
  geom_point(aes(x = dataset$Level, y = dataset$Salary),
             colour = 'red') +
  geom_line(aes(x = x_grid, y = predict(poly_reg,
                                        newdata = data.frame(Level = x_grid,
                                                             Level2 = x_grid^2,
                                                             Level3 = x_grid^3,
                                                             Level4 = x_grid^4))),
            colour = 'blue') +
  ggtitle('Truth or Bluff (Polynomial Regression)') +
  xlab('Level') +
  ylab('Salary')