importing libraries

library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.0.5

importing dataset

dataset <- read.csv('Position_Salaries.csv')

dataset

keeping only necessary columns

dataset <- dataset[2:3]

dataset

fitting polynomial regression to the dataset

# create new columns with squared/cubed... values of Level column

dataset$Level2 <- dataset$Level ^ 2
dataset$Level3 <- dataset$Level ^ 3
dataset$Level4 <- dataset$Level ^ 4
dataset$Level5 <- dataset$Level ^ 5
dataset$Level6 <- dataset$Level ^ 6

poly_reg <- lm(
  formula = Salary ~ .,
  data = dataset,
  
)

summary(poly_reg)
## 
## Call:
## lm(formula = Salary ~ ., data = dataset)
## 
## Residuals:
##       1       2       3       4       5       6       7       8       9      10 
##  -190.6   913.2 -1383.4   122.4  1191.1   717.9 -3940.6  4110.7 -1867.7   326.9 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  82166.667  46776.837   1.757   0.1772  
## Level       -82943.998  87810.183  -0.945   0.4146  
## Level2       65896.402  57802.944   1.140   0.3371  
## Level3      -24381.957  17847.332  -1.366   0.2653  
## Level4        4929.087   2806.401   1.756   0.1773  
## Level5        -495.433    217.397  -2.279   0.1070  
## Level6          19.792      6.576   3.010   0.0572 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3686 on 3 degrees of freedom
## Multiple R-squared:  0.9999, Adjusted R-squared:  0.9998 
## F-statistic:  9896 on 6 and 3 DF,  p-value: 1.571e-06

predicting/visualizing polynomial regression results

# regression line can be more accurate if we add more levels

poly_pred <- predict(poly_reg, newdata = dataset)

ggplot() +
  geom_point(
    aes(
      x = dataset$Level,
      y = dataset$Salary
    ),
    colour = 'red' 
  ) + 
  geom_line(
    aes(
      x = dataset$Level,
      y = poly_pred
    ),
    colour = 'blue' 
  ) + 
  ggtitle('Salary Prediction by Level (Polynomial Regression)') +
  xlab('Levels') +
  ylab('Salary')

predicting a new result with polynomial regression

my_level <- 5
# match the columns of the dataset (exponential levels) excluding Salary
p_pred <- predict(
  poly_reg, 
  newdata = data.frame(
    Level = my_level, 
    Level2 = my_level ^ 2, 
    Level3 = my_level ^ 3, 
    Level4 = my_level ^ 4,
    Level5 = my_level ^ 5,
    Level6 = my_level ^ 6
  )
)

p_pred
##        1 
## 108808.9