# Load the dataset
# Inspect and summarize the data.
head(pressure)
##   temperature pressure
## 1           0   0.0002
## 2          20   0.0012
## 3          40   0.0060
## 4          60   0.0300
## 5          80   0.0900
## 6         100   0.2700

Step 2- Visualize the data

Next, to decide if a polynomial model is appropriate for our dataset, we use a scatter plot and visualize the relationship between dependent and independent variables.

# plotting the graph
library(ggplot2)
ggplot() +
  geom_point(aes(x = pressure$temperature, y = pressure$pressure),
             colour = 'blue')

From the above plot, we can observe that there is a nonlinear relationship between the dependent and independent variables. Therefore we can use the polynomial regression model. ## Splitting The Data

split = sample.split(pressure$pressure, SplitRatio = 2/3)
training = subset(pressure, split == TRUE)
testing = subset(pressure, split == FALSE)

Applying Polynomial Regression model

pressure$temperature2= pressure$temperature ^  2
pressure$temperature3= pressure$temperature ^  3
pressure$temperature4 = pressure$temperature  ^  4
poly_pressure1 = pressure
polynomial_reg = lm(formula = pressure~ .,data = poly_pressure1)
summary(polynomial_reg)
## 
## Call:
## lm(formula = pressure ~ ., data = poly_pressure1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.1989 -4.2112  0.2224  4.0172  7.0729 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   6.453e+00  4.645e+00   1.389 0.186418    
## temperature  -7.992e-01  1.893e-01  -4.223 0.000852 ***
## temperature2  1.588e-02  2.226e-03   7.135 5.06e-06 ***
## temperature3 -1.052e-04  9.415e-06 -11.179 2.31e-08 ***
## temperature4  2.341e-07  1.297e-08  18.056 4.28e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.38 on 14 degrees of freedom
## Multiple R-squared:  0.9996, Adjusted R-squared:  0.9994 
## F-statistic:  7841 on 4 and 14 DF,  p-value: < 2.2e-16

Plotting and forecasting

library(ggplot2)
ggplot() +
  geom_point(aes(x = poly_pressure1$temperature, y = poly_pressure1$pressure), color = 'red') +
  geom_line(aes(x=poly_pressure1$temperature,y=predict(polynomial_reg,newdata=poly_pressure1)),color='blue') +
  ggtitle('Polynomial Regression Model') +
  xlab('Temperature') + ylab('Pressure')

### Predicting on test data

predict_lvl1 = data.frame(temperature  = 250,
                          temperature2 = 250^2,
                          temperature3 = 250^3,
                          temperature4 = 250^4)

pred_pressure <- predict(polynomial_reg, predict_lvl1) # Predict pressure for 250 temperature
pred_pressure # Predicted pressure value
##        1 
## 69.32188

Plotting predicted Data Points with original DataSet

new_pressure_row <- data.frame(
  temperature = 250,
  temperature2 = 250^2,
  temperature3 = 250^3,
  temperature4 = 250^4,
  pressure = pred_pressure, stringsAsFactors = FALSE)

plot_pressure <- rbind(poly_pressure1, new_pressure_row) # bind predicted value with original dataset

# Plotting predicted and actual datapoints together for polynomial model
ggplot() +
  geom_point(aes(x = plot_pressure$temperature, y = plot_pressure$pressure), color = 'red') + 
  geom_point(aes(x = 250, y = pred_pressure), color= "green", size=3) +
  geom_line(aes(x=poly_pressure1$temperature,y=predict(polynomial_reg,newdata=poly_pressure1)),color='blue') +
  ggtitle('Polynomial Regression Model Prediction') +
  xlab('Temperature') +   ylab('Pressure')

## Conclusion Predicted value is now on the regression fit line and prediction seems to be perfect

Advantages and disadvantages of Polynomial regression

Advantages:

The polynomial regression can work on a dataset of any size. The polynomial regression might work very well on the non-linear problems. ### Disadvantages: One of the main disadvantages of using polynomial regression is that we need to choose the right polynomial degree for good bias or variance trade-off.