# Load the dataset
# Inspect and summarize the data.
head(pressure)
## temperature pressure
## 1 0 0.0002
## 2 20 0.0012
## 3 40 0.0060
## 4 60 0.0300
## 5 80 0.0900
## 6 100 0.2700
Next, to decide if a polynomial model is appropriate for our dataset, we use a scatter plot and visualize the relationship between dependent and independent variables.
# plotting the graph
library(ggplot2)
ggplot() +
geom_point(aes(x = pressure$temperature, y = pressure$pressure),
colour = 'blue')
From the above plot, we can observe that there is a nonlinear relationship between the dependent and independent variables. Therefore we can use the polynomial regression model. ## Splitting The Data
split = sample.split(pressure$pressure, SplitRatio = 2/3)
training = subset(pressure, split == TRUE)
testing = subset(pressure, split == FALSE)
pressure$temperature2= pressure$temperature ^ 2
pressure$temperature3= pressure$temperature ^ 3
pressure$temperature4 = pressure$temperature ^ 4
poly_pressure1 = pressure
polynomial_reg = lm(formula = pressure~ .,data = poly_pressure1)
summary(polynomial_reg)
##
## Call:
## lm(formula = pressure ~ ., data = poly_pressure1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.1989 -4.2112 0.2224 4.0172 7.0729
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.453e+00 4.645e+00 1.389 0.186418
## temperature -7.992e-01 1.893e-01 -4.223 0.000852 ***
## temperature2 1.588e-02 2.226e-03 7.135 5.06e-06 ***
## temperature3 -1.052e-04 9.415e-06 -11.179 2.31e-08 ***
## temperature4 2.341e-07 1.297e-08 18.056 4.28e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.38 on 14 degrees of freedom
## Multiple R-squared: 0.9996, Adjusted R-squared: 0.9994
## F-statistic: 7841 on 4 and 14 DF, p-value: < 2.2e-16
library(ggplot2)
ggplot() +
geom_point(aes(x = poly_pressure1$temperature, y = poly_pressure1$pressure), color = 'red') +
geom_line(aes(x=poly_pressure1$temperature,y=predict(polynomial_reg,newdata=poly_pressure1)),color='blue') +
ggtitle('Polynomial Regression Model') +
xlab('Temperature') + ylab('Pressure')
### Predicting on test data
predict_lvl1 = data.frame(temperature = 250,
temperature2 = 250^2,
temperature3 = 250^3,
temperature4 = 250^4)
pred_pressure <- predict(polynomial_reg, predict_lvl1) # Predict pressure for 250 temperature
pred_pressure # Predicted pressure value
## 1
## 69.32188
new_pressure_row <- data.frame(
temperature = 250,
temperature2 = 250^2,
temperature3 = 250^3,
temperature4 = 250^4,
pressure = pred_pressure, stringsAsFactors = FALSE)
plot_pressure <- rbind(poly_pressure1, new_pressure_row) # bind predicted value with original dataset
# Plotting predicted and actual datapoints together for polynomial model
ggplot() +
geom_point(aes(x = plot_pressure$temperature, y = plot_pressure$pressure), color = 'red') +
geom_point(aes(x = 250, y = pred_pressure), color= "green", size=3) +
geom_line(aes(x=poly_pressure1$temperature,y=predict(polynomial_reg,newdata=poly_pressure1)),color='blue') +
ggtitle('Polynomial Regression Model Prediction') +
xlab('Temperature') + ylab('Pressure')
## Conclusion Predicted value is now on the regression fit line and prediction seems to be perfect
The polynomial regression can work on a dataset of any size. The polynomial regression might work very well on the non-linear problems. ### Disadvantages: One of the main disadvantages of using polynomial regression is that we need to choose the right polynomial degree for good bias or variance trade-off.