A combined cycle power plant (CCPP) is composed of gas turbines (GT), steam turbines (ST) and heat recovery steam generators.In a CCPP, the electricity is generated by gas and steam turbines, which are combined in one cycle, and is transferred from one turbine to another. While the Vacuum is collected from and has effect on the Steam Turbine, the other three of the ambient variables effect the GT performance. For comparability with our baseline studies, and to allow 5x2 fold statistical tests be carried out, we provide the data shuffled five times. For each shuffling 2-fold CV is carried out and the resulting 10 measurements are used for statistical testing. Using the data provided, we have to predict the Energy Output (PE).
dim(mydata)
## [1] 9568 5
str(mydata)
## Classes 'tbl_df', 'tbl' and 'data.frame': 9568 obs. of 5 variables:
## $ AT: num 14.96 25.18 5.11 20.86 10.82 ...
## $ V : num 41.8 63 39.4 57.3 37.5 ...
## $ AP: num 1024 1020 1012 1010 1009 ...
## $ RH: num 73.2 59.1 92.1 76.6 96.6 ...
## $ PE: num 463 444 489 446 474 ...
library(Hmisc)
## Warning: package 'Hmisc' was built under R version 3.5.1
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## Loading required package: ggplot2
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
##
## format.pval, units
describe(mydata)
## mydata
##
## 5 Variables 9568 Observations
## ---------------------------------------------------------------------------
## AT
## n missing distinct Info Mean Gmd .05 .10
## 9568 0 2773 1 19.65 8.578 7.46 9.34
## .25 .50 .75 .90 .95
## 13.51 20.34 25.72 29.24 30.92
##
## lowest : 1.81 2.34 2.58 2.64 2.71, highest: 35.10 35.20 35.56 35.77 37.11
## ---------------------------------------------------------------------------
## V
## n missing distinct Info Mean Gmd .05 .10
## 9568 0 634 1 54.31 14.45 38.73 39.72
## .25 .50 .75 .90 .95
## 41.74 52.08 66.54 71.58 73.68
##
## lowest : 25.36 25.88 34.03 34.69 35.19, highest: 79.05 79.74 80.18 80.25 81.56
## ---------------------------------------------------------------------------
## AP
## n missing distinct Info Mean Gmd .05 .10
## 9568 0 2517 1 1013 6.683 1004 1006
## .25 .50 .75 .90 .95
## 1009 1013 1017 1021 1023
##
## lowest : 992.89 993.11 993.31 993.74 993.82
## highest: 1033.14 1033.19 1033.25 1033.29 1033.30
## ---------------------------------------------------------------------------
## RH
## n missing distinct Info Mean Gmd .05 .10
## 9568 0 4546 1 73.31 16.58 46.58 52.76
## .25 .50 .75 .90 .95
## 63.33 74.97 84.83 91.00 94.60
##
## lowest : 25.56 25.89 26.30 26.67 28.16, highest: 100.12 100.13 100.14 100.15 100.16
## ---------------------------------------------------------------------------
## PE
## n missing distinct Info Mean Gmd .05 .10
## 9568 0 4836 1 454.4 19.56 430.8 433.4
## .25 .50 .75 .90 .95
## 439.8 451.5 468.4 479.0 483.5
##
## lowest : 420.26 421.57 425.11 425.12 425.14, highest: 495.21 495.23 495.24 495.35 495.76
## ---------------------------------------------------------------------------
summary(mydata)
## AT V AP RH
## Min. : 1.81 Min. :25.36 Min. : 992.9 Min. : 25.56
## 1st Qu.:13.51 1st Qu.:41.74 1st Qu.:1009.1 1st Qu.: 63.33
## Median :20.34 Median :52.08 Median :1012.9 Median : 74.97
## Mean :19.65 Mean :54.31 Mean :1013.3 Mean : 73.31
## 3rd Qu.:25.72 3rd Qu.:66.54 3rd Qu.:1017.3 3rd Qu.: 84.83
## Max. :37.11 Max. :81.56 Max. :1033.3 Max. :100.16
## PE
## Min. :420.3
## 1st Qu.:439.8
## Median :451.6
## Mean :454.4
## 3rd Qu.:468.4
## Max. :495.8
set.seed(100)
df <- mydata[sample(nrow(mydata),200),]
df
## # A tibble: 200 x 5
## AT V AP RH PE
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 31.2 68.7 1006. 52.1 437.
## 2 12.7 44.9 1008. 71.8 463.
## 3 32.4 44.4 1005. 44.4 430.
## 4 17.5 63.9 1020. 82.6 453.
## 5 11.7 40.2 1018. 82.5 475.
## 6 22.0 51.2 1009. 82.6 447.
## 7 17.3 58.6 1016. 87.2 455.
## 8 24.3 56.6 1014. 70.4 443.
## 9 22.3 44.9 1010. 78.9 447.
## 10 28.6 69.8 1004. 67.3 428.
## # ... with 190 more rows
nrow(df)
## [1] 200
attach(df)
## The following objects are masked from mydata:
##
## AP, AT, PE, RH, V
par(mfrow=c(2,2))
hist(AT, col = "Light Blue")
hist(V, col="Green")
hist(AP, col="blue")
hist(RH,col="Orange")
hist(PE, col = "Pink")
plot(df, col="Blue")
Simple Linear Regression - Independent Variable Ambient Temperature
cor.test(PE,AT)
##
## Pearson's product-moment correlation
##
## data: PE and AT
## t = -48.106, df = 198, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.9694332 -0.9471683
## sample estimates:
## cor
## -0.9597832
plot(AT,PE, col="Red", main="Energy Produced Vs Ambient Temp")
fitPE1 <- lm(PE ~ AT, data=df)
summary(fitPE1)
##
## Call:
## lm(formula = PE ~ AT, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.6247 -3.7930 0.0608 3.5700 17.9346
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 497.23212 0.97080 512.19 <2e-16 ***
## AT -2.20199 0.04577 -48.11 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.005 on 198 degrees of freedom
## Multiple R-squared: 0.9212, Adjusted R-squared: 0.9208
## F-statistic: 2314 on 1 and 198 DF, p-value: < 2.2e-16
Simple Linear Regression - Independent Variable Exhaust Vacuum
cor.test(PE,V)
##
## Pearson's product-moment correlation
##
## data: PE and V
## t = -22.622, df = 198, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.8837608 -0.8052626
## sample estimates:
## cor
## -0.8491356
plot(V,PE, col="Red", main="Energy Produced Vs Exhaust Vacuum")
fitPE2 <- lm(PE ~ V, data=df)
summary(fitPE2)
##
## Call:
## lm(formula = PE ~ V, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -36.22 -5.54 0.68 6.09 20.39
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 520.26200 3.01481 172.57 <2e-16 ***
## V -1.22236 0.05403 -22.62 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.416 on 198 degrees of freedom
## Multiple R-squared: 0.721, Adjusted R-squared: 0.7196
## F-statistic: 511.8 on 1 and 198 DF, p-value: < 2.2e-16
Simple Linear Regression - Independent Variable Ambient Pressure
cor.test(PE,AP)
##
## Pearson's product-moment correlation
##
## data: PE and AP
## t = 6.9888, df = 198, p-value = 4.137e-11
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.3262174 0.5496451
## sample estimates:
## cor
## 0.4448257
plot(AP,PE, col="Red", main="Energy Produced Vs Ambient Pressure")
fitPE3 <- lm(PE ~ AP, data=df)
summary(fitPE3)
##
## Call:
## lm(formula = PE ~ AP, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -30.696 -11.633 -3.776 10.749 43.704
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -989.0473 206.4478 -4.791 3.25e-06 ***
## AP 1.4241 0.2038 6.989 4.14e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15.97 on 198 degrees of freedom
## Multiple R-squared: 0.1979, Adjusted R-squared: 0.1938
## F-statistic: 48.84 on 1 and 198 DF, p-value: 4.137e-11
Simple Linear Regression - Independent Variable Relative Humidity
cor.test(PE,RH)
##
## Pearson's product-moment correlation
##
## data: PE and RH
## t = 7.5716, df = 198, p-value = 1.371e-12
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.3586883 0.5748007
## sample estimates:
## cor
## 0.4738484
plot(RH,PE, col="Red", main="Energy Produced Vs Relative Humidity")
fitPE4 <- lm(PE ~ RH, data=df)
summary(fitPE4)
##
## Call:
## lm(formula = PE ~ RH, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -31.264 -11.644 -2.894 11.719 37.520
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 412.92830 5.50384 75.025 < 2e-16 ***
## RH 0.56009 0.07397 7.572 1.37e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15.7 on 198 degrees of freedom
## Multiple R-squared: 0.2245, Adjusted R-squared: 0.2206
## F-statistic: 57.33 on 1 and 198 DF, p-value: 1.371e-12
Multiple Linear Regression
library("corrplot")
## Warning: package 'corrplot' was built under R version 3.5.1
## corrplot 0.84 loaded
cm <- cor(df, method="pearson")
corrplot(cm,method = "circle", type="upper", order = "hclust", tl.col = "black", tl.srt = 45)
From the above correlation plot, we see that Ambient Temperature and Exhaust Vacuum have a strong negative correlation with Energy Produced, where as Relative Humidity has lesser correlation with Energy Produced.
set.seed(100)
fitPE5 <- lm (PE ~ AT+V+AP+RH, data=df)
summary(fitPE5)
##
## Call:
## lm(formula = PE ~ AT + V + AP + RH, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -12.1031 -2.9245 -0.5918 3.2924 15.9344
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 465.82947 64.66649 7.204 1.25e-11 ***
## AT -2.13080 0.09374 -22.732 < 2e-16 ***
## V -0.17330 0.04760 -3.641 0.000348 ***
## AP 0.04965 0.06326 0.785 0.433527
## RH -0.14922 0.02737 -5.451 1.50e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.374 on 195 degrees of freedom
## Multiple R-squared: 0.9407, Adjusted R-squared: 0.9395
## F-statistic: 773.4 on 4 and 195 DF, p-value: < 2.2e-16
End of Document