library(tidyverse)
## Warning: package 'ggplot2' was built under R version 4.3.3
## Warning: package 'tidyr' was built under R version 4.3.3
## Warning: package 'readr' was built under R version 4.3.3
## Warning: package 'dplyr' was built under R version 4.3.3
## Warning: package 'stringr' was built under R version 4.3.3
## Warning: package 'lubridate' was built under R version 4.3.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(GGally)
## Warning: package 'GGally' was built under R version 4.3.3
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
library(R2jags)
## Warning: package 'R2jags' was built under R version 4.3.3
## Loading required package: rjags
## Warning: package 'rjags' was built under R version 4.3.3
## Loading required package: coda
## Warning: package 'coda' was built under R version 4.3.3
## Linked to JAGS 4.3.1
## Loaded modules: basemod,bugs
##
## Attaching package: 'R2jags'
##
## The following object is masked from 'package:coda':
##
## traceplot
library(ggmcmc)
df = read.csv("C:/Users/panne/Desktop/22_23/SDS II/2024/ccpp.csv")
names(df)
## [1] "AT" "V" "AP" "RH" "PE" "X"
head(df)
## AT V AP RH PE X
## 1 14.96 41.76 1024.07 73.17 463.26 NA
## 2 25.18 62.96 1020.04 59.08 444.37 NA
## 3 5.11 39.40 1012.16 92.14 488.56 NA
## 4 20.86 57.32 1010.24 76.64 446.48 NA
## 5 10.82 37.50 1009.23 96.62 473.90 NA
## 6 26.27 59.44 1012.23 58.77 443.67 NA
power_df <- df %>% select(c('AT','V','AP','RH','PE'))
head(power_df)
## AT V AP RH PE
## 1 14.96 41.76 1024.07 73.17 463.26
## 2 25.18 62.96 1020.04 59.08 444.37
## 3 5.11 39.40 1012.16 92.14 488.56
## 4 20.86 57.32 1010.24 76.64 446.48
## 5 10.82 37.50 1009.23 96.62 473.90
## 6 26.27 59.44 1012.23 58.77 443.67
power_df <- power_df %>% slice(1:1000)
head(power_df)
## AT V AP RH PE
## 1 14.96 41.76 1024.07 73.17 463.26
## 2 25.18 62.96 1020.04 59.08 444.37
## 3 5.11 39.40 1012.16 92.14 488.56
## 4 20.86 57.32 1010.24 76.64 446.48
## 5 10.82 37.50 1009.23 96.62 473.90
## 6 26.27 59.44 1012.23 58.77 443.67
# Descriptive statistics
ybar <-mean(power_df$PE)
std <-sd(power_df$PE)
s2 <-var(power_df$PE)
n <-length(power_df$PE)
descriptive <-c(n,ybar,std,s2)
names(descriptive) <-c("N","Ybar","std","s^2")
descriptive
## N Ybar std s^2
## 1000.00000 455.26359 16.94249 287.04782
summary(power_df$PE)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 425.3 441.0 452.6 455.3 469.2 495.8
library(psych)
## Warning: package 'psych' was built under R version 4.3.3
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
corPlot(power_df, cex = 1.2)
y = power_df$PE
x = power_df$AT
n = length(y)
hist(y, xlab = "Energy Output (PE)", main = paste("Mean=", mean(y), ", SD=", sd(y)))
plot(x, y, xlab = "Hourly Avg. Ambient Temperature (AT)",
ylab = "Energy Output (PE)",
main = paste("Correlation=", cor(x, y)))
#The data list with all the predictors
dataList1 <-list( y = power_df$PE,
x = (power_df$AT - mean(power_df$AT))/sd(power_df$AT),
n = length(power_df$PE))
# BAYESIAN MODELS ----------------------------------------------------------
model1 = function() {
#Likelihood
for (i in 1:n){
y[i] ~ dnorm(mu[i], 1/sigma^2)
mu[i] <- b0 + b1*x[i]
}
#Prior for beta
b0 ~ dnorm(0, 0.001)
b1 ~ dnorm(0, 0.001)
#Prior for sigma
sigma ~ dexp(1/1.5)
}
#parameters to observe
#params = c(paste0("b", 0:4), "sigma")
params = c("b0","b1","sigma")
# mcmc estimates
jags_model1 = jags(data = dataList1,
#inits = initsList,
model.file = model1,
parameters.to.save = params,
n.chains = 5,n.iter = 1e4,
n.burnin = 2e3)
## module glm loaded
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph information:
## Observed stochastic nodes: 1000
## Unobserved stochastic nodes: 3
## Total graph size: 3688
##
## Initializing model
print(jags_model1)
## Inference for Bugs model at "C:/Users/panne/AppData/Local/Temp/Rtmp0WgI56/model24ac7f9b2d0c", fit using jags,
## 5 chains, each with 10000 iterations (first 2000 discarded), n.thin = 8
## n.sims = 5000 iterations saved
## mu.vect sd.vect 2.5% 25% 50% 75% 97.5% Rhat
## b0 455.253 0.169 454.926 455.139 455.254 455.367 455.585 1.001
## b1 -16.060 0.172 -16.395 -16.176 -16.063 -15.943 -15.728 1.001
## sigma 5.393 0.120 5.163 5.312 5.389 5.470 5.634 1.001
## deviance 6210.114 2.490 6207.319 6208.296 6209.449 6211.226 6216.473 1.001
## n.eff
## b0 5000
## b1 5000
## sigma 5000
## deviance 5000
##
## For each parameter, n.eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor (at convergence, Rhat=1).
##
## DIC info (using the rule, pD = var(deviance)/2)
## pD = 3.1 and DIC = 6213.2
## DIC is an estimate of expected predictive error (lower deviance is better).
plot(jags_model1)
# summary
jags_model1
## Inference for Bugs model at "C:/Users/panne/AppData/Local/Temp/Rtmp0WgI56/model24ac7f9b2d0c", fit using jags,
## 5 chains, each with 10000 iterations (first 2000 discarded), n.thin = 8
## n.sims = 5000 iterations saved
## mu.vect sd.vect 2.5% 25% 50% 75% 97.5% Rhat
## b0 455.253 0.169 454.926 455.139 455.254 455.367 455.585 1.001
## b1 -16.060 0.172 -16.395 -16.176 -16.063 -15.943 -15.728 1.001
## sigma 5.393 0.120 5.163 5.312 5.389 5.470 5.634 1.001
## deviance 6210.114 2.490 6207.319 6208.296 6209.449 6211.226 6216.473 1.001
## n.eff
## b0 5000
## b1 5000
## sigma 5000
## deviance 5000
##
## For each parameter, n.eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor (at convergence, Rhat=1).
##
## DIC info (using the rule, pD = var(deviance)/2)
## pD = 3.1 and DIC = 6213.2
## DIC is an estimate of expected predictive error (lower deviance is better).
jags_model1$BUGSoutput$summary
## mean sd 2.5% 25% 50% 75%
## b0 455.252781 0.1689331 454.926256 455.138543 455.25378 455.366970
## b1 -16.060059 0.1724330 -16.394823 -16.175893 -16.06265 -15.943373
## deviance 6210.113947 2.4896243 6207.318588 6208.296218 6209.44949 6211.225934
## sigma 5.392668 0.1202535 5.163111 5.311609 5.38851 5.469684
## 97.5% Rhat n.eff
## b0 455.584866 1.000624 5000
## b1 -15.727924 1.000900 5000
## deviance 6216.472752 1.000612 5000
## sigma 5.634289 1.000601 5000
# diagnostic plots
ggs_model_1 = ggs(as.mcmc(jags_model1))
ggs_density(ggs_model_1) # visualize densities
ggs_traceplot(ggs_model_1)
traceplot(jags_model1, ask = FALSE, mfrow = c(2, 2))
ggs_running(ggs_model_1)
ggs_autocorrelation(ggs_model_1)
#The data list with all the predictors
#xc = x - mean(x)
dataList2 <-list( y = power_df$PE,
x = (power_df$V - mean(power_df$V))/sd(power_df$V),
n = length(power_df$PE))
# BAYESIAN MODELS ----------------------------------------------------------
model2 = function() {
#Likelihood
for (i in 1:n){
y[i] ~ dnorm(mu[i], 1/sigma^2)
mu[i] <- b0 + b1*x[i]
}
#Prior for beta
b0 ~ dnorm(0, 0.001)
b1 ~ dnorm(0, 0.001)
#Prior for sigma
sigma ~ dexp(1/1.5)
}
#parameters to observe
#params = c(paste0("b", 0:4), "sigma")
params = c("b0","b1","sigma")
# mcmc estimates
jags_model2 = jags(data = dataList2,
#inits = initsList,
model.file = model2,
parameters.to.save = params,
n.chains = 5,n.iter = 1e4,
n.burnin = 2e3)
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph information:
## Observed stochastic nodes: 1000
## Unobserved stochastic nodes: 3
## Total graph size: 2874
##
## Initializing model
print(jags_model2)
## Inference for Bugs model at "C:/Users/panne/AppData/Local/Temp/Rtmp0WgI56/model24ac4c923268", fit using jags,
## 5 chains, each with 10000 iterations (first 2000 discarded), n.thin = 8
## n.sims = 5000 iterations saved
## mu.vect sd.vect 2.5% 25% 50% 75% 97.5% Rhat
## b0 455.239 0.262 454.726 455.064 455.233 455.421 455.740 1.001
## b1 -14.763 0.263 -15.275 -14.941 -14.766 -14.590 -14.242 1.002
## sigma 8.308 0.187 7.952 8.179 8.307 8.431 8.678 1.001
## deviance 7076.497 2.446 7073.681 7074.704 7075.891 7077.677 7082.731 1.001
## n.eff
## b0 4800
## b1 2400
## sigma 4200
## deviance 5000
##
## For each parameter, n.eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor (at convergence, Rhat=1).
##
## DIC info (using the rule, pD = var(deviance)/2)
## pD = 3.0 and DIC = 7079.5
## DIC is an estimate of expected predictive error (lower deviance is better).
plot(jags_model2)
# summary
jags_model2
## Inference for Bugs model at "C:/Users/panne/AppData/Local/Temp/Rtmp0WgI56/model24ac4c923268", fit using jags,
## 5 chains, each with 10000 iterations (first 2000 discarded), n.thin = 8
## n.sims = 5000 iterations saved
## mu.vect sd.vect 2.5% 25% 50% 75% 97.5% Rhat
## b0 455.239 0.262 454.726 455.064 455.233 455.421 455.740 1.001
## b1 -14.763 0.263 -15.275 -14.941 -14.766 -14.590 -14.242 1.002
## sigma 8.308 0.187 7.952 8.179 8.307 8.431 8.678 1.001
## deviance 7076.497 2.446 7073.681 7074.704 7075.891 7077.677 7082.731 1.001
## n.eff
## b0 4800
## b1 2400
## sigma 4200
## deviance 5000
##
## For each parameter, n.eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor (at convergence, Rhat=1).
##
## DIC info (using the rule, pD = var(deviance)/2)
## pD = 3.0 and DIC = 7079.5
## DIC is an estimate of expected predictive error (lower deviance is better).
jags_model2$BUGSoutput$summary
## mean sd 2.5% 25% 50% 75%
## b0 455.238674 0.2619321 454.72650 455.063764 455.233471 455.42144
## b1 -14.762805 0.2634963 -15.27471 -14.941408 -14.765526 -14.58988
## deviance 7076.497469 2.4457223 7073.68102 7074.704431 7075.890575 7077.67660
## sigma 8.307896 0.1868884 7.95195 8.178914 8.306819 8.43092
## 97.5% Rhat n.eff
## b0 455.739579 1.001117 4800
## b1 -14.241550 1.001756 2400
## deviance 7082.730546 1.000796 5000
## sigma 8.678062 1.001218 4200
# diagnostic plots
ggs_model_2 = ggs(as.mcmc(jags_model2))
ggs_density(ggs_model_2) # visualize densities
ggs_traceplot(ggs_model_2)
traceplot(jags_model2, ask = FALSE, mfrow = c(2, 2))
ggs_running(ggs_model_2)
ggs_autocorrelation(ggs_model_2)
#The data list with all the predictors
#xc = x - mean(x)
dataList3 <-list( y = power_df$PE,
x = (power_df$AP - mean(power_df$AP))/sd(power_df$AP),
n = length(power_df$PE))
# BAYESIAN MODELS ----------------------------------------------------------
model3 = function() {
#Likelihood
for (i in 1:n){
y[i] ~ dnorm(mu[i], 1/sigma^2)
mu[i] <- b0 + b1*x[i]
}
#Prior for beta
b0 ~ dnorm(0, 0.001)
b1 ~ dnorm(0, 0.001)
#Prior for sigma
sigma ~ dexp(1/1.5)
}
#parameters to observe
#params = c(paste0("b", 0:4), "sigma")
params = c("b0","b1","sigma")
# mcmc estimates
jags_model3 = jags(data = dataList3,
#inits = initsList,
model.file = model3,
parameters.to.save = params,
n.chains = 5,n.iter = 1e4,
n.burnin = 2e3)
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph information:
## Observed stochastic nodes: 1000
## Unobserved stochastic nodes: 3
## Total graph size: 3620
##
## Initializing model
print(jags_model3)
## Inference for Bugs model at "C:/Users/panne/AppData/Local/Temp/Rtmp0WgI56/model24ace963de2", fit using jags,
## 5 chains, each with 10000 iterations (first 2000 discarded), n.thin = 8
## n.sims = 5000 iterations saved
## mu.vect sd.vect 2.5% 25% 50% 75% 97.5% Rhat
## b0 455.177 0.462 454.302 454.863 455.181 455.484 456.092 1.002
## b1 8.616 0.460 7.709 8.310 8.617 8.921 9.523 1.001
## sigma 14.535 0.323 13.918 14.317 14.523 14.749 15.183 1.001
## deviance 8199.512 2.534 8196.684 8197.672 8198.875 8200.596 8206.045 1.002
## n.eff
## b0 1800
## b1 5000
## sigma 5000
## deviance 2700
##
## For each parameter, n.eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor (at convergence, Rhat=1).
##
## DIC info (using the rule, pD = var(deviance)/2)
## pD = 3.2 and DIC = 8202.7
## DIC is an estimate of expected predictive error (lower deviance is better).
plot(jags_model3)
# summary
jags_model3
## Inference for Bugs model at "C:/Users/panne/AppData/Local/Temp/Rtmp0WgI56/model24ace963de2", fit using jags,
## 5 chains, each with 10000 iterations (first 2000 discarded), n.thin = 8
## n.sims = 5000 iterations saved
## mu.vect sd.vect 2.5% 25% 50% 75% 97.5% Rhat
## b0 455.177 0.462 454.302 454.863 455.181 455.484 456.092 1.002
## b1 8.616 0.460 7.709 8.310 8.617 8.921 9.523 1.001
## sigma 14.535 0.323 13.918 14.317 14.523 14.749 15.183 1.001
## deviance 8199.512 2.534 8196.684 8197.672 8198.875 8200.596 8206.045 1.002
## n.eff
## b0 1800
## b1 5000
## sigma 5000
## deviance 2700
##
## For each parameter, n.eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor (at convergence, Rhat=1).
##
## DIC info (using the rule, pD = var(deviance)/2)
## pD = 3.2 and DIC = 8202.7
## DIC is an estimate of expected predictive error (lower deviance is better).
jags_model3$BUGSoutput$summary
## mean sd 2.5% 25% 50% 75%
## b0 455.177312 0.4622819 454.301540 454.863416 455.180644 455.484491
## b1 8.616282 0.4595163 7.709244 8.309739 8.616883 8.921291
## deviance 8199.512346 2.5344726 8196.683704 8197.671757 8198.874911 8200.596458
## sigma 14.535262 0.3233232 13.917635 14.316743 14.523403 14.748731
## 97.5% Rhat n.eff
## b0 456.09196 1.002200 1800
## b1 9.52305 1.000689 5000
## deviance 8206.04527 1.001611 2700
## sigma 15.18332 1.000819 5000
# diagnostic plots
ggs_model_3 = ggs(as.mcmc(jags_model3))
ggs_density(ggs_model_3) # visualize densities
ggs_traceplot(ggs_model_3)
traceplot(jags_model3, ask = FALSE, mfrow = c(2, 2))
ggs_running(ggs_model_3)
ggs_autocorrelation(ggs_model_3)
#The data list with all the predictors
#xc = x - mean(x)
dataList4 <-list( y = power_df$PE,
x = (power_df$RH - mean(power_df$RH))/sd(power_df$RH),
n = length(power_df$PE))
# BAYESIAN MODELS ----------------------------------------------------------
model4 = function() {
#Likelihood
for (i in 1:n){
y[i] ~ dnorm(mu[i], 1/sigma^2)
mu[i] <- b0 + b1*x[i]
}
#Prior for beta
b0 ~ dnorm(0, 0.001)
b1 ~ dnorm(0, 0.001)
#Prior for sigma
sigma ~ dexp(1/1.5)
}
#parameters to observe
#params = c(paste0("b", 0:4), "sigma")
params = c("b0","b1","sigma")
# mcmc estimates
jags_model4 = jags(data = dataList4,
#inits = initsList,
model.file = model4,
parameters.to.save = params,
n.chains = 5,n.iter = 1e4,
n.burnin = 2e3)
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph information:
## Observed stochastic nodes: 1000
## Unobserved stochastic nodes: 3
## Total graph size: 3838
##
## Initializing model
print(jags_model4)
## Inference for Bugs model at "C:/Users/panne/AppData/Local/Temp/Rtmp0WgI56/model24ac72e93af5", fit using jags,
## 5 chains, each with 10000 iterations (first 2000 discarded), n.thin = 8
## n.sims = 5000 iterations saved
## mu.vect sd.vect 2.5% 25% 50% 75% 97.5% Rhat
## b0 455.142 0.492 454.167 454.806 455.148 455.475 456.098 1.001
## b1 6.421 0.498 5.451 6.089 6.422 6.753 7.403 1.001
## sigma 15.633 0.350 14.968 15.395 15.624 15.867 16.345 1.001
## deviance 8345.111 2.494 8342.256 8343.256 8344.460 8346.284 8351.565 1.001
## n.eff
## b0 5000
## b1 5000
## sigma 5000
## deviance 3300
##
## For each parameter, n.eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor (at convergence, Rhat=1).
##
## DIC info (using the rule, pD = var(deviance)/2)
## pD = 3.1 and DIC = 8348.2
## DIC is an estimate of expected predictive error (lower deviance is better).
plot(jags_model4)
# summary
jags_model4
## Inference for Bugs model at "C:/Users/panne/AppData/Local/Temp/Rtmp0WgI56/model24ac72e93af5", fit using jags,
## 5 chains, each with 10000 iterations (first 2000 discarded), n.thin = 8
## n.sims = 5000 iterations saved
## mu.vect sd.vect 2.5% 25% 50% 75% 97.5% Rhat
## b0 455.142 0.492 454.167 454.806 455.148 455.475 456.098 1.001
## b1 6.421 0.498 5.451 6.089 6.422 6.753 7.403 1.001
## sigma 15.633 0.350 14.968 15.395 15.624 15.867 16.345 1.001
## deviance 8345.111 2.494 8342.256 8343.256 8344.460 8346.284 8351.565 1.001
## n.eff
## b0 5000
## b1 5000
## sigma 5000
## deviance 3300
##
## For each parameter, n.eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor (at convergence, Rhat=1).
##
## DIC info (using the rule, pD = var(deviance)/2)
## pD = 3.1 and DIC = 8348.2
## DIC is an estimate of expected predictive error (lower deviance is better).
jags_model4$BUGSoutput$summary
## mean sd 2.5% 25% 50% 75%
## b0 455.141905 0.4924199 454.166983 454.805863 455.14849 455.474883
## b1 6.420655 0.4976479 5.451137 6.088664 6.42197 6.752661
## deviance 8345.110648 2.4944358 8342.255946 8343.256200 8344.46015 8346.284374
## sigma 15.633351 0.3503549 14.968269 15.394833 15.62376 15.867222
## 97.5% Rhat n.eff
## b0 456.097888 1.000810 5000
## b1 7.402542 1.000737 5000
## deviance 8351.565222 1.001421 3300
## sigma 16.344577 1.000864 5000
# diagnostic plots
ggs_model_4 = ggs(as.mcmc(jags_model4))
ggs_density(ggs_model_4) # visualize densities
ggs_traceplot(ggs_model_4)
traceplot(jags_model4, ask = FALSE, mfrow = c(2, 2))
ggs_running(ggs_model_4)
ggs_autocorrelation(ggs_model_4)
#The data list with all the predictors
#xc = x - mean(x)
dataList5 <-list( y = power_df$PE,
AT = (power_df$AT - mean(power_df$AT))/sd(power_df$AT),
V = (power_df$V - mean(power_df$V))/sd(power_df$V),
AP = (power_df$AP - mean(power_df$AP))/sd(power_df$AP),
RH = (power_df$RH - mean(power_df$RH))/sd(power_df$RH),
n = length(power_df$PE))
# BAYESIAN MODELS ----------------------------------------------------------
model5 = function() {
#Likelihood
for (i in 1:n){
y[i] ~ dnorm(mu[i], 1/sigma^2)
mu[i] <- b0 + b1*AT[i] + b2*V[i] + b3*AP[i] + b4*RH[i]
}
#Prior for beta
b0 ~ dnorm(0, 0.001) #the intercept
b1 ~ dnorm(0, 0.001) #slope of Temperature (AT)
b2 ~ dnorm(0, 0.001) #slope of Exhaust Vacuum (V)
b3 ~ dnorm(0, 0.001) #slope of Ambient Pressure (AP)
b4 ~ dnorm(0, 0.001) #slope of Relative Humidity (RH)
#Prior for sigma
sigma ~ dexp(1/1.5)
}
#parameters to observe
#params = c(paste0("b", 0:4), "sigma")
params = c("b0","b1","b2","b3","b4","sigma")
# mcmc estimates
jags_model5 = jags(data = dataList5,
#inits = initsList,
model.file = model5,
parameters.to.save = params,
n.chains = 5,n.iter = 1e4,
n.burnin = 2e3)
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph information:
## Observed stochastic nodes: 1000
## Unobserved stochastic nodes: 6
## Total graph size: 9001
##
## Initializing model
print(jags_model5)
## Inference for Bugs model at "C:/Users/panne/AppData/Local/Temp/Rtmp0WgI56/model24ac4ee81f4e", fit using jags,
## 5 chains, each with 10000 iterations (first 2000 discarded), n.thin = 8
## n.sims = 5000 iterations saved
## mu.vect sd.vect 2.5% 25% 50% 75% 97.5% Rhat
## b0 455.252 0.143 454.961 455.156 455.250 455.350 455.531 1.001
## b1 -14.801 0.345 -15.472 -15.030 -14.806 -14.569 -14.129 1.001
## b2 -2.796 0.285 -3.354 -2.987 -2.795 -2.605 -2.219 1.001
## b3 0.440 0.170 0.099 0.327 0.441 0.552 0.775 1.001
## b4 -2.454 0.186 -2.820 -2.583 -2.451 -2.327 -2.093 1.001
## sigma 4.480 0.099 4.290 4.413 4.478 4.547 4.679 1.001
## deviance 5838.366 3.451 5833.635 5835.825 5837.689 5840.242 5846.734 1.001
## n.eff
## b0 5000
## b1 4400
## b2 5000
## b3 5000
## b4 4000
## sigma 4400
## deviance 5000
##
## For each parameter, n.eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor (at convergence, Rhat=1).
##
## DIC info (using the rule, pD = var(deviance)/2)
## pD = 6.0 and DIC = 5844.3
## DIC is an estimate of expected predictive error (lower deviance is better).
plot(jags_model5)
# summary
jags_model5
## Inference for Bugs model at "C:/Users/panne/AppData/Local/Temp/Rtmp0WgI56/model24ac4ee81f4e", fit using jags,
## 5 chains, each with 10000 iterations (first 2000 discarded), n.thin = 8
## n.sims = 5000 iterations saved
## mu.vect sd.vect 2.5% 25% 50% 75% 97.5% Rhat
## b0 455.252 0.143 454.961 455.156 455.250 455.350 455.531 1.001
## b1 -14.801 0.345 -15.472 -15.030 -14.806 -14.569 -14.129 1.001
## b2 -2.796 0.285 -3.354 -2.987 -2.795 -2.605 -2.219 1.001
## b3 0.440 0.170 0.099 0.327 0.441 0.552 0.775 1.001
## b4 -2.454 0.186 -2.820 -2.583 -2.451 -2.327 -2.093 1.001
## sigma 4.480 0.099 4.290 4.413 4.478 4.547 4.679 1.001
## deviance 5838.366 3.451 5833.635 5835.825 5837.689 5840.242 5846.734 1.001
## n.eff
## b0 5000
## b1 4400
## b2 5000
## b3 5000
## b4 4000
## sigma 4400
## deviance 5000
##
## For each parameter, n.eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor (at convergence, Rhat=1).
##
## DIC info (using the rule, pD = var(deviance)/2)
## pD = 6.0 and DIC = 5844.3
## DIC is an estimate of expected predictive error (lower deviance is better).
jags_model5$BUGSoutput$summary
## mean sd 2.5% 25% 50%
## b0 455.2515888 0.14267619 454.96134381 455.1556887 455.2495713
## b1 -14.8010127 0.34493598 -15.47222944 -15.0298845 -14.8055004
## b2 -2.7959705 0.28538015 -3.35367785 -2.9870088 -2.7954063
## b3 0.4398322 0.17020483 0.09914486 0.3274215 0.4410526
## b4 -2.4537975 0.18579882 -2.82033534 -2.5825010 -2.4512534
## deviance 5838.3663480 3.45143501 5833.63504093 5835.8251387 5837.6888110
## sigma 4.4804887 0.09922034 4.29019799 4.4127510 4.4778430
## 75% 97.5% Rhat n.eff
## b0 455.3501648 455.5310487 1.000807 5000
## b1 -14.5692496 -14.1293787 1.001186 4400
## b2 -2.6048753 -2.2192213 1.000914 5000
## b3 0.5517485 0.7752951 1.000783 5000
## b4 -2.3273537 -2.0925050 1.001250 4000
## deviance 5840.2418610 5846.7338837 1.000907 5000
## sigma 4.5470740 4.6789008 1.001179 4400
# diagnostic plots
ggs_model_5 = ggs(as.mcmc(jags_model5))
ggs_density(ggs_model_5) # visualize densities
ggs_traceplot(ggs_model_5)
traceplot(jags_model5, ask = FALSE, mfrow = c(2, 2))
ggs_running(ggs_model_5)
ggs_autocorrelation(ggs_model_5)
# calculate wAIC with JAGS
# https://sourceforge.net/p/mcmc-jags/discussion/610036/thread/8211df61/#ea5c
samples <- jags.samples(jags_model1$model,c("WAIC","deviance"), type = "mean",
n.iter = 6000,
n.burnin = 2000,
n.thin = 4)
samples$p_waic <- samples$WAIC
samples$waic <- samples$deviance + samples$p_waic
tmp <- sapply(samples, sum)
waic_m1 <- round(c(waic = tmp[["waic"]], p_waic = tmp[["p_waic"]]),1)
waic_m1
## waic p_waic
## 6213.4 3.3
samples <- jags.samples(jags_model2$model,c("WAIC","deviance"), type = "mean",
n.iter = 6000,
n.burnin = 2000,
n.thin = 4)
samples$p_waic <- samples$WAIC
samples$waic <- samples$deviance + samples$p_waic
tmp <- sapply(samples, sum)
waic_m2 <- round(c(waic = tmp[["waic"]], p_waic = tmp[["p_waic"]]),1)
waic_m2
## waic p_waic
## 7079.6 3.1
samples <- jags.samples(jags_model3$model,c("WAIC","deviance"), type = "mean",
n.iter = 6000,
n.burnin = 2000,
n.thin = 4)
samples$p_waic <- samples$WAIC
samples$waic <- samples$deviance + samples$p_waic
tmp <- sapply(samples, sum)
waic_m3 <- round(c(waic = tmp[["waic"]], p_waic = tmp[["p_waic"]]),1)
waic_m3
## waic p_waic
## 8202.4 2.9
samples <- jags.samples(jags_model4$model,c("WAIC","deviance"), type = "mean",
n.iter = 6000,
n.burnin = 2000,
n.thin = 4)
samples$p_waic <- samples$WAIC
samples$waic <- samples$deviance + samples$p_waic
tmp <- sapply(samples, sum)
waic_m4 <- round(c(waic = tmp[["waic"]], p_waic = tmp[["p_waic"]]),1)
waic_m4
## waic p_waic
## 8347.6 2.5
samples <- jags.samples(jags_model5$model,c("WAIC","deviance"), type = "mean",
n.iter = 6000,
n.burnin = 2000,
n.thin = 4)
samples$p_waic <- samples$WAIC
samples$waic <- samples$deviance + samples$p_waic
tmp <- sapply(samples, sum)
waic_m5 <- round(c(waic = tmp[["waic"]], p_waic = tmp[["p_waic"]]),1)
waic_m5
## waic p_waic
## 5844.6 6.2
data.frame(model = c('jags_model1','jags_model2','jags_model3',
'jags_model4','jags_model5'),
waic = c(waic_m1[1], waic_m2[1], waic_m3[1],waic_m4[1], waic_m5[1]),
p_waic = c(waic_m1[2], waic_m2[2], waic_m3[2], waic_m4[2], waic_m5[2])) %>%
arrange(waic)
## model waic p_waic
## 1 jags_model5 5844.6 6.2
## 2 jags_model1 6213.4 3.3
## 3 jags_model2 7079.6 3.1
## 4 jags_model3 8202.4 2.9
## 5 jags_model4 8347.6 2.5
Diagnostics look better for model5, DIC is smaller: mode5 1 seems preferable
chainMat <- jags_model5$BUGSoutput$sims.matrix
# Intervals
cred <- 0.95
(beta.ET.jags <- apply(chainMat, 2, quantile, prob=c((1-cred)/2, 1-(1-cred)/2)))
## b0 b1 b2 b3 b4 deviance sigma
## 2.5% 454.9613 -15.47223 -3.353678 0.09914486 -2.820335 5833.635 4.290198
## 97.5% 455.5310 -14.12938 -2.219221 0.77529509 -2.092505 5846.734 4.678901
# Point estimates
(beta.hat.jags <- colMeans(chainMat))
## b0 b1 b2 b3 b4 deviance
## 455.2515888 -14.8010127 -2.7959705 0.4398322 -2.4537975 5838.3663480
## sigma
## 4.4804887
# What about the HPD?
(beta.HPD.jags <- coda::HPDinterval(as.mcmc(chainMat)))
## lower upper
## b0 454.9718287 455.5389418
## b1 -15.4701505 -14.1254741
## b2 -3.3948655 -2.2659968
## b3 0.1007402 0.7769558
## b4 -2.8091080 -2.0822216
## deviance 5833.1354173 5845.3504672
## sigma 4.2800273 4.6665026
## attr(,"Probability")
## [1] 0.95
# FREQUENTIST MODEL
#with all data
mod_0 = lm(PE ~ ., data=power_df)
summary(mod_0)
##
## Call:
## lm(formula = PE ~ ., data = power_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -14.3885 -3.3483 -0.1442 3.1255 16.9664
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 445.91458 28.51897 15.636 < 2e-16 ***
## AT -2.00992 0.04765 -42.177 < 2e-16 ***
## V -0.22669 0.02310 -9.814 < 2e-16 ***
## AP 0.07157 0.02764 2.590 0.00975 **
## RH -0.16684 0.01271 -13.124 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.481 on 995 degrees of freedom
## Multiple R-squared: 0.9303, Adjusted R-squared: 0.9301
## F-statistic: 3322 on 4 and 995 DF, p-value: < 2.2e-16
fm1rmse<- sqrt(mean(mod_0$residuals^2))
fm1rmse
## [1] 4.469364
plot(mod_0, 1) # looks ok
plot(mod_0, 2) # looks ok