SDSII_Final Project

library(tidyverse)

## Warning: package 'ggplot2' was built under R version 4.3.3

## Warning: package 'tidyr' was built under R version 4.3.3

## Warning: package 'readr' was built under R version 4.3.3

## Warning: package 'dplyr' was built under R version 4.3.3

## Warning: package 'stringr' was built under R version 4.3.3

## Warning: package 'lubridate' was built under R version 4.3.3

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(GGally)

## Warning: package 'GGally' was built under R version 4.3.3

## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2

library(R2jags)

## Warning: package 'R2jags' was built under R version 4.3.3

## Loading required package: rjags

## Warning: package 'rjags' was built under R version 4.3.3

## Loading required package: coda

## Warning: package 'coda' was built under R version 4.3.3

## Linked to JAGS 4.3.1
## Loaded modules: basemod,bugs
## 
## Attaching package: 'R2jags'
## 
## The following object is masked from 'package:coda':
## 
##     traceplot

library(ggmcmc)

df = read.csv("C:/Users/panne/Desktop/22_23/SDS II/2024/ccpp.csv")
names(df)

## [1] "AT" "V"  "AP" "RH" "PE" "X"

head(df)

##      AT     V      AP    RH     PE  X
## 1 14.96 41.76 1024.07 73.17 463.26 NA
## 2 25.18 62.96 1020.04 59.08 444.37 NA
## 3  5.11 39.40 1012.16 92.14 488.56 NA
## 4 20.86 57.32 1010.24 76.64 446.48 NA
## 5 10.82 37.50 1009.23 96.62 473.90 NA
## 6 26.27 59.44 1012.23 58.77 443.67 NA

power_df <- df %>% select(c('AT','V','AP','RH','PE'))
head(power_df)

##      AT     V      AP    RH     PE
## 1 14.96 41.76 1024.07 73.17 463.26
## 2 25.18 62.96 1020.04 59.08 444.37
## 3  5.11 39.40 1012.16 92.14 488.56
## 4 20.86 57.32 1010.24 76.64 446.48
## 5 10.82 37.50 1009.23 96.62 473.90
## 6 26.27 59.44 1012.23 58.77 443.67

power_df <-  power_df %>% slice(1:1000)
head(power_df)

##      AT     V      AP    RH     PE
## 1 14.96 41.76 1024.07 73.17 463.26
## 2 25.18 62.96 1020.04 59.08 444.37
## 3  5.11 39.40 1012.16 92.14 488.56
## 4 20.86 57.32 1010.24 76.64 446.48
## 5 10.82 37.50 1009.23 96.62 473.90
## 6 26.27 59.44 1012.23 58.77 443.67

# Descriptive statistics
ybar <-mean(power_df$PE)
std  <-sd(power_df$PE)
s2   <-var(power_df$PE)
n    <-length(power_df$PE)

descriptive <-c(n,ybar,std,s2)
names(descriptive) <-c("N","Ybar","std","s^2")

descriptive

##          N       Ybar        std        s^2 
## 1000.00000  455.26359   16.94249  287.04782

summary(power_df$PE)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   425.3   441.0   452.6   455.3   469.2   495.8

library(psych)

## Warning: package 'psych' was built under R version 4.3.3

## 
## Attaching package: 'psych'

## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha

corPlot(power_df, cex = 1.2)

y = power_df$PE
x = power_df$AT
n = length(y)

hist(y, xlab = "Energy Output (PE)", main = paste("Mean=", mean(y), ", SD=", sd(y)))

plot(x, y, xlab = "Hourly Avg. Ambient Temperature (AT)", 
     ylab = "Energy Output (PE)", 
     main = paste("Correlation=", cor(x, y)))

#The data list with all the predictors
dataList1 <-list(      y  = power_df$PE,
                       x  = (power_df$AT - mean(power_df$AT))/sd(power_df$AT),
                       n  = length(power_df$PE))

# BAYESIAN MODELS ----------------------------------------------------------


model1 = function() {
  
  #Likelihood
  for (i in 1:n){
          y[i] ~ dnorm(mu[i], 1/sigma^2)
          mu[i] <- b0 + b1*x[i]
              
  }
  
  #Prior for beta
  b0 ~ dnorm(0, 0.001)
  b1 ~ dnorm(0, 0.001)
  
  #Prior for sigma
  sigma ~ dexp(1/1.5)      

}

#parameters to observe
#params = c(paste0("b", 0:4), "sigma")
params = c("b0","b1","sigma")

# mcmc estimates
jags_model1 = jags(data = dataList1,
                  #inits = initsList,
                  model.file = model1,
                  parameters.to.save = params,
                  n.chains = 5,n.iter = 1e4,
                  n.burnin = 2e3)

## module glm loaded

## Compiling model graph
##    Resolving undeclared variables
##    Allocating nodes
## Graph information:
##    Observed stochastic nodes: 1000
##    Unobserved stochastic nodes: 3
##    Total graph size: 3688
## 
## Initializing model

print(jags_model1)

## Inference for Bugs model at "C:/Users/panne/AppData/Local/Temp/Rtmp0WgI56/model24ac7f9b2d0c", fit using jags,
##  5 chains, each with 10000 iterations (first 2000 discarded), n.thin = 8
##  n.sims = 5000 iterations saved
##           mu.vect sd.vect     2.5%      25%      50%      75%    97.5%  Rhat
## b0        455.253   0.169  454.926  455.139  455.254  455.367  455.585 1.001
## b1        -16.060   0.172  -16.395  -16.176  -16.063  -15.943  -15.728 1.001
## sigma       5.393   0.120    5.163    5.312    5.389    5.470    5.634 1.001
## deviance 6210.114   2.490 6207.319 6208.296 6209.449 6211.226 6216.473 1.001
##          n.eff
## b0        5000
## b1        5000
## sigma     5000
## deviance  5000
## 
## For each parameter, n.eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor (at convergence, Rhat=1).
## 
## DIC info (using the rule, pD = var(deviance)/2)
## pD = 3.1 and DIC = 6213.2
## DIC is an estimate of expected predictive error (lower deviance is better).

plot(jags_model1)

# summary
jags_model1

## Inference for Bugs model at "C:/Users/panne/AppData/Local/Temp/Rtmp0WgI56/model24ac7f9b2d0c", fit using jags,
##  5 chains, each with 10000 iterations (first 2000 discarded), n.thin = 8
##  n.sims = 5000 iterations saved
##           mu.vect sd.vect     2.5%      25%      50%      75%    97.5%  Rhat
## b0        455.253   0.169  454.926  455.139  455.254  455.367  455.585 1.001
## b1        -16.060   0.172  -16.395  -16.176  -16.063  -15.943  -15.728 1.001
## sigma       5.393   0.120    5.163    5.312    5.389    5.470    5.634 1.001
## deviance 6210.114   2.490 6207.319 6208.296 6209.449 6211.226 6216.473 1.001
##          n.eff
## b0        5000
## b1        5000
## sigma     5000
## deviance  5000
## 
## For each parameter, n.eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor (at convergence, Rhat=1).
## 
## DIC info (using the rule, pD = var(deviance)/2)
## pD = 3.1 and DIC = 6213.2
## DIC is an estimate of expected predictive error (lower deviance is better).

jags_model1$BUGSoutput$summary

##                 mean        sd        2.5%         25%        50%         75%
## b0        455.252781 0.1689331  454.926256  455.138543  455.25378  455.366970
## b1        -16.060059 0.1724330  -16.394823  -16.175893  -16.06265  -15.943373
## deviance 6210.113947 2.4896243 6207.318588 6208.296218 6209.44949 6211.225934
## sigma       5.392668 0.1202535    5.163111    5.311609    5.38851    5.469684
##                97.5%     Rhat n.eff
## b0        455.584866 1.000624  5000
## b1        -15.727924 1.000900  5000
## deviance 6216.472752 1.000612  5000
## sigma       5.634289 1.000601  5000

# diagnostic plots
ggs_model_1 = ggs(as.mcmc(jags_model1))
ggs_density(ggs_model_1) # visualize densities

ggs_traceplot(ggs_model_1)

traceplot(jags_model1, ask = FALSE, mfrow = c(2, 2))

ggs_running(ggs_model_1)

ggs_autocorrelation(ggs_model_1)

#The data list with all the predictors
#xc = x - mean(x)

dataList2 <-list(      y  = power_df$PE,
                       x  = (power_df$V - mean(power_df$V))/sd(power_df$V),
                       n  = length(power_df$PE))

# BAYESIAN MODELS ----------------------------------------------------------


model2 = function() {
  
  #Likelihood
  for (i in 1:n){
          y[i] ~ dnorm(mu[i], 1/sigma^2)
          mu[i] <- b0 + b1*x[i]
              
  }
  
  #Prior for beta
  b0 ~ dnorm(0, 0.001)
  b1 ~ dnorm(0, 0.001)
  
  #Prior for sigma
  sigma ~ dexp(1/1.5)      

}

#parameters to observe
#params = c(paste0("b", 0:4), "sigma")
params = c("b0","b1","sigma")

# mcmc estimates
jags_model2 = jags(data = dataList2,
                  #inits = initsList,
                  model.file = model2,
                  parameters.to.save = params,
                  n.chains = 5,n.iter = 1e4,
                  n.burnin = 2e3)

## Compiling model graph
##    Resolving undeclared variables
##    Allocating nodes
## Graph information:
##    Observed stochastic nodes: 1000
##    Unobserved stochastic nodes: 3
##    Total graph size: 2874
## 
## Initializing model

print(jags_model2)

## Inference for Bugs model at "C:/Users/panne/AppData/Local/Temp/Rtmp0WgI56/model24ac4c923268", fit using jags,
##  5 chains, each with 10000 iterations (first 2000 discarded), n.thin = 8
##  n.sims = 5000 iterations saved
##           mu.vect sd.vect     2.5%      25%      50%      75%    97.5%  Rhat
## b0        455.239   0.262  454.726  455.064  455.233  455.421  455.740 1.001
## b1        -14.763   0.263  -15.275  -14.941  -14.766  -14.590  -14.242 1.002
## sigma       8.308   0.187    7.952    8.179    8.307    8.431    8.678 1.001
## deviance 7076.497   2.446 7073.681 7074.704 7075.891 7077.677 7082.731 1.001
##          n.eff
## b0        4800
## b1        2400
## sigma     4200
## deviance  5000
## 
## For each parameter, n.eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor (at convergence, Rhat=1).
## 
## DIC info (using the rule, pD = var(deviance)/2)
## pD = 3.0 and DIC = 7079.5
## DIC is an estimate of expected predictive error (lower deviance is better).

plot(jags_model2)

# summary
jags_model2

## Inference for Bugs model at "C:/Users/panne/AppData/Local/Temp/Rtmp0WgI56/model24ac4c923268", fit using jags,
##  5 chains, each with 10000 iterations (first 2000 discarded), n.thin = 8
##  n.sims = 5000 iterations saved
##           mu.vect sd.vect     2.5%      25%      50%      75%    97.5%  Rhat
## b0        455.239   0.262  454.726  455.064  455.233  455.421  455.740 1.001
## b1        -14.763   0.263  -15.275  -14.941  -14.766  -14.590  -14.242 1.002
## sigma       8.308   0.187    7.952    8.179    8.307    8.431    8.678 1.001
## deviance 7076.497   2.446 7073.681 7074.704 7075.891 7077.677 7082.731 1.001
##          n.eff
## b0        4800
## b1        2400
## sigma     4200
## deviance  5000
## 
## For each parameter, n.eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor (at convergence, Rhat=1).
## 
## DIC info (using the rule, pD = var(deviance)/2)
## pD = 3.0 and DIC = 7079.5
## DIC is an estimate of expected predictive error (lower deviance is better).

jags_model2$BUGSoutput$summary

##                 mean        sd       2.5%         25%         50%        75%
## b0        455.238674 0.2619321  454.72650  455.063764  455.233471  455.42144
## b1        -14.762805 0.2634963  -15.27471  -14.941408  -14.765526  -14.58988
## deviance 7076.497469 2.4457223 7073.68102 7074.704431 7075.890575 7077.67660
## sigma       8.307896 0.1868884    7.95195    8.178914    8.306819    8.43092
##                97.5%     Rhat n.eff
## b0        455.739579 1.001117  4800
## b1        -14.241550 1.001756  2400
## deviance 7082.730546 1.000796  5000
## sigma       8.678062 1.001218  4200

# diagnostic plots
ggs_model_2 = ggs(as.mcmc(jags_model2))
ggs_density(ggs_model_2) # visualize densities

ggs_traceplot(ggs_model_2)

traceplot(jags_model2, ask = FALSE, mfrow = c(2, 2))

ggs_running(ggs_model_2)

ggs_autocorrelation(ggs_model_2)

#The data list with all the predictors
#xc = x - mean(x)

dataList3 <-list(      y  = power_df$PE,
                       x  = (power_df$AP - mean(power_df$AP))/sd(power_df$AP),
                       n  = length(power_df$PE))

# BAYESIAN MODELS ----------------------------------------------------------


model3 = function() {
  
  #Likelihood
  for (i in 1:n){
          y[i] ~ dnorm(mu[i], 1/sigma^2)
          mu[i] <- b0 + b1*x[i]
              
  }
  
  #Prior for beta
  b0 ~ dnorm(0, 0.001)
  b1 ~ dnorm(0, 0.001)
  
  #Prior for sigma
  sigma ~ dexp(1/1.5)      

}

#parameters to observe
#params = c(paste0("b", 0:4), "sigma")
params = c("b0","b1","sigma")

# mcmc estimates
jags_model3 = jags(data = dataList3,
                  #inits = initsList,
                  model.file = model3,
                  parameters.to.save = params,
                  n.chains = 5,n.iter = 1e4,
                  n.burnin = 2e3)

## Compiling model graph
##    Resolving undeclared variables
##    Allocating nodes
## Graph information:
##    Observed stochastic nodes: 1000
##    Unobserved stochastic nodes: 3
##    Total graph size: 3620
## 
## Initializing model

print(jags_model3)

## Inference for Bugs model at "C:/Users/panne/AppData/Local/Temp/Rtmp0WgI56/model24ace963de2", fit using jags,
##  5 chains, each with 10000 iterations (first 2000 discarded), n.thin = 8
##  n.sims = 5000 iterations saved
##           mu.vect sd.vect     2.5%      25%      50%      75%    97.5%  Rhat
## b0        455.177   0.462  454.302  454.863  455.181  455.484  456.092 1.002
## b1          8.616   0.460    7.709    8.310    8.617    8.921    9.523 1.001
## sigma      14.535   0.323   13.918   14.317   14.523   14.749   15.183 1.001
## deviance 8199.512   2.534 8196.684 8197.672 8198.875 8200.596 8206.045 1.002
##          n.eff
## b0        1800
## b1        5000
## sigma     5000
## deviance  2700
## 
## For each parameter, n.eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor (at convergence, Rhat=1).
## 
## DIC info (using the rule, pD = var(deviance)/2)
## pD = 3.2 and DIC = 8202.7
## DIC is an estimate of expected predictive error (lower deviance is better).

plot(jags_model3)

# summary
jags_model3

## Inference for Bugs model at "C:/Users/panne/AppData/Local/Temp/Rtmp0WgI56/model24ace963de2", fit using jags,
##  5 chains, each with 10000 iterations (first 2000 discarded), n.thin = 8
##  n.sims = 5000 iterations saved
##           mu.vect sd.vect     2.5%      25%      50%      75%    97.5%  Rhat
## b0        455.177   0.462  454.302  454.863  455.181  455.484  456.092 1.002
## b1          8.616   0.460    7.709    8.310    8.617    8.921    9.523 1.001
## sigma      14.535   0.323   13.918   14.317   14.523   14.749   15.183 1.001
## deviance 8199.512   2.534 8196.684 8197.672 8198.875 8200.596 8206.045 1.002
##          n.eff
## b0        1800
## b1        5000
## sigma     5000
## deviance  2700
## 
## For each parameter, n.eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor (at convergence, Rhat=1).
## 
## DIC info (using the rule, pD = var(deviance)/2)
## pD = 3.2 and DIC = 8202.7
## DIC is an estimate of expected predictive error (lower deviance is better).

jags_model3$BUGSoutput$summary

##                 mean        sd        2.5%         25%         50%         75%
## b0        455.177312 0.4622819  454.301540  454.863416  455.180644  455.484491
## b1          8.616282 0.4595163    7.709244    8.309739    8.616883    8.921291
## deviance 8199.512346 2.5344726 8196.683704 8197.671757 8198.874911 8200.596458
## sigma      14.535262 0.3233232   13.917635   14.316743   14.523403   14.748731
##               97.5%     Rhat n.eff
## b0        456.09196 1.002200  1800
## b1          9.52305 1.000689  5000
## deviance 8206.04527 1.001611  2700
## sigma      15.18332 1.000819  5000

# diagnostic plots
ggs_model_3 = ggs(as.mcmc(jags_model3))
ggs_density(ggs_model_3) # visualize densities

ggs_traceplot(ggs_model_3)

traceplot(jags_model3, ask = FALSE, mfrow = c(2, 2))

ggs_running(ggs_model_3)

ggs_autocorrelation(ggs_model_3)

#The data list with all the predictors
#xc = x - mean(x)

dataList4 <-list(      y  = power_df$PE,
                       x  = (power_df$RH - mean(power_df$RH))/sd(power_df$RH),
                       n  = length(power_df$PE))

# BAYESIAN MODELS ----------------------------------------------------------


model4 = function() {
  
  #Likelihood
  for (i in 1:n){
          y[i] ~ dnorm(mu[i], 1/sigma^2)
          mu[i] <- b0 + b1*x[i]
              
  }
  
  #Prior for beta
  b0 ~ dnorm(0, 0.001)
  b1 ~ dnorm(0, 0.001)
  
  #Prior for sigma
  sigma ~ dexp(1/1.5)      

}

#parameters to observe
#params = c(paste0("b", 0:4), "sigma")
params = c("b0","b1","sigma")

# mcmc estimates
jags_model4 = jags(data = dataList4,
                  #inits = initsList,
                  model.file = model4,
                  parameters.to.save = params,
                  n.chains = 5,n.iter = 1e4,
                  n.burnin = 2e3)

## Compiling model graph
##    Resolving undeclared variables
##    Allocating nodes
## Graph information:
##    Observed stochastic nodes: 1000
##    Unobserved stochastic nodes: 3
##    Total graph size: 3838
## 
## Initializing model

print(jags_model4)

## Inference for Bugs model at "C:/Users/panne/AppData/Local/Temp/Rtmp0WgI56/model24ac72e93af5", fit using jags,
##  5 chains, each with 10000 iterations (first 2000 discarded), n.thin = 8
##  n.sims = 5000 iterations saved
##           mu.vect sd.vect     2.5%      25%      50%      75%    97.5%  Rhat
## b0        455.142   0.492  454.167  454.806  455.148  455.475  456.098 1.001
## b1          6.421   0.498    5.451    6.089    6.422    6.753    7.403 1.001
## sigma      15.633   0.350   14.968   15.395   15.624   15.867   16.345 1.001
## deviance 8345.111   2.494 8342.256 8343.256 8344.460 8346.284 8351.565 1.001
##          n.eff
## b0        5000
## b1        5000
## sigma     5000
## deviance  3300
## 
## For each parameter, n.eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor (at convergence, Rhat=1).
## 
## DIC info (using the rule, pD = var(deviance)/2)
## pD = 3.1 and DIC = 8348.2
## DIC is an estimate of expected predictive error (lower deviance is better).

plot(jags_model4)

# summary
jags_model4

## Inference for Bugs model at "C:/Users/panne/AppData/Local/Temp/Rtmp0WgI56/model24ac72e93af5", fit using jags,
##  5 chains, each with 10000 iterations (first 2000 discarded), n.thin = 8
##  n.sims = 5000 iterations saved
##           mu.vect sd.vect     2.5%      25%      50%      75%    97.5%  Rhat
## b0        455.142   0.492  454.167  454.806  455.148  455.475  456.098 1.001
## b1          6.421   0.498    5.451    6.089    6.422    6.753    7.403 1.001
## sigma      15.633   0.350   14.968   15.395   15.624   15.867   16.345 1.001
## deviance 8345.111   2.494 8342.256 8343.256 8344.460 8346.284 8351.565 1.001
##          n.eff
## b0        5000
## b1        5000
## sigma     5000
## deviance  3300
## 
## For each parameter, n.eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor (at convergence, Rhat=1).
## 
## DIC info (using the rule, pD = var(deviance)/2)
## pD = 3.1 and DIC = 8348.2
## DIC is an estimate of expected predictive error (lower deviance is better).

jags_model4$BUGSoutput$summary

##                 mean        sd        2.5%         25%        50%         75%
## b0        455.141905 0.4924199  454.166983  454.805863  455.14849  455.474883
## b1          6.420655 0.4976479    5.451137    6.088664    6.42197    6.752661
## deviance 8345.110648 2.4944358 8342.255946 8343.256200 8344.46015 8346.284374
## sigma      15.633351 0.3503549   14.968269   15.394833   15.62376   15.867222
##                97.5%     Rhat n.eff
## b0        456.097888 1.000810  5000
## b1          7.402542 1.000737  5000
## deviance 8351.565222 1.001421  3300
## sigma      16.344577 1.000864  5000

# diagnostic plots
ggs_model_4 = ggs(as.mcmc(jags_model4))
ggs_density(ggs_model_4) # visualize densities

ggs_traceplot(ggs_model_4)

traceplot(jags_model4, ask = FALSE, mfrow = c(2, 2))

ggs_running(ggs_model_4)

ggs_autocorrelation(ggs_model_4)

#The data list with all the predictors
#xc = x - mean(x)

dataList5 <-list(      y   = power_df$PE,
                       AT  = (power_df$AT - mean(power_df$AT))/sd(power_df$AT),
                       V   = (power_df$V - mean(power_df$V))/sd(power_df$V),
                       AP  = (power_df$AP - mean(power_df$AP))/sd(power_df$AP),
                       RH  = (power_df$RH - mean(power_df$RH))/sd(power_df$RH),
                       n   = length(power_df$PE))

# BAYESIAN MODELS ----------------------------------------------------------


model5 = function() {
  
  #Likelihood
  for (i in 1:n){
          y[i] ~ dnorm(mu[i], 1/sigma^2)
          mu[i] <- b0 + b1*AT[i] + b2*V[i] + b3*AP[i] + b4*RH[i]
              
  }
  
  #Prior for beta
  b0 ~ dnorm(0, 0.001) #the intercept
  b1 ~ dnorm(0, 0.001) #slope of Temperature (AT) 
  b2 ~ dnorm(0, 0.001) #slope of Exhaust Vacuum (V) 
  b3 ~ dnorm(0, 0.001) #slope of Ambient Pressure (AP)
  b4 ~ dnorm(0, 0.001) #slope of Relative Humidity (RH)
  
  #Prior for sigma
  sigma ~ dexp(1/1.5)      

}

#parameters to observe
#params = c(paste0("b", 0:4), "sigma")
params = c("b0","b1","b2","b3","b4","sigma")

# mcmc estimates
jags_model5 = jags(data = dataList5,
                  #inits = initsList,
                  model.file = model5,
                  parameters.to.save = params,
                  n.chains = 5,n.iter = 1e4,
                  n.burnin = 2e3)

## Compiling model graph
##    Resolving undeclared variables
##    Allocating nodes
## Graph information:
##    Observed stochastic nodes: 1000
##    Unobserved stochastic nodes: 6
##    Total graph size: 9001
## 
## Initializing model

print(jags_model5)

## Inference for Bugs model at "C:/Users/panne/AppData/Local/Temp/Rtmp0WgI56/model24ac4ee81f4e", fit using jags,
##  5 chains, each with 10000 iterations (first 2000 discarded), n.thin = 8
##  n.sims = 5000 iterations saved
##           mu.vect sd.vect     2.5%      25%      50%      75%    97.5%  Rhat
## b0        455.252   0.143  454.961  455.156  455.250  455.350  455.531 1.001
## b1        -14.801   0.345  -15.472  -15.030  -14.806  -14.569  -14.129 1.001
## b2         -2.796   0.285   -3.354   -2.987   -2.795   -2.605   -2.219 1.001
## b3          0.440   0.170    0.099    0.327    0.441    0.552    0.775 1.001
## b4         -2.454   0.186   -2.820   -2.583   -2.451   -2.327   -2.093 1.001
## sigma       4.480   0.099    4.290    4.413    4.478    4.547    4.679 1.001
## deviance 5838.366   3.451 5833.635 5835.825 5837.689 5840.242 5846.734 1.001
##          n.eff
## b0        5000
## b1        4400
## b2        5000
## b3        5000
## b4        4000
## sigma     4400
## deviance  5000
## 
## For each parameter, n.eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor (at convergence, Rhat=1).
## 
## DIC info (using the rule, pD = var(deviance)/2)
## pD = 6.0 and DIC = 5844.3
## DIC is an estimate of expected predictive error (lower deviance is better).

plot(jags_model5)

# summary
jags_model5

## Inference for Bugs model at "C:/Users/panne/AppData/Local/Temp/Rtmp0WgI56/model24ac4ee81f4e", fit using jags,
##  5 chains, each with 10000 iterations (first 2000 discarded), n.thin = 8
##  n.sims = 5000 iterations saved
##           mu.vect sd.vect     2.5%      25%      50%      75%    97.5%  Rhat
## b0        455.252   0.143  454.961  455.156  455.250  455.350  455.531 1.001
## b1        -14.801   0.345  -15.472  -15.030  -14.806  -14.569  -14.129 1.001
## b2         -2.796   0.285   -3.354   -2.987   -2.795   -2.605   -2.219 1.001
## b3          0.440   0.170    0.099    0.327    0.441    0.552    0.775 1.001
## b4         -2.454   0.186   -2.820   -2.583   -2.451   -2.327   -2.093 1.001
## sigma       4.480   0.099    4.290    4.413    4.478    4.547    4.679 1.001
## deviance 5838.366   3.451 5833.635 5835.825 5837.689 5840.242 5846.734 1.001
##          n.eff
## b0        5000
## b1        4400
## b2        5000
## b3        5000
## b4        4000
## sigma     4400
## deviance  5000
## 
## For each parameter, n.eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor (at convergence, Rhat=1).
## 
## DIC info (using the rule, pD = var(deviance)/2)
## pD = 6.0 and DIC = 5844.3
## DIC is an estimate of expected predictive error (lower deviance is better).

jags_model5$BUGSoutput$summary

##                  mean         sd          2.5%          25%          50%
## b0        455.2515888 0.14267619  454.96134381  455.1556887  455.2495713
## b1        -14.8010127 0.34493598  -15.47222944  -15.0298845  -14.8055004
## b2         -2.7959705 0.28538015   -3.35367785   -2.9870088   -2.7954063
## b3          0.4398322 0.17020483    0.09914486    0.3274215    0.4410526
## b4         -2.4537975 0.18579882   -2.82033534   -2.5825010   -2.4512534
## deviance 5838.3663480 3.45143501 5833.63504093 5835.8251387 5837.6888110
## sigma       4.4804887 0.09922034    4.29019799    4.4127510    4.4778430
##                   75%        97.5%     Rhat n.eff
## b0        455.3501648  455.5310487 1.000807  5000
## b1        -14.5692496  -14.1293787 1.001186  4400
## b2         -2.6048753   -2.2192213 1.000914  5000
## b3          0.5517485    0.7752951 1.000783  5000
## b4         -2.3273537   -2.0925050 1.001250  4000
## deviance 5840.2418610 5846.7338837 1.000907  5000
## sigma       4.5470740    4.6789008 1.001179  4400

# diagnostic plots
ggs_model_5 = ggs(as.mcmc(jags_model5))
ggs_density(ggs_model_5) # visualize densities

ggs_traceplot(ggs_model_5)

traceplot(jags_model5, ask = FALSE, mfrow = c(2, 2))

ggs_running(ggs_model_5)

ggs_autocorrelation(ggs_model_5)

WAIC in Jags

# calculate wAIC with JAGS
# https://sourceforge.net/p/mcmc-jags/discussion/610036/thread/8211df61/#ea5c
samples <- jags.samples(jags_model1$model,c("WAIC","deviance"), type = "mean", 
                        n.iter = 6000,
                        n.burnin = 2000,
                        n.thin = 4)

samples$p_waic <- samples$WAIC
samples$waic <- samples$deviance + samples$p_waic
tmp <- sapply(samples, sum)
waic_m1 <- round(c(waic = tmp[["waic"]], p_waic = tmp[["p_waic"]]),1)
waic_m1

##   waic p_waic 
## 6213.4    3.3

samples <- jags.samples(jags_model2$model,c("WAIC","deviance"), type = "mean", 
                        n.iter = 6000,
                        n.burnin = 2000,
                        n.thin = 4)

samples$p_waic <- samples$WAIC
samples$waic <- samples$deviance + samples$p_waic
tmp <- sapply(samples, sum)
waic_m2 <- round(c(waic = tmp[["waic"]], p_waic = tmp[["p_waic"]]),1)
waic_m2

##   waic p_waic 
## 7079.6    3.1

samples <- jags.samples(jags_model3$model,c("WAIC","deviance"), type = "mean", 
                        n.iter = 6000,
                        n.burnin = 2000,
                        n.thin = 4)

samples$p_waic <- samples$WAIC
samples$waic <- samples$deviance + samples$p_waic
tmp <- sapply(samples, sum)
waic_m3 <- round(c(waic = tmp[["waic"]], p_waic = tmp[["p_waic"]]),1)
waic_m3

##   waic p_waic 
## 8202.4    2.9

samples <- jags.samples(jags_model4$model,c("WAIC","deviance"), type = "mean", 
                        n.iter = 6000,
                        n.burnin = 2000,
                        n.thin = 4)

samples$p_waic <- samples$WAIC
samples$waic <- samples$deviance + samples$p_waic
tmp <- sapply(samples, sum)
waic_m4 <- round(c(waic = tmp[["waic"]], p_waic = tmp[["p_waic"]]),1)
waic_m4

##   waic p_waic 
## 8347.6    2.5

samples <- jags.samples(jags_model5$model,c("WAIC","deviance"), type = "mean", 
                        n.iter = 6000,
                        n.burnin = 2000,
                        n.thin = 4)

samples$p_waic <- samples$WAIC
samples$waic <- samples$deviance + samples$p_waic
tmp <- sapply(samples, sum)
waic_m5 <- round(c(waic = tmp[["waic"]], p_waic = tmp[["p_waic"]]),1)
waic_m5

##   waic p_waic 
## 5844.6    6.2

data.frame(model = c('jags_model1','jags_model2','jags_model3',
                     'jags_model4','jags_model5'),
           
waic = c(waic_m1[1], waic_m2[1], waic_m3[1],waic_m4[1], waic_m5[1]),

p_waic = c(waic_m1[2], waic_m2[2], waic_m3[2], waic_m4[2], waic_m5[2])) %>%
  
arrange(waic)

##         model   waic p_waic
## 1 jags_model5 5844.6    6.2
## 2 jags_model1 6213.4    3.3
## 3 jags_model2 7079.6    3.1
## 4 jags_model3 8202.4    2.9
## 5 jags_model4 8347.6    2.5

Diagnostics look better for model5, DIC is smaller: mode5 1 seems preferable

chainMat <- jags_model5$BUGSoutput$sims.matrix

# Intervals
cred <- 0.95
(beta.ET.jags <- apply(chainMat, 2, quantile, prob=c((1-cred)/2, 1-(1-cred)/2)))

##             b0        b1        b2         b3        b4 deviance    sigma
## 2.5%  454.9613 -15.47223 -3.353678 0.09914486 -2.820335 5833.635 4.290198
## 97.5% 455.5310 -14.12938 -2.219221 0.77529509 -2.092505 5846.734 4.678901

# Point estimates
(beta.hat.jags <- colMeans(chainMat))

##           b0           b1           b2           b3           b4     deviance 
##  455.2515888  -14.8010127   -2.7959705    0.4398322   -2.4537975 5838.3663480 
##        sigma 
##    4.4804887

# What about the HPD?
(beta.HPD.jags <- coda::HPDinterval(as.mcmc(chainMat)))

##                 lower        upper
## b0        454.9718287  455.5389418
## b1        -15.4701505  -14.1254741
## b2         -3.3948655   -2.2659968
## b3          0.1007402    0.7769558
## b4         -2.8091080   -2.0822216
## deviance 5833.1354173 5845.3504672
## sigma       4.2800273    4.6665026
## attr(,"Probability")
## [1] 0.95

# FREQUENTIST MODEL
#with all data
mod_0 = lm(PE ~ ., data=power_df)
summary(mod_0)

## 
## Call:
## lm(formula = PE ~ ., data = power_df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -14.3885  -3.3483  -0.1442   3.1255  16.9664 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 445.91458   28.51897  15.636  < 2e-16 ***
## AT           -2.00992    0.04765 -42.177  < 2e-16 ***
## V            -0.22669    0.02310  -9.814  < 2e-16 ***
## AP            0.07157    0.02764   2.590  0.00975 ** 
## RH           -0.16684    0.01271 -13.124  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.481 on 995 degrees of freedom
## Multiple R-squared:  0.9303, Adjusted R-squared:  0.9301 
## F-statistic:  3322 on 4 and 995 DF,  p-value: < 2.2e-16

fm1rmse<- sqrt(mean(mod_0$residuals^2))
fm1rmse

## [1] 4.469364

plot(mod_0, 1) # looks ok

plot(mod_0, 2) # looks ok

SDSII_Final Project

Ousainou Panneh

2025-02-05

WAIC in Jags