ECON565-Assignment 2-reproduce results from Rust & Rothwell (1995 & 1996)

Kun Wang (86756137) March28-2014

Problem 1. Summary statistics and figures

Table I Replication

The replicatd Table I summarizes the changes in NPP operations that occured over sample periods. The replicated results are almost consistent with Rust & Rothwell (1995), as I did not identify significant differences in the reported numbers.

df <- plantData
df$percent.time.operating <- df$hrs.operating/df$hrs.total
df$percent.time.refueling <- df$hrs.refuel/df$hrs.total
df$percent.time.forced.out <- df$hrs.forced.out/df$hrs.total
df$percent.time.planned.out <- df$hrs.plan.out/df$hrs.total
df$era <- "1975-79"
df$era[df$year >= 80] <- "1980-83"
df$era[df$year >= 84] <- "1984-93"
tab1 <- aggregate(. ~ era, df[, c("era", "percent.time.operating", "percent.time.refueling", 
    "percent.time.forced.out", "percent.time.planned.out")], FUN = mean)
tab1 <- cbind(tab1, aggregate(. ~ era, df[, c("era", "hrs.total")], FUN = sum)[, 
    2], aggregate(. ~ era, df[df$spell.type != "exit", c("era", "hrs.total")], 
    FUN = length)[, 2])
colnames(tab1)[6:7] <- c("hours", "months")

tab1

##       era percent.time.operating percent.time.refueling
## 1 1975-79                 0.7189                 0.1223
## 2 1980-83                 0.6371                 0.1714
## 3 1984-93                 0.6806                 0.1503
##   percent.time.forced.out percent.time.planned.out    hours months
## 1                 0.09234                  0.06644  2526096   3402
## 2                 0.08898                  0.07464  2510784   3341
## 3                 0.09916                  0.03303 10203048  13412

Figure 3 Replication

Replicated Figure 3 is almost the same as Rust & Rothwell 1995. The number of forced outages per month has been reduced steadily with time. This is due to two reasons. First, the NRC initiated stricter safety policy to regulate the NPP. Second, there is a plant learning-by-doing and progress of technology. The increases from 1979 to 1981 was due to NRC required forced outrages for safety concern due to TMI accident.

library(ggplot2)
fig3 <- ggplot(data = aggregate(. ~ year, plantData, mean), aes(x = year, y = num.forced.out)) + 
    geom_line() + xlab("Figure 3. Trend in mean number of forced outages per reactor-month") + 
    ylab("Mean number of Forced outages per month")
fig3

plot of chunk unnamed-chunk-3

Figure 4-replication

Replicated Figure 4 is almost identical to Rust & Rothwell (1995) for the pre-TMI period. While the trend for post-TMI has similar pattern as Rust and Rothwell (1995), the magnitude of this availiablity factor is lower. However, this difference is not significant and should not result in big difference in estimation.

fig4 <- ggplot(data = aggregate(percent.time.operating ~ year, df, mean), aes(x = year, 
    y = percent.time.operating)) + geom_line() + xlab("Figure 4. Trend in NPP availability factors") + 
    ylab("Mean availiablity Factor")
fig4

plot of chunk unnamed-chunk-4

Figure 7-replication

For the top panel of of Figure 7, the trend is almost consistent with Rust & Rothwell 1995. However, the level from “1975 to 1978” is lower, and level for “1979 to 1984” are higher than Rust & Rothwell 1995. But overall, the replicated figure is almost consistent with Rust & Rothwell (except 1975-1978).

However, for the bottom panel of Figure 7, replicated “pecent of time spent in Forced Outages” replicated for “1985-1990” are much higher than that in Rust and Rothwell (1995). This is a strange observation because Rust and Rothwell observe that there is a steady decrease of forced outage post-TMI accident.

This unusually high forced outage in our data is likely to increase the transitional probablity of observing forced outages and thus resulting in the difference of our parameter estimation. We will see this effect later in the estimation part.

fig7a <- ggplot(data = aggregate(percent.time.planned.out ~ year, df, mean), 
    aes(x = year, y = percent.time.planned.out)) + geom_line() + xlab("Figure 7a. Trend in Planned Outage Rate Between Refuelings") + 
    ylab("percent of time spent in planned outages")

fig7a

plot of chunk unnamed-chunk-5


fig7b <- ggplot(data = aggregate(percent.time.forced.out ~ year, df, mean), 
    aes(x = year, y = percent.time.forced.out)) + geom_line() + xlab("Figure 7b. trend in time lost from forced outages between refueling") + 
    ylab("percent of time spent in forced outages")
fig7b

plot of chunk unnamed-chunk-5

##################################################################################################################################################

Problem 3 Estimate transitional probabilities

## censor durations at 24 months (unclear what R&R did. Also unclear why we
## have some very long durations.)  censor durations at 24 months (unclear
## what R&R did. Also unclear why we have some very long durations.)
plantData$dur.cens <- plantData$duration
max.duration <- 24
plantData$dur.cens[plantData$dur.cens > max.duration] <- max.duration

Estimate P (forced outage | duration)

pof.glm <- glm((npp.signal == "forced.outage") ~ as.factor(dur.cens), family = binomial(link = logit), 
    data = subset(plantData, npp.signal != "cont.refuel"))
# plot estimated probability
md <- max(subset(plantData, npp.signal != "cont.refuel")$dur.cens)
df <- data.frame(dur.cens = 1:md)
tmp <- predict(pof.glm, newdata = df, type = "response", se.fit = TRUE)
df$p <- tmp$fit
df$se.p <- tmp$se.fit
df$lo <- df$p - df$se.p * 1.96
df$hi <- df$p + df$se.p * 1.96
fig10.of <- ggplot(data = df, aes(x = dur.cens, y = p)) + geom_line() + geom_line(aes(y = lo), 
    linetype = "dashed") + geom_line(aes(y = hi), linetype = "dashed")
rm(df, tmp)

Problem 3.1. estimate P(refueling ends | duration)

pro.glm <- glm( (npp.signal!="cont.refuel") ~
                  #dur.cens,
                  I(dur.cens==1) +
                  I(dur.cens==2) +
                  I(dur.cens==3) +
                  I(dur.cens==4) +
                  I(dur.cens==5) +
                  I(dur.cens==6) +
                  I((dur.cens>=7)*(dur.cens-6)),
                family=binomial(link=logit) ,
                data=subset(plantData,spell.type=="refueling"))
# plot estimated probability
md <- max(subset(plantData,spell.type=="refueling")$dur.cens)
df <- data.frame(dur.cens=1:md)
tmp <- predict(pro.glm,newdata=df,type="response", se.fit=TRUE)
df$p <- tmp$fit
df$se.p <- tmp$se.fit
df$lo <- df$p - df$se.p*1.96
df$hi <- df$p + df$se.p*1.96
fig10.ro <- ggplot(data=df, aes(x=dur.cens, y=p)) +
  geom_line() + geom_line(aes(y=lo),linetype="dashed") +
  geom_line(aes(y=hi),linetype="dashed")
rm(df,tmp)

Estimate the transitional Probablity

# create function that given array of states (or a single state) returns
# predicted probabilities
pof.fn <- function(state) {
    p <- predict(pof.glm, newdata = state, type = "response")
    p[state$npp.signal == "cont.refuel"] <- 0
    p[state$spell.type == "exit"] <- 0
    return(p)
}
pro.fn <- function(state) {
    p <- predict(pro.glm, newdata = state, type = "response")
    p[state$spell.type != "refueling"] <- 0
    return(p)
}


action.vars <- c("action")
state.vars <- c("spell.type", "npp.signal", "dur.cens")
# Find out how many states and actions there are
n.states <- prod(apply(plantData[, state.vars], 2, function(x) {
    sum(!is.na(unique(x)))
}))
n.actions <- sum(!is.na(unique(plantData[, action.vars])))

# Given vector of variables that define a state, create a function that
# returns an index for each unique combination of them, and a function that
# given an index returns a state vector.  The two resulting functions are
# inverses of one another
vector.index.converter <- function(data, state.vars) {
    nv <- rep(NA, length(state.vars))
    state.levels <- list()
    for (s in 1:length(state.vars)) {
        state.levels[[s]] <- sort(unique(data[, state.vars[s]]))
        nv[s] <- length(state.levels[[s]])
    }
    si <- function(state) {
        stopifnot(length(state) == length(nv))
        sn <- as.numeric(state)
        fac <- 1
        index <- 0
        for (i in 1:length(nv)) {
            index <- index + (sn[i] - 1) * fac
            fac <- fac * nv[i]
        }
        return(index + 1)
    }

    sv <- function(index) {
        stopifnot(index <= n.states)
        state <- data[1, state.vars]
        li <- rep(NA, length(state.vars))
        fac <- prod(nv)
        index <- index - 1
        for (i in 1:length(nv)) {
            li[i] <- index%%nv[i] + 1
            index <- index%/%nv[i]
            state[[i]] <- state.levels[[i]][li[i]]
        }
        state[[1]] <- state.levels[[1]][li[1]]
        return(state)
    }
    return(list(index = si, vector = sv))
}

state.fn <- vector.index.converter(plantData, state.vars)
action.fn <- vector.index.converter(plantData, action.vars)
for (s in 1:n.states) {
    stopifnot(state.fn$index(state.fn$vector(s)) == s)
}

# Given functions pof and pro return an X by X by A array of transition
# probabilities. pof(state) is the probability of receiving a forced outage
# signal. pro(state) is the probability of not receiving a cont.refuel
# signal while refueling.
transition.prob <- function(pof, pro) {
    P <- array(0, dim = c(n.states, n.states, n.actions))
    state <- plantData[1, state.vars]
    action <- plantData$action[1]
    for (i in 1:n.states) {
        old.state <- state.fn$vector(i)
        new.state <- old.state

        # choose to exit
        new.state$spell.type <- factor("exit", levels = levels(old.state$spell.type))
        action <- factor("exit", levels = levels(action))
        ia <- action.fn$index(action)
        P[state.fn$index(new.state), i, ia] <- 1

        if (old.state$spell.type != "exit") {
            # choose to refuel
            if (old.state$spell.type == "refueling") {
                new.state$dur.cens <- min(old.state$dur.cens + 1, max.duration)
            } else {
                new.state$dur.cens <- 1
            }
            new.state$spell.type <- factor("refueling", levels = levels(old.state$spell.type))
            action <- factor("refuel", levels = levels(action))
            ia <- action.fn$index(action)
            new.state$npp.signal <- factor("none", levels = levels(old.state$npp.signal))
            P[state.fn$index(new.state), i, ia] <- pro(old.state) * (1 - pof(old.state))
            new.state$npp.signal <- factor("forced.outage", levels = levels(old.state$npp.signal))
            P[state.fn$index(new.state), i, ia] <- pro(old.state) * pof(old.state)
            new.state$npp.signal <- factor("cont.refuel", levels = levels(old.state$npp.signal))
            P[state.fn$index(new.state), i, ia] <- (1 - pro(old.state))

            # choose to operate
            if (old.state$npp.signal != "cont.refuel") {
                new.state <- old.state
                new.state$spell.type <- factor("operating", levels = levels(old.state$spell.type))
                if (old.state$spell.type == "refueling") {
                  new.state$dur.cens <- 1
                } else {
                  new.state$dur.cens <- min(old.state$dur.cens + 1, max.duration)
                }
                for (a in c("shutdown", "run1.25", "run26.50", "run51.75", "run76.99", 
                  "run100")) {
                  a <- factor(a, levels = levels(action))
                  ia <- action.fn$index(a)
                  new.state$npp.signal <- factor("none", levels = levels(old.state$npp.signal))
                  P[state.fn$index(new.state), i, ia] <- (1 - pof(old.state))
                  new.state$npp.signal <- factor("forced.outage", levels = levels(old.state$npp.signal))
                  P[state.fn$index(new.state), i, ia] <- pof(old.state)
                }
            }
        }
        cat(".")
        if (i%%50 == 0) 
            cat(" ", i, " of ", n.states, " transitions completed\n")
    }
    return(P)
}
P <- transition.prob(pof.fn, pro.fn)

## ..................................................  50  of  216  transitions completed
## ..................................................  100  of  216  transitions completed
## ..................................................  150  of  216  transitions completed
## ..................................................  200  of  216  transitions completed
## ................

################################################################################

Problem 4. Calculating the likelihood

feasible.action <- matrix(TRUE, nrow = n.states, ncol = n.actions)
for (s in 1:n.states) {
    state <- state.fn$vector(s)
    if (state$npp.signal == "cont.refuel") {
        feasible.action[s, ] <- FALSE
        feasible.action[s, 2] <- TRUE
    }
    if (state$spell.type == "exit") {
        feasible.action[s, ] <- FALSE
        feasible.action[s, 1] <- TRUE
    }
}


## flow utility
phi.names <- c("exit", "refuel", "f.r", "duration", "shutdown", "run1.25", "run26.50", 
    "run51.75", "run76.99", "run100", "f.s", "f.100")
# parameter estimates from Rust & Rothwell (1995)
phi.pre <- c(0, -1.82, -2.33, -0.05, -0.04, -1.82, -0.96, -0.15, 1.52, 2.93, 
    -4.03, -3.44)
names(phi.pre) <- phi.names
phi.post <- c(0, -3.44, -3.09, -0.06, -0.54, -2.12, -1.58, -0.74, 0.54, 2.93, 
    -4.04, -5.89)
names(phi.post) <- phi.names

flow.utility <- function(phi) {
    u <- matrix(0, nrow = n.states, ncol = n.actions)
    colnames(u) <- levels(plantData$action)
    if (is.null(names(phi))) 
        names(phi) <- phi.names
    for (a in c("exit", "refuel", "run1.25", "run26.50", "run51.75", "run76.99", 
        "run100")) {
        u[, a] <- phi[a]
    }
    for (s in 1:n.states) {
        state <- state.fn$vector(s)
        if (state$spell.type == "exit") {
            u[s, ] <- phi["exit"]
        } else {
            if (state$spell.type == "operating") {
                for (a in c("run1.25", "run26.50", "run51.75", "run76.99", "run100")) {
                  u[s, a] <- u[s, a] + state$dur.cens * phi["duration"]
                }
            }
            if (state$npp.signal == "forced.outage") {
                u[s, "refuel"] <- u[s, "refuel"] + phi["f.r"]
                u[s, "shutdown"] <- u[s, "shutdown"] + phi["f.s"]
                u[s, "run100"] <- u[s, "run100"] + phi["f.100"]
            }
        }
    }
    return(u)
}

## Code for estimating a dynamic discrete decision problem Each period agents
## choose a \in {1, ..., A}.  There is a state x \in {1, ..., X }.  Flow
## utility is u(x,a) + \epsilon(a), where e(a) ~ Type-I extreme value x
## follows a controlled Markov process with transition probability P[newx,
## oldx, a] u is represented by a X by A matrix

## Calculate choice specific value function v(t,x,a) = u(x,a) +
## beta*E[max_{a'} v(t+1,x',a') + e(a') | x, a]
choice.value <- function(u, discount, p.x, T) {
    v <- array(dim = c(T, nrow(u), ncol(u)))
    v[T, , ] <- u[, ]
    vtp1 <- v[T, , ]
    for (t in (T - 1):1) {
        for (a in 1:ncol(u)) {
            v[t, , a] <- u[, a] + discount * log(rowSums(exp(vtp1) * feasible.action)) %*% 
                p.x[, , a]
        }
        vtp1 <- v[t, , ]
    }
    return(v)
}


discount <- 0.999  # monthly discount factor

Problem 4.1 Figures 13 & 14 replication to check the sensibility of choice specific value

From the replicated Figure 13, the estimated choice specific value pattern for action “u0”, “u13”, “u88” and “u100” are almost consistent with Rust & Rothwell. However, there is one fundenmental difference from that in Rust & Rothwell (1995). It is noted that the refuel line does not cross the other action lines. This implies that the planned optimal strategy under our choice value is to always run 100% to maximum operating duration (24 months). While this result is not intuitive because Rust & Routhwell (1995) found the “refuel” line crosses “run100” line at 11 months for pre-TMI period when t=366 and f=0.

There are many reasons to cause this inconsistence in our replication.

(1) To simplify our estimation, we did not distinguish pre or post-TMI accident, and we did not consider monthly dummies for seasonal variation concern. This may cause fundemental difference of our replicated results.
(2) Our data set might not be the same as the one used by Rust & Routhwell. As it was noticed there are some very long spells. For “exit”, the duration can be as long as over “100 months”. To reduce the space for estimation, the maximum duration was set as 24 months. But it is unclear how Rust & Rothwell (1995) dealt with this problem. This may cause the estimation difference.
(3) As noticed earlier, it was observed that the forced outage between “1980-1985” is unusually high. This may cause our transitional probability is quite different from Rust & Rothwell 1995.
(4) When estimating the transitional probability, for example for P(refueling ends|duration) we consider duration >= 7 months as one single category. It is unclear how Rust & Rothwell specifiy this estimation.

For the replicated Figure 14, our result is more consistent to the pre-TIM period in Rust & Rothwell 1995. The choice value decreases with the age of the NPP, and the choice values are similar among different actions. We did not observe the concave shape for post-TMI period. This shows that our dynamic programming result does not reflect significant impact of the TMI accident on NPP decision making.

u <- flow.utility(phi.post)
T <- 480
v <- choice.value(u, discount, P, T)
df <- data.frame(dur.cens = 1:max.duration, refuel = NA, u0 = NA, u13 = NA, 
    u88 = NA, u100 = NA)
state <- state.fn$vector(1)
state$npp.signal <- factor("none", levels = levels(state$npp.signal))
state$spell.type <- factor("operating", levels = levels(state$spell.type))
t <- 366
dimnames(v)[[3]] <- levels(plantData$action)  # so we can index by action name
for (d in df$dur.cens) {
    state$dur.cens <- d
    si <- state.fn$index(state)
    df$refuel[d] <- v[t, si, "refuel"]
    df$u0[d] <- v[t, si, "shutdown"]
    df$u13[d] <- v[t, si, "run1.25"]
    df$u88[d] <- v[t, si, "run76.99"]
    df$u100[d] <- v[t, si, "run100"]
}
df <- melt(df, id.vars = "dur.cens")
fig13 <- ggplot(data = df, aes(x = dur.cens, y = value, colour = variable)) + 
    geom_line() + theme_bw()
fig13

plot of chunk unnamed-chunk-11


df <- data.frame(age = 1:480, refuel = NA, u0 = NA, u13 = NA, u88 = NA, u100 = NA, 
    exit = NA)
state <- state.fn$vector(1)
state$npp.signal <- factor("none", levels = levels(state$npp.signal))
state$spell.type <- factor("operating", levels = levels(state$spell.type))
state$dur.cens <- 5
si <- state.fn$index(state)
df$refuel <- v[, si, "refuel"]
df$u0 <- v[, si, "shutdown"]
df$u13 <- v[, si, "run1.25"]
df$u88 <- v[, si, "run76.99"]
df$u100 <- v[, si, "run100"]
df$exit <- v[, si, "exit"]
df <- melt(df, id.vars = "age")
fig14 <- ggplot(data = df, aes(x = age, y = value, colour = variable)) + geom_line() + 
    theme_bw()
fig14

plot of chunk unnamed-chunk-11

Problem 4.2 and 4.3 Speed of the likelihood function calculation and its bottleneck

The speed to calculate the likelihood is critical because we need to iterate this likelihood cacluation many times (sometimes over 1000 times) to do optimization. However, the R code to calculate the likelihood needs several seconds, making the optimization to be very slow.

The report shows that calculation of “Choice.value” accounted for the largest proportion of the time when cacluating the likelihood. It took “5.90” seconds, equal to “79.09%” of the total time. Using the C++ code, calculation speed increases a lot for the choice value. The choice value calculation using C+++ code (“ChoiceValue”) took only “0.48” seconds. This may make our optimization feasible. And the likelihood.cpp only took 0.94 seconds.

The likelihood estimated by R code and C++ code is the same as “-29264.91” when we use the estimated post-TMI parameters from Rust and Rothwell (1995).

Faster likelihood calcualtion using C++ version

Because I cannot call the Rcpp package under this markdown file, I cannot run the likelihood.cpp function and also cannot show the optimization for Question 5.1.

I therefore, report the the code and results seperately for Problem 4.2, 4.3 and 5.1 sperately.


library(Rcpp)

## Warning: package 'Rcpp' was built under R version 3.0.3

Sys.setenv(PKG_CXXFLAGS = "-O3")
sourceCpp("choiceValue.cpp", showOutput = TRUE, verbose = TRUE, rebuild = TRUE)

## 
## Generated extern "C" functions 
## --------------------------------------------------------
## 
## 
## #include <Rcpp.h>
## 
## RcppExport SEXP sourceCpp_31703_choiceValue(SEXP uSEXP, SEXP feasibleSEXP, SEXP discountSEXP, SEXP pSEXP, SEXP TSEXP) {
## BEGIN_RCPP
##     SEXP __sexp_result;
##     {
##         Rcpp::RNGScope __rngScope;
##         Rcpp::traits::input_parameter< NumericMatrix >::type u(uSEXP );
##         Rcpp::traits::input_parameter< IntegerMatrix >::type feasible(feasibleSEXP );
##         Rcpp::traits::input_parameter< double >::type discount(discountSEXP );
##         Rcpp::traits::input_parameter< NumericVector >::type p(pSEXP );
##         Rcpp::traits::input_parameter< int >::type T(TSEXP );
##         NumericVector __result = choiceValue(u, feasible, discount, p, T);
##         PROTECT(__sexp_result = Rcpp::wrap(__result));
##     }
##     UNPROTECT(1);
##     return __sexp_result;
## END_RCPP
## }
## 
## Generated R functions 
## -------------------------------------------------------
## 
## `.sourceCpp_31703_DLLInfo` <- dyn.load('C:/Users/WANGKU~1/AppData/Local/Temp/RtmpqUUM2N/sourcecpp_18bc60fc5657/sourceCpp_31949.dll')
## 
## choiceValue <- Rcpp:::sourceCppFunction(function(u, feasible, discount, p, T) {}, FALSE, `.sourceCpp_31703_DLLInfo`, 'sourceCpp_31703_choiceValue')
## 
## rm(`.sourceCpp_31703_DLLInfo`)
## 
## Building shared library
## --------------------------------------------------------
## 
## DIR: C:/Users/WANGKU~1/AppData/Local/Temp/RtmpqUUM2N/sourcecpp_18bc60fc5657
## 
## C:/PROGRA~1/R/R-30~1.2/bin/x64/R CMD SHLIB -o "sourceCpp_31949.dll" --preclean "choiceValue.cpp"

## Error: Error 1 occurred building shared library.

likelihood.cpp <- function(phi) {
    u <- flow.utility(phi)
    v <- choiceValue(u, feasible.action, discount, P, 480)  # only difference
    sumExpV <- matrix(NA, nrow = dim(v)[1], ncol = dim(v)[2])  # T by S
    for (t in 1:dim(v)[1]) {
        sumExpV[t, ] <- rowSums(exp(v[t, , ]) * feasible.action)
    }
    ccp <- exp(v[cbind(plantData$age, plantData$state.index, plantData$action.index)])/sumExpV[cbind(plantData$age, 
        plantData$state.index)]
    return(sum(log(ccp), na.rm = TRUE))
}

save(list = ls(), file = "tmp.Rdata")  # in case we crash R with buggy c++ code

################################################################################

Problem 5. Estimation

First, the unknown parameters are estimated using “nloptr” package with “NLOPT_LN_NELDERMEAD” algorithm.

The estimation result has some distinct differences compared to Rust and Rothwell (1995). This is mainly due to the reasons we discussd in Problem 4.1 that we combined pre and post-TMI periods, and we used different parametric form to estimate transitional probability etc. Therefore, the difference observed in our replication is understandable.

The major differences in replicated estimation are

(1) For estimated “refuel” cost, our estimated value is “-0.368”, which is much lower than “-1.82” of Rust and Rothwell for pre-TMI and “-3.44” for post-TMI.
(2) For estimated “shutdown” cost, our estimated value is “2.244”, while it is “-0.04” from Rust and Rothwell for pre-TMI and “-0.54” post-TMI period.
(3) For estimated “run51.75” profit, our estimated value is “2.14”, while it is “-0.15” from Rust and Rothwell for pre-TMI and “-0.74” for post-TMI.

I do not have clear explanation on what exactly caused the estimation difference. But I suspect that first, there are more “forced outages” in our data from 1985 to 1990 as I mentioned in Figure 7 replication. This can cause more shutdown and refuel in response, thus making the corresponding cost estimation for “refuel” and “shutdown” to have a higher benefit under dynamic programming perspective.

But, another reason is that the estimation is sensitive to the optimization methods used (as will be shown in Problem 5.1). Rust & Rothwell may adopt different estimation strategies. Therefore, we need to try out some other methods to test the sensitivity of our estimation.

Problem 5.1 Test the sensitivity of the estimation by using different initial value and different estimation methods

In order to test the sensitivity of the estimation, first I used initial value as phi.post “post-TMI” from Rust & Rothwell (1995), but still using the “nloptr” with “NLOPT_LN_NELDERMEAD” algorithm. The estimation result is almost identical. So, I do not think that the estimation result is sensitive to initial values chosen. To save space, I did not report this result.

However, our estimation is quite sensitive to the optimization methods adopted.I first tried other algoritims under “nloptr” packages, including “NLOPT_LN_BOBYQA” and “NLOPT_LN_SBPLX”. In addition, I also adopted “nlm” package using a Newton-type algorithm to do the estimation.

(1). For the “NLOPT_LN_BOBYQA” algorithm, it peforms derevative-free, bound-constrained optimization using an iteratively constructed quadratic approximation. This algoritm results in much similar results as that in Rust & Rothwell (1995). The “refuel” cost is “-1.633”, “shutdown” cost is “-0.025” and “run51.75” is “-0.15”. These estimates are very close to Rust & Rothwell (1995). But it should be noted that the likelihood is -22190.61, which is smaller than that of “NLOPT_LN_NELDERMEAD”.
(2). For the “NLOPT_LN_SBPLX” algoritm, this is a re-implementation of Tom Rowan's “subplex” algorithm. The estimation results are quite close to “NLOPT_LN_NELDERMEAD”. We still observed quite different “refuel”, “shutdown” and “run51.75” estimates compared to Rust & Rothwell.
(3) Last, I adopted “nlm” package which uses “Newton-type” algorithm to do the estimation. The estimation results are somewhat different from the “nloptr” packages' algorithm. But both the “refuel” and “shutdown” have negative cost. The sign of the estimates are almost consistent with Rust & Rothwell (1995), but the magnitude varies. This might be understandable because the use of somewhat different data and transitional probablity.

Therefore, to summary, the estimation can be sensitive to the optimization methods. “NLOPT_LN_SBPLX” and “NLOPT_LN_NELDERMEAD” performs well in terms of their estimated likelihood (-16155.64 and -8422.76). But “NLOPT_LN_BOBYQA” and “nlm” produce more similar estimates with Rust & Rothwell.

Problem 5.2 Figure 11 and 12 Replication

Figure 11 shows probability of refueling by duration of operating spell.My replicated Figure 11 is different from that in Rust and Rothwell (1995), especially for the dynamic programming estimated probability. The NP-f0 (non-dynamic programming when signal is none) and NP-f1 (non-dynamic programming when signal is forced outage) are comparable to Rust& Rothwell. When operating duration is small (below 10 month), the probablity to initiate a refuelling is very low. However, the refuelling probablity increases fast when operating duration increases but drops again when duration is very long (larger than 20 months).

However, for the dynamic programming estimates (both DP-f0 and DP-f1), the probablity to initiate the refuelling is very low and close to 0. This is a very strange pattern observed. But it can be explained by my replicated Figure 13. The replicated Figure 13 indicates that from the dynamic programming perspective, the optimal operating duration is to the maximum duration because the refuel choice value line is always below the operating lines. Coming back to this replicated Figure 11, the probability to initial refueling is thus close to 0. The reason to cause this strange result can be our data inconsistency, combination of pre and post-TIM periods and the different method to caclulate the transitional probability.

Remark: when calculating the refuelling probability using dynamic programming parameters, I used t=366 as the age for NPP. I do not know how Rust& Rothwell did, and it seemed that they calculate the average for different NPP ages.

# Estimate of NP_f1
NP_f1.glm <- glm((action == "refuel") ~ as.factor(dur.cens), family = binomial(link = logit), 
    data = subset(plantData, spell.type == "operating" & npp.signal == "forced.outage"))
md <- max(subset(plantData, spell.type == "operating")$dur.cens)
df <- data.frame(dur.cens = 1:md)
tmp <- predict(NP_f1.glm, newdata = df, type = "response", se.fit = TRUE)
df$NP_f1 <- tmp$fit

# Estimate of DP_f1
u <- flow.utility(phi.post)
v <- choice.value(u, discount, P, 480)
df1 <- data.frame(dur.cens = 1:24, refuel = NA)
t <- 366
dimnames(v)[[3]] <- levels(plantData$action)
state <- state.fn$vector(1)
state$npp.signal <- factor("forced.outage", levels = levels(state$npp.signal))
state$spell.type <- factor("operating", levels = levels(state$spell.type))
for (d in 1:24) {
    state$dur.cens <- d
    si <- state.fn$index(state)
    sumExpV <- matrix(NA, nrow = dim(v)[1], ncol = dim(v)[2])  # T by S
    sumExpV[t, ] <- rowSums(exp(v[t, , ]) * feasible.action)
    df1$refuel[d] <- exp(v[t, si, "refuel"])/sumExpV[t, si]
}

df$DP_f1 <- df1$refuel

# Estimate of NP_f0
NP_f0.glm <- glm((action == "refuel") ~ as.factor(dur.cens), family = binomial(link = logit), 
    data = subset(plantData, spell.type == "operating" & npp.signal == "none"))
md <- max(subset(plantData, spell.type == "operating")$dur.cens)
df2 <- data.frame(dur.cens = 1:md)
tmp2 <- predict(NP_f0.glm, newdata = df2, type = "response", se.fit = TRUE)
df$NP_f0 <- tmp2$fit

# Estimate of DP_f0
u <- flow.utility(phi.post)
v <- choice.value(u, discount, P, 480)
df3 <- data.frame(dur.cens = 1:24, refuel = NA)
t <- 366
dimnames(v)[[3]] <- levels(plantData$action)
state <- state.fn$vector(1)
state$npp.signal <- factor("none", levels = levels(state$npp.signal))
state$spell.type <- factor("operating", levels = levels(state$spell.type))
for (d in 1:24) {
    state$dur.cens <- d
    si <- state.fn$index(state)
    sumExpV <- matrix(NA, nrow = dim(v)[1], ncol = dim(v)[2])  # T by S
    sumExpV[t, ] <- rowSums(exp(v[t, , ]) * feasible.action)
    df3$refuel[d] <- exp(v[t, si, "refuel"])/sumExpV[t, si]
}
df$DP_f0 <- df3$refuel
df <- melt(df, id.vars = "dur.cens")
fig11 <- ggplot(data = df, aes(dur.cens, value, colour = variable)) + geom_line() + 
    theme_bw() + xlab("Figure 11 Probablity of refueling by duration since last refueling") + 
    ylab("Probablity of initiating Refueling")
fig11

plot of chunk unnamed-chunk-14

Figure 12 shows the NPP availability factor as function of duration since the last refueling. My replicated result has similar pattern as that in Rust& Rothwell. Overall, the availability factor decreases with the operating spell durations, and the NPP availability factor when signal is “none” is higher than that when the signal is “forced outage”.

Sepcficially, my estimated pattern for non-dynamic NP-f1 and NP-f0 is very comparable to Rust & Rothwell (1995). However, for the DP (dynamic programming) estimates, the availability factor decreases much slower than NP estimates, especially for DP-f1. This is different from Rust & Rothwell where the DP demonstrates similar decreasing pattern as NP. This slowly decaying DP estimates is consistent with the replicated Figure 11 and Figure 13, where the refueling probablity does not increase with duration of operating spells. Thus the availability factor does not decreases significantly with operating durations for our DP estimates.

# Availability Factor-NP_f1 'run100'
NP_run100_f1.glm <- glm((action == "run100") ~ as.factor(dur.cens), family = binomial(link = logit), 
    data = subset(plantData, spell.type == "operating" & npp.signal == "forced.outage"))
md <- max(subset(plantData, spell.type == "operating")$dur.cens)
df <- data.frame(dur.cens = 1:md)
tmp <- predict(NP_run100_f1.glm, newdata = df, type = "response", se.fit = TRUE)
df$NP_run100_f1 <- tmp$fit

#'run76.99'
NP_run76.99_f1.glm <- glm((action == "run76.99") ~ as.factor(dur.cens), family = binomial(link = logit), 
    data = subset(plantData, spell.type == "operating" & npp.signal == "forced.outage"))
md <- max(subset(plantData, spell.type == "operating")$dur.cens)
df1 <- data.frame(dur.cens = 1:md)
tmp1 <- predict(NP_run76.99_f1.glm, newdata = df1, type = "response", se.fit = TRUE)
df$NP_run76.99_f1 <- tmp1$fit

# 'run51.75'
NP_run51.75_f1.glm <- glm((action == "run51.75") ~ as.factor(dur.cens), family = binomial(link = logit), 
    data = subset(plantData, spell.type == "operating" & npp.signal == "forced.outage"))
md <- max(subset(plantData, spell.type == "operating")$dur.cens)
df2 <- data.frame(dur.cens = 1:md)
tmp2 <- predict(NP_run51.75_f1.glm, newdata = df2, type = "response", se.fit = TRUE)
df$NP_run51.75_f1 <- tmp2$fit

#'run26.50'
NP_run26.50_f1.glm <- glm((action == "run26.50") ~ as.factor(dur.cens), family = binomial(link = logit), 
    data = subset(plantData, spell.type == "operating" & npp.signal == "forced.outage"))
md <- max(subset(plantData, spell.type == "operating")$dur.cens)
df3 <- data.frame(dur.cens = 1:md)
tmp3 <- predict(NP_run26.50_f1.glm, newdata = df3, type = "response", se.fit = TRUE)
df$NP_run26.50_f1 <- tmp3$fit

"run1.25"

## [1] "run1.25"

NP_run1.25_f1.glm <- glm((action == "run1.25") ~ as.factor(dur.cens), family = binomial(link = logit), 
    data = subset(plantData, spell.type == "operating" & npp.signal == "forced.outage"))
md <- max(subset(plantData, spell.type == "operating")$dur.cens)
df4 <- data.frame(dur.cens = 1:md)
tmp4 <- predict(NP_run1.25_f1.glm, newdata = df4, type = "response", se.fit = TRUE)
df$NP_run1.25_f1 <- tmp4$fit

df$NP_f1 <- 100 * df$NP_run100_f1 + 88 * df$NP_run76.99_f1 + 63 * df$NP_run51.75_f1 + 
    38 * df$NP_run26.50_f1 + 13 * df$NP_run1.25_f1
rm(df1, df2, df3, df4)


# Availability Factor-NP_f0 'run100'
NP_run100_f0.glm <- glm((action == "run100") ~ as.factor(dur.cens), family = binomial(link = logit), 
    data = subset(plantData, spell.type == "operating" & npp.signal == "none"))
md <- max(subset(plantData, spell.type == "operating")$dur.cens)
df5 <- data.frame(dur.cens = 1:md)
tmp5 <- predict(NP_run100_f0.glm, newdata = df5, type = "response", se.fit = TRUE)
df$NP_run100_f0 <- tmp5$fit

#'run76.99'
NP_run76.99_f0.glm <- glm((action == "run76.99") ~ as.factor(dur.cens), family = binomial(link = logit), 
    data = subset(plantData, spell.type == "operating" & npp.signal == "none"))
md <- max(subset(plantData, spell.type == "operating")$dur.cens)
df1 <- data.frame(dur.cens = 1:md)
tmp1 <- predict(NP_run76.99_f0.glm, newdata = df1, type = "response", se.fit = TRUE)
df$NP_run76.99_f0 <- tmp1$fit

# 'run51.75'
NP_run51.75_f0.glm <- glm((action == "run51.75") ~ as.factor(dur.cens), family = binomial(link = logit), 
    data = subset(plantData, spell.type == "operating" & npp.signal == "none"))
md <- max(subset(plantData, spell.type == "operating")$dur.cens)
df2 <- data.frame(dur.cens = 1:md)
tmp2 <- predict(NP_run51.75_f0.glm, newdata = df2, type = "response", se.fit = TRUE)
df$NP_run51.75_f0 <- tmp2$fit

#'run26.50'
NP_run26.50_f0.glm <- glm((action == "run26.50") ~ as.factor(dur.cens), family = binomial(link = logit), 
    data = subset(plantData, spell.type == "operating" & npp.signal == "none"))
md <- max(subset(plantData, spell.type == "operating")$dur.cens)
df3 <- data.frame(dur.cens = 1:md)
tmp3 <- predict(NP_run26.50_f0.glm, newdata = df3, type = "response", se.fit = TRUE)
df$NP_run26.50_f0 <- tmp3$fit

"run1.25"

## [1] "run1.25"

NP_run1.25_f0.glm <- glm((action == "run1.25") ~ as.factor(dur.cens), family = binomial(link = logit), 
    data = subset(plantData, spell.type == "operating" & npp.signal == "none"))
md <- max(subset(plantData, spell.type == "operating")$dur.cens)
df4 <- data.frame(dur.cens = 1:md)
tmp4 <- predict(NP_run1.25_f0.glm, newdata = df4, type = "response", se.fit = TRUE)
df$NP_run1.25_f0 <- tmp4$fit

df$NP_f0 <- 100 * df$NP_run100_f0 + 88 * df$NP_run76.99_f0 + 63 * df$NP_run51.75_f0 + 
    38 * df$NP_run26.50_f0 + 13 * df$NP_run1.25_f0
rm(df1, df2, df3, df4)

# Availability Factor-DP_f1
u <- flow.utility(phi.post)
v <- choice.value(u, discount, P, 480)
df1 <- data.frame(dur.cens = 1:24, refuel = NA)
t <- 366
dimnames(v)[[3]] <- levels(plantData$action)
state <- state.fn$vector(1)
state$npp.signal <- factor("forced.outage", levels = levels(state$npp.signal))
state$spell.type <- factor("operating", levels = levels(state$spell.type))
for (d in 1:24) {
    state$dur.cens <- d
    si <- state.fn$index(state)
    sumExpV <- matrix(NA, nrow = dim(v)[1], ncol = dim(v)[2])  # T by S
    sumExpV[t, ] <- rowSums(exp(v[t, , ]) * feasible.action)
    df1$run100[d] <- exp(v[t, si, "run100"])/sumExpV[t, si]
    df1$run76.99[d] <- exp(v[t, si, "run76.99"])/sumExpV[t, si]
    df1$run51.75[d] <- exp(v[t, si, "run51.75"])/sumExpV[t, si]
    df1$run26.50[d] <- exp(v[t, si, "run26.50"])/sumExpV[t, si]
    df1$run1.25[d] <- exp(v[t, si, "run1.25"])/sumExpV[t, si]
    df1$DP_f1_avail[d] <- 100 * df1$run100[d] + 88 * df1$run76.99[d] + 63 * 
        df1$run51.75[d] + 38 * df1$run26.50[d] + 13 * df1$run1.25[d]
}

df$DP_f1 <- df1$DP_f1_avail
rm(df1)

# Availability Factor-DP_f0
u <- flow.utility(phi.post)
v <- choice.value(u, discount, P, 480)
df1 <- data.frame(dur.cens = 1:24, refuel = NA)
t <- 366
dimnames(v)[[3]] <- levels(plantData$action)
state <- state.fn$vector(1)
state$npp.signal <- factor("none", levels = levels(state$npp.signal))
state$spell.type <- factor("operating", levels = levels(state$spell.type))
for (d in 1:24) {
    state$dur.cens <- d
    si <- state.fn$index(state)
    sumExpV <- matrix(NA, nrow = dim(v)[1], ncol = dim(v)[2])  # T by S
    sumExpV[t, ] <- rowSums(exp(v[t, , ]) * feasible.action)
    df1$run100[d] <- exp(v[t, si, "run100"])/sumExpV[t, si]
    df1$run76.99[d] <- exp(v[t, si, "run76.99"])/sumExpV[t, si]
    df1$run51.75[d] <- exp(v[t, si, "run51.75"])/sumExpV[t, si]
    df1$run26.50[d] <- exp(v[t, si, "run26.50"])/sumExpV[t, si]
    df1$run1.25[d] <- exp(v[t, si, "run1.25"])/sumExpV[t, si]
    df1$DP_f0_avail[d] <- 100 * df1$run100[d] + 88 * df1$run76.99[d] + 63 * 
        df1$run51.75[d] + 38 * df1$run26.50[d] + 13 * df1$run1.25[d]
}
df$DP_f0 <- df1$DP_f0_avail
dfa <- data.frame(df$dur.cens, df$NP_f1, df$NP_f0, df$DP_f0, df$DP_f1)
dfa <- melt(dfa, id.vars = "df.dur.cens")
fig12 <- ggplot(data = dfa, aes(x = df.dur.cens, y = value, colour = variable)) + 
    geom_line() + theme_bw() + xlab("Figure 12 Time since Last Refueling ") + 
    ylab("Availability Factor (%)")
fig12