Gap <- read.csv("Book1.csv")
attach(Gap)
names(Gap)
## [1] "Year"  "MPL"   "Wages"
load("USA_data.RData")
attach(USA_data)
W<-ts(Gap$Wages, start= 1947, frequency=4)
MPL<-ts(Gap$MPL, start= 1947, frequency=4)
ts.plot(W,MPL)

U<-(MPL-W)
lu<-log(W/MPL)
ts.plot(U)

length(U)
## [1] 310
Imp <- read.csv("M.csv")
Ex <- read.csv("X.csv")
names(Ex)
## [1] "observation_date" "EXPGSC1"
names(Imp)
## [1] "observation_date" "IMPGSC1"
X<-ts(Ex$EXPGSC1, start= 1947, frequency=4)
M<-ts(Imp$IMPGSC1, start= 1947, frequency=4)
B<-log(X/M)
ts.plot(B)

length(B)
## [1] 310
plot(B,U)

lm(U~B)
## 
## Call:
## lm(formula = U ~ B)
## 
## Coefficients:
## (Intercept)            B  
##       30.01      -148.17
du<-diff(lu)
t <- 1:length(U)
d<-lm(U~t)
m_trend <- lm(U ~ poly(t, 2))
length(du)
## [1] 309
ts.plot(du)

length(U)
## [1] 310
length(lu)
## [1] 310
library(car)
## Loading required package: carData
library(bootUR)
q <- adf(U, deterministics = "trend")
q
## 
##  Two-step ADF test (with trend) on a single time series
## 
## data: U
## null hypothesis: Series has a unit root
## alternative hypothesis: Series is stationary
## 
##   estimate largest root statistic p-value
## U                 0.977    -2.402  0.3778
w <- adf(U, deterministics = "intercept")
w
## 
##  Two-step ADF test (with intercept) on a single time series
## 
## data: U
## null hypothesis: Series has a unit root
## alternative hypothesis: Series is stationary
## 
##   estimate largest root statistic p-value
## U                 1.001    0.5903  0.9893
ts.plot(d$residuals)

## --- 1. Create time index
t <- 1:length(U)

## --- 2. Fit nonlinear deterministic trend: quadratic in time
mod_quad <- lm(U ~ poly(t, 2, raw = TRUE))

## Fitted nonlinear trend
trend_quad <- fitted(mod_quad)

## Residuals after removing nonlinear deterministic trend
resid_quad <- resid(mod_quad)

## --- 3. Plot series with fitted nonlinear trend
par(mfrow = c(2, 1), mar = c(4, 4, 2, 1))

plot(t, U, type = "l",
     main = "Series u with quadratic deterministic trend",
     xlab = "time", ylab = "u")
lines(t, trend_quad, lwd = 2)

## --- 4. Plot residuals
plot(t, resid_quad, type = "l",
     main = "Residuals from u ~ quadratic(t)",
     xlab = "time", ylab = "residuals")
abline(h = 0, lty = 2)

 ## Nonparametric deterministic trend (smooth curve)
mod_loess <- loess(U ~ t, span = 0.3)  # adjust span for more/less smooth
trend_loess <- predict(mod_loess)

resid_loess <- U - trend_loess

par(mfrow = c(2, 1), mar = c(4, 4, 2, 1))

plot(t, U, type = "l",
     main = "Series u with loess deterministic trend",
     xlab = "time", ylab = "U")
lines(t, trend_loess, lwd = 2)

plot(t, resid_loess, type = "l",
     main = "Residuals from u ~ loess(t)",
     xlab = "time", ylab = "residuals")
abline(h = 0, lty = 2)

## ===================== 1. DATA =====================

## Your series:
## U must be a numeric vector from 1948Q1 to 2025Q2
## U <- d$U

N <- length(U)

## ===================== 2. TIME INDEX =====================

## Build years and quarters from length(U)
idx <- 0:(N - 1)
yrs <- 1948 + idx %/% 4
qtr <- 1 + idx %% 4

## continuous time for plotting (e.g. 1975.25 = 1975Q2)
time_cont <- yrs + (qtr - 1) / 4

## ===================== 3. BREAK AT 1975Q1 =====================

Tb <- which(yrs == 1975 & qtr == 1)[1]   # index of 1975Q1

## ===================== 4. PRE-BREAK QUADRATIC TREND =====================

mod_pre <- lm(U[1:Tb] ~ poly(time_cont[1:Tb], 2, raw = TRUE))
trend_pre <- predict(mod_pre,
                     newdata = data.frame(time_cont = time_cont[1:Tb]))

## ===================== 5. POST-BREAK LINE THROUGH ENDPOINTS ==============

start_idx <- Tb + 1
end_idx   <- N

x1 <- time_cont[start_idx]
x2 <- time_cont[end_idx]
y1 <- U[start_idx]
y2 <- U[end_idx]

## slope and intercept of line through (x1,y1) and (x2,y2)
slope_post <- (y2 - y1) / (x2 - x1)
intercept_post <- y1 - slope_post * x1

trend_post <- intercept_post + slope_post * time_cont[start_idx:end_idx]

## choose width of channel (here: half of SD of deviations from this line)
dev_post   <- U[start_idx:end_idx] - trend_post
w_post     <- 0.5 * sd(dev_post)

upper_post <- trend_post + w_post
lower_post <- trend_post - w_post

## ===================== 6. PLOT =====================

par(mar = c(5, 5, 3, 2))

plot(time_cont, U, type = "l",
     xlab = "Time", ylab = "U",
     main = "U with deterministic trend and break at 1975Q1")

## pre-break curved trend
lines(time_cont[1:Tb], trend_pre, col = "red", lwd = 2)

## post-break straight trend (dashed)
lines(time_cont[start_idx:end_idx], trend_post,
      col = "red", lwd = 2, lty = 2)

## post-break parallel channel
lines(time_cont[start_idx:end_idx], upper_post, col = "red", lwd = 1)
lines(time_cont[start_idx:end_idx], lower_post, col = "red", lwd = 1)

## plot full series
plot(time_cont, U, type = "l",
     xlab = "Time", ylab = "U")

## labels to show when clicking
labs <- paste0(yrs, " Q", qtr)

## click (for example) 4 points: 2 peaks, 2 dips
## after each click, R will print index and label in the console
idx_sel <- identify(time_cont, U,
                    labels = labs,
                    n = 4)

idx_sel
## integer(0)
yrs[idx_sel]
## numeric(0)
qtr[idx_sel]
## numeric(0)
U[idx_sel]
## numeric(0)
up1  <- idx_sel[1]   # first blue peak
up2  <- idx_sel[2]   # second blue peak
low1 <- idx_sel[3]   # first purple dip
low2 <- idx_sel[4]   # second purple dip
## ================== 0. DATA AND TIME INDEX ==================

## U : your series from 1948Q1 to 2025Q2
## U <- d$U

N   <- length(U)
idx <- 0:(N - 1)

yrs <- 1948 + idx %/% 4        # 1948, 1948, 1948, 1948, 1949, ...
qtr <- 1 + idx %% 4            # 1,2,3,4,1,2,...

time_cont <- yrs + (qtr - 1)/4 # 1948.00, 1948.25, ...

## break at 1975 Q1 (Nixon shock)
Tb <- which(yrs == 1975 & qtr == 1)[1]
start_idx <- Tb + 1
end_idx   <- N

## ================== 1. INDICES OF YOUR FOUR POINTS ==================

idx_1975Q4 <- which(yrs == 1975 & qtr == 4)
idx_1982Q3 <- which(yrs == 1982 & qtr == 3)
idx_2020Q2 <- which(yrs == 2020 & qtr == 2)
idx_2021Q4 <- which(yrs == 2021 & qtr == 4)

## upper (blue) peaks
up1  <- idx_1982Q3
up2  <- idx_2020Q2

## lower (purple) dips
low1 <- idx_1975Q4
low2 <- idx_2021Q4

## ================== 2. UPPER AND LOWER LINES ==================

## ---- upper line through 1982Q3 and 2020Q2 ----
x_up <- time_cont[c(up1, up2)]
y_up <- U[c(up1, up2)]

slope_up     <- (y_up[2] - y_up[1]) / (x_up[2] - x_up[1])
intercept_up <- y_up[1] - slope_up * x_up[1]

trend_up <- intercept_up + slope_up * time_cont[start_idx:end_idx]

## ---- lower line through 1976Q4 and 2021Q4 ----
x_low <- time_cont[c(low1, low2)]
y_low <- U[c(low1, low2)]

slope_low     <- (y_low[2] - y_low[1]) / (x_low[2] - x_low[1])
intercept_low <- y_low[1] - slope_low * x_low[1]

trend_low <- intercept_low + slope_low * time_cont[start_idx:end_idx]

## ================== 3. MIDDLE DETRENDING LINE (BISECTOR) ==================

slope_mid     <- (slope_up + slope_low) / 2
intercept_mid <- (intercept_up + intercept_low) / 2

trend_mid <- intercept_mid + slope_mid * time_cont[start_idx:end_idx]

## ================== 4. DETRENDED POST-BREAK SERIES ==================

U_post_det <- U[start_idx:end_idx] - trend_mid

## ================== 5. PLOTS ==================

par(mfrow = c(2,1), mar = c(4,4,2,1))

## (a) original with channel and midline
plot(time_cont, U, type = "l",
     xlab = "Time", ylab = "U",
     main = "Channel and midline (post-1975)")

lines(time_cont[start_idx:end_idx], trend_up,  col = "red", lwd = 1)
lines(time_cont[start_idx:end_idx], trend_low, col = "red", lwd = 1)
lines(time_cont[start_idx:end_idx], trend_mid, col = "red", lwd = 2, lty = 2)

points(time_cont[c(up1, up2, low1, low2)],
       U[c(up1, up2, low1, low2)],
       pch = 5, col = c("blue","blue","purple","purple"))

## (b) detrended post-break part
plot(time_cont[start_idx:end_idx], U_post_det, type = "l",
     xlab = "Time", ylab = "Detrended U",
     main = "Detrended series (post-1975, midline removed)")
abline(h = 0, lty = 2)

## ============================================================
## 0. DATA AND TIME INDEX (1948Q1 – 2025Q2)
## ============================================================

## U <- your series vector

N   <- length(U)
idx <- 0:(N - 1)

yrs <- 1948 + idx %/% 4
qtr <- 1 + idx %% 4

time_cont <- yrs + (qtr - 1)/4

## break at 1975 Q1
Tb <- which(yrs == 1975 & qtr == 1)[1]

start_pre  <- 1
end_pre    <- Tb

start_post <- Tb + 1
end_post   <- N


## ============================================================
## 1. PRE-BREAK NONLINEAR (QUADRATIC) DETERMINISTIC TREND
## ============================================================

mod_pre <- lm(U[start_pre:end_pre] ~ 
                poly(time_cont[start_pre:end_pre], 2, raw = TRUE))

trend_pre <- predict(mod_pre,
                     newdata = data.frame(
                       time_cont = time_cont[start_pre:end_pre]
                     ))

U_pre_det <- U[start_pre:end_pre] - trend_pre



## ============================================================
## 2. POST-BREAK CHANNEL (UPPER, LOWER, MIDLINE)
## ============================================================

## --- four identified points ---
idx_1975Q4 <- which(yrs == 1975 & qtr == 4)
idx_1982Q3 <- which(yrs == 1982 & qtr == 3)
idx_2020Q2 <- which(yrs == 2020 & qtr == 2)
idx_2021Q4 <- which(yrs == 2021 & qtr == 4)

## upper peaks
up1  <- idx_1982Q3
up2  <- idx_2020Q2

## lower dips
low1 <- idx_1975Q4
low2 <- idx_2021Q4


## --- upper line ---
x_up <- time_cont[c(up1, up2)]
y_up <- U[c(up1, up2)]

slope_up     <- (y_up[2] - y_up[1]) / (x_up[2] - x_up[1])
intercept_up <- y_up[1] - slope_up * x_up[1]

trend_up <- intercept_up + slope_up * time_cont[start_post:end_post]


## --- lower line ---
x_low <- time_cont[c(low1, low2)]
y_low <- U[c(low1, low2)]

slope_low     <- (y_low[2] - y_low[1]) / (x_low[2] - x_low[1])
intercept_low <- y_low[1] - slope_low * x_low[1]

trend_low <- intercept_low + slope_low * time_cont[start_post:end_post]


## --- midline (detrending trend) ---
slope_mid     <- (slope_up + slope_low) / 2
intercept_mid <- (intercept_up + intercept_low) / 2

trend_mid <- intercept_mid + slope_mid * time_cont[start_post:end_post]

U_post_det <- U[start_post:end_post] - trend_mid



## ============================================================
## 3. FULL COMBINED DETRENDING (PRE + POST)
## ============================================================

fitted_full <- c(trend_pre, trend_mid)

U_full_det <- U - fitted_full


## ============================================================
## 4. PLOTS
## ============================================================

par(mfrow = c(3,1), mar = c(4,4,2,1))

## --- pre-break trend ---
plot(time_cont[start_pre:end_pre], U[start_pre:end_pre], type = "l",
     xlab = "Time", ylab = "U",
     main = "Pre-break (nonlinear deterministic trend)")
lines(time_cont[start_pre:end_pre], trend_pre, col = "red", lwd = 2)


## --- post-break channel + midline ---
plot(time_cont[start_post:end_post], U[start_post:end_post], type = "l",
     xlab = "Time", ylab = "U",
     main = "Post-break channel and midline")

lines(time_cont[start_post:end_post], trend_up,  col = "red", lwd = 1)
lines(time_cont[start_post:end_post], trend_low, col = "red", lwd = 1)
lines(time_cont[start_post:end_post], trend_mid, col = "red", lwd = 2, lty = 2)

points(time_cont[c(up1, up2)],  U[c(up1, up2)],  pch = 19, col = "blue")
points(time_cont[c(low1, low2)],U[c(low1, low2)],pch = 19, col = "purple")


## --- full detrended series ---
plot(time_cont, U_full_det, type = "l",
     xlab = "Time", ylab = "Detrended U",
     main = "Full detrended series (curve pre-1975, midline post-1975)")
abline(h = 0, lty = 2)

## ================================
## 0. BUILD TIME INDEX + BREAK
## ================================

N   <- length(U)
idx <- 0:(N - 1)

yrs <- 1948 + idx %/% 4
qtr <- 1 + idx %% 4
time_cont <- yrs + (qtr - 1)/4

## break index at 1975 Q1
Tb <- which(yrs == 1975 & qtr == 1)[1]

start_pre <- 1
end_pre   <- Tb


## ================================
## 1. FIT NONLINEAR DETERMINISTIC TREND (QUADRATIC)
## ================================

mod_pre <- lm(U[start_pre:end_pre] ~ 
                poly(time_cont[start_pre:end_pre], 2, raw = TRUE))

trend_pre <- predict(mod_pre,
                     newdata = data.frame(time_cont = time_cont[start_pre:end_pre]))


## detrended pre-break series
U_pre_det <- U[start_pre:end_pre] - trend_pre


## ================================
## 2. PLOT DETRENDED PRE-BREAK SERIES ONLY
## ================================

plot(time_cont[start_pre:end_pre], U_pre_det, type = "l",
     xlab = "Time", ylab = "Detrended U",
     main = "Detrended series (Pre-1975 only)")
abline(h = 0, lty = 2)

library(urca)
library(tseries)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(bootUR)

## ---------- ADF ----------
adf_pre  <- ur.df(U_pre_det,  type = "drift", lags = 4)
adf_post <- ur.df(U_post_det, type = "drift", lags = 4)

summary(adf_pre)
## 
## ############################################### 
## # Augmented Dickey-Fuller Test Unit Root Test # 
## ############################################### 
## 
## Test regression drift 
## 
## 
## Call:
## lm(formula = z.diff ~ z.lag.1 + 1 + z.diff.lag)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.3745 -0.7256 -0.0287  0.7423  3.3417 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept) -0.01502    0.12358  -0.122  0.90351   
## z.lag.1     -0.19490    0.07360  -2.648  0.00943 **
## z.diff.lag1  0.04702    0.10442   0.450  0.65350   
## z.diff.lag2  0.25840    0.09649   2.678  0.00869 **
## z.diff.lag3 -0.20140    0.10028  -2.008  0.04736 * 
## z.diff.lag4 -0.11801    0.10166  -1.161  0.24853   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.256 on 98 degrees of freedom
## Multiple R-squared:  0.2151, Adjusted R-squared:  0.1751 
## F-statistic: 5.372 on 5 and 98 DF,  p-value: 0.0002086
## 
## 
## Value of test-statistic is: -2.6482 3.5219 
## 
## Critical values for test statistics: 
##       1pct  5pct 10pct
## tau2 -3.46 -2.88 -2.57
## phi1  6.52  4.63  3.81
summary(adf_post)
## 
## ############################################### 
## # Augmented Dickey-Fuller Test Unit Root Test # 
## ############################################### 
## 
## Test regression drift 
## 
## 
## Call:
## lm(formula = z.diff ~ z.lag.1 + 1 + z.diff.lag)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.0788 -1.0679  0.0114  1.0334 10.8717 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.11463    0.12944  -0.886 0.376950    
## z.lag.1     -0.11152    0.03198  -3.487 0.000607 ***
## z.diff.lag1 -0.02724    0.07149  -0.381 0.703600    
## z.diff.lag2  0.04150    0.07195   0.577 0.564777    
## z.diff.lag3  0.08897    0.07194   1.237 0.217706    
## z.diff.lag4 -0.01881    0.07185  -0.262 0.793770    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.79 on 190 degrees of freedom
## Multiple R-squared:  0.07221,    Adjusted R-squared:  0.04779 
## F-statistic: 2.957 on 5 and 190 DF,  p-value: 0.01353
## 
## 
## Value of test-statistic is: -3.487 6.1491 
## 
## Critical values for test statistics: 
##       1pct  5pct 10pct
## tau2 -3.46 -2.88 -2.57
## phi1  6.52  4.63  3.81
## ---------- Ljung–Box ----------
Box.test(U_pre_det,  lag = 12, type = "Ljung")
## 
##  Box-Ljung test
## 
## data:  U_pre_det
## X-squared = 188.06, df = 12, p-value < 2.2e-16
Box.test(U_post_det, lag = 12, type = "Ljung")
## 
##  Box-Ljung test
## 
## data:  U_post_det
## X-squared = 725.69, df = 12, p-value < 2.2e-16
## ---------- Bootstrapped Union Test (correct for your version) ----------
boot_union(U_pre_det,  B = 999)
## Progress: |------------------| 
##           ********************
## 
##  AWB bootstrap union test on a single time series
## 
## data: U_pre_det
## null hypothesis: Series has a unit root
## alternative hypothesis: Series is stationary
## 
##           estimate largest root statistic p-value
## U_pre_det                    NA   -0.6362  0.7437
boot_union(U_post_det, B = 999)
## Progress: |------------------| 
##           ********************
## 
##  AWB bootstrap union test on a single time series
## 
## data: U_post_det
## null hypothesis: Series has a unit root
## alternative hypothesis: Series is stationary
## 
##            estimate largest root statistic p-value
## U_post_det                    NA    -1.157 0.05005
## ===============================
##  PRE-BREAK DETRENDING COMPARISON
## ===============================

library(urca)
library(splines)

## ---------------------------
## 1. 3rd-degree polynomial
## ---------------------------

mod_pre_poly3 <- lm(U[start_pre:end_pre] ~ 
                    poly(time_cont[start_pre:end_pre], 3, raw = TRUE))

trend_pre_poly3 <- predict(mod_pre_poly3)
U_pre_det_poly3 <- U[start_pre:end_pre] - trend_pre_poly3


## ---------------------------
## 2. Spline detrending (df=4)
## ---------------------------

mod_pre_spline <- lm(U[start_pre:end_pre] ~ 
                     ns(time_cont[start_pre:end_pre], df = 4))

trend_pre_spline <- predict(mod_pre_spline)
U_pre_det_spline <- U[start_pre:end_pre] - trend_pre_spline


## ===============================
## 3. PLOT BOTH DETRENDED SERIES
## ===============================

par(mfrow = c(2,1), mar = c(4,4,2,1))

plot(time_cont[start_pre:end_pre], U_pre_det_poly3, type = "l",
     main = "Detrended Pre-Break Series (3rd-degree Polynomial)",
     xlab = "Time", ylab = "Residuals")
abline(h = 0, lty = 2)

plot(time_cont[start_pre:end_pre], U_pre_det_spline, type = "l",
     main = "Detrended Pre-Break Series (Spline, df = 4)",
     xlab = "Time", ylab = "Residuals")
abline(h = 0, lty = 2)

## ===============================
## 4. ADF TESTS
## ===============================

adf_poly3  <- ur.df(U_pre_det_poly3,  type = "drift", lags = 4)
adf_spline <- ur.df(U_pre_det_spline, type = "drift", lags = 4)

summary(adf_poly3)
## 
## ############################################### 
## # Augmented Dickey-Fuller Test Unit Root Test # 
## ############################################### 
## 
## Test regression drift 
## 
## 
## Call:
## lm(formula = z.diff ~ z.lag.1 + 1 + z.diff.lag)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.3745 -0.7256 -0.0287  0.7423  3.3417 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept) -0.01502    0.12358  -0.122  0.90351   
## z.lag.1     -0.19490    0.07360  -2.648  0.00943 **
## z.diff.lag1  0.04702    0.10442   0.450  0.65350   
## z.diff.lag2  0.25840    0.09649   2.678  0.00869 **
## z.diff.lag3 -0.20140    0.10028  -2.008  0.04736 * 
## z.diff.lag4 -0.11801    0.10166  -1.161  0.24853   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.256 on 98 degrees of freedom
## Multiple R-squared:  0.2151, Adjusted R-squared:  0.1751 
## F-statistic: 5.372 on 5 and 98 DF,  p-value: 0.0002086
## 
## 
## Value of test-statistic is: -2.6482 3.5219 
## 
## Critical values for test statistics: 
##       1pct  5pct 10pct
## tau2 -3.46 -2.88 -2.57
## phi1  6.52  4.63  3.81
summary(adf_spline)
## 
## ############################################### 
## # Augmented Dickey-Fuller Test Unit Root Test # 
## ############################################### 
## 
## Test regression drift 
## 
## 
## Call:
## lm(formula = z.diff ~ z.lag.1 + 1 + z.diff.lag)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.90932 -0.60164 -0.06423  0.63978  2.59314 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.02261    0.11761   0.192 0.847910    
## z.lag.1     -0.42343    0.10519  -4.025 0.000112 ***
## z.diff.lag1  0.16560    0.10986   1.507 0.134933    
## z.diff.lag2  0.34112    0.09807   3.478 0.000754 ***
## z.diff.lag3 -0.08416    0.10507  -0.801 0.425100    
## z.diff.lag4 -0.04008    0.10243  -0.391 0.696422    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.198 on 98 degrees of freedom
## Multiple R-squared:  0.2833, Adjusted R-squared:  0.2468 
## F-statistic: 7.749 on 5 and 98 DF,  p-value: 3.546e-06
## 
## 
## Value of test-statistic is: -4.0254 8.1512 
## 
## Critical values for test statistics: 
##       1pct  5pct 10pct
## tau2 -3.46 -2.88 -2.57
## phi1  6.52  4.63  3.81
## ===============================
## 5. BOOTSTRAP UNIT ROOT TEST
## ===============================

library(bootUR)

boot_union(U_pre_det_poly3,  B = 999)
## Progress: |------------------| 
##           ********************
## 
##  AWB bootstrap union test on a single time series
## 
## data: U_pre_det_poly3
## null hypothesis: Series has a unit root
## alternative hypothesis: Series is stationary
## 
##                 estimate largest root statistic p-value
## U_pre_det_poly3                    NA   -0.6391  0.7508
boot_union(U_pre_det_spline, B = 999)
## Progress: |------------------| 
##           ********************
## 
##  AWB bootstrap union test on a single time series
## 
## data: U_pre_det_spline
## null hypothesis: Series has a unit root
## alternative hypothesis: Series is stationary
## 
##                  estimate largest root statistic p-value
## U_pre_det_spline                    NA    -2.135       0
## ===========================
##   PRE-BREAK SEGMENT SETUP
## ===========================

start_pre <- 1
end_pre   <- Tb   # 1975 Q1 index

t_pre <- time_cont[start_pre:end_pre]
y_pre <- U[start_pre:end_pre]


## ===========================
## 1. QUADRATIC TREND (FIRST VERSION)
## ===========================

mod_quad <- lm(y_pre ~ poly(t_pre, 2, raw = TRUE))
trend_quad <- predict(mod_quad)
res_quad <- y_pre - trend_quad


## ===========================
## 2. 3rd-DEGREE POLYNOMIAL TREND
## ===========================

mod_poly3 <- lm(y_pre ~ poly(t_pre, 3, raw = TRUE))
trend_poly3 <- predict(mod_poly3)
res_poly3 <- y_pre - trend_poly3


## ===========================
## 3. SPLINE TREND (df = 4)
## ===========================

library(splines)
mod_spline <- lm(y_pre ~ ns(t_pre, df = 4))
trend_spline <- predict(mod_spline)
res_spline <- y_pre - trend_spline


## ===========================
##       SIDE-BY-SIDE PLOTS
## ===========================

par(mfrow = c(3,2), mar = c(4,4,2,1))

## ---- Quadratic ----
plot(t_pre, y_pre, type="l", main="Quadratic Trend (Original Fit)",
     xlab="Time", ylab="U (pre-break)")
lines(t_pre, trend_quad, col="red", lwd=2)

plot(t_pre, res_quad, type="l", main="Quadratic Residuals",
     xlab="Time", ylab="Detrended U")
abline(h=0, lty=2)

## ---- Polynomial(3) ----
plot(t_pre, y_pre, type="l", main="3rd Degree Polynomial Trend",
     xlab="Time", ylab="U (pre-break)")
lines(t_pre, trend_poly3, col="red", lwd=2)

plot(t_pre, res_poly3, type="l", main="Poly(3) Residuals",
     xlab="Time", ylab="Detrended U")
abline(h=0, lty=2)

## ---- Spline (df = 4) ----
plot(t_pre, y_pre, type="l", main="Spline Trend (df=4)",
     xlab="Time", ylab="U (pre-break)")
lines(t_pre, trend_spline, col="red", lwd=2)

plot(t_pre, res_spline, type="l", main="Spline Residuals",
     xlab="Time", ylab="Detrended U")
abline(h=0, lty=2)

## ==========================
##  SPLINE TREND FOR FULL SERIES
## ==========================

library(splines)

## choose df (flexibility); e.g. 5 or 6
df_all <- 5

## 1. fit spline trend to whole sample
mod_all_spline <- lm(U ~ ns(time_cont, df = df_all))

## 2. fitted trend over full sample
trend_all_spline <- fitted(mod_all_spline)

## 3. detrended series
U_all_det_spline <- U - trend_all_spline


## ==========================
##  PLOTS
## ==========================

par(mfrow = c(2,1), mar = c(4,4,2,1))

## (a) original series + spline trend
plot(time_cont, U, type = "l",
     xlab = "Time", ylab = "U",
     main = paste0("Full series with spline trend (df = ", df_all, ")"))
lines(time_cont, trend_all_spline, col = "red", lwd = 2)

## (b) detrended series
plot(time_cont, U_all_det_spline, type = "l",
     xlab = "Time", ylab = "Detrended U",
     main = "Detrended series (spline over full sample)")
abline(h = 0, lty = 2)

## =================================
##   POST-BREAK LINEAR DETRENDING
## =================================

Nend <- length(U)              # total number of observations

## correct post-break sample
U_post <- U[(Tb + 1):Nend]
t_post <- time_cont[(Tb + 1):Nend]

## 1. linear trend
lin_fit_post <- lm(U_post ~ t_post)
summary(lin_fit_post)
## 
## Call:
## lm(formula = U_post ~ t_post)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8.1983 -3.2474 -0.3645  2.7933 11.4727 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -5.381e+03  4.313e+01  -124.8   <2e-16 ***
## t_post       2.732e+00  2.156e-02   126.7   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.434 on 199 degrees of freedom
## Multiple R-squared:  0.9878, Adjusted R-squared:  0.9877 
## F-statistic: 1.606e+04 on 1 and 199 DF,  p-value: < 2.2e-16
## 2. residuals
post_resid <- resid(lin_fit_post)

plot(t_post, post_resid, type = "l",
     main = "Residuals from post-break linear trend",
     xlab = "Time", ylab = "Residuals")
abline(h = 0, lty = 2)

## 3. ADF on detrended residuals (no drift, no trend)
library(urca)
adf_post_resid <- ur.df(post_resid, type = "none", lags = 4)
summary(adf_post_resid)
## 
## ############################################### 
## # Augmented Dickey-Fuller Test Unit Root Test # 
## ############################################### 
## 
## Test regression none 
## 
## 
## Call:
## lm(formula = z.diff ~ z.lag.1 - 1 + z.diff.lag)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.1783 -1.1245  0.0097  0.8880 10.6919 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## z.lag.1     -0.11707    0.03221  -3.635 0.000357 ***
## z.diff.lag1 -0.02521    0.07108  -0.355 0.723284    
## z.diff.lag2  0.04274    0.07152   0.598 0.550841    
## z.diff.lag3  0.09019    0.07151   1.261 0.208802    
## z.diff.lag4 -0.01753    0.07146  -0.245 0.806452    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.782 on 191 degrees of freedom
## Multiple R-squared:  0.07655,    Adjusted R-squared:  0.05238 
## F-statistic: 3.167 on 5 and 191 DF,  p-value: 0.009044
## 
## 
## Value of test-statistic is: -3.6349 
## 
## Critical values for test statistics: 
##       1pct  5pct 10pct
## tau1 -2.58 -1.95 -1.62
## ============================================================
## PRE-BREAK DETRENDING USING SPLINE (df = 4)
## ============================================================

library(urca)
library(bootUR)

## 1. Extract pre-break sample
Tb    <- 109                   # 1975Q1
U_pre <- U[1:Tb]
t_pre <- time_cont[1:Tb]

## 2. Fit spline trend and detrend
spline_fit_pre      <- smooth.spline(t_pre, U_pre, df = 4)
U_pre_trend         <- predict(spline_fit_pre, t_pre)$y
U_pre_det_spline    <- U_pre - U_pre_trend

## 3. Plot detrended pre-break series
plot(t_pre, U_pre_det_spline, type = "l",
     main = "Detrended series (Pre-1975, spline df=4)",
     xlab = "Time", ylab = "Detrended U")
abline(h = 0, lty = 2)

## 4. ADF test on detrended residuals (no drift, no trend)
adf_pre_resid <- ur.df(U_pre_det_spline, type = "none", lags = 4)
summary(adf_pre_resid)
## 
## ############################################### 
## # Augmented Dickey-Fuller Test Unit Root Test # 
## ############################################### 
## 
## Test regression none 
## 
## 
## Call:
## lm(formula = z.diff ~ z.lag.1 - 1 + z.diff.lag)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.2281 -0.7006 -0.0155  0.6923  2.9879 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## z.lag.1     -0.37136    0.09787  -3.794 0.000255 ***
## z.diff.lag1  0.13349    0.10741   1.243 0.216891    
## z.diff.lag2  0.32103    0.09678   3.317 0.001273 ** 
## z.diff.lag3 -0.11546    0.10267  -1.125 0.263507    
## z.diff.lag4 -0.06267    0.10111  -0.620 0.536820    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.2 on 99 degrees of freedom
## Multiple R-squared:  0.2721, Adjusted R-squared:  0.2354 
## F-statistic: 7.402 on 5 and 99 DF,  p-value: 6.203e-06
## 
## 
## Value of test-statistic is: -3.7945 
## 
## Critical values for test statistics: 
##       1pct  5pct 10pct
## tau1 -2.58 -1.95 -1.62
## 5. Ljung–Box autocorrelation test
Box.test(U_pre_det_spline, lag = 12, type = "Ljung")
## 
##  Box-Ljung test
## 
## data:  U_pre_det_spline
## X-squared = 89.779, df = 12, p-value = 5.451e-14
## 6. Bootstrap union unit root test
boot_pre <- boot_union(U_pre_det_spline, B = 999)
## Progress: |------------------| 
##           ********************
boot_pre
## 
##  AWB bootstrap union test on a single time series
## 
## data: U_pre_det_spline
## null hypothesis: Series has a unit root
## alternative hypothesis: Series is stationary
## 
##                  estimate largest root statistic p-value
## U_pre_det_spline                    NA   -0.9294  0.2172
## ==========================================
## PRE-1975: STRONGER SPLINE vs DIFFERENCES
## ==========================================

library(urca)
library(bootUR)

Tb    <- 109                 # 1975Q1
U_pre <- U[1:Tb]
t_pre <- time_cont[1:Tb]

## -------------------------------
## 1) STRONGER SPLINE (df = 6)
## -------------------------------

spline_strong <- smooth.spline(t_pre, U_pre, df = 6)
U_pre_trend6  <- predict(spline_strong, t_pre)$y
U_pre_det6    <- U_pre - U_pre_trend6

# ADF (no trend, no drift)
adf_pre_det6 <- ur.df(U_pre_det6, type = "none", lags = 4)
summary(adf_pre_det6)
## 
## ############################################### 
## # Augmented Dickey-Fuller Test Unit Root Test # 
## ############################################### 
## 
## Test regression none 
## 
## 
## Call:
## lm(formula = z.diff ~ z.lag.1 - 1 + z.diff.lag)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.77196 -0.65297  0.04364  0.53127  2.45733 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## z.lag.1     -0.606110   0.120884  -5.014 2.34e-06 ***
## z.diff.lag1  0.263723   0.112292   2.349   0.0208 *  
## z.diff.lag2  0.418446   0.098439   4.251 4.83e-05 ***
## z.diff.lag3  0.006797   0.106467   0.064   0.9492    
## z.diff.lag4  0.023796   0.101375   0.235   0.8149    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.14 on 99 degrees of freedom
## Multiple R-squared:  0.3395, Adjusted R-squared:  0.3061 
## F-statistic: 10.18 on 5 and 99 DF,  p-value: 6.908e-08
## 
## 
## Value of test-statistic is: -5.014 
## 
## Critical values for test statistics: 
##       1pct  5pct 10pct
## tau1 -2.58 -1.95 -1.62
# Ljung–Box
Box.test(U_pre_det6, lag = 12, type = "Ljung")
## 
##  Box-Ljung test
## 
## data:  U_pre_det6
## X-squared = 94.65, df = 12, p-value = 6.217e-15
# Bootstrap union test
boot_pre_det6 <- boot_union(U_pre_det6, B = 999)
## Progress: |------------------| 
##           ********************
boot_pre_det6
## 
##  AWB bootstrap union test on a single time series
## 
## data: U_pre_det6
## null hypothesis: Series has a unit root
## alternative hypothesis: Series is stationary
## 
##            estimate largest root statistic p-value
## U_pre_det6                    NA        -2       0
## -------------------------------
## 2) FIRST DIFFERENCE of U_pre
## -------------------------------

U_pre_diff <- diff(U_pre)

# ADF on differences (no trend, no drift)
adf_pre_diff <- ur.df(U_pre_diff, type = "none", lags = 4)
summary(adf_pre_diff)
## 
## ############################################### 
## # Augmented Dickey-Fuller Test Unit Root Test # 
## ############################################### 
## 
## Test regression none 
## 
## 
## Call:
## lm(formula = z.diff ~ z.lag.1 - 1 + z.diff.lag)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.0868 -0.3960  0.2584  0.9696  4.0351 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## z.lag.1     -1.32734    0.22669  -5.855 6.35e-08 ***
## z.diff.lag1  0.27352    0.19091   1.433  0.15511    
## z.diff.lag2  0.45829    0.15852   2.891  0.00473 ** 
## z.diff.lag3  0.19022    0.14221   1.338  0.18413    
## z.diff.lag4  0.03239    0.10177   0.318  0.75099    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.328 on 98 degrees of freedom
## Multiple R-squared:  0.5957, Adjusted R-squared:  0.5751 
## F-statistic: 28.88 on 5 and 98 DF,  p-value: < 2.2e-16
## 
## 
## Value of test-statistic is: -5.8554 
## 
## Critical values for test statistics: 
##       1pct  5pct 10pct
## tau1 -2.58 -1.95 -1.62
# Ljung–Box on differences
Box.test(U_pre_diff, lag = 12, type = "Ljung")
## 
##  Box-Ljung test
## 
## data:  U_pre_diff
## X-squared = 26.16, df = 12, p-value = 0.01019
# Bootstrap union on differences
boot_pre_diff <- boot_union(U_pre_diff, B = 999)
## Progress: |------------------| 
##           ********************
boot_pre_diff
## 
##  AWB bootstrap union test on a single time series
## 
## data: U_pre_diff
## null hypothesis: Series has a unit root
## alternative hypothesis: Series is stationary
## 
##            estimate largest root statistic p-value
## U_pre_diff                    NA    -2.714       0
library(urca)
library(bootUR)

df_list <- 4:7
results <- list()

for (k in df_list) {

    fit <- smooth.spline(t_pre, U_pre, df = k)
    trend <- predict(fit, t_pre)$y
    resid <- U_pre - trend
    
    # ADF
    adf <- summary(ur.df(resid, type="none", lags=4))
    
    # Bootstrap union
    boot <- boot_union(resid, B=999)
    
    # RSS
    RSS <- sum(resid^2)
    
    # AIC / BIC (effective parameters = df)
    n <- length(U_pre)
    AIC <- n*log(RSS/n) + 2*k
    BIC <- n*log(RSS/n) + k*log(n)
    
    results[[paste0("df",k)]] <- list(
        ADF_stat = adf@teststat,
        ADF_crit = adf@cval,
        Boot_p = boot$p.value,
        RSS = RSS,
        AIC = AIC,
        BIC = BIC
    )
}
## Progress: |------------------| 
##           ********************
## Progress: |------------------| 
##           ********************
## Progress: |------------------| 
##           ********************
## Progress: |------------------| 
##           ********************
results
## $df4
## $df4$ADF_stat
##                tau1
## statistic -3.794488
## 
## $df4$ADF_crit
##       1pct  5pct 10pct
## tau1 -2.58 -1.95 -1.62
## 
## $df4$Boot_p
##   p-value 
## 0.2472472 
## 
## $df4$RSS
## [1] 344.5679
## 
## $df4$AIC
## [1] 133.4528
## 
## $df4$BIC
## [1] 144.2182
## 
## 
## $df5
## $df5$ADF_stat
##                tau1
## statistic -4.500663
## 
## $df5$ADF_crit
##       1pct  5pct 10pct
## tau1 -2.58 -1.95 -1.62
## 
## $df5$Boot_p
##     p-value 
## 0.001001001 
## 
## $df5$RSS
## [1] 297.6145
## 
## $df5$AIC
## [1] 119.4852
## 
## $df5$BIC
## [1] 132.9419
## 
## 
## $df6
## $df6$ADF_stat
##                tau1
## statistic -5.013989
## 
## $df6$ADF_crit
##       1pct  5pct 10pct
## tau1 -2.58 -1.95 -1.62
## 
## $df6$Boot_p
## p-value 
##       0 
## 
## $df6$RSS
## [1] 275.4133
## 
## $df6$AIC
## [1] 113.0348
## 
## $df6$BIC
## [1] 129.1829
## 
## 
## $df7
## $df7$ADF_stat
##                tau1
## statistic -5.386972
## 
## $df7$ADF_crit
##       1pct  5pct 10pct
## tau1 -2.58 -1.95 -1.62
## 
## $df7$Boot_p
## p-value 
##       0 
## 
## $df7$RSS
## [1] 261.928
## 
## $df7$AIC
## [1] 109.5627
## 
## $df7$BIC
## [1] 128.4021
## ============================================================
##  PRE-BREAK SPLINE DETRENDING WITH df = 7 (WITH VISUALS)
## ============================================================

library(splines)

## 1. Pre-break sample (up to 1975Q1)
Tb    <- 109
U_pre <- U[1:Tb]
t_pre <- time_cont[1:Tb]

## 2. Fit spline with df = 7
fit_df7   <- smooth.spline(t_pre, U_pre, df = 7)
trend_df7 <- predict(fit_df7, t_pre)$y
det_df7   <- U_pre - trend_df7   # detrended values (use these later)

## 3. Visuals: original + trend, and detrended series

par(mfrow = c(2,1), mar = c(4,4,2,1))

## (a) Original series with spline(df=7) trend
plot(t_pre, U_pre, type = "l",
     main = "Pre-1975 series with spline trend (df = 7)",
     xlab = "Time", ylab = "U (pre-break)")
lines(t_pre, trend_df7, col = "red", lwd = 2)

## (b) Detrended pre-break series (df=7)
plot(t_pre, det_df7, type = "l",
     main = "Detrended pre-1975 series (spline df = 7)",
     xlab = "Time", ylab = "Detrended U")
abline(h = 0, lty = 2)

## 4. Return detrended values (det_df7) so you can reuse them
det_df7
##   [1]  1.55935241  1.94921532  1.53806729  1.12359488 -1.69696124 -3.82670909
##   [7] -2.86842064 -4.22264272 -1.58736958 -1.05960975  1.56544152  1.89294714
##  [13]  2.32735834  1.37170774  1.82832969  1.49808886  0.78122605 -0.02306855
##  [19]  0.38403987  1.10125922  1.22688873  0.85892387 -0.60549606 -1.86963244
##  [25] -2.33644663 -1.60750094 -0.48369627  0.13531296  1.55022298  2.16158174
##  [31]  1.86875929  0.67048289 -0.63587444 -1.05327028 -1.38441293 -0.43113271
##  [37]  0.50505204 -0.57699932 -0.67836225 -1.30006188 -1.94248290 -1.30551075
##  [43]  1.01229653  1.91271698  1.19725734  0.76627515  1.01964347  0.25645076
##  [49]  2.47549996 -1.22495913 -1.84754795 -3.79461254 -3.56661558 -0.26297226
##  [55]  0.91920883  0.88327614  0.43199695 -0.73239188 -0.10770196 -0.09152786
##  [61] -0.18140820 -0.67480121  1.33113067  0.53917493  1.25162496 -0.22951712
##  [67]  0.19710100 -1.26725148 -0.52115039 -1.46263613  1.01062877  2.40167468
##  [73]  2.51264388  0.74497176  0.69839827  0.97216848  0.26511029 -0.12455191
##  [79] -0.79866230 -0.75880682  1.59352818  2.15699668  1.12996017  0.30935588
##  [85] -0.40866288 -1.52794681 -1.25190619 -3.28365585 -2.22511622 -1.07620985
##  [91]  0.26381020 -1.70344377  1.92400562  1.44845214  2.17122424 -0.10691590
##  [97] -2.98635695 -0.76748254 -0.74935498  0.86985737  1.79198789  2.21827584
## [103]  0.04949736  1.88507048 -1.07630035 -0.33600130 -2.29538220 -0.65548178
## [109]  1.08386901

X and M

ts.plot(X, M)

ts.plot(B)

## ============================================
##  Make B stationary with same structure as U
##  (spline df=7 before 1975, linear after)
## ============================================

library(splines)
library(bootUR)

## 1. Build B as you already did
X <- ts(Ex$EXPGSC1, start = 1947, frequency = 4)
M <- ts(Imp$IMPGSC1, start = 1947, frequency = 4)
B <- log(X / M)

## 2. Define break and time indices
Tb   <- 109                 # 1975Q1, same as for U
Nend <- length(B)

B_pre  <- B[1:Tb]
B_post <- B[(Tb + 1):Nend]

t_pre  <- 1:Tb
t_post <- (Tb + 1):Nend     # simple time index; you can also use time(B)

## 3. Pre-break: spline(df = 7)
fit_B_pre   <- smooth.spline(t_pre, B_pre, df = 7)
B_pre_trend <- predict(fit_B_pre, t_pre)$y
B_pre_det   <- B_pre - B_pre_trend

## 4. Post-break: linear trend
fit_B_post   <- lm(B_post ~ t_post)
B_post_trend <- fitted(fit_B_post)
B_post_det   <- B_post - B_post_trend

## 5. Combine detrended B
B_det <- ts(c(B_pre_det, B_post_det),
            start = start(B),
            frequency = frequency(B))

## 6. Visual check
par(mfrow = c(2,1), mar = c(4,4,2,1))

# detrended
plot(time(B_det), B_det, type = "l",
     main = "Detrended B_t ",
     xlab = "Time", ylab = "B_det")
abline(h = 0, lty = 2)

## 7. Stationarity tests on detrended B

# ADF with no deterministics (trend removed)
adf_B_det <- adf(B_det, deterministics = "none")
adf_B_det
## 
##  Two-step ADF test (with none) on a single time series
## 
## data: B_det
## null hypothesis: Series has a unit root
## alternative hypothesis: Series is stationary
## 
##       estimate largest root statistic   p-value
## B_det                0.9103    -4.182 3.646e-05
# Bootstrap union test
boot_B_det <- boot_union(B_det, B = 999)
## Progress: |------------------| 
##           ********************
boot_B_det
## 
##  AWB bootstrap union test on a single time series
## 
## data: B_det
## null hypothesis: Series has a unit root
## alternative hypothesis: Series is stationary
## 
##       estimate largest root statistic  p-value
## B_det                    NA    -1.591 0.004004
# Ljung–Box for remaining autocorrelation
Box.test(B_det, lag = 12, type = "Ljung")
## 
##  Box-Ljung test
## 
## data:  B_det
## X-squared = 1064.4, df = 12, p-value < 2.2e-16