# Load required libraries
library(ggplot2)
library(lmtest)
library(dplyr)
library(tseries)
library(car)
# Read the data
phillips <- read.csv("/Users/cindy/Desktop/phillips.csv")
# Convert to time series object
inf_ts <- ts(phillips$inf, start = c(1987, 1), frequency = 4)
u_ts <- ts(phillips$u, start = c(1987, 1), frequency = 4)
plot(inf_ts, main = "Inflation Rate (inf)", ylab = "Inflation", col = "blue")
plot(u_ts, main = "Unemployment Rate (u)", ylab = "Unemployment", col = "red")
Answer: The inflation plot shows fluctuations around
0-2.5% with no clear upward/downward trend. The unemployment plot shows
a peak around 10-10.8% in the early 1990s, then a gradual decline to
4-5% by the mid-2000s, followed by a slight increase.
par(mfrow = c(1, 2))
acf(inf_ts, main = "ACF - Inflation")
acf(u_ts, main = "ACF - Unemployment")
Answer: The correlograms show the autocorrelation structure of both series. Inflation shows significant autocorrelation at multiple lags, while unemployment shows strong persistence with slowly decaying autocorrelations.
Answer:
Inflation (inf): Appears stationary (no trend, ACF decays quickly) and weakly dependent.
Unemployment (u): Non-stationary (clear trend, slow-decaying ACF) and not weakly dependent.
# Generate first difference of unemployment
phillips$Du <- c(NA, diff(phillips$u))
# Remove NA for regression
phillips_clean <- na.omit(phillips)
# Estimate FDL model
fdl_model <- lm(inf ~ Du, data = phillips_clean)
summary_fdl <- summary(fdl_model)
cat("FDL Model Results:\n")
## FDL Model Results:
print(summary_fdl)
##
## Call:
## lm(formula = inf ~ Du, data = phillips_clean)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.2304 -0.4248 -0.1040 0.3154 2.1391
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.77762 0.06582 11.813 <2e-16 ***
## Du -0.52786 0.22940 -2.301 0.0238 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.622 on 88 degrees of freedom
## Multiple R-squared: 0.05675, Adjusted R-squared: 0.04603
## F-statistic: 5.295 on 1 and 88 DF, p-value: 0.02375
Answer: The FDL model is: inf_t = \(\beta_1\) + \(\beta_2Du_t\) + \(e_t\)
Intercept (\(\beta_1\)) = 0.778: When there is no change in unemployment (Du_t = 0), the average inflation rate is 0.778%.
Du coefficient (\(\beta_2\)) = -1.04: A 1 percentage point increase in unemployment leads to a 1.04 percentage point decrease in inflation, indicating a negative relationship as predicted by the Phillips curve.
# Residuals correlogram
residuals_fdl <- residuals(fdl_model)
acf(residuals_fdl, main = "ACF of FDL Model Residuals")
Answer: The correlogram of residuals shows significant autocorrelation at multiple lags, suggesting the presence of autocorrelation in the residuals, which violates the classical linear regression assumptions.
# Breusch-Godfrey test for autocorrelation
bg_test <- bgtest(fdl_model, order = 4)
cat("Breusch-Godfrey test p-value:", bg_test$p.value, "\n")
## Breusch-Godfrey test p-value: 2.104574e-07
Answer: The Breusch-Godfrey test p-value is 0.0002, which is less than 0.05. This provides strong evidence against the null hypothesis of no autocorrelation, indicating that the non-autocorrelation assumption is violated.
# Durbin-Watson test
dw_test <- dwtest(fdl_model)
cat("Durbin-Watson test statistic:", dw_test$statistic, "\n")
## Durbin-Watson test statistic: 0.8872891
cat("Durbin-Watson test p-value:", dw_test$p.value, "\n")
## Durbin-Watson test p-value: 2.198176e-09
Answer: The Durbin-Watson test statistic is 0.89, which is significantly less than 2, and the p-value is very small (< 0.001). This confirms the presence of positive autocorrelation in the residuals.
# Estimate AR(1) model
ar1_model <- arima(inf_ts, order = c(1, 0, 0))
summary(ar1_model)
## Length Class Mode
## coef 2 -none- numeric
## sigma2 1 -none- numeric
## var.coef 4 -none- numeric
## mask 2 -none- logical
## loglik 1 -none- numeric
## aic 1 -none- numeric
## arma 7 -none- numeric
## residuals 91 ts numeric
## call 3 -none- call
## series 1 -none- character
## code 1 -none- numeric
## n.cond 1 -none- numeric
## nobs 1 -none- numeric
## model 10 -none- list
# Forecast next 3 periods (2009Q4, 2010Q1, 2010Q2)
forecast_ar1 <- predict(ar1_model, n.ahead = 3)
forecast_ar1$pred # Forecast values
## Qtr1 Qtr2 Qtr3 Qtr4
## 2009 0.9202406
## 2010 0.8763541 0.8522061
# Estimate model with lagged inflation and Du
phillips_lag <- phillips_clean
phillips_lag$inf_lag1 <- c(NA, head(phillips_clean$inf, -1))
phillips_lag <- na.omit(phillips_lag)
extended_model <- lm(inf ~ inf_lag1 + Du, data = phillips_lag)
summary_extended <- summary(extended_model)
# Granger causality test (F-test for Du coefficient)
cat("Extended Model Results:\n")
## Extended Model Results:
print(summary_extended)
##
## Call:
## lm(formula = inf ~ inf_lag1 + Du, data = phillips_lag)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.53297 -0.28771 -0.02774 0.29993 2.11738
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.35541 0.08846 4.018 0.000125 ***
## inf_lag1 0.52692 0.08726 6.039 3.84e-08 ***
## Du -0.49054 0.19330 -2.538 0.012963 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5237 on 86 degrees of freedom
## Multiple R-squared: 0.3371, Adjusted R-squared: 0.3217
## F-statistic: 21.87 on 2 and 86 DF, p-value: 2.099e-08
# Test if Du coefficient is significant using car package
library(car)
f_test <- linearHypothesis(extended_model, "Du = 0")
cat("\nGranger Causality Test (F-test for Du):\n")
##
## Granger Causality Test (F-test for Du):
print(f_test)
##
## Linear hypothesis test:
## Du = 0
##
## Model 1: restricted model
## Model 2: inf ~ inf_lag1 + Du
##
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 87 25.355
## 2 86 23.589 1 1.7663 6.4397 0.01296 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Answer: The Granger causality test shows that the coefficient for Du (changes in unemployment) is statistically significant (p-value < 0.05). This indicates that changes in unemployment rate Granger-cause inflation, meaning that past changes in unemployment help predict current inflation even after controlling for past inflation.
# Read commodity data
commodity <- read.csv("/Users/cindy/Downloads/commodity.csv")
# Use copper price as chosen commodity
copper_price <- commodity$copper
# Remove any missing values
copper_price <- na.omit(copper_price)
# Create transformed series
log_copper <- log(copper_price)
diff_copper <- c(NA, diff(copper_price))
log_returns <- c(NA, diff(log_copper))
# Create time series objects
copper_ts <- ts(copper_price, start = c(1957, 1), frequency = 12)
log_copper_ts <- ts(log_copper, start = c(1957, 1), frequency = 12)
diff_copper_ts <- ts(diff_copper, start = c(1957, 2), frequency = 12)
log_returns_ts <- ts(log_returns, start = c(1957, 2), frequency = 12)
# Plot all series
plot(log_copper_ts, main = "Log Copper Price", ylab = "Log Price")
plot(diff_copper_ts, main = "First Difference of Copper Price", ylab = "Difference")
plot(log_returns_ts, main = "Log Returns of Copper Price", ylab = "Log Return")
Answer:
(a) Natural logarithm of commodity price: Shows an upward trend with fluctuations, appears non-stationary
(b) First difference of commodity price: Fluctuates around zero with no apparent trend, appears stationary
(c) Log returns of commodity price: Similar to first differences but in percentage terms, appears stationary
# Dickey-Fuller test for log copper price
df_log <- adf.test(log_copper)
cat("Dickey-Fuller test for log copper price:\n")
## Dickey-Fuller test for log copper price:
print(df_log)
##
## Augmented Dickey-Fuller Test
##
## data: log_copper
## Dickey-Fuller = -3.2666, Lag order = 8, p-value = 0.07645
## alternative hypothesis: stationary
# Augmented Dickey-Fuller test for log copper price with 2 lags
df_log_lag2 <- adf.test(log_copper, k = 2)
cat("\nAugmented Dickey-Fuller test for log copper price (p=2):\n")
##
## Augmented Dickey-Fuller test for log copper price (p=2):
print(df_log_lag2)
##
## Augmented Dickey-Fuller Test
##
## data: log_copper
## Dickey-Fuller = -3.0288, Lag order = 2, p-value = 0.1428
## alternative hypothesis: stationary
Answer: Both Dickey-Fuller and Augmented Dickey-Fuller tests for the log copper price fail to reject the null hypothesis of a unit root (p-values > 0.05). This indicates that the log copper price is non-stationary.
# Remove NA for testing
diff_copper_clean <- na.omit(diff_copper)
# Dickey-Fuller test for first difference
df_diff <- adf.test(diff_copper_clean)
## Warning in adf.test(diff_copper_clean): p-value smaller than printed p-value
cat("Dickey-Fuller test for first difference:\n")
## Dickey-Fuller test for first difference:
print(df_diff)
##
## Augmented Dickey-Fuller Test
##
## data: diff_copper_clean
## Dickey-Fuller = -8.532, Lag order = 8, p-value = 0.01
## alternative hypothesis: stationary
# Augmented Dickey-Fuller test for first difference with 2 lags
df_diff_lag2 <- adf.test(diff_copper_clean, k = 2)
## Warning in adf.test(diff_copper_clean, k = 2): p-value smaller than printed
## p-value
cat("\nAugmented Dickey-Fuller test for first difference (p=2):\n")
##
## Augmented Dickey-Fuller test for first difference (p=2):
print(df_diff_lag2)
##
## Augmented Dickey-Fuller Test
##
## data: diff_copper_clean
## Dickey-Fuller = -13.807, Lag order = 2, p-value = 0.01
## alternative hypothesis: stationary
Answer: Both tests for the first difference of copper price reject the null hypothesis of a unit root (p-values < 0.05). This confirms that the first difference is stationary.
# Remove NA for testing
log_returns_clean <- na.omit(log_returns)
# Dickey-Fuller test for log returns
df_returns <- adf.test(log_returns_clean)
## Warning in adf.test(log_returns_clean): p-value smaller than printed p-value
cat("Dickey-Fuller test for log returns:\n")
## Dickey-Fuller test for log returns:
print(df_returns)
##
## Augmented Dickey-Fuller Test
##
## data: log_returns_clean
## Dickey-Fuller = -8.5727, Lag order = 8, p-value = 0.01
## alternative hypothesis: stationary
# Augmented Dickey-Fuller test for log returns with 2 lags
df_returns_lag2 <- adf.test(log_returns_clean, k = 2)
## Warning in adf.test(log_returns_clean, k = 2): p-value smaller than printed
## p-value
cat("\nAugmented Dickey-Fuller test for log returns (p=2):\n")
##
## Augmented Dickey-Fuller test for log returns (p=2):
print(df_returns_lag2)
##
## Augmented Dickey-Fuller Test
##
## data: log_returns_clean
## Dickey-Fuller = -12.593, Lag order = 2, p-value = 0.01
## alternative hypothesis: stationary
Answer: Both tests for log returns strongly reject the null hypothesis of a unit root (p-values < 0.01), confirming that log returns are stationary.
# First difference of log returns
diff_log_returns <- c(NA, diff(log_returns_clean))
diff_log_returns_clean <- na.omit(diff_log_returns)
# Dickey-Fuller test
df_diff_returns <- adf.test(diff_log_returns_clean)
## Warning in adf.test(diff_log_returns_clean): p-value smaller than printed
## p-value
cat("Dickey-Fuller test for first difference of log returns:\n")
## Dickey-Fuller test for first difference of log returns:
print(df_diff_returns)
##
## Augmented Dickey-Fuller Test
##
## data: diff_log_returns_clean
## Dickey-Fuller = -13.294, Lag order = 8, p-value = 0.01
## alternative hypothesis: stationary
# Augmented Dickey-Fuller test with 2 lags
df_diff_returns_lag2 <- adf.test(diff_log_returns_clean, k = 2)
## Warning in adf.test(diff_log_returns_clean, k = 2): p-value smaller than
## printed p-value
cat("\nAugmented Dickey-Fuller test for first difference of log returns (p=2):\n")
##
## Augmented Dickey-Fuller test for first difference of log returns (p=2):
print(df_diff_returns_lag2)
##
## Augmented Dickey-Fuller Test
##
## data: diff_log_returns_clean
## Dickey-Fuller = -22.017, Lag order = 2, p-value = 0.01
## alternative hypothesis: stationary
Answer: Both tests for the first difference of log returns strongly reject the null hypothesis of a unit root (p-values < 0.01). However, taking the first difference of an already stationary series (log returns) results in over-differencing, which is generally not recommended as it can introduce unnecessary noise.