suppressWarnings({
suppressPackageStartupMessages({
library(tidyverse)
library(quantmod)
library(tsbox)
library(zoo)
})
})
options(digits = 3, scipen = 99999)
graphics.off()
What it does:
Loads required libraries: tidyverse for data manipulation and visualization, quantmod for downloading economic data, tsbox for time-series manipulation, and zoo for rolling averages.
Suppresses warnings and messages to declutter the output.
Sets options:
digits = 3: Restricts numerical output to 3 decimal places.
scipen = 99999: Prevents scientific notation in numerical output.
Closes any open graphics devices (graphics.off()).
getSymbols(c("PAYEMS", "INDPRO", "HOUST"),
src = "FRED", return.class = 'xts',
from = "2010-01-01", to = Sys.Date())
## [1] "PAYEMS" "INDPRO" "HOUST"
What it does: Downloads three economic variables from the FRED database:
PAYEMS: Nonfarm Payroll Employment (employment data).
INDPRO: Industrial Production Index.
HOUST: Housing Starts.
Data is stored as xts objects for easy time-series manipulation.
employment_ss <- PAYEMS["2010-01-31/2024-09-01"] |> ts_ts()
industrial_ss <- INDPRO["2010-01-31/2024-09-01"] |> ts_ts()
housing_ss <- HOUST["2010-01-31/2024-09-01"] |> ts_ts()
mydata <- cbind.data.frame(employment_ss, industrial_ss, housing_ss)
What it does:
Subsets the data for the range 2010-01-31 to 2024-09-01.
Converts each variable (employment, industrial_production, and housing) to a ts object using ts_ts() for compatibility.
Combines the three variables into a single data frame (mydata) for processing.
mydf <- mydata %>%
mutate(
emp_diff = tsibble::difference(employment_ss, differences = 1),
ind_diff = tsibble::difference(industrial_ss, differences = 1),
house_diff = tsibble::difference(housing_ss, differences = 1)
) %>%
dplyr::select(emp_diff, ind_diff, house_diff) %>%
na.omit()
## Registered S3 method overwritten by 'tsibble':
## method from
## as_tibble.grouped_df dplyr
What it does:
Uses the difference() function from the tsibble package to calculate first differences for each variable:
emp_diff: Employment differences.
ind_diff: Industrial production differences.
house_diff: Housing starts differences.
Retains only the calculated differences and removes rows with NA values using na.omit().
mydf_mat <- apply(mydf, 2, sign)
pos <- apply(mydf_mat, 1, function(row) sum(row > 0))
neg <- apply(mydf_mat, 1, function(row) sum(row < 0))
tot <- pos + neg
index <- (pos / tot - neg / tot) * 100
ma_index <- rollmean(index, 7, align = "right", na.pad = TRUE)
`What it does:
Converts mydf to a matrix of signs (-1, 0, 1) using apply() and sign(): 1: Positive differences. -1: Negative differences. 0: No change.
Date <- seq.Date(from = as.Date("2010-05-01"), length.out = length(index), by = "month")
diffusion_df <- cbind.data.frame(Date, index, ma_index)
What it does:
Creates a sequence of dates (Date) from 2010-05-01 with a monthly frequency.
Combines Date, raw index, and smoothed ma_index into a data frame (diffusion_df)
## Visualize the Diffusion Index
```r
ggplot(diffusion_df, aes(x = Date, y = index)) +
geom_line(color = "navyblue", size = 0.8) +
geom_smooth(color = "red", fill = "lightblue", alpha = 0.3, size = 1.2) +
geom_text(data = diffusion_df[c(1, nrow(diffusion_df)), ],
aes(label = paste0(round(index, 2), "%")),
vjust = -1, size = 3.5, fontface = "bold", color = "darkred") +
geom_hline(yintercept = 0, linetype = "dashed", color = "black", size = 0.8) +
geom_vline(xintercept = as.Date("2020-03-01"), linetype = "dotted", color = "darkgreen", size = 1) +
annotate("text", x = as.Date("2020-03-01"), y = -90,
label = "COVID-19", color = "darkgreen", size = 4, hjust = 0, angle = 90) +
labs(
title = "U.S. Economic Diffusion Index Over Time",
subtitle = "Analyzing economic trends",
x = "Year",
y = "Diffusion Index (%)"
) +
scale_y_continuous(limits = c(-120, 120)) +
theme_minimal(base_size = 14) +
theme(
plot.title = element_text(size = 20, face = "bold", hjust = 0.5, color = "darkblue"),
plot.subtitle = element_text(size = 14, face = "italic", hjust = 0.5, color = "orange"),
axis.title.x = element_text(size = 14, face = "bold", color = "yellow"),
axis.title.y = element_text(size = 14, face = "bold", color = "yellow"),
axis.text = element_text(size = 12, color = "yellow"),
panel.grid.major = element_line(color = "gray85", size = 0.5),
panel.grid.minor = element_blank(),
legend.position = "none",
plot.background = element_rect(fill = "white", color = "white")
)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
What it does:
geom_line: Plots the diffusion index (index) as a navy-blue line.
geom_smooth: Adds a smoothed trend line (red) with a light blue shaded area.
geom_text: Labels the start and end points of the index with percentages.
geom_hline: Adds a horizontal dashed line at y = 0.
geom_vline: Adds a vertical dotted line at the COVID-19 event (2020-03-01).
annotate: Annotates the COVID-19 event on the plot.
labs: Adds a title, subtitle, and axis labels.
scale_y_continuous: Adjusts the y-axis limits to [-120, 120].
theme_minimal: Applies a minimal theme and customizes fonts, colors, and spacing.
getSymbols("CFNAIDIFF", src = "FRED", return.class = 'xts', from = "2010-01-01")
## [1] "CFNAIDIFF"
cfnaidiff_ss <- CFNAIDIFF["2010-05-01/2024-09-01"] |> ts_ts()
min_length <- min(length(ma_index), length(cfnaidiff_ss))
ma_index <- ma_index[1:min_length]
cfnaidiff_ss <- cfnaidiff_ss[1:min_length]
Date <- Date[1:min_length]
What it does:
Extracts data for the specified date range (2010-05-01 to 2024-09-01) and converts it into a ts object using ts_ts() for easier time-series manipulation.
compare_df <- cbind.data.frame(Date, diffusion_index = ma_index, CFNAIDIFF = cfnaidiff_ss)
```What it does:
Combines the aligned Date, ma_index, and cfnaidiff_ss into a single data frame called compare_df. This structure is suitable for further analysis and plotting.
correlation <- cor(compare_df$diffusion_index, compare_df$CFNAIDIFF, use = "complete.obs")
What it does:
Computes the Pearson correlation coefficient between the diffusion_index and CFNAIDIFF columns of compare_df.
The use = “complete.obs” argument ensures that missing values (NA) are ignored in the calculation.
plotly::ggplotly(
ggplot() +
geom_line(data = compare_df, aes(x = Date, y = diffusion_index, color = "Diffusion Index"), size = 1.2) +
geom_line(data = compare_df, aes(x = Date, y = CFNAIDIFF * 100, color = "CFNAIDIFF"), size = 1.2, linetype = "dashed") +
scale_color_manual(
values = c("Diffusion Index" = "blue", "CFNAIDIFF" = "green")
) +
labs(
title = "Comparison of Diffusion Index and CFNAIDIFF",
x = "Year",
y = "Index Value",
color = "Legend"
) +
theme_minimal(base_size = 8) +
theme(
plot.title = element_text(size = 10, face = "bold", hjust = 0.5),
legend.position = "bottom"
)
)
## R Markdown
This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see <http://rmarkdown.rstudio.com>.
When you click the **Knit** button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
```r
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2
## 1st Qu.:12.0 1st Qu.: 26
## Median :15.0 Median : 36
## Mean :15.4 Mean : 43
## 3rd Qu.:19.0 3rd Qu.: 56
## Max. :25.0 Max. :120
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.