The purpose of this analysis is to develop a diffusion index for selected economic variables, compare it to the Chicago Fed National Activity Index: Diffusion Index (CFNAIDIFF), and provide insights into the current state of the U.S. economy. The diffusion index is an indicator of economic activity and trends, constructed from unemployment rate, industrial production, and GDP.
options(repos = c(CRAN = "https://cran.rstudio.com"))
install.packages(c("tidyverse", "ggplot2", "quantmod"))
## Installing packages into 'C:/Users/Admin/AppData/Local/R/win-library/4.3'
## (as 'lib' is unspecified)
## package 'tidyverse' successfully unpacked and MD5 sums checked
## package 'ggplot2' successfully unpacked and MD5 sums checked
## package 'quantmod' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\Admin\AppData\Local\Temp\Rtmpc3Ic2d\downloaded_packages
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.3.3
## Warning: package 'ggplot2' was built under R version 4.3.3
## Warning: package 'tidyr' was built under R version 4.3.3
## Warning: package 'forcats' was built under R version 4.3.3
## Warning: package 'lubridate' was built under R version 4.3.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(quantmod)
## Warning: package 'quantmod' was built under R version 4.3.3
## Loading required package: xts
## Warning: package 'xts' was built under R version 4.3.3
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 4.3.3
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
##
## ######################### Warning from 'xts' package ##########################
## # #
## # The dplyr lag() function breaks how base R's lag() function is supposed to #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or #
## # source() into this session won't work correctly. #
## # #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop #
## # dplyr from breaking base R's lag() function. #
## # #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning. #
## # #
## ###############################################################################
##
## Attaching package: 'xts'
##
## The following objects are masked from 'package:dplyr':
##
## first, last
##
## Loading required package: TTR
## Warning: package 'TTR' was built under R version 4.3.3
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
# Load the economic data from FRED using quantmod
# using GDP, unemployment rate, and industrial production as economic variables
getSymbols(c("GDP", "UNRATE", "INDPRO"), src = "FRED")
## [1] "GDP" "UNRATE" "INDPRO"
# Combine the data into one data frame
econ_data <- na.omit(merge(UNRATE, INDPRO, GDP))
colnames(econ_data) <- c("Unemployment_Rate", "Industrial_Production_Index", "GDP")
# Calculate the Diffusion Index using a custom function if available
# Fallback to manual calculation if the function is not available
diffusion_calc <- function(data) {
return(ifelse(diff(data) > 0, 1, -1))
}
unemployment_diffusion <- diffusion_calc(econ_data$Unemployment_Rate)
indpro_diffusion <- diffusion_calc(econ_data$Industrial_Production_Index)
gdp_diffusion <- diffusion_calc(econ_data$GDP)
# Ensure the diffusion index aligns in length with econ_data
diffusion_index <- rowMeans(cbind(unemployment_diffusion, indpro_diffusion, gdp_diffusion), na.rm = TRUE)
diffusion_index <- c(NA, diffusion_index) # Align length with original data
diffusion_index <- diffusion_index[1:nrow(econ_data)] # Ensure same length as econ_data
econ_data$Diffusion_Index <- diffusion_index
# Step 4: Load the CFNAIDIFF data from the uploaded CSV file
# Set working directory to the path where the CSV file is located (update the path below)
setwd("C:/Users/Admin/Desktop/econ final project")
cfnaidiff_data <- read.csv("cfnai-data-series-xlsx.csv")
# Convert the Date column to a proper Date format
cfnaidiff_data$Date <- as.Date(paste0(cfnaidiff_data$Date, "-01"), format = "%Y:%m-%d")
# Extract only the Date and DIFFUSION columns for comparison
cfnaidiff_data <- cfnaidiff_data %>% select(Date, DIFFUSION)
colnames(cfnaidiff_data) <- c("Date", "CFNAIDIFF")
# Convert the FRED data to a data frame and add a date column
econ_df <- data.frame(Date = index(econ_data), coredata(econ_data))
# Merge CFNAIDIFF data with the diffusion index data for comparison
combined_data <- merge(econ_df, cfnaidiff_data, by = "Date", all.x = TRUE)
# Calculate correlation coefficient between the Diffusion Index and CFNAIDIFF
cor_coefficient <- cor(combined_data$Diffusion_Index, combined_data$CFNAIDIFF, use = "complete.obs")
print(paste("Correlation Coefficient:", cor_coefficient))
## [1] "Correlation Coefficient: 0.0886380120422671"
# Plot the Diffusion Index using ggplot
ggplot(econ_df, aes(x = Date, y = Diffusion_Index)) +
geom_line(color = "blue") +
geom_smooth(method = "loess", color = "red") +
labs(title = "Diffusion Index for Selected Economic Variables",
x = "Date", y = "Diffusion Index") +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 2 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_line()`).
# Plot the Diffusion Index and CFNAIDIFF side by side using ggplot
ggplot(combined_data, aes(x = Date)) +
geom_line(aes(y = Diffusion_Index, color = "Diffusion Index")) +
geom_line(aes(y = CFNAIDIFF, color = "CFNAIDIFF")) +
labs(title = "Comparison of Diffusion Index and CFNAIDIFF",
x = "Date", y = "Index Value") +
scale_color_manual("",
breaks = c("Diffusion Index", "CFNAIDIFF"),
values = c("Diffusion Index" = "blue", "CFNAIDIFF" = "green")) +
theme_minimal()
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 78 rows containing missing values or values outside the scale range
## (`geom_line()`).
Based on the analysis, the correlation between the custom diffusion index and CFNAIDIFF is approximately 0.0886, indicating a weak but positive linear relationship. While the correlation is low, the trend analysis shows that the diffusion index captures certain directional patterns, providing valuable insights into economic activity. The comparison highlights the utility of customized indices in tracking economic trends.