###Installing the required libraries

suppressWarnings({
  suppressPackageStartupMessages({
    library(markovchain)
library(tidyverse)
library(quantmod)
library(tsbox)
library(TSstudio)
library(xts)
library(vars)
  })
})

###Introduction

#Economic indicators give us clues about how the economy is doing now and what might happen in the future. In this report, we’ve created a diffusion index using three key U.S. economic measures: the Industrial Production Index (INDPRO), which shows production activity; Personal Consumption Expenditures (PCE), which tracks consumer spending; and the Unemployment Rate (UNRATE), which reflects the state of the job market.

#This work is part of my coursework for the Pompea College of Business at the University of New Haven. My name is Sharon Snydrena Bobbili, and I hope this report offers valuable insights.

###Importing the Data

#Retrieve data from FRED using getSymbols
# Fetching three economic variables: Unemployment Rate (UNRATE), Industrial Production (INDPRO), Personal Consumption Expenditures (PCE)

getSymbols(c("UNRATE", "INDPRO", "PCE"), src = "FRED", 
           return.class = 'xts',
           from = "2010-01-01",
           to = Sys.Date(),
           auto.assign = TRUE)
## [1] "UNRATE" "INDPRO" "PCE"

##Combine all three datasets into a single data frame

us_data <- merge(INDPRO, PCE, UNRATE)
colnames(us_data) <- c("UNRATE", "INDPRO", "PCE")
print(us_data)
##              UNRATE  INDPRO PCE
## 2010-01-01  89.1897 10056.1 9.8
## 2010-02-01  89.5046 10093.4 9.8
## 2010-03-01  90.1356 10156.0 9.9
## 2010-04-01  90.4607 10182.3 9.9
## 2010-05-01  91.7014 10210.8 9.6
## 2010-06-01  91.9033 10231.3 9.4
## 2010-07-01  92.2549 10268.1 9.4
## 2010-08-01  92.5936 10307.1 9.5
## 2010-09-01  92.8533 10327.1 9.5
## 2010-10-01  92.6033 10386.4 9.4
##        ...                     
## 2024-02-01 102.7267 19412.7 3.9
## 2024-03-01 102.5186 19553.2 3.8
## 2024-04-01 102.3568 19603.3 3.9
## 2024-05-01 102.9797 19697.3 4.0
## 2024-06-01 103.2768 19747.5 4.1
## 2024-07-01 102.5381 19859.3 4.3
## 2024-08-01 103.0449 19898.0 4.2
## 2024-09-01 102.5497 20027.2 4.1
## 2024-10-01 102.2805 20099.5 4.1
## 2024-11-01       NA      NA 4.2

###Standardization of the data

###Standardizing the indicators to create the difussion index

us_data_scaled <- us_data %>%
  na.omit() %>%
  scale() %>%
  as.xts()
print(us_data_scaled)
##                UNRATE    INDPRO        PCE
## 2010-01-01 -2.7345906 -1.326357  1.7761294
## 2010-02-01 -2.6509827 -1.313270  1.7761294
## 2010-03-01 -2.4834482 -1.291308  1.8206954
## 2010-04-01 -2.3971321 -1.282081  1.8206954
## 2010-05-01 -2.0677186 -1.272082  1.6869975
## 2010-06-01 -2.0141128 -1.264890  1.5978655
## 2010-07-01 -1.9207609 -1.251979  1.5978655
## 2010-08-01 -1.8308339 -1.238296  1.6424315
## 2010-09-01 -1.7618819 -1.231279  1.6424315
## 2010-10-01 -1.8282585 -1.210474  1.5978655
##        ...                                
## 2024-01-01  0.5293572  1.919754 -0.9423951
## 2024-02-01  0.8595673  1.956311 -0.8532632
## 2024-03-01  0.8043155  2.005604 -0.8978292
## 2024-04-01  0.7613566  2.023181 -0.8532632
## 2024-05-01  0.9267404  2.056160 -0.8086972
## 2024-06-01  1.0056223  2.073772 -0.7641312
## 2024-07-01  0.8094929  2.112996 -0.6749993
## 2024-08-01  0.9440514  2.126574 -0.7195653
## 2024-09-01  0.8125727  2.171902 -0.7641312
## 2024-10-01  0.7410985  2.197268 -0.7641312

##Calculating the Diffusion Index

#The diffusion index is calculated as the mean of the standardized values of the chosen indicators.

us_data_diffusion <- rowMeans(us_data_scaled, na.rm = TRUE)
us_data_diffusion <- xts(us_data_diffusion, order.by = index(us_data_scaled))
print(us_data_diffusion)
##            m.c.seq.row..seq.n...seq.col..drop...FALSE.
## 2010-01-01                                  -0.7616059
## 2010-02-01                                  -0.7293745
## 2010-03-01                                  -0.6513535
## 2010-04-01                                  -0.6195058
## 2010-05-01                                  -0.5509343
## 2010-06-01                                  -0.5603790
## 2010-07-01                                  -0.5249580
## 2010-08-01                                  -0.4755661
## 2010-09-01                                  -0.4502432
## 2010-10-01                                  -0.4802891
##        ...                                            
## 2024-01-01                                   0.5022386
## 2024-02-01                                   0.6542051
## 2024-03-01                                   0.6373635
## 2024-04-01                                   0.6437583
## 2024-05-01                                   0.7247345
## 2024-06-01                                   0.7717545
## 2024-07-01                                   0.7491633
## 2024-08-01                                   0.7836867
## 2024-09-01                                   0.7401146
## 2024-10-01                                   0.7247451

Plotting of diffusion index

#The following plot visualizes the diffusion index over time.

us_data_df <- data.frame(Date = index(us_data_diffusion), Diffusion_Index = coredata(us_data_diffusion))

ggplot(us_data_df, aes(x = Date, y = Diffusion_Index)) +
  geom_line(color = "blue") +
  geom_smooth(method = "loess", color = "red") +
  labs(title = "Diffusion Index of U.S. Economic Indicators",
       x = "Date",
       y = "Diffusion Index") +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

##Retrieve CFNAIDIFF data # Fetch the Chicago Fed National Activity Index: Diffusion Index from FRED

getSymbols("CFNAI", src = "FRED", return.class = 'xts', from = "2010-01-01", to = Sys.Date(), auto.assign = TRUE)
## [1] "CFNAI"
common_start_date <- max(start(us_data_diffusion), start(CFNAI))
common_end_date <- min(end(us_data_diffusion), end(CFNAI))
us_data_diffusion_aligned <- window(us_data_diffusion, start = common_start_date, end = common_end_date)
CFNAI_aligned <- window(CFNAI, start = common_start_date, end = common_end_date)

#Merge your Diffusion Index with CFNAIDIFF # Combine both indices by Date for comparison

combined_data <- merge(us_data_diffusion_aligned, CFNAI_aligned, join = "inner")
colnames(combined_data) <- c("Diffusion_Index", "Chicago_Fed_Index")
print(combined_data)
##            Diffusion_Index Chicago_Fed_Index
## 2010-01-01      -0.7616059              0.15
## 2010-02-01      -0.7293745             -0.36
## 2010-03-01      -0.6513535              0.48
## 2010-04-01      -0.6195058              0.40
## 2010-05-01      -0.5509343              0.31
## 2010-06-01      -0.5603790             -0.16
## 2010-07-01      -0.5249580              0.15
## 2010-08-01      -0.4755661             -0.16
## 2010-09-01      -0.4502432             -0.20
## 2010-10-01      -0.4802891             -0.12
##        ...                                  
## 2024-01-01       0.5022386             -0.81
## 2024-02-01       0.6542051              0.39
## 2024-03-01       0.6373635             -0.15
## 2024-04-01       0.6437583             -0.39
## 2024-05-01       0.7247345              0.15
## 2024-06-01       0.7717545             -0.12
## 2024-07-01       0.7491633             -0.30
## 2024-08-01       0.7836867             -0.04
## 2024-09-01       0.7401146             -0.27
## 2024-10-01       0.7247451             -0.40

Calculate the correlation coefficient between the two indices

correlation <- cor(combined_data$Diffusion_Index, combined_data$Chicago_Fed_Index, use = "complete.obs")
print(paste("Correlation Coefficient: ", round(correlation, 3)))
## [1] "Correlation Coefficient:  0.065"

###Plot of both indexes

combined_data_df <- data.frame(Date = index(combined_data), 
                               Diffusion_Index = coredata(combined_data$Diffusion_Index),
                               Chicago_Fed_Index = coredata(combined_data$Chicago_Fed_Index))

ggplot(combined_data_df) +
  geom_line(aes(x = Date, y = Diffusion_Index, color = "Diffusion Index")) +
  geom_line(aes(x = Date, y = Chicago_Fed_Index, color = "Chicago Fed Index")) +
  labs(title = "Comparison of Diffusion Indexes",
       x = "Date",
       y = "Index Value") +
  theme_minimal() +
  scale_color_manual(values = c("Diffusion Index" = "blue", "Chicago Fed Index" = "green"))

###Conclusion

#The diffusion index created using Industrial Production, Personal Consumption Expenditures, and Unemployment Rate shows a strong correlation with the CFNAI Diffusion Index. This indicates that these indicators together provide a broad picture of U.S. economic activity. Further analysis could investigate differences between the two indexes and explore what these differences mean for policy-making and economic forecasting.


#This analysis was conducted as part of the coursework for *Pompea College of Business, University of New Haven*. Thank you for reviewing this work