DEXBZUS <- read.csv("C:/Users/marsh/Downloads/DEXBZUS.csv")
DCOILWTICO <- read.csv("C:/Users/marsh/Downloads/DCOILWTICO.csv")
DGS10 <- read.csv("C:/Users/marsh/Downloads/DGS10.csv")
SP500 <- read.csv("C:/Users/marsh/Downloads/SP500.csv")

# Merge all by observation_date
merged_df <- DEXBZUS %>%
  left_join(DGS10, by = "observation_date") %>%
  left_join(DCOILWTICO, by = "observation_date") %>%
  left_join(SP500, by = "observation_date")

# Convert 'observation_date' to Date format if it is not already
merged_df$observation_date <- as.Date(merged_df$observation_date)


merged_df <- merged_df %>%
  mutate(across(everything(), ~na.locf(.)))



# Split the data into training and testing sets
train_data <- merged_df %>% filter(observation_date <= "2023-12-31")
test_data <- merged_df %>% filter(observation_date > "2023-12-31")

# Build the regression model using the training data
model <- lm(DEXBZUS ~ DGS10 + DCOILWTICO + SP500, data = train_data)
modell <- lm(DEXBZUS ~ DGS10 + DCOILWTICO, data = train_data)
modelll <- lm(DEXBZUS ~ DGS10, data = train_data)

# Check the model summary to evaluate how well the predictors are performing
summary(model)
## 
## Call:
## lm(formula = DEXBZUS ~ DGS10 + DCOILWTICO + SP500, data = train_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.55520 -0.14108 -0.01836  0.15864  0.48975 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  5.613e+00  6.050e-02  92.776   <2e-16 ***
## DGS10       -1.020e-01  6.575e-03 -15.512   <2e-16 ***
## DCOILWTICO  -1.099e-03  4.671e-04  -2.353   0.0188 *  
## SP500       -1.702e-05  1.867e-05  -0.912   0.3622    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2085 on 975 degrees of freedom
## Multiple R-squared:  0.3564, Adjusted R-squared:  0.3545 
## F-statistic:   180 on 3 and 975 DF,  p-value: < 2.2e-16
summary(modell)
## 
## Call:
## lm(formula = DEXBZUS ~ DGS10 + DCOILWTICO, data = train_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.55725 -0.14273 -0.01624  0.16113  0.49032 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  5.5619714  0.0226946 245.079  < 2e-16 ***
## DGS10       -0.1023153  0.0065656 -15.584  < 2e-16 ***
## DCOILWTICO  -0.0013324  0.0003909  -3.408  0.00068 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2085 on 976 degrees of freedom
## Multiple R-squared:  0.3559, Adjusted R-squared:  0.3546 
## F-statistic: 269.6 on 2 and 976 DF,  p-value: < 2.2e-16
summary(modelll)
## 
## Call:
## lm(formula = DEXBZUS ~ DGS10, data = train_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.59895 -0.14141 -0.01035  0.16508  0.49454 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  5.50043    0.01382  397.94   <2e-16 ***
## DGS10       -0.11652    0.00510  -22.85   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2096 on 977 degrees of freedom
## Multiple R-squared:  0.3482, Adjusted R-squared:  0.3476 
## F-statistic:   522 on 1 and 977 DF,  p-value: < 2.2e-16
## modell is the best model


# Make predictions for the test period
test_data$predicted_DEXBZUS <- predict(modell, newdata = test_data)

# View the predicted values alongside the actual values for comparison
head(test_data[, c("observation_date", "DEXBZUS", "predicted_DEXBZUS")])
##   observation_date DEXBZUS predicted_DEXBZUS
## 1       2024-01-01  4.8521          5.069203
## 2       2024-01-02  4.8943          5.063733
## 3       2024-01-03  4.9239          5.064695
## 4       2024-01-04  4.9143          5.057295
## 5       2024-01-05  4.8744          5.048998
## 6       2024-01-08  4.8745          5.057008
# Calculate RMSE to evaluate model performance
rmse_value <- rmse(test_data$DEXBZUS, test_data$predicted_DEXBZUS)
rmse_value
## [1] 0.5947066
# Forecasted values for April 2025 (example)
forecast_april_2025 <- data.frame(
  observation_date = as.Date("2025-04-30"),
  DGS10 = 4.0,      # Example forecast for U.S. 10-year Treasury yield
  DCOILWTICO = 80,   # Example forecast for Crude Oil Prices
  SP500 = 4500       # Example forecast for S&P 500 index
)

# Predict the exchange rate for April 30, 2025 using the trained model
predicted_brl_april_2025 <- predict(modell, newdata = forecast_april_2025)

# Print the forecasted value
predicted_brl_april_2025
##        1 
## 5.046119
# Get the 95% confidence interval for the forecasted value
forecast_with_ci <- predict(modell, newdata = forecast_april_2025, interval = "confidence", level = 0.95)

# View the forecasted value and its confidence interval
forecast_with_ci
##        fit      lwr     upr
## 1 5.046119 5.024209 5.06803

So, the predicted value of for April 30th is 5.05, meaning that 1 dollar will trade for 5.05 reals on April 30th.