library(tidyverse)
library(readr)
library(ggplot2)

2. (scatterplot + regression line)

ggplot(trends, aes(x = Lincoln_Financial_Index, y = John_Hancock_Index)) +
  geom_point(alpha = 0.7, color = "#0072B2") +
  geom_smooth(method = "lm", se = TRUE, color = "#D55E00") +
  labs(
    title = "Lincoln Financial vs John Hancock (Google Trends)",
    x = "Lincoln Financial (Index 0–100)",
    y = "John Hancock (Index 0–100)"
  ) +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'


3. Correlation coefficient

cor_val <- cor(trends$Lincoln_Financial_Index,
               trends$John_Hancock_Index,
               use = "complete.obs")
cor_val
## [1] 0.549074

Interpretation: - r ≈ 1: Strong positive relationship
- r ≈ 0: No relationship
- r ≈ −1: Strong negative relationship


4. Simple linear regression

Model: \[ \text{John Hancock} = \beta_0 + \beta_1(\text{Lincoln Financial}) + \varepsilon \]

model <- lm(John_Hancock_Index ~ Lincoln_Financial_Index, data = trends)
summary(model)
## 
## Call:
## lm(formula = John_Hancock_Index ~ Lincoln_Financial_Index, data = trends)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -34.383  -4.335  -0.335   3.445  30.665 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             58.33478    0.74785   78.00   <2e-16 ***
## Lincoln_Financial_Index  0.13643    0.01288   10.59   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.659 on 260 degrees of freedom
## Multiple R-squared:  0.3015, Adjusted R-squared:  0.2988 
## F-statistic: 112.2 on 1 and 260 DF,  p-value: < 2.2e-16

5. Visualize regression with R²

r2_val <- summary(model)$r.squared
ggplot(trends, aes(Lincoln_Financial_Index, John_Hancock_Index)) +
  geom_point(alpha = 0.7) +
  geom_smooth(method = "lm", se = FALSE, color = "#D55E00") +
  annotate("text", x = 10, y = 90, label = paste("R² =", round(r2_val, 3)), hjust = 0) +
  labs(title = "Regression: John Hancock ~ Lincoln Financial",
       x = "Lincoln Financial (Index)",
       y = "John Hancock (Index)") +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'