library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.1     ✔ stringr   1.5.2
## ✔ ggplot2   4.0.0     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readr)
library(ggplot2)

Read the file and clean

trends <- read_csv("multiTimeline.csv", skip = 2, show_col_types = FALSE) |>
rename(
Date = 1,
Lincoln_Financial_Index = 2,
John_Hancock_Index = 3
) |>
mutate(Date = as.Date(Date))

# View the structure of the cleaned data

glimpse(trends)
## Rows: 262
## Columns: 3
## $ Date                    <date> 2020-10-25, 2020-11-01, 2020-11-08, 2020-11-1…
## $ Lincoln_Financial_Index <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ John_Hancock_Index      <dbl> 51, 49, 58, 58, 54, 59, 60, 61, 48, 61, 71, 66…
head(trends)
## # A tibble: 6 × 3
##   Date       Lincoln_Financial_Index John_Hancock_Index
##   <date>                       <dbl>              <dbl>
## 1 2020-10-25                       0                 51
## 2 2020-11-01                       0                 49
## 3 2020-11-08                       0                 58
## 4 2020-11-15                       0                 58
## 5 2020-11-22                       0                 54
## 6 2020-11-29                       0                 59
ggplot(trends, aes(x = Lincoln_Financial_Index, y = John_Hancock_Index)) +
geom_point(color = "#0072B2", alpha = 0.7) +
geom_smooth(method = "lm", se = TRUE, color = "#D55E00") +
labs(
title = "Scatterplot: Lincoln Financial vs John Hancock (Google Trends)",
x = "Lincoln Financial (Interest Index 0–100)",
y = "John Hancock (Interest Index 0–100)"
) +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

correlation <- cor(trends$Lincoln_Financial_Index,
trends$John_Hancock_Index,
use = "complete.obs")
correlation
## [1] 0.549074
model <- lm(John_Hancock_Index ~ Lincoln_Financial_Index, data = trends)
summary(model)
## 
## Call:
## lm(formula = John_Hancock_Index ~ Lincoln_Financial_Index, data = trends)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -34.383  -4.335  -0.335   3.445  30.665 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             58.33478    0.74785   78.00   <2e-16 ***
## Lincoln_Financial_Index  0.13643    0.01288   10.59   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.659 on 260 degrees of freedom
## Multiple R-squared:  0.3015, Adjusted R-squared:  0.2988 
## F-statistic: 112.2 on 1 and 260 DF,  p-value: < 2.2e-16
r2_val <- summary(model)$r.squared

ggplot(trends, aes(Lincoln_Financial_Index, John_Hancock_Index)) +
geom_point(alpha = 0.7) +
geom_smooth(method = "lm", se = FALSE, color = "#D55E00") +
annotate("text", x = 10, y = 90,
label = paste("R² =", round(r2_val, 3)),
hjust = 0) +
labs(
title = "Regression: John Hancock ~ Lincoln Financial",
x = "Lincoln Financial (Index)",
y = "John Hancock (Index)"
) +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'