Read the file and clean
trends <- read_csv("multiTimeline.csv", skip = 2, show_col_types = FALSE) |>
rename(
Date = 1,
Lincoln_Financial_Index = 2,
John_Hancock_Index = 3
) |>
mutate(Date = as.Date(Date))
# View the structure of the cleaned data
glimpse(trends)
## Rows: 262
## Columns: 3
## $ Date <date> 2020-10-25, 2020-11-01, 2020-11-08, 2020-11-1…
## $ Lincoln_Financial_Index <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ John_Hancock_Index <dbl> 51, 49, 58, 58, 54, 59, 60, 61, 48, 61, 71, 66…
head(trends)
## # A tibble: 6 × 3
## Date Lincoln_Financial_Index John_Hancock_Index
## <date> <dbl> <dbl>
## 1 2020-10-25 0 51
## 2 2020-11-01 0 49
## 3 2020-11-08 0 58
## 4 2020-11-15 0 58
## 5 2020-11-22 0 54
## 6 2020-11-29 0 59
ggplot(trends, aes(x = Lincoln_Financial_Index, y = John_Hancock_Index)) +
geom_point(color = "#0072B2", alpha = 0.7) +
geom_smooth(method = "lm", se = TRUE, color = "#D55E00") +
labs(
title = "Scatterplot: Lincoln Financial vs John Hancock (Google Trends)",
x = "Lincoln Financial (Interest Index 0–100)",
y = "John Hancock (Interest Index 0–100)"
) +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

correlation <- cor(trends$Lincoln_Financial_Index,
trends$John_Hancock_Index,
use = "complete.obs")
correlation
## [1] 0.549074
model <- lm(John_Hancock_Index ~ Lincoln_Financial_Index, data = trends)
summary(model)
##
## Call:
## lm(formula = John_Hancock_Index ~ Lincoln_Financial_Index, data = trends)
##
## Residuals:
## Min 1Q Median 3Q Max
## -34.383 -4.335 -0.335 3.445 30.665
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 58.33478 0.74785 78.00 <2e-16 ***
## Lincoln_Financial_Index 0.13643 0.01288 10.59 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.659 on 260 degrees of freedom
## Multiple R-squared: 0.3015, Adjusted R-squared: 0.2988
## F-statistic: 112.2 on 1 and 260 DF, p-value: < 2.2e-16
r2_val <- summary(model)$r.squared
ggplot(trends, aes(Lincoln_Financial_Index, John_Hancock_Index)) +
geom_point(alpha = 0.7) +
geom_smooth(method = "lm", se = FALSE, color = "#D55E00") +
annotate("text", x = 10, y = 90,
label = paste("R² =", round(r2_val, 3)),
hjust = 0) +
labs(
title = "Regression: John Hancock ~ Lincoln Financial",
x = "Lincoln Financial (Index)",
y = "John Hancock (Index)"
) +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
