library(tidyverse)
library(readr)
library(ggplot2)
# Load the file, skipping the first two lines that contain metadata
raw <- read_csv("multiTimeline.csv", skip = 2, show_col_types = FALSE)
# Rename columns for easier use
trends <- raw |>
rename(
Date = 1,
Lincoln_Financial_Index = 2,
John_Hancock_Index = 3
)
# Convert Date to date format
trends <- trends |>
mutate(Date = as.Date(Date))
# Quick look at cleaned data
glimpse(trends)
## Rows: 262
## Columns: 3
## $ Date <date> 2020-10-25, 2020-11-01, 2020-11-08, 2020-11-1…
## $ Lincoln_Financial_Index <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ John_Hancock_Index <dbl> 51, 49, 58, 58, 54, 59, 60, 61, 48, 61, 71, 66…
head(trends)
## # A tibble: 6 × 3
## Date Lincoln_Financial_Index John_Hancock_Index
## <date> <dbl> <dbl>
## 1 2020-10-25 0 51
## 2 2020-11-01 0 49
## 3 2020-11-08 0 58
## 4 2020-11-15 0 58
## 5 2020-11-22 0 54
## 6 2020-11-29 0 59
ggplot(trends, aes(x = Lincoln_Financial_Index, y = John_Hancock_Index)) +
geom_point(alpha = 0.7, color = "#0072B2") +
geom_smooth(method = "lm", se = TRUE, color = "#D55E00") +
labs(
title = "Lincoln Financial vs John Hancock (Google Trends)",
x = "Lincoln Financial (Index 0–100)",
y = "John Hancock (Index 0–100)"
) +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
cor_val <- cor(trends$Lincoln_Financial_Index,
trends$John_Hancock_Index,
use = "complete.obs")
cor_val
## [1] 0.549074
Interpretation: - r ≈ 1: Strong positive
relationship
- r ≈ 0: No relationship
- r ≈ −1: Strong negative relationship
Model: \[ \text{John Hancock} = \beta_0 + \beta_1(\text{Lincoln Financial}) + \varepsilon \]
model <- lm(John_Hancock_Index ~ Lincoln_Financial_Index, data = trends)
summary(model)
##
## Call:
## lm(formula = John_Hancock_Index ~ Lincoln_Financial_Index, data = trends)
##
## Residuals:
## Min 1Q Median 3Q Max
## -34.383 -4.335 -0.335 3.445 30.665
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 58.33478 0.74785 78.00 <2e-16 ***
## Lincoln_Financial_Index 0.13643 0.01288 10.59 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.659 on 260 degrees of freedom
## Multiple R-squared: 0.3015, Adjusted R-squared: 0.2988
## F-statistic: 112.2 on 1 and 260 DF, p-value: < 2.2e-16
r2_val <- summary(model)$r.squared
ggplot(trends, aes(Lincoln_Financial_Index, John_Hancock_Index)) +
geom_point(alpha = 0.7) +
geom_smooth(method = "lm", se = FALSE, color = "#D55E00") +
annotate("text", x = 10, y = 90, label = paste("R² =", round(r2_val, 3)), hjust = 0) +
labs(title = "Regression: John Hancock ~ Lincoln Financial",
x = "Lincoln Financial (Index)",
y = "John Hancock (Index)") +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'