---
title: "Chilean Travel Search Terms Analysis"
format:
html:
embed-resources: true
toc: true
toc-depth: 4
toc-location: left
number-sections: true
code-overflow: wrap
code-tools: true
code-block-bg: false
code-block-border-left: "#31BAE9"
code-fold: show
css: styles.css
page-layout: full
editor: visual
editor_options:
chunk_output_type: console
---
```{css, echo=FALSE}
.tab-content>.tab-pane {
display: block;
height: 0px;
overflow: hidden;
padding: 0px
}
.tab-content>.active {
display: block;
height: auto;
padding: 12px
}
```
```{r, include = FALSE}
library(tidyverse)
library(ggplot2)
library(plotly)
library(ggplot2)
library(dplyr)
library(hrbrthemes)
library(viridis)
library(crosstalk)
library(knitr)
```
```{r setup, include=FALSE}
df <- readxl::read_xlsx("~/Library/CloudStorage/GoogleDrive-aoife.ryan@sharecreative.com/.shortcut-targets-by-id/0BwEyzS8OvJgreXdPNGZKV2tyRjg/Share_Clients/data_science_project_work/google_keywords_analysis/data/USA - Keyword Stats 2024-01-16 at 09_12_04.xlsx") %>%
janitor::row_to_names(row_number = 36, remove_rows_above = FALSE) %>% # column names
janitor::clean_names() %>%
mutate_at(15:26, as.numeric) %>% # monthly searches stored as character
mutate(yo_y_change = as.numeric(str_remove_all(yo_y_change, "%")),
three_month_change = as.numeric(str_remove_all(three_month_change, "%")),
yo_y_change = yo_y_change/100,
three_month_change = three_month_change/100,
avg_monthly_searches = as.numeric(avg_monthly_searches)) %>%
dplyr::select(-c(competition:in_plan))
df <- df %>%
mutate(total2023 = rowSums(df %>% dplyr::select(searches_jan_2023:searches_dec_2023),
na.rm = TRUE)) %>%# add a total searches column
rename_with(~sub("searches_(\\w+)_2023", "\\1", .), starts_with("searches_")) %>%
drop_na()
```
```{r, eval = FALSE, echo = FALSE}
df %>%
ggplot(aes(x = as.numeric(avg_monthly_searches), y = three_month_change,
size=as.numeric(yo_y_change),
fill=as.numeric(avg_monthly_searches), text = keyword)) +
geom_point(alpha=0.5, shape=21, color="black") +
# geom_point(aes(x = as.numeric(avg_monthly_searches), y = yo_y_change)) +
scale_size_continuous(range = c(.1, 10), name="Year-on-year growth",
guide = guide_legend(title.position = "top"),
labels = scales::percent_format(scale = 100)) +
scale_fill_viridis(trans = "log10", discrete=FALSE,
name = "Average monthly searches",
guide=guide_legend(title.position = "top"), option="A",
labels = scales::number_format()) +
scale_x_log10() +
scale_y_continuous(labels = scales::percent) +
theme_bw() +
theme(legend.position="bottom") +
ylab("Growth") +
xlab("Average monthly searches") +
theme(legend.position = "bottom")
ggsave("viz/growth_total_search_viz.png")
```
```{r, eval = FALSE, echo = FALSE}
months <- tolower(month.abb)
fig <- plot_ly() %>% layout(xaxis = list(type = "log"))
# color_pal <- viridis::viridis_pal()(12) # might try to make a nicer colour palette later
for (month in months){
df_alt <- data.frame(y=df$yo_y_change, x=df[[month]],
log_monthly_searches = log10(as.numeric(df$avg_monthly_searches)))
fig <- fig %>%
add_trace(data = df_alt, x = ~x, y = ~y,
type = "scatter", mode = "markers",
name= stringr::str_to_title(month),
marker = list(
color = ~log_monthly_searches,
colorscale = "Viridis",
size = ~log_monthly_searches))
}
fig
df_test <- df %>%
mutate(log_monthly_searches = log10(as.numeric(avg_monthly_searches))) %>%
drop_na()
plot_ly(type = "scatter", mode = "markers") %>%
layout(xaxis = list(type = "log")) %>%
add_trace(data = df_test , x = ~.data[["jan"]], y = ~yo_y_change,
size = ~as.numeric(avg_monthly_searches),
color = ~log_monthly_searches
# marker = list(
# color = ~log10(as.numeric(avg_monthly_searches)),
# colorscale = "Viridis",
# size = ~as.numeric(avg_monthly_searches))
)
```
```{r, eval = FALSE, echo = FALSE}
#Below is a function we can use to make the graph render empty
library(htmltools)
set_values <- function(w, sharedData, values) {
script <- sprintf(
"HTMLWidgets.addPostRenderHandler(function() { return crosstalk.group('%s').var('%s').set(%s); })",
sharedData$groupName(), "filter", jsonlite::toJSON(values)
)
# TODO: attach htmlwidgets dependency to w?
browsable(tagList(
tags$head(tags$script(script)), w
))
}
```
```{r, eval = FALSE, echo = FALSE}
#custom psuedo_log_transform
transform_pseudo_log_ar <- function(sigma = 1, base = exp(1)) {
scales::new_transform(
"pseudo_log",
function(x) asinh(x / (2 * sigma)) / log(base),
function(x) 2 * sigma * sinh(x * log(base)),
d_transform = function(x) 1 / (sqrt(4 + x^2/sigma^2) * sigma * log(base)),
d_inverse = function(x) 2 * sigma * cosh(x * log(base)) * log(base)
)
}
```
```{r, echo = FALSE, warning = FALSE, fig.width = 10, fig.height = 6}
sd <- df %>%
filter(avg_monthly_searches != 0) %>%
pivot_longer(c(avg_monthly_searches, jan:dec), names_to = "xaxis", values_to = "posts") %>%
left_join(df %>% dplyr::select(keyword, avg_monthly_searches), by = "keyword") %>%
mutate(xaxis = factor(xaxis, levels = c("avg_monthly_searches", tolower(month.abb)),
labels = c("Average monthly searches", month.name)),
keyword = str_replace_all(keyword, "^&", "and ")) %>%
pivot_longer(three_month_change:yo_y_change, names_to = "change_time", values_to = "change") %>%
mutate(change_time = if_else(
change_time == "three_month_change", "Three month growth", "Yearly growth"),
log_avg_posts = log(avg_monthly_searches)) %>%
# log_avg_posts2 = log_avg_posts) %>%
SharedData$new()
xmin <- min(sd$data()$posts)
xmax <- max(sd$data()$posts) + 200000
hline <- function(y = 0, color = "#4d4d4d") {
list(
type = "line",
# x0 = NULL,
x1 = xmax,
# xref = "paper",
y0 = y,
y1 = y,
width = 0.5,
line = list(color = color, dash = "dash")
)
}
p <-
sd %>% plot_ly() %>%
add_trace(sd, x = ~posts, y = ~change,
color = ~log_avg_posts,
size = ~log_avg_posts, sizes = c(10, 30),
colors = viridis_pal(option = "A")(nrow(sd$data())),
type = "scatter", mode = "markers",
hoverinfo = "text",
text = ~paste("Keyword:", keyword,
"<br>Avg. Monthly Searches:", avg_monthly_searches),
marker = list(opacity = 0.5,
showscale = FALSE,
sizemode = "diameter",
line = list(color = "black",
width = 0.5)
)) %>%
colorbar(title = list(text = "Log of Average Posts")) %>%
layout(shapes = list(hline(0)),
margin = list(b=100),
xaxis = list( type = "log",
range = c(NULL, xmax),
title = "Number of Posts*", range = c(NULL,xmax),
zeroline = FALSE, showline = TRUE, mirror = TRUE),
yaxis = list(zeroline = FALSE,
showline = TRUE, mirror = TRUE,
title = "Three Month Change (%)"),
showlegend = FALSE) %>%
group_by(xaxis, change_time) %>%
add_annotations(text = "<span style='font-style:italic;'>*Note that this is a logarithmic scale, values 1, 10, 100, 1000 etc. are equally spaced on the graph.\nA log scale means that that keywords with 0 posts will not be displayed as points on the graph</span>",
# "*",
xref = "paper", yref = "paper",
x = 0.5, y = -0.35, size = 3, showarrow = FALSE,
font = list(size = 10))
p2 <-
bscols(widths = c(3, 9),
list(
# set_values(
filter_select(id = "select_xaxis", label = "x-axis timeframe",
sharedData = sd,
group = ~xaxis,
multiple = FALSE),
filter_select(id = "select_change", label = "Growth Metric",
sharedData = sd, group = ~change_time,
multiple = FALSE),
# sd, month.name, default_value = "January"
# ),
filter_select(id = "select_keyword", label = "Keyword",
sharedData = sd, group = ~keyword),
filter_slider(id = "slider_monthly_posts", label = "Monthly Posts",
sharedData = sd, column = ~posts),
filter_slider(id = "slider_change", label = "Growth (%)",
sharedData = sd, column = ~change)
),
p)
p2
```
```{js, echo = FALSE}
function filter_default() {
document.getElementById("select_xaxis").getElementsByClassName("selectized")[0].selectize.setValue("Average monthly searches", false);
document.getElementById("select_change").getElementsByClassName("selectized")[0].selectize.setValue("Yearly Growth", false);
}
window.onload = filter_default;
```