Pinterest Trends (Growing + Top Yearly)

library(ggplot2)
library(dplyr)
library(tidyr)

# 1. Read the 'Growing Trends' CSV
#    (Adjust skip=10 if line 11 is your header row, or skip=9 if line 10 is your header.)
df_growing <- read.csv(
  "Pinterest_Trends_Data.csv",
  skip = 10,                # or 9, depending on the file
  stringsAsFactors = FALSE,
  header = TRUE,
  row.names = NULL,
  check.names = FALSE,
  fill = TRUE,
  comment.char = ""
)

cat("GROWING TRENDS - Column names:\n")

## GROWING TRENDS - Column names:

print(names(df_growing))

##  [1] "Rank"           "Trend"          "Weekly change"  "Monthly change"
##  [5] "Yearly change"  "2024-12-16"     "2024-12-23"     "2024-12-30"    
##  [9] "2025-01-06"     "2025-01-13"     "2025-01-20"     "2025-01-27"    
## [13] "2025-02-03"     "2025-02-10"     "2025-02-17"     "2025-02-24"    
## [17] "2025-03-03"     "2025-03-10"     "2025-03-17"     "2025-03-24"    
## [21] "2025-03-31"     "2025-04-07"     "2025-04-14"     "2025-04-21"    
## [25] "2025-04-28"     "2025-05-05"     "2025-05-12"     "2025-05-19"    
## [29] "2025-05-26"     "2025-06-02"     "2025-06-09"

# Convert Weekly/Monthly/Yearly change to numeric if desired
for (col in c("Weekly change", "Monthly change", "Yearly change")) {
  if (col %in% names(df_growing)) {
    df_growing[[col]] <- gsub(",", "", df_growing[[col]])  # remove commas
    df_growing[[col]] <- gsub("%", "", df_growing[[col]])  # remove '%'
    df_growing[[col]] <- as.numeric(df_growing[[col]])
  }
}

# Quick preview
head(df_growing)

##   Rank                Trend Weekly change Monthly change Yearly change
## 1    1         spring nails            20            900             0
## 2    2 future wedding plans            30            100          7000
## 3    3   home aesthetic diy            40            100            NA
## 4    4    winter hair braid            40             30            NA
## 5    5  date planning inspo            50             20            NA
## 6    6   cherry nails inspo            40             20            NA
##   2024-12-16 2024-12-23 2024-12-30 2025-01-06 2025-01-13 2025-01-20 2025-01-27
## 1          1          1          1          2          2          4          7
## 2         13         16         45         51         37         38         63
## 3         11          7         36         50         36         36         61
## 4         34         60         50         48         33         36         59
## 5         53         56         70         45         30         37         57
## 6         51         61         50         48         33         34         59
##   2025-02-03 2025-02-10 2025-02-17 2025-02-24 2025-03-03 2025-03-10 2025-03-17
## 1         14         22         39         48         55         66         71
## 2         74        100         NA         NA         NA         NA         NA
## 3         72        100         NA         NA         NA         NA         NA
## 4         70        100         NA         NA         NA         NA         NA
## 5         69        100         NA         NA         NA         NA         NA
## 6         70        100         NA         NA         NA         NA         NA
##   2025-03-24 2025-03-31 2025-04-07 2025-04-14 2025-04-21 2025-04-28 2025-05-05
## 1         58         46         35         29         25         21         18
## 2         NA         NA         NA         NA         NA         NA         NA
## 3         NA         NA         NA         NA         NA         NA         NA
## 4         NA         NA         NA         NA         NA         NA         NA
## 5         NA         NA         NA         NA         NA         NA         NA
## 6         NA         NA         NA         NA         NA         NA         NA
##   2025-05-12 2025-05-19 2025-05-26 2025-06-02 2025-06-09
## 1         12          7          4         NA         NA
## 2         NA         NA         NA         NA         NA
## 3         NA         NA         NA         NA         NA
## 4         NA         NA         NA         NA         NA
## 5         NA         NA         NA         NA         NA
## 6         NA         NA         NA         NA         NA

# Example: Plot "Yearly change" by "Trend" for the Growing Trends data
ggplot(df_growing, aes(x = reorder(Trend, `Yearly change`), y = `Yearly change`)) +
  geom_bar(stat = "identity", fill = "turquoise") +
  coord_flip() +
  labs(
    title = "Growing Trends: Yearly Change by Trend",
    x = "Trend",
    y = "Yearly Change"
  ) +
  theme_minimal()

## Warning: Removed 19 rows containing missing values or values outside the scale range
## (`geom_bar()`).

2. Top Yearly Trends CSV

library(readr)
library(dplyr)
library(ggplot2)

# 1️⃣ Read CSV (skip metadata rows)
df_yearly <- read_csv("Pinterest_Top_Yearly_Trends.csv", skip = 10)

## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)

## Rows: 50 Columns: 59
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (4): Trend, Weekly change, Monthly change, Yearly change
## dbl (54): Rank, Normalized volume, 2024-03-11, 2024-03-18, 2024-03-25, 2024-...
## num  (1): 2025-03-10
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

# 2️⃣ Remove duplicate names
df_yearly <- df_yearly[, !duplicated(names(df_yearly))]

# 3️⃣ Convert change & volume to numeric (handles numeric or character inputs)
df_yearly <- df_yearly %>%
  mutate(
    yearly_change      = parse_number(as.character(`Yearly change`)),
    normalized_volume  = parse_number(as.character(`Normalized volume`))
  )

# 4️⃣ Drop future-date columns
date_cols <- grep("^202[4-5]-\\d{2}-\\d{2}$", names(df_yearly), value = TRUE)
valid_dates <- date_cols[as.Date(date_cols) <= Sys.Date()]
df_yearly <- select(df_yearly, Trend, yearly_change, normalized_volume, all_of(valid_dates))

# 5️⃣ Filter for plotting
df_plot <- df_yearly %>% filter(!is.na(yearly_change) & yearly_change > 0)

if (nrow(df_plot) == 0) {
  message("No valid yearly_change → using normalized_volume")
  df_plot    <- df_yearly %>% filter(!is.na(normalized_volume))
  aes_map    <- aes(x = reorder(Trend, normalized_volume), y = normalized_volume)
  plot_title <- "Top Yearly Trends: Normalized Volume by Trend"
  y_label    <- "Normalized Volume"
} else {
  aes_map    <- aes(x = reorder(Trend, yearly_change), y = yearly_change)
  plot_title <- "Top Yearly Trends: Yearly Change by Trend"
  y_label    <- "Yearly Change (%)"
}

# 6️⃣ Render bar chart
ggplot(df_plot, aes_map) +
  geom_col(fill = "mediumpurple1") +
  coord_flip() +
  labs(title = plot_title, x = NULL, y = y_label) +
  theme_minimal() +
  theme(axis.text.y = element_text(size = 8))

library(readr)
library(dplyr)
library(janitor)

## 
## Attaching package: 'janitor'

## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test

library(ggplot2)

# Read and clean
df_yearly <- read_csv("Pinterest_Top_Yearly_Trends.csv", skip = 10) %>%
  clean_names() %>%
  mutate(
    rank             = parse_number(as.character(rank)),
    yearly_change    = parse_number(as.character(yearly_change)),
    normalized_volume= parse_number(as.character(normalized_volume))
  )

## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)

## Rows: 50 Columns: 59

## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (4): Trend, Weekly change, Monthly change, Yearly change
## dbl (54): Rank, Normalized volume, 2024-03-11, 2024-03-18, 2024-03-25, 2024-...
## num  (1): 2025-03-10
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

# Drop future-date columns
date_cols <- grep("^202[4-5]-\\d{2}-\\d{2}$", names(df_yearly), value = TRUE)
keep_dates <- date_cols[as.Date(date_cols) <= Sys.Date()]
df_yearly <- select(df_yearly, trend, rank, yearly_change, normalized_volume, all_of(keep_dates))

# Filter Top 10
df_top10 <- df_yearly %>%
  arrange(rank) %>%
  slice_head(n = 10)

# Choose metric to plot
if (all(is.na(df_top10$yearly_change) | df_top10$yearly_change <= 0)) {
  message("No valid yearly_change in top 10 → plotting normalized_volume")
  aes_map    <- aes(x = reorder(trend, normalized_volume), y = normalized_volume)
  plot_title <- "Top 10 Yearly Trends: Normalized Volume by Trend"
  y_label    <- "Normalized Volume"
} else {
  aes_map    <- aes(x = reorder(trend, yearly_change), y = yearly_change)
  plot_title <- "Top 10 Yearly Trends: Yearly Change by Trend"
  y_label    <- "Yearly Change (%)"
}

# Render
ggplot(df_top10, aes_map) +
  geom_col(fill = "magenta") +
  coord_flip() +
  labs(title = plot_title, x = NULL, y = y_label) +
  theme_minimal() +
  theme(axis.text.y = element_text(size = 8))

ggplot(df_yearly, aes(x = normalized_volume, y = yearly_change)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE, color = "red") +
  labs(
    title = "Top Yearly Trends: Volume vs Yearly Change",
    x = "Normalized Volume",
    y = "Yearly Change (%)"
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

## `geom_smooth()` using formula = 'y ~ x'

Hypothesis: Certain Trends Appear in All Three Lists (Weekly, Monthly, Yearly), Suggesting Universal, Sustained Popularity

library(ggvenn)

## Loading required package: grid

trend_sets <- list(
  "Growing" = c("nails", "spring nails", "outfit ideas"), 
  "Yearly"  = c("nails", "hairstyles", "wallpaper"),
  "Monthly" = c("nails", "outfit ideas", "wallpaper backgrounds")
)


ggvenn(
  trend_sets,
  show_percentage = FALSE,
  fill_color = c("#CAB2D6", "#B2DFEE", "#FDB9C8"),  # light purple, light blue, pink
  stroke_size = 0.5,          
  set_name_color = "black"    
)