library(ggplot2)
library(dplyr)
library(tidyr)
# 1. Read the 'Growing Trends' CSV
# (Adjust skip=10 if line 11 is your header row, or skip=9 if line 10 is your header.)
df_growing <- read.csv(
"Pinterest_Trends_Data.csv",
skip = 10, # or 9, depending on the file
stringsAsFactors = FALSE,
header = TRUE,
row.names = NULL,
check.names = FALSE,
fill = TRUE,
comment.char = ""
)
cat("GROWING TRENDS - Column names:\n")
## GROWING TRENDS - Column names:
print(names(df_growing))
## [1] "Rank" "Trend" "Weekly change" "Monthly change"
## [5] "Yearly change" "2024-12-16" "2024-12-23" "2024-12-30"
## [9] "2025-01-06" "2025-01-13" "2025-01-20" "2025-01-27"
## [13] "2025-02-03" "2025-02-10" "2025-02-17" "2025-02-24"
## [17] "2025-03-03" "2025-03-10" "2025-03-17" "2025-03-24"
## [21] "2025-03-31" "2025-04-07" "2025-04-14" "2025-04-21"
## [25] "2025-04-28" "2025-05-05" "2025-05-12" "2025-05-19"
## [29] "2025-05-26" "2025-06-02" "2025-06-09"
# Convert Weekly/Monthly/Yearly change to numeric if desired
for (col in c("Weekly change", "Monthly change", "Yearly change")) {
if (col %in% names(df_growing)) {
df_growing[[col]] <- gsub(",", "", df_growing[[col]]) # remove commas
df_growing[[col]] <- gsub("%", "", df_growing[[col]]) # remove '%'
df_growing[[col]] <- as.numeric(df_growing[[col]])
}
}
# Quick preview
head(df_growing)
## Rank Trend Weekly change Monthly change Yearly change
## 1 1 spring nails 20 900 0
## 2 2 future wedding plans 30 100 7000
## 3 3 home aesthetic diy 40 100 NA
## 4 4 winter hair braid 40 30 NA
## 5 5 date planning inspo 50 20 NA
## 6 6 cherry nails inspo 40 20 NA
## 2024-12-16 2024-12-23 2024-12-30 2025-01-06 2025-01-13 2025-01-20 2025-01-27
## 1 1 1 1 2 2 4 7
## 2 13 16 45 51 37 38 63
## 3 11 7 36 50 36 36 61
## 4 34 60 50 48 33 36 59
## 5 53 56 70 45 30 37 57
## 6 51 61 50 48 33 34 59
## 2025-02-03 2025-02-10 2025-02-17 2025-02-24 2025-03-03 2025-03-10 2025-03-17
## 1 14 22 39 48 55 66 71
## 2 74 100 NA NA NA NA NA
## 3 72 100 NA NA NA NA NA
## 4 70 100 NA NA NA NA NA
## 5 69 100 NA NA NA NA NA
## 6 70 100 NA NA NA NA NA
## 2025-03-24 2025-03-31 2025-04-07 2025-04-14 2025-04-21 2025-04-28 2025-05-05
## 1 58 46 35 29 25 21 18
## 2 NA NA NA NA NA NA NA
## 3 NA NA NA NA NA NA NA
## 4 NA NA NA NA NA NA NA
## 5 NA NA NA NA NA NA NA
## 6 NA NA NA NA NA NA NA
## 2025-05-12 2025-05-19 2025-05-26 2025-06-02 2025-06-09
## 1 12 7 4 NA NA
## 2 NA NA NA NA NA
## 3 NA NA NA NA NA
## 4 NA NA NA NA NA
## 5 NA NA NA NA NA
## 6 NA NA NA NA NA
# Example: Plot "Yearly change" by "Trend" for the Growing Trends data
ggplot(df_growing, aes(x = reorder(Trend, `Yearly change`), y = `Yearly change`)) +
geom_bar(stat = "identity", fill = "turquoise") +
coord_flip() +
labs(
title = "Growing Trends: Yearly Change by Trend",
x = "Trend",
y = "Yearly Change"
) +
theme_minimal()
## Warning: Removed 19 rows containing missing values or values outside the scale range
## (`geom_bar()`).

2. Top Yearly Trends CSV
library(readr)
library(dplyr)
library(ggplot2)
# 1️⃣ Read CSV (skip metadata rows)
df_yearly <- read_csv("Pinterest_Top_Yearly_Trends.csv", skip = 10)
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
## Rows: 50 Columns: 59
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): Trend, Weekly change, Monthly change, Yearly change
## dbl (54): Rank, Normalized volume, 2024-03-11, 2024-03-18, 2024-03-25, 2024-...
## num (1): 2025-03-10
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# 2️⃣ Remove duplicate names
df_yearly <- df_yearly[, !duplicated(names(df_yearly))]
# 3️⃣ Convert change & volume to numeric (handles numeric or character inputs)
df_yearly <- df_yearly %>%
mutate(
yearly_change = parse_number(as.character(`Yearly change`)),
normalized_volume = parse_number(as.character(`Normalized volume`))
)
# 4️⃣ Drop future-date columns
date_cols <- grep("^202[4-5]-\\d{2}-\\d{2}$", names(df_yearly), value = TRUE)
valid_dates <- date_cols[as.Date(date_cols) <= Sys.Date()]
df_yearly <- select(df_yearly, Trend, yearly_change, normalized_volume, all_of(valid_dates))
# 5️⃣ Filter for plotting
df_plot <- df_yearly %>% filter(!is.na(yearly_change) & yearly_change > 0)
if (nrow(df_plot) == 0) {
message("No valid yearly_change → using normalized_volume")
df_plot <- df_yearly %>% filter(!is.na(normalized_volume))
aes_map <- aes(x = reorder(Trend, normalized_volume), y = normalized_volume)
plot_title <- "Top Yearly Trends: Normalized Volume by Trend"
y_label <- "Normalized Volume"
} else {
aes_map <- aes(x = reorder(Trend, yearly_change), y = yearly_change)
plot_title <- "Top Yearly Trends: Yearly Change by Trend"
y_label <- "Yearly Change (%)"
}
# 6️⃣ Render bar chart
ggplot(df_plot, aes_map) +
geom_col(fill = "mediumpurple1") +
coord_flip() +
labs(title = plot_title, x = NULL, y = y_label) +
theme_minimal() +
theme(axis.text.y = element_text(size = 8))

library(readr)
library(dplyr)
library(janitor)
##
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(ggplot2)
# Read and clean
df_yearly <- read_csv("Pinterest_Top_Yearly_Trends.csv", skip = 10) %>%
clean_names() %>%
mutate(
rank = parse_number(as.character(rank)),
yearly_change = parse_number(as.character(yearly_change)),
normalized_volume= parse_number(as.character(normalized_volume))
)
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
## Rows: 50 Columns: 59
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): Trend, Weekly change, Monthly change, Yearly change
## dbl (54): Rank, Normalized volume, 2024-03-11, 2024-03-18, 2024-03-25, 2024-...
## num (1): 2025-03-10
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Drop future-date columns
date_cols <- grep("^202[4-5]-\\d{2}-\\d{2}$", names(df_yearly), value = TRUE)
keep_dates <- date_cols[as.Date(date_cols) <= Sys.Date()]
df_yearly <- select(df_yearly, trend, rank, yearly_change, normalized_volume, all_of(keep_dates))
# Filter Top 10
df_top10 <- df_yearly %>%
arrange(rank) %>%
slice_head(n = 10)
# Choose metric to plot
if (all(is.na(df_top10$yearly_change) | df_top10$yearly_change <= 0)) {
message("No valid yearly_change in top 10 → plotting normalized_volume")
aes_map <- aes(x = reorder(trend, normalized_volume), y = normalized_volume)
plot_title <- "Top 10 Yearly Trends: Normalized Volume by Trend"
y_label <- "Normalized Volume"
} else {
aes_map <- aes(x = reorder(trend, yearly_change), y = yearly_change)
plot_title <- "Top 10 Yearly Trends: Yearly Change by Trend"
y_label <- "Yearly Change (%)"
}
# Render
ggplot(df_top10, aes_map) +
geom_col(fill = "magenta") +
coord_flip() +
labs(title = plot_title, x = NULL, y = y_label) +
theme_minimal() +
theme(axis.text.y = element_text(size = 8))

ggplot(df_yearly, aes(x = normalized_volume, y = yearly_change)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE, color = "red") +
labs(
title = "Top Yearly Trends: Volume vs Yearly Change",
x = "Normalized Volume",
y = "Yearly Change (%)"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
## `geom_smooth()` using formula = 'y ~ x'

Hypothesis: Certain Trends Appear in All Three Lists (Weekly,
Monthly, Yearly), Suggesting Universal, Sustained Popularity
library(ggvenn)
## Loading required package: grid
trend_sets <- list(
"Growing" = c("nails", "spring nails", "outfit ideas"),
"Yearly" = c("nails", "hairstyles", "wallpaper"),
"Monthly" = c("nails", "outfit ideas", "wallpaper backgrounds")
)
ggvenn(
trend_sets,
show_percentage = FALSE,
fill_color = c("#CAB2D6", "#B2DFEE", "#FDB9C8"), # light purple, light blue, pink
stroke_size = 0.5,
set_name_color = "black"
)
