# Load the CSV file
df <- read_csv("linkedin_jobs_20250424_marketing.csv")
## Rows: 539 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): title, company, location, link, description
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# View basic structure
glimpse(df)
## Rows: 539
## Columns: 5
## $ title       <chr> "Wholesale Brand Manager", "Events Associate", "Marketing …
## $ company     <chr> "GIVENCHY", "Cedar", "Sears", "Nike", "The Coca-Cola Compa…
## $ location    <chr> "New York, United States", "United States", "Hoffman Estat…
## $ link        <chr> "https://www.linkedin.com/jobs/view/wholesale-brand-manage…
## $ description <chr> "Position Description: The position plays a key role in gr…
# Define skill list
skills_list <- c(
  "SEO", "SEM", "content marketing", "email marketing", "Google Analytics",
  "social media", "brand management", "paid media", "CRM", "HubSpot",
  "Salesforce", "PPC", "campaign management", "analytics", "market research",
  "copywriting", "digital marketing", "strategic planning", "event planning",
  "B2B", "B2C", "media buying", "Adobe Creative Suite", "Facebook Ads",
  "Instagram marketing", "LinkedIn marketing", "marketing automation"
)

# Function to extract skills
extract_skills <- function(description) {
  description <- tolower(description)
  found <- skills_list[str_detect(description, regex(skills_list, ignore_case = TRUE))]
  return(found)
}

# Apply skill extraction
df <- df %>%
  mutate(skills = map(description, extract_skills))

# Unnest skills for counting
skills_df <- df %>%
  unnest(skills) %>%
  count(skills, sort = TRUE)

# View extracted skills
head(skills_df)
## # A tibble: 6 × 2
##   skills                n
##   <chr>             <int>
## 1 social media        192
## 2 analytics           167
## 3 digital marketing   125
## 4 SEM                  86
## 5 CRM                  78
## 6 paid media           67
# Plot top 10 skills
skills_df %>%
  top_n(10, n) %>%
  ggplot(aes(x = reorder(skills, n), y = n)) +
  geom_col(fill = "steelblue") +
  coord_flip() +
  labs(title = "Top 10 Most In-Demand Marketing Skills",
       x = "Skills", y = "Number of Mentions")

# Extract Country (if available)
df <- df %>%
  mutate(country = word(location, -1, sep = ", "))

# Skills by Country (example: USA vs others)
df %>%
  unnest(skills) %>%
  group_by(country, skills) %>%
  count(sort = TRUE) %>%
  filter(country %in% c("United States", "Canada", "United Kingdom")) %>%
  ggplot(aes(x = reorder(skills, n), y = n, fill = country)) +
  geom_col(position = "dodge") +
  coord_flip() +
  labs(title = "Top Skills by Country",
       x = "Skills", y = "Count")

install.packages("wordcloud2")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
library("wordcloud2")
library(tidyverse)
library(wordcloud2)

# Example if you don't have skills_df yet:
skills_df <- data.frame(
  Skill = c("SEO", "Analytics", "Social Media", "Digital Marketing"),
  Count = c(45, 60, 55, 50)
)

# Prepare for wordcloud2
skills_for_wordcloud <- skills_df %>%
  rename(word = Skill, freq = Count)

# Create interactive word cloud
wordcloud2(skills_for_wordcloud, size = 0.7, shape = "circle")