library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.3.3
## Warning: package 'ggplot2' was built under R version 4.3.3
## Warning: package 'tibble' was built under R version 4.3.3
## Warning: package 'tidyr' was built under R version 4.3.3
## Warning: package 'readr' was built under R version 4.3.3
## Warning: package 'purrr' was built under R version 4.3.3
## Warning: package 'dplyr' was built under R version 4.3.3
## Warning: package 'stringr' was built under R version 4.3.3
## Warning: package 'forcats' was built under R version 4.3.3
## Warning: package 'lubridate' was built under R version 4.3.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(ggplot2)
data <-read.csv("social_media_entertainment_data.csv")
df_1 <- data %>% filter(Country == "UsA")
platform_time <- data %>%
  group_by(Primary.Platform) %>%
  summarise(Average_Time_Per_Platform = mean(Daily.Social.Media.Time..hrs.), User_Count = n())

platform_time
## # A tibble: 5 × 3
##   Primary.Platform Average_Time_Per_Platform User_Count
##   <chr>                                <dbl>      <int>
## 1 Facebook                              4.26      59936
## 2 Instagram                             4.25      59721
## 3 TikTok                                4.25      60301
## 4 Twitter                               4.25      60285
## 5 YouTube                               4.27      59757
ggplot(platform_time, aes(x = reorder(Primary.Platform, -Average_Time_Per_Platform), y = Average_Time_Per_Platform, fill = Primary.Platform)) +
  geom_col() +
  labs(title = "Average Daily Social Media Time by Platform",
       x = "Social Media Platform",
       y = "Average Time Spent (hrs)") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Analysis:

Average_Social_Media_Time_Per_Occupation <- data %>% 
  group_by(Occupation) %>% 
  summarise(
    Average_Social_Media_Time = mean(Daily.Social.Media.Time..hrs.),
    Employee_Count = n()
  )

Average_Social_Media_Time_Per_Occupation
## # A tibble: 4 × 3
##   Occupation   Average_Social_Media_Time Employee_Count
##   <chr>                            <dbl>          <int>
## 1 Professional                      4.26          75332
## 2 Retired                           4.25          74835
## 3 Student                           4.27          74875
## 4 Unemployed                        4.25          74958
ggplot(Average_Social_Media_Time_Per_Occupation, aes(x = reorder(Occupation, -Average_Social_Media_Time), y = Average_Social_Media_Time, fill = Occupation)) +

  geom_col() +

  labs(title = "Average Social Media Time Per Occupation",

       x = "Occupation",

       y = "Average Social Media Time (hrs)") +

  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Insights:

data <- data %>%
  mutate(Category = cut(Internet.Speed..Mbps., 
                                     breaks = c(0, 25, 50, 75, 100), 
                                     labels = c("Very Slow", "Slow", "Moderate", "Fast"),
                                     include.lowest = TRUE))

internet_speed_groups <- data %>%
  group_by(Category) %>%
  summarise(AverageTime = mean(Time.Spent.in.Online.Communities..hrs., na.rm = TRUE), 
            Users = n())

internet_speed_groups
## # A tibble: 4 × 3
##   Category  AverageTime Users
##   <fct>           <dbl> <int>
## 1 Very Slow        2.00 63128
## 2 Slow             2.00 79082
## 3 Moderate         2.01 79022
## 4 Fast             2.00 78768
ggplot(internet_speed_groups, aes(x = Category, y = AverageTime, fill = Category)) +
  geom_col() +
  labs(title = "Time Spent in Online Communities by Internet Speed Category",
       x = "Internet Speed Category",
       y = "Average Time Spent (hrs)")

Insights:

Data Frame

combination_groups <- data %>%
  group_by(Occupation, Primary.Platform) %>%
  summarise(n = n()) %>%
  ungroup()
## `summarise()` has grouped output by 'Occupation'. You can override using the
## `.groups` argument.
all_combinations <- expand.grid(
  Occupation = unique(data$Occupation),
  Primary.Platform = unique(data$Primary.Platform)
)

missing_combinations <- anti_join(all_combinations, combination_groups, by = c("Occupation", "Primary.Platform"))

print(missing_combinations)
## [1] Occupation       Primary.Platform
## <0 rows> (or 0-length row.names)
print(combination_groups)
## # A tibble: 20 × 3
##    Occupation   Primary.Platform     n
##    <chr>        <chr>            <int>
##  1 Professional Facebook         15191
##  2 Professional Instagram        14899
##  3 Professional TikTok           15097
##  4 Professional Twitter          15205
##  5 Professional YouTube          14940
##  6 Retired      Facebook         14947
##  7 Retired      Instagram        14767
##  8 Retired      TikTok           15002
##  9 Retired      Twitter          15044
## 10 Retired      YouTube          15075
## 11 Student      Facebook         14906
## 12 Student      Instagram        15231
## 13 Student      TikTok           15063
## 14 Student      Twitter          14957
## 15 Student      YouTube          14718
## 16 Unemployed   Facebook         14892
## 17 Unemployed   Instagram        14824
## 18 Unemployed   TikTok           15139
## 19 Unemployed   Twitter          15079
## 20 Unemployed   YouTube          15024
ggplot(combination_groups, aes(x = Occupation, y = Primary.Platform, fill = n)) +
  geom_tile() +
  scale_fill_gradient(low = "white", high = "blue") +
  labs(title = "Heatmap of Occupation and Primary.Platform Combinations",
       x = "Occupation",
       y = "Primary Platform",
       fill = "Count")

Insights:

  • The most common combination was “Student” and “Instagram”, indicating that it may be preferred due to its social and entertainment contents.

  • The least common combination was “Student” and “Youtube” which indicates that students don’t tend to use Youtube as commonly as other platforms. This could be due to Youtube not being as social or entertaining as say, Instagram or Tiktok.

  • Instagram and Facebook both show relatively high usage across most occupations indicating that they are popular for personal engagement.

  • Tiktok shows high usage across all occupations suggesting a growing influence.