Prodi: Informatika
Mata Kuliah: Probabilitas dan Statistika
Kelas E25
Kelompok: 5
Media sosial menjadi salah satu sarana utama dalam penyebaran informasi dan interaksi antara pengguna di era digital. Platform seperti LinkedIn, Instagram, YouTube, dan Medium menyediakan berbagai metrik performa seperti jumlah pengikut (followers), tingkat keterlibatan (engagement rate), serta interaksi pengguna seperti likes, comments, dan shares. Analisis terhadap metrik tersebut dapat membantu memahami pola performa dan tingkat keterlibatan pengguna pada berbagai platform media sosial.
Dataset Social Media Performance and Engagement Data diperoleh dari platform Kaggle dan berisi data simulasi mengenai performa serta tingkat keterlibatan pengguna. Platform yang terdapat pada dataset ini meliputi LinkedIn, Instagram, YouTube, dan Medium.
Dataset ini merupakan data simulasi yang dibuat untuk keperluan analisis dan visualisasi, sehingga tidak merepresentasikan waktu atau tahun tertentu.
Sumber data:
https://www.kaggle.com/datasets/svthejaswini/social-media-performance-and-engagement-data
Mengimpor library yang diperlukan untuk analisis data dan visualisasi. Seperti:
ggplot2 untuk visualisasidplyr untuk manipulasi datareadr untuk membaca datascales untuk format angka pada visualisasilibrary(ggplot2)
library(dplyr)
library(readr)
library(scales)
Memuat dataset yang berisi informasi tentang performa media sosial.
Dataset ini mungkin berisi kolom seperti platform,
content_type, views, likes, dan
sebagainya.
data <- read_csv("social_media_performance.csv")
head(data)
## # A tibble: 6 × 15
## post_id platform content_type topic language region post_datetime
## <dbl> <chr> <chr> <chr> <chr> <chr> <dttm>
## 1 1 LinkedIn article Technology UR BR 2025-04-25 09:47:00
## 2 2 LinkedIn poll Health FR JP 2025-10-29 09:44:00
## 3 3 LinkedIn article Travel HI FR 2025-02-10 14:12:00
## 4 4 LinkedIn image Sports DE DE 2025-04-18 22:41:00
## 5 5 LinkedIn poll Business DE US 2025-04-28 10:17:00
## 6 6 LinkedIn image Sports FR AU 2025-10-03 15:25:00
## # ℹ 8 more variables: hashtags <chr>, sentiment_score <dbl>, views <dbl>,
## # likes <dbl>, comments <dbl>, shares <dbl>, engagement_rate <dbl>,
## # is_viral <dbl>
Analisis dilakukan pada variabel views.
# Ambil variabel views
views <- data$views
# Hitung statistik deskriptif
mean_views <- mean(views)
median_views <- median(views)
mode_views <- as.numeric(names(sort(table(views), decreasing = TRUE)[1]))
q1_views <- quantile(views, 0.25)
q3_views <- quantile(views, 0.75)
range_views <- max(views) - min(views)
var_views <- var(views)
sd_views <- sd(views)
# Gabungkan hasil ke dalam tabel
statistik <- data.frame(
Mean = mean(views),
Median = median(views),
Modus = as.numeric(names(sort(table(views), decreasing = TRUE)[1])),
Q1 = as.numeric(quantile(views, 0.25)),
Q3 = as.numeric(quantile(views, 0.75)),
Range = max(views) - min(views),
Varians = var(views),
Standard_Deviation = sd(views)
)
# Tampilkan hasil
knitr::kable(statistik, digits = 2, caption = "Statistik Deskriptif Variabel Views")
| Mean | Median | Modus | Q1 | Q3 | Range | Varians | Standard_Deviation |
|---|---|---|---|---|---|---|---|
| 212142.2 | 78193 | 1459 | 30936.5 | 343206.8 | 998906 | 64783933052 | 254526.9 |
likes Berdasarkan
platformlikes_platform <- data %>%
group_by(platform) %>%
summarise(total_likes = sum(likes, na.rm = TRUE)) %>%
mutate(percentage = total_likes / sum(total_likes) * 100)
ggplot(likes_platform, aes(x = "", y = total_likes, fill = platform)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y") +
scale_fill_manual(
values = c("#00F5D4", "#9B5DE5", "#F15BB5", "#00BBF9"),
labels = paste0(likes_platform$platform, " (", round(likes_platform$percentage,1), "%)")
) +
labs(
title = "Distribusi Jumlah Likes Berdasarkan Platform",
fill = "platform"
) +
theme_void() +
theme(
plot.title = element_text(hjust = 0.5, face = "bold")
)
views Berdasarkan
content_typeviews_content <- data %>%
group_by(content_type) %>%
summarise(avg_views = mean(views, na.rm = TRUE))
ggplot(views_content, aes(x = content_type, y = avg_views, fill = content_type)) +
geom_bar(stat = "identity") +
scale_fill_manual(values = c(
"#00F5D4",
"#9B5DE5",
"#F15BB5",
"#00BBF9",
"#FEE440",
"#FB5607",
"#3A86FF",
"#8338EC"
)) +
scale_y_continuous(labels = label_number(scale = 1/1000, suffix = "K")) +
labs(
title = "Rata-rata Views Berdasarkan Tipe Konten",
x = "Tipe Konten",
y = "Rata-rata Views (Ribu)"
) +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5, face = "bold"))
Viewsmean_views <- mean(data$views, na.rm = TRUE)
median_views <- median(data$views, na.rm = TRUE)
ggplot(data, aes(x = views)) +
geom_histogram(
bins = 30,
fill = "#00BBF9",
color = "white",
alpha = 0.9
) +
geom_vline(
xintercept = mean_views,
color = "#F15BB5",
linetype = "dashed",
linewidth = 1
) +
geom_vline(
xintercept = median_views,
color = "#FB5607",
linetype = "dashed",
linewidth = 1
) +
annotate(
"text",
x = mean_views,
y = Inf,
label = paste0("Mean: ", round(mean_views/1000,2), "K"),
vjust = 2,
color = "#F15BB5",
fontface = "bold"
) +
annotate(
"text",
x = median_views,
y = Inf,
label = paste0("Median: ", round(median_views/1000,2), "K"),
vjust = 4,
color = "#FB5607",
fontface = "bold"
) +
scale_x_continuous(labels = label_number(scale = 1/1000, suffix = "K")) +
labs(
title = "Distribusi Views",
x = "Views (Ribu)",
y = "Frekuensi"
) +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5, face = "bold")
)
viewsmean_views <- mean(data$views, na.rm = TRUE)
median_views <- median(data$views, na.rm = TRUE)
ggplot(data, aes(x = views)) +
geom_density(
fill = "#00F5D4",
alpha = 0.6
) +
geom_vline(
xintercept = mean_views,
color = "#F15BB5",
linetype = "dashed",
linewidth = 1
) +
geom_vline(
xintercept = median_views,
color = "#FB5607",
linetype = "dashed",
linewidth = 1
) +
annotate(
"text",
x = mean_views,
y = Inf,
label = paste0("Mean: ", round(mean_views/1000,2), "K"),
vjust = 2,
color = "#F15BB5",
fontface = "bold"
) +
annotate(
"text",
x = median_views,
y = Inf,
label = paste0("Median: ", round(median_views/1000,2), "K"),
vjust = 4,
color = "#FB5607",
fontface = "bold"
) +
scale_x_continuous(labels = label_number(scale = 1/1000, suffix = "K")) +
labs(
title = "Density Plot Views",
x = "Views (Ribu)",
y = "Density"
) +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5, face = "bold")
)
views berdasarkan
platformggplot(data, aes(x = platform, y = views, fill = platform)) +
geom_boxplot(
alpha = 0.85,
outlier.color = "#FB5607",
outlier.size = 2
) +
scale_fill_manual(values = c(
"#00F5D4",
"#9B5DE5",
"#F15BB5",
"#00BBF9"
)) +
scale_y_continuous(labels = label_number(scale = 1/1000, suffix = "K")) +
labs(
title = "Distribusi Views Berdasarkan Platform",
x = "Platform",
y = "Views (Ribu)"
) +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5, face = "bold"),
legend.position = "none"
)
Kesimpulan Analisis Performa Media Sosial