Selain laporan ini, saya juga membuat dashboard interaktif dengan Shiny. Akses dashboardnya di sini:
https://hairulysin.shinyapps.io/Netflix/
<- read.csv("data_IP/netflix.csv")
netflix netflix
# base
glimpse(netflix)
#> Rows: 8,790
#> Columns: 10
#> $ show_id <chr> "s1", "s3", "s6", "s14", "s8", "s9", "s10", "s939", "s13"…
#> $ type <chr> "Movie", "TV Show", "TV Show", "Movie", "Movie", "TV Show…
#> $ title <chr> "Dick Johnson Is Dead", "Ganglands", "Midnight Mass", "Co…
#> $ director <chr> "Kirsten Johnson", "Julien Leclercq", "Mike Flanagan", "B…
#> $ country <chr> "United States", "France", "United States", "Brazil", "Un…
#> $ date_added <chr> "9/25/2021", "9/24/2021", "9/24/2021", "9/22/2021", "9/24…
#> $ release_year <int> 2020, 2021, 2021, 2021, 1993, 2021, 2021, 2019, 2021, 201…
#> $ rating <chr> "PG-13", "TV-MA", "TV-MA", "TV-PG", "TV-MA", "TV-14", "PG…
#> $ duration <chr> "90 min", "1 Season", "1 Season", "91 min", "125 min", "9…
#> $ listed_in <chr> "Documentaries", "Crime TV Shows, International TV Shows,…
Berikut merupakan deskripsi dari variabel pada dataset ‘netflix.csv’ :
type
dan buat kolom baru count
dari type.<- theme(legend.key = element_rect(fill="black"),
theme_algoritma legend.background = element_rect(color="white", fill="#263238"),
plot.subtitle = element_text(size=6, color="white"),
panel.background = element_rect(fill="#dddddd"),
panel.border = element_rect(fill=NA),
panel.grid.minor.x = element_blank(),
panel.grid.major.x = element_blank(),
panel.grid.major.y = element_line(color="darkgrey", linetype=2),
panel.grid.minor.y = element_blank(),
plot.background = element_rect(fill="#263238"),
text = element_text(color="white"),
axis.text = element_text(color="white")
)
<- c("#db0000", "black")
palet_warna
# Membuat grafik bar chart
<- ggplot(netflix1,
plot_bar aes(x = type,
y = count,
fill = type,
text = glue("Jenis Konten : {type}
Jumlah: {count}
Distribusi : {round(count/sum(count)*100,2)} %"))) +
geom_bar(stat = "identity", color = "black") +
labs(x = "Jenis Konten",
y = "Jumlah",
fill = "Jenis Konten") +
ggtitle("Jenis Konten di Netflix") +
scale_fill_manual(values = palet_warna) +
theme(plot.title = element_text(hjust = 0.5))+
+
theme_algoritmaguides(fill = FALSE)
ggplotly(plot_bar, tooltip = "text")
library(maps)
<- map_data("world")
world_map
<- netflix %>%
netflix2 group_by(country) %>%
summarise(contributors = n()) %>%
arrange(desc(contributors))
# Ubah nama negara langsung dalam dataset
<- netflix2 %>%
netflix2 mutate(country = case_when(
== "United States" ~ "USA",
country # Tambahkan aturan perubahan nama negara lainnya sesuai kebutuhan
TRUE ~ country # Biarkan negara lain tidak berubah
))
<- left_join(world_map,
map_data
netflix2, by = c("region" = "country"))
<- ggplot(map_data,
plot_map aes(x = long,
y = lat,
group = group,
fill = contributors,
text = glue("Negara: {region}<br>Kontribusi: {contributors}")))+
geom_polygon(color = "black") +
scale_fill_gradient(low = "#db0000", high = "black", limits = c(0, max(netflix2$contributors))) +
labs(title = "Kontribusi Negara dalam Konten Netflix",
x = NULL,
y = NULL,
fill = "Kontribusi") +
+
theme_algoritmatheme(plot.title = element_text(hjust = 0.5),
panel.grid.major.x = element_line(color = "darkgrey", linetype = 2))
ggplotly(plot_map, tooltip = "text")
<- netflix %>%
netflix3 group_by(release_year) %>%
summarise(count = n()) %>%
arrange(release_year) %>%
filter(release_year >= 1980)
<- ggplot(netflix3,
plot_trend aes(x = release_year,
y = count,
text = glue("Tahun Rilis : {release_year}
Jumlah konten : {count}"))) +
geom_col(fill = "#564d4d", width = 0.8) +
geom_col(data = netflix3 %>%
filter(release_year == 2018), fill = "#db0000", width = 1) +
labs(title = "Tren Penambahan Konten ke Netflix Tiap Tahun",
x = "Tahun Rilis",
y = "Jumlah Konten Ditambahkan") +
theme(plot.title = element_text(hjust = 0.5)) +
theme(axis.text.x = element_blank()) +
theme_algoritma
ggplotly(plot_trend, tooltip = "text")
# Mengambil kolom rating dari dataset Netflix
<- select(netflix, rating)
netflix4
# Menghitung frekuensi rating
<- table(netflix4$rating)
rating_counts <- data.frame(rating = names(rating_counts),
rating_data count = as.numeric(rating_counts))
# Membuat grafik bar plot dengan gradasi warna merah
<- ggplot(rating_data,
plot_rating aes(x = reorder(rating, +count),
y = count,
text = glue("Rating: {rating}
Jumlah: {count}"))) +
geom_bar(stat = "identity", aes(fill = count)) +
labs(title = "Distribusi Rating Konten di Netflix",
x = "Rating",
y = "Jumlah",
fill = " Jumlah") +
# coord_flip() +
scale_fill_gradient(low = "black", high = "#db0000") + # Gradasi warna merah
theme(plot.title = element_text(hjust = 0.5)) +
theme_algoritma
ggplotly(plot_rating, tooltip = "text")
# Filter data hanya untuk tipe 'Movie'
<- netflix %>%
netflix_5 filter(type == 'Movie')
# Membuat kolom 'duration_numeric' yang berisi durasi dalam format numerik
<- netflix_5 %>%
netflix_5 mutate(Durasi = as.numeric(gsub("[^0-9]", "", duration)))
# Mendapatkan rata-rata durasi
<- mean(netflix_5$Durasi)
mean_duration
# Visualisasi dengan histogram
<- ggplot(netflix_5, aes(x = Durasi)) +
plot_movie_duration geom_histogram(binwidth = 10, fill = "#831010", color = "black") +
geom_vline(xintercept = mean_duration, color = "black", linetype = "dashed", size = 1) +
labs(title = "Distribusi Durasi Film di Netflix",
x = "Durasi (Menit)",
y = "Jumlah Film (count) ") +
geom_text(x = mean_duration, y = 140, label = "Rata-rata", color = "white", vjust = -2, hjust = -0.2) +
theme(plot.title = element_text(hjust = 0.5)) +
+
theme_algoritma scale_y_continuous(name = "Jumlah Film")
ggplotly(plot_movie_duration)
# Filter data berdasarkan abad ke-20
<- netflix %>%
netflix_20th_century filter(release_year >= 2000)
# Menghitung jumlah film dan serial TV berdasarkan tahun dan tipe
<- netflix_20th_century %>%
trend_counts group_by(release_year, type) %>%
summarise(Jumlah = n()) %>%
ungroup()
# Menghitung proporsi
<- trend_counts %>%
trend_counts group_by(release_year) %>%
mutate(proportion = Jumlah / sum(Jumlah))
# Visualisasi tren jumlah film dan serial TV
<- trend_counts %>%
plot_trend ggplot(mapping = aes(x = release_year,
y = Jumlah,
fill = type,
text = paste("Tahun Rilis: ", release_year, "<br>Tipe: ", type, "<br>Jumlah: ", Jumlah))) +
geom_bar(stat = "identity", position = "stack") +
labs(title = "Tren Jumlah Film dan Serial TV di Netflix (Abad ke-20)",
x = "Tahun Rilis",
y = "Jumlah",
fill = "Tipe") +
scale_fill_manual(values = c("#831010", "black")) +
theme(plot.title = element_text(hjust = 0.5)) +
theme(legend.position = "none") +
theme_algoritma
# Ubah plot menjadi interaktif dengan tooltip
<- ggplotly(plot_trend, tooltip = "text")
plotly_plot
# Tampilkan plot interaktif
plotly_plot
library(plotly)
# Filter data untuk film yang dirilis di Indonesia sejak tahun 2000
<- netflix[netflix$country == "Indonesia" & netflix$release_year >= 2000, ]
filtered_data_indonesia
# Filter data untuk film yang dirilis di Amerika Serikat sejak tahun 2000
<- netflix[netflix$country == "Thailand" & netflix$release_year >= 2000, ]
filtered_data_thailand
# Menghitung jumlah film yang dirilis setiap tahun di Indonesia
<- table(filtered_data_indonesia$release_year)
yearly_counts_indonesia
# Menghitung jumlah film yang dirilis setiap tahun di Amerika Serikat
<- table(filtered_data_thailand$release_year)
yearly_counts_thailand
# Mengonversi tabel menjadi data frame
<- data.frame(tahun = as.numeric(names(yearly_counts_indonesia)), jumlah = as.numeric(yearly_counts_indonesia))
df_indonesia <- data.frame(tahun = as.numeric(names(yearly_counts_thailand)), jumlah = as.numeric(yearly_counts_thailand))
df_thailand
# Membuat plot garis
<- ggplot() +
line_plot geom_line(data = df_indonesia, aes(x = tahun,
y = jumlah,
color = "Indonesia"), size = 0.8) +
geom_line(data = df_thailand, aes(x = tahun,
y = jumlah,
color = "Thailand"), size = 0.8) +
labs(title = "Perbandingan Trend Rilis Film (Indonesia vs Thailand)",
x = NULL, y = "Jumlah Film",
color = "Negara") +
scale_color_manual(values = c("Indonesia" = "#db0000", "Thailand" = "black")) +
+
theme_algoritma theme(plot.title = element_text(hjust = 0.5),
panel.grid.major.x = element_line(color = "darkgrey", linetype = 2))
theme(plot.title = element_text(hjust = 0.5))
#> List of 1
#> $ plot.title:List of 11
#> ..$ family : NULL
#> ..$ face : NULL
#> ..$ colour : NULL
#> ..$ size : NULL
#> ..$ hjust : num 0.5
#> ..$ vjust : NULL
#> ..$ angle : NULL
#> ..$ lineheight : NULL
#> ..$ margin : NULL
#> ..$ debug : NULL
#> ..$ inherit.blank: logi FALSE
#> ..- attr(*, "class")= chr [1:2] "element_text" "element"
#> - attr(*, "class")= chr [1:2] "theme" "gg"
#> - attr(*, "complete")= logi FALSE
#> - attr(*, "validate")= logi TRUE
# Konversi plot menjadi plot interaktif menggunakan plotly
<- ggplotly(line_plot, tooltip = c("x", "y", "group")) %>%
interactive_plot layout(hoverlabel = list(namelength = -1),
hovertemplate = "Tahun Rilis: %{x}<br>Total: %{y}<br>Negara: %{group}") %>%
layout(legend = list(orientation = "h", x = 0.35, y = -0.1))
# Menampilkan plot interaktif
interactive_plot
# Menghitung jumlah konten berdasarkan sutradara
<- netflix %>%
director_counts count(director, sort = TRUE)
# Mengambil 10 sutradara teratas
<- head(director_counts, n = 15)
top_directors
# Menghilangkan sutradara dengan nilai "Not Given"
<- top_directors %>%
top_directors filter(director != "Not Given")
# Menghitung persentase jumlah konten
<- top_directors %>%
top_directors mutate(percentage = n / sum(n) * 100)
# Menyusun sutradara berdasarkan persentase tertinggi
<- top_directors %>%
top_directors arrange(desc(percentage))
# Membuat plot bar chart horizontal dengan pengurutan sumbu y dan gradasi warna
<- ggplot(top_directors, aes(x = percentage,
p y = reorder(director, percentage),
text = glue("Sutradara: {director}
Persentase: {round(percentage, 2)} %",
fill = percentage))) +
geom_col(aes(fill = percentage)) +
labs(title = "Sutradara dengan Jumlah Konten Terbanyak di Netflix",
x = "Jumlah Persentase", y = "", fill = "Persentase") +
+
theme_algoritma theme(plot.title = element_text(hjust = 0.5),
panel.grid.major.x = element_line(color = "darkgrey", linetype = 2),
panel.grid.major.y = element_blank()) +
theme(plot.title = element_text(hjust = 0.5)) +
theme(legend.position = "none") +
scale_x_continuous(labels = scales::number_format(suffix = "%")) +
scale_fill_gradient(low = "#474747", high = "#c40815")
# Mengubah plot menjadi interaktif menggunakan ggplotly
ggplotly(p, tooltip = "text")
<- netflix_new %>%
perbandingan_rating group_by(country, rating_type) %>%
summarize(jumlah_film = n()) %>%
filter(rating_type %in% c("Remaja", "Dewasa")) %>%
filter(country != "Not Given") %>%
mutate(proporsi = round((jumlah_film / sum(jumlah_film)) * 100))
<- perbandingan_rating %>%
top_10_countries group_by(country) %>%
summarize(total_jumlah_film = sum(jumlah_film)) %>%
top_n(10, total_jumlah_film) %>%
inner_join(perbandingan_rating, by = "country") %>%
mutate(country = reorder(country, desc(-jumlah_film)))
<- top_10_countries %>%
plot_composition ggplot(mapping = aes(x = proporsi,
y = country,
fill = factor(rating_type, levels = c("Dewasa", "Remaja")),
text = glue("Rating: {rating_type}\nPersentase: {proporsi}%"))) +
geom_col(position = position_stack(reverse = TRUE)) +
geom_vline(xintercept = 50, lty = 2, lwd = 1.5, col = "white") +
scale_fill_manual(values = c("Dewasa" = "#db0000", "Remaja" = "#474747"), drop = FALSE) +
scale_x_continuous(labels = scales::number_format(suffix = "%")) +
labs(title = "Rating Dewasa vs Remaja",
x = NULL,
y = NULL) +
+
theme_algoritma theme(plot.title = element_text(hjust = 0.5),
panel.grid.major.x = element_line(color = "darkgrey", linetype = 2),
panel.grid.major.y = element_blank()) +
theme(axis.text.y = element_text(hjust = 0.5, size = 10)) +
theme(plot.title = element_text(hjust = 0.5)) +
theme(legend.position = "none")
ggplotly(plot_composition, tooltip = c("text"))
<- c("Indonesia", "Malaysia", "Thailand", "Singapore", "Philippines", "Vietnam")
asia_countries
<- netflix_new %>%
asia_plot filter(country %in% asia_countries) %>%
group_by(country, type) %>%
summarise(count = n()) %>%
ggplot(aes(x = country, y = count, fill = type, text = glue("Negara: {country}\nJumlah: {count}\nTipe: {type}"))) +
geom_bar(stat = "identity", position = "stack") +
labs(title = "Perbedaan Jumlah Film dan TV Show di beberapa Negara",
x = "", y = "Jumlah", fill = "Tipe :") +
theme(axis.text.x = element_text(angle = 0, hjust = 1)) +
scale_fill_manual(values = c("Movie" = "#971400", "TV Show" = "black")) +
theme_minimal() +
theme(plot.title = element_text(size = 14)) +
theme_algoritma
<- ggplotly(asia_plot, tooltip = "text") %>%
asia_plot_interaktif layout(legend = list(orientation = "h", x = 0.35, y = -0.1))
asia_plot_interaktif
<- netflix_new[netflix_new$rating_type != "Lainnya" & !is.na(netflix_new$durasi_menit), ]
netflix_new
# Menghitung rata-rata durasi
<- mean(netflix_new$durasi_menit, na.rm = TRUE)
rata_durasi
# Membuat plot histogram dengan garis putus-putus rata-rata durasi
<- ggplot(netflix_new, aes(x = durasi_menit, fill = rating_type,
durasi_rating_plot text = paste("Durasi:", durasi_menit, "menit",
"\nKategori :", rating_type))) +
geom_histogram(binwidth = 10, position = "identity", alpha = 0.7) +
geom_vline(xintercept = rata_durasi, linetype = "dashed", color = "white") + # Menambahkan garis putus-putus rata-rata
geom_text(x = rata_durasi, y = 10, label = "Rata-rata", vjust = -1, color = "white", size = 3, angle = 90) + # Menambahkan teks "Rata-rata"
labs(title = "Perbedaan Durasi antara Rating Dewasa & Remaja",
x = "Durasi (menit)", y = "Jumlah Film") +
scale_fill_manual(values = c("Dewasa" = "#db0000", "Remaja" = "black"),
labels = c("Dewasa", "Remaja"),
breaks = c("Dewasa", "Remaja"),
drop = FALSE) +
theme(legend.position = "none") +
+
theme_algoritma theme(plot.title = element_text(hjust = 0.5),
panel.grid.major.x = element_line(color = "darkgrey", linetype = 2))
# Mengubah plot menjadi interaktif menggunakan ggplotly
<- ggplotly(durasi_rating_plot, tooltip = "text") %>%
durasi_rating_plot layout(legend = list(orientation = "h", x = 0.35, y = -0.2))
durasi_rating_plot