library(readr)
movie_data <- read_csv("~/Downloads/FILMI.csv")
## Rows: 2000 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): Movie Name, Release Year, Genre, Director, Cast, Gross
## dbl (3): Duration, IMDB Rating, Metascore
## num (1): Votes
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(movie_data)
## # A tibble: 6 × 10
## `Movie Name` `Release Year` Duration `IMDB Rating` Metascore Votes Genre
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 The Godfather 1972 175 9.2 100 2.00e6 Crim…
## 2 The Godfather Pa… 1974 202 9 90 1.36e6 Crim…
## 3 Ordinary People 1980 124 7.7 86 5.65e4 Drama
## 4 Lawrence of Arab… 1962 218 8.3 100 3.13e5 Adve…
## 5 Straw Dogs 1971 113 7.4 73 6.43e4 Crim…
## 6 Close Encounters… 1977 138 7.6 90 2.16e5 Dram…
## # ℹ 3 more variables: Director <chr>, Cast <chr>, Gross <chr>
Here’s a brief description of the variables in the IMDB Top 2000 Movies dataset:
I remove categorical data a part from Genre, I will use it for criterion validity test.
movie_data <- movie_data[, -c(2, 8, 9)]
head(movie_data)
## # A tibble: 6 × 7
## `Movie Name` Duration `IMDB Rating` Metascore Votes Genre Gross
## <chr> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 The Godfather 175 9.2 100 2.00e6 Crim… $134…
## 2 The Godfather Part II 202 9 90 1.36e6 Crim… $57.…
## 3 Ordinary People 124 7.7 86 5.65e4 Drama $54.…
## 4 Lawrence of Arabia 218 8.3 100 3.13e5 Adve… $44.…
## 5 Straw Dogs 113 7.4 73 6.43e4 Crim… <NA>
## 6 Close Encounters of the T… 138 7.6 90 2.16e5 Dram… $132…
# Remove rows with any NA values
movie_data_clean <- na.omit(movie_data)
# Check the structure of the cleaned dataset
head(movie_data_clean)
## # A tibble: 6 × 7
## `Movie Name` Duration `IMDB Rating` Metascore Votes Genre Gross
## <chr> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 The Godfather 175 9.2 100 2.00e6 Crim… $134…
## 2 The Godfather Part II 202 9 90 1.36e6 Crim… $57.…
## 3 Ordinary People 124 7.7 86 5.65e4 Drama $54.…
## 4 Lawrence of Arabia 218 8.3 100 3.13e5 Adve… $44.…
## 5 Close Encounters of the T… 138 7.6 90 2.16e5 Dram… $132…
## 6 Once Upon a Time in the W… 166 8.5 82 3.48e5 West… $5.3…
library(psych)
describe(movie_data_clean)
## vars n mean sd median trimmed mad min
## Movie Name* 1 1870 926.34 535.07 926.5 926.21 687.19 1.0
## Duration 2 1870 114.02 22.11 110.0 111.58 19.27 64.0
## IMDB Rating 3 1870 6.92 0.91 7.0 6.97 0.89 2.4
## Metascore 4 1870 60.60 17.81 61.0 60.73 19.27 9.0
## Votes 5 1870 232492.92 283288.33 140924.5 173044.78 109108.98 7442.0
## Genre* 6 1870 121.64 76.22 122.0 121.01 94.89 1.0
## Gross* 7 1870 836.58 493.12 837.5 836.10 624.92 1.0
## max range skew kurtosis se
## Movie Name* 1854.0 1853.0 0.00 -1.20 12.37
## Duration 238.0 174.0 1.32 2.84 0.51
## IMDB Rating 9.3 6.9 -0.80 1.85 0.02
## Metascore 100.0 91.0 -0.07 -0.50 0.41
## Votes 2875249.0 2867807.0 3.85 20.95 6551.00
## Genre* 258.0 257.0 0.00 -1.15 1.76
## Gross* 1698.0 1697.0 0.01 -1.18 11.40
Here are three interesting statistics and their interpretations for the entire dataset:
These three statistics together show that while most movies in the dataset are well-rated, their popularity and financial success vary significantly, with a few standout films driving up the numbers.
For the clustering analysis, I selected five variables: Duration, IMDB Rating, Metascore, Votes, and Gross Revenue. These features capture distinct and critical aspects of movie characteristics, providing a comprehensive view of how films differ in terms of popularity, critical reception, and commercial success.
I chose to exclude Genre from the clustering process and use it later as a criterion validity test. Since movie trends evolve over time, the release year can serve as a benchmark to assess whether the formed clusters align with different cinematic eras or trends.
# Remove "$" and "M", then convert to numeric
movie_data_clean$Gross <- as.numeric(gsub("\\$|M", "", movie_data_clean$Gross))
head(movie_data_clean)
## # A tibble: 6 × 7
## `Movie Name` Duration `IMDB Rating` Metascore Votes Genre Gross
## <chr> <dbl> <dbl> <dbl> <dbl> <chr> <dbl>
## 1 The Godfather 175 9.2 100 2.00e6 Crim… 135.
## 2 The Godfather Part II 202 9 90 1.36e6 Crim… 57.3
## 3 Ordinary People 124 7.7 86 5.65e4 Drama 54.8
## 4 Lawrence of Arabia 218 8.3 100 3.13e5 Adve… 44.8
## 5 Close Encounters of the … 138 7.6 90 2.16e5 Dram… 132.
## 6 Once Upon a Time in the … 166 8.5 82 3.48e5 West… 5.32
Here above I used ChatGPT.
# Standardizing the selected variables
movie_data_clu_std <- as.data.frame(scale(movie_data_clean[, c("Duration",
"IMDB Rating",
"Metascore",
"Votes",
"Gross")]))
head(movie_data_clu_std)
## Duration IMDB Rating Metascore Votes Gross
## 1 2.7576972 2.5066733 2.211785 6.24862347 0.9289781
## 2 3.9787562 2.2870112 1.650349 3.97515527 -0.1334506
## 3 0.4512525 0.8592076 1.425775 -0.62133489 -0.1676475
## 4 4.7023467 1.5181939 2.211785 0.28434310 -0.3041615
## 5 1.0843942 0.7493766 1.650349 -0.05804306 0.8895833
## 6 2.3506776 1.7378560 1.201201 0.40812511 -0.8444722
# Compute the dissimilarity measure
movie_data_clu_std$Dissimilarity <- sqrt(movie_data_clu_std$Duration^2 +
movie_data_clu_std$`IMDB Rating`^2 +
movie_data_clu_std$Metascore^2 +
movie_data_clu_std$Votes^2 +
movie_data_clu_std$Gross^2)
# Add dissimilarity back to the original dataset
movie_data_clean$Dissimilarity <- movie_data_clu_std$Dissimilarity
# Display the top movies with the highest dissimilarity
head(movie_data_clean[order(-movie_data_clean$Dissimilarity), c("Movie Name", "Dissimilarity")])
## # A tibble: 6 × 2
## `Movie Name` Dissimilarity
## <chr> <dbl>
## 1 The Dark Knight 11.7
## 2 Avatar 10.7
## 3 The Shawshank Redemption 9.86
## 4 Titanic 9.70
## 5 Inception 9.11
## 6 The Lord of the Rings: The Return of the King 8.94
which(movie_data_clean$`Movie Name` %in% c("The Dark Knight", "Avatar", "The Shawshank Redemption", "Titanic"))
## [1] 450 455 1415 1652
First four potential outliers.
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# Remove the specified outlier movies using row indices
movie_data_clean <- movie_data_clean %>% filter(!(row_number() %in% c(450, 455, 1415, 1652)))
# Standardize the clustering variables without the outliers
movie_data_clu_std <- as.data.frame(scale(movie_data_clean[, c("Duration",
"IMDB Rating",
"Metascore",
"Votes",
"Gross")]))
# Recalculate dissimilarity for the filtered dataset
movie_data_clu_std$Dissimilarity <- sqrt(movie_data_clu_std$Duration^2 +
movie_data_clu_std$`IMDB Rating`^2 +
movie_data_clu_std$Metascore^2 +
movie_data_clu_std$Votes^2 +
movie_data_clu_std$Gross^2)
# Add the updated dissimilarity scores back to the original dataset
movie_data_clean$Dissimilarity <- movie_data_clu_std$Dissimilarity
# Display the top movies with the highest dissimilarity
head(movie_data_clean[order(-movie_data_clean$Dissimilarity), c("Movie Name", "Dissimilarity")])
## # A tibble: 6 × 2
## `Movie Name` Dissimilarity
## <chr> <dbl>
## 1 Inception 9.61
## 2 The Lord of the Rings: The Return of the King 9.33
## 3 Forrest Gump 8.87
## 4 The Lord of the Rings: The Fellowship of the Ring 8.55
## 5 Pulp Fiction 8.18
## 6 Fight Club 8.15
Euclidian distances
library(factoextra)
## Loading required package: ggplot2
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
# Calculate Euclidean distances for the standardized movie dataset
Distances <- get_dist(movie_data_clu_std, method = "euclidean")
# Visualize the distance matrix
fviz_dist(Distances, gradient = list(low = "darkred",
mid = "grey95",
high = "white"))
library(factoextra)
# Hopkins statistics
get_clust_tendency(movie_data_clu_std,
n = nrow(movie_data_clu_std) - 1,
graph = FALSE)
## $hopkins_stat
## [1] 0.90236
##
## $plot
## NULL
Number of clusters
library(factoextra)
library(NbClust)
# Perform the Elbow Method
fviz_nbclust(movie_data_clu_std, kmeans, method = "wss") +
labs(subtitle = "Elbow method")
fviz_nbclust(movie_data_clu_std, kmeans, method = "silhouette") +
labs(subtitle = "Silhouette analysis")
library(NbClust)
# Determine the optimal number of clusters
nc <- NbClust(movie_data_clu_std, distance = "euclidean",
min.nc = 2, max.nc = 10,
method = "kmeans", index = "all")
## *** : The Hubert index is a graphical method of determining the number of clusters.
## In the plot of Hubert index, we seek a significant knee that corresponds to a
## significant increase of the value of the measure i.e the significant peak in Hubert
## index second differences plot.
##
## *** : The D index is a graphical method of determining the number of clusters.
## In the plot of D index, we seek a significant knee (the significant peak in Dindex
## second differences plot) that corresponds to a significant increase of the value of
## the measure.
##
## *******************************************************************
## * Among all indices:
## * 5 proposed 2 as the best number of clusters
## * 10 proposed 3 as the best number of clusters
## * 1 proposed 4 as the best number of clusters
## * 3 proposed 5 as the best number of clusters
## * 4 proposed 9 as the best number of clusters
## * 1 proposed 10 as the best number of clusters
##
## ***** Conclusion *****
##
## * According to the majority rule, the best number of clusters is 3
##
##
## *******************************************************************
# Perform k-means clustering with 3 clusters
Clustering <- kmeans(movie_data_clu_std,
centers = 3,
nstart = 25)
Clustering
## K-means clustering with 3 clusters of sizes 142, 941, 783
##
## Cluster means:
## Duration IMDB Rating Metascore Votes Gross Dissimilarity
## 1 1.0445714 1.1686868 0.8572351 2.5328781 1.93669508 4.526250
## 2 0.1794850 0.5389765 0.6012805 -0.1024449 -0.27564365 1.653932
## 3 -0.4051399 -0.8596812 -0.8780745 -0.3362300 -0.01996172 1.839719
##
## Clustering vector:
## [1] 1 1 2 1 2 2 2 2 2 2 2 2 1 1 2 2 2 3 2 2 2 3 2 2 2 2 1 1 1 2 2 1 2 2 2 1 2
## [38] 2 2 2 2 1 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 1 2 2 2 2
## [75] 2 2 2 2 2 1 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 3 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2
## [112] 2 2 2 1 1 2 1 2 2 2 2 2 2 3 2 2 2 3 2 2 3 2 1 2 2 2 2 2 2 2 2 1 2 2 2 2 2
## [149] 2 2 2 2 2 2 2 2 2 3 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 2 2 2 2 2 2 2 2 3
## [186] 2 2 3 3 2 3 2 2 2 2 3 3 2 1 3 2 3 2 2 3 2 2 2 2 2 2 2 3 2 2 2 2 3 1 1 2 2
## [223] 1 2 3 2 2 2 2 2 3 2 2 2 2 2 2 3 2 2 3 2 2 2 2 2 1 2 1 1 3 2 2 2 3 2 2 2 3
## [260] 1 2 3 1 2 2 2 2 2 2 3 2 2 2 3 2 2 1 3 2 1 2 1 3 2 2 1 2 1 2 2 3 3 2 2 2 1
## [297] 2 2 3 2 2 2 2 3 2 2 2 2 2 2 3 2 3 3 3 3 3 3 2 2 2 2 1 3 3 2 2 3 2 1 2 2 3
## [334] 2 2 3 2 2 2 2 2 2 3 3 2 3 3 3 2 2 3 2 3 2 2 3 2 3 2 3 2 2 2 3 3 3 3 3 3 3
## [371] 3 1 2 2 2 2 3 2 2 2 2 2 2 2 2 3 2 2 2 2 3 3 2 2 2 3 2 2 2 2 2 2 3 3 2 2 2
## [408] 3 2 3 2 2 3 3 2 2 2 3 3 3 3 3 2 3 3 2 3 2 2 3 3 2 2 3 3 2 3 3 3 2 2 2 2 2
## [445] 2 1 3 2 2 2 2 2 2 2 1 2 1 1 2 2 2 2 1 2 2 2 3 2 2 2 1 1 3 2 2 2 1 2 2 2 3
## [482] 2 1 2 3 2 3 2 2 3 3 3 3 1 2 3 2 3 2 2 2 2 3 2 2 3 3 1 2 2 3 3 2 2 2 2 2 1
## [519] 3 3 2 2 2 2 3 3 3 2 1 3 2 1 2 3 1 2 2 3 3 3 2 2 3 1 2 3 3 3 2 2 2 3 3 2 3
## [556] 2 3 2 2 2 3 3 2 2 2 3 3 2 2 3 2 2 3 3 3 2 2 2 3 3 2 2 2 2 3 2 2 2 3 3 3 2
## [593] 3 2 3 3 3 3 3 3 2 2 2 2 3 2 3 3 2 3 2 2 3 2 3 2 2 3 2 2 2 2 2 2 3 2 2 3 2
## [630] 3 2 2 2 3 2 3 2 2 2 2 3 3 3 3 2 2 3 2 3 2 2 2 2 2 3 2 3 3 3 3 3 3 3 3 2 3
## [667] 3 2 3 2 2 2 2 3 3 2 3 3 2 2 1 2 2 1 3 2 1 1 2 2 1 1 3 3 1 2 3 3 2 2 2 3 1
## [704] 2 2 3 2 3 2 3 2 3 2 2 1 2 2 2 3 3 3 2 3 3 2 2 2 3 3 2 2 2 3 3 2 3 3 2 2 3
## [741] 2 2 2 3 2 2 2 2 2 3 3 2 3 2 3 2 2 1 2 2 2 3 2 3 3 2 3 2 2 2 3 3 2 2 2 2 2
## [778] 2 2 2 3 3 2 3 3 2 3 2 1 2 3 2 2 2 3 3 3 3 2 3 2 3 1 2 3 2 2 3 3 3 3 2 2 2
## [815] 3 3 2 2 3 2 2 3 3 2 3 1 3 2 3 3 2 2 2 2 2 2 2 2 2 3 1 3 2 1 2 2 2 2 3 3 2
## [852] 2 3 2 3 3 3 2 2 3 2 3 2 3 2 2 3 2 2 3 3 2 3 2 2 3 3 3 2 3 2 3 3 2 2 2 3 2
## [889] 3 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 1 2 2 3 3 3 3 1 2 1
## [926] 1 2 1 2 2 2 2 1 1 1 1 2 3 1 3 1 1 1 3 3 3 2 2 1 2 3 2 2 3 1 2 3 1 3 3 2 2
## [963] 3 1 2 2 2 2 3 3 1 2 2 3 2 2 3 2 2 3 3 1 2 2 2 2 2 2 2 3 1 1 2 2 1 3 2 3 3
## [1000] 3 3 3 2 2 3 3 2 3 3 2 3 2 2 2 3 3 3 3 2 3 2 3 3 2 2 2 2 3 2 2 3 2 3 3 3 3
## [1037] 3 2 2 3 3 2 3 2 3 3 3 1 3 3 2 2 3 2 3 3 2 3 2 2 3 2 2 2 3 3 2 2 2 3 2 3 3
## [1074] 2 3 2 3 2 3 2 3 3 2 3 3 3 2 2 3 3 2 3 2 3 2 2 2 2 3 3 2 3 3 3 3 3 3 3 3 3
## [1111] 3 3 3 3 3 3 3 3 3 3 2 2 3 3 2 2 2 3 3 3 3 3 3 3 3 3 3 2 3 2 3 3 3 3 3 3 3
## [1148] 3 3 3 3 3 3 3 3 3 3 3 2 2 2 3 3 3 1 2 1 3 1 1 2 3 2 2 1 2 1 2 3 2 3 2 2 3
## [1185] 2 1 3 2 1 2 2 2 2 2 2 3 2 3 1 3 2 2 2 2 2 3 3 2 3 1 3 3 3 2 3 1 3 2 2 2 2
## [1222] 3 2 2 2 1 2 2 1 2 3 3 2 2 1 3 2 3 2 3 2 3 3 2 3 3 3 2 2 2 3 3 3 2 3 3 2 2
## [1259] 2 2 2 2 2 2 2 3 3 3 3 3 3 2 2 3 2 2 3 3 2 2 3 3 3 3 3 3 3 2 3 3 3 3 2 3 2
## [1296] 3 2 2 2 3 3 2 2 3 3 3 3 2 2 3 2 3 3 3 2 3 2 2 2 3 2 2 2 3 3 2 3 3 3 2 2 2
## [1333] 3 2 3 3 3 2 2 2 3 3 2 2 3 3 2 3 3 3 2 3 3 3 2 2 3 2 3 3 2 3 3 2 3 2 2 2 3
## [1370] 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 2 3 3 3 2 3 3 3 3 3 3 3 3 3 3 3 2 3 2 2 3
## [1407] 1 1 1 1 3 1 1 2 2 2 2 2 1 3 2 2 2 2 2 2 2 2 2 1 1 2 3 2 1 1 2 2 3 2 2 2 3
## [1444] 2 1 2 1 2 2 3 2 2 3 2 2 2 2 2 3 3 2 2 3 2 1 2 2 2 3 2 3 1 2 2 3 3 3 2 2 3
## [1481] 2 2 3 3 2 2 2 2 3 3 2 3 3 3 2 3 2 2 3 3 3 3 3 3 3 3 3 1 2 2 3 3 3 3 3 2 3
## [1518] 2 2 3 3 3 3 3 3 3 3 2 2 3 2 2 3 3 2 3 3 3 3 3 3 2 3 2 2 2 3 3 3 2 2 2 3 3
## [1555] 3 2 3 2 2 3 2 2 3 3 2 2 2 2 2 3 3 1 2 3 3 3 2 3 2 3 3 2 3 3 2 2 2 2 3 3 3
## [1592] 2 3 2 2 2 3 3 2 3 2 3 2 3 3 3 3 2 3 2 2 3 3 3 3 3 3 3 3 3 3 2 3 2 3 3 3 3
## [1629] 3 2 3 3 3 3 3 3 2 2 2 3 1 1 1 3 2 2 1 2 3 2 1 2 2 3 3 2 2 2 2 3 3 2 2 3 1
## [1666] 3 3 3 3 3 1 2 2 3 3 2 2 1 3 3 1 1 3 3 3 2 3 2 3 1 3 3 2 2 2 2 3 2 3 2 2 2
## [1703] 2 2 2 1 3 3 3 3 2 2 2 2 3 2 3 2 3 3 3 3 3 2 2 3 2 3 2 3 3 3 3 1 2 3 1 3 3
## [1740] 2 3 3 3 3 2 3 2 2 2 3 3 3 2 1 2 3 3 3 3 2 2 3 3 2 3 2 3 3 1 2 3 2 3 3 1 3
## [1777] 2 3 3 3 3 3 3 3 3 2 3 2 2 2 2 3 2 3 3 3 3 3 2 2 3 2 2 3 2 3 3 3 2 2 2 2 3
## [1814] 3 3 3 3 2 3 2 2 3 2 2 3 2 3 3 3 3 3 3 3 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [1851] 2 3 2 3 3 3 2 3 2 3 3 2 3 2 3 3
##
## Within cluster sum of squares by cluster:
## [1] 1613.372 2710.070 2171.372
## (between_SS / total_SS = 43.8 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
Cluster 1 (142 movies): - High IMDB ratings, good metascores, and large gross revenues. - Movies are popular, long, and distinct from others (high dissimilarity).
Cluster 2 (941 movies): - Average or below-average ratings, moderate box office earnings. - These are shorter, less popular movies with fewer votes.
Cluster 3 (783 movies): - Low ratings, poor critical reception, and average box office earnings. - These movies are less popular but more similar to each other (moderate dissimilarity).
In summary, Cluster 1 has blockbusters, Cluster 2 contains moderate movies, and Cluster 3 has poorly rated films.
library(factoextra)
# Visualize the clusters
fviz_cluster(Clustering,
palette = "Set1",
repel = FALSE,
ggtheme = theme_bw(),
data = movie_data_clu_std)
library(dplyr)
# Remove the specified outlier movies using row indices
movie_data_clean <- movie_data_clean %>% filter(!(row_number() %in% c(1641, 508, 935, 934, 971, 1, 455, 2, 134, 4, 1750, 113)))
# Standardize the clustering variables without the outliers
movie_data_clu_std <- as.data.frame(scale(movie_data_clean[, c("Duration",
"IMDB Rating",
"Metascore",
"Votes",
"Gross")]))
# Perform k-means clustering with 3 clusters
Clustering <- kmeans(movie_data_clu_std,
centers = 3,
nstart = 25)
Clustering
## K-means clustering with 3 clusters of sizes 185, 826, 843
##
## Cluster means:
## Duration IMDB Rating Metascore Votes Gross
## 1 0.5674822 0.9389795 0.6124444 2.2653258 1.80019513
## 2 -0.4230055 -0.8066720 -0.8201808 -0.3620828 -0.07501013
## 3 0.2899387 0.5843415 0.6692374 -0.1423545 -0.32156314
##
## Clustering vector:
## [1] 3 3 3 3 3 3 3 3 3 3 1 3 3 3 2 3 3 3 2 3 3 3 3 1 1 1 3 3 1 3 3 3 1 3 3 3 3
## [38] 3 1 3 1 3 3 3 1 3 3 3 3 3 3 3 3 3 3 3 2 1 3 3 3 3 3 3 3 3 1 3 3 3 3 3 3 3
## [75] 2 3 1 3 1 3 3 3 3 3 3 3 2 3 3 3 2 3 2 3 3 3 3 3 3 3 3 3 3 3 3 1 3 3 3 3 1
## [112] 1 3 3 3 3 3 3 3 2 2 3 3 3 2 3 3 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 2 3 3
## [149] 3 3 3 3 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 2 3 3 3 3 3 3 3 3 2 3 3 2 2 3
## [186] 2 3 3 2 3 2 2 3 1 2 3 2 3 3 2 3 3 3 1 3 3 3 2 3 3 3 3 2 1 1 3 3 1 1 2 2 3
## [223] 1 3 3 2 3 3 3 2 3 3 1 3 3 2 2 3 3 3 1 1 3 1 1 2 3 3 3 1 3 3 3 2 1 3 1 1 3
## [260] 3 3 3 1 3 2 3 3 3 2 3 3 1 2 3 1 3 1 2 3 3 1 3 1 3 3 2 2 3 3 3 1 3 3 2 3 3
## [297] 3 3 2 1 3 3 3 3 3 2 3 2 2 2 2 2 2 3 3 3 3 3 2 2 3 3 2 3 1 3 3 2 3 3 2 3 3
## [334] 3 3 3 3 2 2 3 2 2 2 3 3 2 3 2 3 3 2 3 2 3 2 3 3 3 2 2 2 2 2 2 2 2 1 3 3 3
## [371] 1 2 3 2 3 3 3 3 2 3 2 3 3 3 3 2 2 3 3 3 2 3 3 3 3 2 3 2 2 3 3 3 2 3 2 3 3
## [408] 2 2 2 3 2 2 2 2 2 2 3 2 2 2 2 2 3 2 2 3 3 2 2 3 2 2 2 3 3 3 3 3 3 1 2 1 1
## [445] 3 3 3 1 3 3 1 1 3 3 1 3 3 2 3 3 2 3 3 3 1 1 2 3 3 1 1 3 3 3 2 3 1 2 2 3 2
## [482] 3 3 2 2 2 1 1 3 2 3 2 3 3 3 3 2 3 3 2 2 3 3 2 1 3 3 3 3 3 1 2 2 3 3 3 3 2
## [519] 2 2 3 1 2 3 1 3 2 1 3 3 2 2 2 3 3 2 1 3 2 2 2 3 3 3 2 2 3 2 3 2 3 3 3 1 2
## [556] 3 3 3 2 2 3 3 2 3 3 2 2 2 3 1 3 2 2 3 2 3 3 2 2 3 3 2 2 2 3 2 3 2 2 2 2 2
## [593] 2 3 3 3 3 2 3 2 2 3 2 2 3 2 3 2 3 3 2 3 3 3 3 3 3 2 3 2 2 3 2 3 3 3 2 3 2
## [630] 3 3 3 3 2 2 2 2 3 3 2 3 2 3 3 3 2 2 2 3 2 2 2 2 2 2 2 2 3 2 2 3 2 3 3 3 3
## [667] 2 2 3 2 2 3 3 1 3 1 1 2 1 1 1 3 1 1 1 2 2 1 3 2 2 3 3 3 2 1 3 3 2 3 2 3 2
## [704] 3 2 3 2 1 3 3 1 2 2 2 3 1 1 3 3 1 2 2 3 3 3 1 2 3 2 2 3 3 2 3 3 3 2 3 3 3
## [741] 2 3 2 2 3 2 3 2 3 3 1 3 3 3 2 2 2 2 2 2 3 3 3 2 2 3 3 3 3 3 3 3 3 2 2 3 2
## [778] 2 3 2 3 1 3 2 1 3 3 2 2 2 2 3 2 3 2 1 3 2 3 3 2 2 2 2 3 3 3 2 2 3 3 2 3 2
## [815] 2 2 3 2 1 2 3 2 2 3 3 3 3 3 3 3 3 3 2 1 2 1 3 3 3 3 3 2 2 3 3 2 3 2 2 2 3
## [852] 3 2 3 2 3 2 2 3 2 3 3 2 2 3 2 3 2 2 2 2 3 2 3 2 2 3 3 3 2 2 2 3 2 2 2 2 2
## [889] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 3 3 2 2 2 2 1 3 1 1 1 1 3 3 1 3
## [926] 1 1 1 2 1 2 1 1 1 2 2 1 3 3 1 3 2 3 3 2 1 3 1 1 2 2 3 3 2 1 3 3 1 3 2 2 1
## [963] 3 2 3 3 2 3 3 2 2 1 1 3 3 3 3 3 3 2 1 1 3 3 1 2 3 2 2 2 1 2 2 3 2 2 3 2 2
## [1000] 3 2 3 3 3 2 1 2 2 3 2 3 2 2 3 3 3 3 2 3 3 2 3 2 2 2 2 2 3 3 2 2 3 2 3 2 2
## [1037] 2 1 2 2 3 3 2 3 2 2 3 2 3 3 2 3 3 3 2 2 3 3 3 2 3 2 2 3 2 3 2 3 2 3 2 2 3
## [1074] 2 2 2 1 3 2 2 2 2 3 2 3 3 3 3 2 2 3 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [1111] 2 3 2 2 3 3 3 2 2 2 2 2 2 2 2 2 2 3 2 3 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [1148] 2 3 3 3 2 2 2 1 3 1 2 1 1 3 2 1 3 1 3 1 3 2 3 2 1 2 2 3 1 2 3 1 3 3 3 3 2
## [1185] 3 2 3 2 1 2 3 3 3 3 3 3 2 3 2 1 2 2 2 1 2 1 2 3 3 3 3 2 3 3 3 1 3 3 1 3 1
## [1222] 2 3 3 1 2 3 2 3 2 3 2 2 3 2 2 2 3 3 3 2 2 2 1 2 2 3 2 3 3 3 2 3 3 3 2 2 2
## [1259] 2 1 2 3 3 2 2 3 2 2 3 3 2 2 2 2 2 2 2 3 2 2 1 2 3 2 3 2 3 3 3 2 2 3 2 2 2
## [1296] 2 2 3 3 2 3 2 2 2 3 2 3 3 3 2 2 3 3 2 2 3 2 2 2 3 3 3 2 3 2 2 2 3 3 3 2 2
## [1333] 3 3 2 2 3 2 2 2 3 2 2 2 3 3 2 3 2 2 3 2 2 3 2 3 3 3 2 3 3 2 2 2 2 2 2 2 2
## [1370] 2 2 2 2 2 2 2 2 2 2 3 2 2 2 2 2 2 2 2 2 2 2 3 2 3 3 2 1 1 1 1 2 1 1 3 3 3
## [1407] 3 1 1 2 3 3 3 3 3 3 3 3 3 1 3 1 2 3 1 1 3 3 2 3 1 3 2 3 1 3 1 3 3 2 3 3 2
## [1444] 3 1 3 1 3 1 2 2 1 2 3 1 3 3 3 2 3 2 1 3 3 2 2 2 3 3 2 2 3 2 2 2 3 3 3 2 2
## [1481] 3 2 2 2 3 2 3 3 2 2 2 2 2 2 2 2 2 1 3 3 2 2 2 2 2 3 2 3 3 2 2 2 2 2 2 2 2
## [1518] 2 3 2 3 3 2 2 2 1 2 2 2 2 2 3 2 3 3 3 2 2 2 3 3 3 2 2 2 3 2 3 3 2 3 3 2 2
## [1555] 3 3 3 3 2 2 2 1 2 2 2 2 3 2 3 2 2 3 2 2 2 3 3 3 2 2 2 2 2 3 3 3 2 2 3 2 2
## [1592] 2 2 2 2 2 2 3 2 3 3 2 2 2 2 2 2 2 2 2 2 3 2 3 2 2 2 2 2 3 2 2 2 2 2 2 3 3
## [1629] 3 2 1 1 2 3 3 1 3 2 3 1 3 3 2 2 3 3 2 3 2 2 3 3 2 1 2 2 2 2 2 1 3 3 2 1 3
## [1666] 3 1 2 2 1 1 2 2 2 1 2 3 2 1 2 2 3 2 3 3 2 2 2 3 3 3 3 3 3 1 2 2 2 2 2 3 3
## [1703] 3 2 3 2 3 2 2 2 2 2 3 3 2 3 2 2 2 2 2 2 1 1 2 1 2 2 3 2 2 2 2 3 2 3 3 2 2
## [1740] 2 3 1 3 2 2 2 2 3 3 2 2 3 2 1 2 2 1 3 2 3 2 2 1 2 3 2 2 2 2 2 2 2 2 3 2 3
## [1777] 3 3 2 2 3 2 2 2 2 2 3 3 2 3 3 2 3 2 2 2 3 3 3 3 2 2 2 2 2 3 2 3 3 2 3 3 2
## [1814] 3 2 2 2 2 2 2 2 3 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 2 3 2 2 2 3 2 2 2 2 3
## [1851] 2 2 2 2
##
## Within cluster sum of squares by cluster:
## [1] 1327.182 1777.982 2124.456
## (between_SS / total_SS = 43.6 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
library(factoextra)
# Visualize the clusters
fviz_cluster(Clustering,
palette = "Set1",
repel = FALSE,
ggtheme = theme_bw(),
data = movie_data_clu_std)
Averages <- Clustering$centers
Averages
## Duration IMDB Rating Metascore Votes Gross
## 1 0.5674822 0.9389795 0.6124444 2.2653258 1.80019513
## 2 -0.4230055 -0.8066720 -0.8201808 -0.3620828 -0.07501013
## 3 0.2899387 0.5843415 0.6692374 -0.1423545 -0.32156314
Each cluster represents a distinct group of movies with different characteristics regarding ratings, popularity, and box office performance.
# Assuming you have the cluster averages stored in 'Averages' (from k-means)
Figure <- as.data.frame(Averages)
# Add a column for cluster IDs
Figure$ID <- 1:nrow(Figure)
# Transform the data for visualization
library(tidyr)
Figure <- pivot_longer(Figure, cols = c("Duration", "IMDB Rating", "Metascore", "Votes", "Gross"))
# Create a factor for the cluster group
Figure$Group <- factor(Figure$ID,
levels = c(1, 2, 3),
labels = c("1", "2", "3"))
# Create a factor for the variable names
Figure$NameF <- factor(Figure$name,
levels = c("Duration", "IMDB Rating", "Metascore", "Votes", "Gross"),
labels = c("Duration", "IMDB Rating", "Metascore", "Votes", "Gross"))
# Visualizing with ggplot2
library(ggplot2)
ggplot(Figure, aes(x = NameF, y = value)) +
geom_hline(yintercept = 0) +
theme_bw() +
geom_point(aes(shape = Group, color = Group), size = 3) +
geom_line(aes(group = ID), size = 1) +
ylab("Averages") +
xlab("Cluster Variables") +
ylim(-2, 3.5) +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 10))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Assuming you already have clustering results in 'Clustering$cluster' and dataset is 'movie_data_clu_std'
movie_data_clean$Group <- Clustering$cluster
# Perform ANOVA for multiple variables using cbind() to combine them
fit <- aov(cbind(Duration, `IMDB Rating`, Metascore, Votes, Gross) ~ as.factor(Group), data = movie_data_clean)
# Display the summary of the ANOVA result
summary(fit)
## Response Duration :
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(Group) 2 127169 63584 163.53 < 2.2e-16 ***
## Residuals 1851 719732 389
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response IMDB Rating :
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(Group) 2 796.37 398.18 1058.1 < 2.2e-16 ***
## Residuals 1851 696.54 0.38
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response Metascore :
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(Group) 2 314170 157085 1091.1 < 2.2e-16 ***
## Residuals 1851 266477 144
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response Votes :
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(Group) 2 6.3606e+13 3.1803e+13 1278.1 < 2.2e-16 ***
## Residuals 1851 4.6060e+13 2.4884e+10
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response Gross :
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(Group) 2 3175128 1587564 550.8 < 2.2e-16 ***
## Residuals 1851 5335099 2882
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
The extremely low p-values (< 0.001) across all movie categories confirm that the clustering successfully differentiates movies based on their characteristics. Each cluster represents distinct profiles, with meaningful differences across all analyzed variables such as Duration, IMDB Rating, Metascore, Votes, and Gross. This supports the validity of the clustering approach in identifying movie groups with varying attributes and performances.
# Perform Chi-Square Test
chi_square <- chisq.test(movie_data_clean$Genre, as.factor(movie_data_clean$Group))
## Warning in chisq.test(movie_data_clean$Genre,
## as.factor(movie_data_clean$Group)): Chi-squared approximation may be incorrect
chi_square
##
## Pearson's Chi-squared test
##
## data: movie_data_clean$Genre and as.factor(movie_data_clean$Group)
## X-squared = 1003.4, df = 514, p-value < 2.2e-16
addmargins(chi_square$observed)
##
## movie_data_clean$Genre 1 2 3 Sum
## Action, Adventure 5 1 0 6
## Action, Adventure, Biography 0 0 1 1
## Action, Adventure, Comedy 1 36 9 46
## Action, Adventure, Crime 0 7 0 7
## Action, Adventure, Drama 1 10 18 29
## Action, Adventure, Family 1 8 2 11
## Action, Adventure, Fantasy 10 19 2 31
## Action, Adventure, History 0 1 0 1
## Action, Adventure, Horror 0 2 1 3
## Action, Adventure, Mystery 2 4 1 7
## Action, Adventure, Romance 1 1 1 3
## Action, Adventure, Sci-Fi 19 24 10 53
## Action, Adventure, Thriller 4 18 18 40
## Action, Adventure, War 0 0 1 1
## Action, Biography, Crime 0 1 2 3
## Action, Biography, Drama 0 1 1 2
## Action, Comedy 0 5 2 7
## Action, Comedy, Crime 2 14 4 20
## Action, Comedy, Drama 1 0 0 1
## Action, Comedy, Fantasy 1 4 1 6
## Action, Comedy, Horror 0 0 1 1
## Action, Comedy, Mystery 0 0 1 1
## Action, Comedy, Romance 0 2 0 2
## Action, Comedy, Sci-Fi 0 3 0 3
## Action, Comedy, Thriller 0 1 1 2
## Action, Comedy, War 0 1 1 2
## Action, Crime 0 1 0 1
## Action, Crime, Drama 4 16 21 41
## Action, Crime, Fantasy 0 2 1 3
## Action, Crime, Horror 0 1 0 1
## Action, Crime, Mystery 1 0 0 1
## Action, Crime, Romance 0 1 1 2
## Action, Crime, Sci-Fi 0 1 4 5
## Action, Crime, Thriller 3 24 13 40
## Action, Drama 3 0 0 3
## Action, Drama, Family 0 1 1 2
## Action, Drama, Fantasy 0 1 0 1
## Action, Drama, History 0 0 6 6
## Action, Drama, Horror 1 0 0 1
## Action, Drama, Music 0 1 0 1
## Action, Drama, Mystery 1 2 1 4
## Action, Drama, Romance 1 1 1 3
## Action, Drama, Sci-Fi 2 3 2 7
## Action, Drama, Sport 0 3 2 5
## Action, Drama, Thriller 0 7 4 11
## Action, Drama, War 0 1 2 3
## Action, Drama, Western 0 1 1 2
## Action, Family, Sport 0 1 0 1
## Action, Fantasy, Horror 0 3 0 3
## Action, Fantasy, Thriller 0 4 0 4
## Action, Horror 0 1 1 2
## Action, Horror, Mystery 0 1 0 1
## Action, Horror, Sci-Fi 0 16 0 16
## Action, Horror, Thriller 0 1 1 2
## Action, Mystery, Sci-Fi 1 2 0 3
## Action, Mystery, Thriller 3 2 1 6
## Action, Sci-Fi 4 5 0 9
## Action, Sci-Fi, Thriller 2 10 1 13
## Action, Sport, Thriller 0 1 0 1
## Action, Thriller 1 3 4 8
## Action, War 0 1 0 1
## Adventure, Biography, Drama 0 0 7 7
## Adventure, Biography, War 0 0 1 1
## Adventure, Comedy 0 5 1 6
## Adventure, Comedy, Crime 0 6 1 7
## Adventure, Comedy, Drama 0 5 8 13
## Adventure, Comedy, Family 2 14 5 21
## Adventure, Comedy, Fantasy 0 1 4 5
## Adventure, Comedy, Music 0 2 0 2
## Adventure, Comedy, Musical 0 1 0 1
## Adventure, Comedy, Sci-Fi 2 2 3 7
## Adventure, Crime, Drama 0 1 1 2
## Adventure, Drama 0 1 2 3
## Adventure, Drama, Family 0 2 2 4
## Adventure, Drama, Fantasy 0 0 4 4
## Adventure, Drama, History 1 0 1 2
## Adventure, Drama, Mystery 0 0 1 1
## Adventure, Drama, Romance 1 4 3 8
## Adventure, Drama, Sci-Fi 0 2 1 3
## Adventure, Drama, Thriller 0 2 3 5
## Adventure, Drama, War 1 0 1 2
## Adventure, Drama, Western 1 0 2 3
## Adventure, Family, Fantasy 6 4 5 15
## Adventure, Family, Romance 0 1 0 1
## Adventure, Family, Sci-Fi 1 0 0 1
## Adventure, Fantasy, Romance 0 1 0 1
## Adventure, Horror 0 0 1 1
## Adventure, Horror, Thriller 0 2 1 3
## Adventure, Mystery, Sci-Fi 0 1 0 1
## Adventure, Mystery, Thriller 1 0 0 1
## Adventure, Sci-Fi 1 0 1 2
## Adventure, Sci-Fi, Thriller 0 1 0 1
## Adventure, War, Western 0 0 1 1
## Adventure, Western 1 0 0 1
## Animation, Action, Adventure 3 8 2 13
## Animation, Action, Crime 0 0 1 1
## Animation, Action, Drama 0 0 1 1
## Animation, Adventure, Comedy 16 24 18 58
## Animation, Adventure, Drama 1 4 3 8
## Animation, Adventure, Family 1 0 4 5
## Animation, Adventure, Sci-Fi 0 0 1 1
## Animation, Comedy, Crime 0 0 1 1
## Animation, Comedy, Family 0 1 1 2
## Animation, Comedy, Fantasy 0 0 1 1
## Animation, Drama, Family 0 0 3 3
## Animation, Drama, Fantasy 0 0 1 1
## Animation, Family, Fantasy 1 0 1 2
## Biography, Comedy, Crime 0 2 1 3
## Biography, Comedy, Drama 0 2 6 8
## Biography, Crime, Drama 3 1 19 23
## Biography, Drama 1 1 11 13
## Biography, Drama, Family 1 0 2 3
## Biography, Drama, History 0 2 20 22
## Biography, Drama, Music 1 1 11 13
## Biography, Drama, Mystery 1 0 0 1
## Biography, Drama, Romance 0 0 8 8
## Biography, Drama, Sport 1 1 10 12
## Biography, Drama, Thriller 0 0 1 1
## Biography, Drama, War 1 0 3 4
## Comedy 2 43 15 60
## Comedy, Crime 2 7 6 15
## Comedy, Crime, Drama 0 4 13 17
## Comedy, Crime, Fantasy 0 1 0 1
## Comedy, Crime, Music 0 2 1 3
## Comedy, Crime, Musical 0 1 1 2
## Comedy, Crime, Mystery 0 2 1 3
## Comedy, Crime, Romance 0 2 1 3
## Comedy, Crime, Sport 0 1 0 1
## Comedy, Crime, Thriller 0 1 0 1
## Comedy, Drama 4 10 28 42
## Comedy, Drama, Family 0 3 3 6
## Comedy, Drama, Fantasy 1 8 7 16
## Comedy, Drama, History 0 0 1 1
## Comedy, Drama, Music 0 3 5 8
## Comedy, Drama, Musical 0 1 2 3
## Comedy, Drama, Mystery 0 0 2 2
## Comedy, Drama, Romance 1 48 43 92
## Comedy, Drama, Sport 0 0 1 1
## Comedy, Drama, Thriller 0 1 1 2
## Comedy, Drama, War 0 0 1 1
## Comedy, Family 1 3 1 5
## Comedy, Family, Fantasy 0 9 1 10
## Comedy, Family, Music 0 2 0 2
## Comedy, Family, Romance 0 6 0 6
## Comedy, Family, Sci-Fi 0 1 0 1
## Comedy, Family, Sport 0 2 0 2
## Comedy, Fantasy 1 6 1 8
## Comedy, Fantasy, Horror 0 3 0 3
## Comedy, Fantasy, Romance 0 6 1 7
## Comedy, History, Musical 0 1 0 1
## Comedy, Horror 0 2 7 9
## Comedy, Horror, Musical 0 0 2 2
## Comedy, Horror, Sci-Fi 0 2 0 2
## Comedy, Horror, Thriller 0 0 1 1
## Comedy, Music 0 3 3 6
## Comedy, Music, Musical 0 1 0 1
## Comedy, Music, Romance 0 2 1 3
## Comedy, Musical 0 1 1 2
## Comedy, Musical, Romance 0 1 1 2
## Comedy, Mystery, Romance 0 0 1 1
## Comedy, Mystery, Sci-Fi 0 1 0 1
## Comedy, Mystery, Thriller 0 1 0 1
## Comedy, Romance 3 40 12 55
## Comedy, Romance, Sci-Fi 0 2 0 2
## Comedy, Romance, Sport 0 3 1 4
## Comedy, Romance, Thriller 0 1 0 1
## Comedy, Sci-Fi 0 4 0 4
## Comedy, Sport 0 9 1 10
## Comedy, War 0 1 1 2
## Comedy, Western 0 1 2 3
## Crime, Drama 5 1 18 24
## Crime, Drama, Fantasy 1 0 1 2
## Crime, Drama, Film-Noir 0 0 4 4
## Crime, Drama, Horror 0 2 1 3
## Crime, Drama, Music 0 1 1 2
## Crime, Drama, Musical 0 0 2 2
## Crime, Drama, Mystery 3 15 23 41
## Crime, Drama, Romance 1 1 4 6
## Crime, Drama, Sci-Fi 0 0 1 1
## Crime, Drama, Sport 0 0 1 1
## Crime, Drama, Thriller 3 7 28 38
## Crime, Film-Noir, Mystery 0 0 1 1
## Crime, Horror, Sci-Fi 0 1 0 1
## Crime, Horror, Thriller 0 1 0 1
## Crime, Mystery, Sci-Fi 0 0 1 1
## Crime, Mystery, Thriller 0 4 2 6
## Crime, Romance, Thriller 0 0 1 1
## Crime, Sci-Fi 1 0 0 1
## Crime, Thriller 2 2 6 10
## Documentary 0 0 1 1
## Documentary, Biography, Crime 0 0 1 1
## Drama 7 5 41 53
## Drama, Family 0 2 0 2
## Drama, Family, Fantasy 0 1 3 4
## Drama, Family, Musical 0 0 3 3
## Drama, Family, Romance 0 0 1 1
## Drama, Family, Sport 0 0 1 1
## Drama, Fantasy 0 0 1 1
## Drama, Fantasy, History 0 0 1 1
## Drama, Fantasy, Horror 0 5 4 9
## Drama, Fantasy, Music 0 0 1 1
## Drama, Fantasy, Mystery 0 3 3 6
## Drama, Fantasy, Romance 2 5 3 10
## Drama, Fantasy, Sport 0 1 0 1
## Drama, Fantasy, Thriller 0 1 0 1
## Drama, Fantasy, War 0 0 1 1
## Drama, Film-Noir, Romance 0 0 1 1
## Drama, History 0 0 2 2
## Drama, History, Thriller 0 1 5 6
## Drama, History, War 0 0 1 1
## Drama, Horror 1 0 1 2
## Drama, Horror, Mystery 0 2 5 7
## Drama, Horror, Romance 0 1 0 1
## Drama, Horror, Sci-Fi 0 2 4 6
## Drama, Horror, Thriller 0 2 0 2
## Drama, Music 0 5 3 8
## Drama, Music, Musical 0 0 2 2
## Drama, Music, Romance 0 8 5 13
## Drama, Musical, Romance 0 1 3 4
## Drama, Musical, Sport 0 0 1 1
## Drama, Mystery 0 0 4 4
## Drama, Mystery, Romance 0 4 7 11
## Drama, Mystery, Sci-Fi 3 0 6 9
## Drama, Mystery, Thriller 2 11 9 22
## Drama, Mystery, War 1 0 0 1
## Drama, Romance 2 12 38 52
## Drama, Romance, Sci-Fi 1 1 1 3
## Drama, Romance, Sport 0 2 1 3
## Drama, Romance, Thriller 0 1 3 4
## Drama, Romance, War 1 1 5 7
## Drama, Romance, Western 0 0 1 1
## Drama, Sci-Fi 0 0 4 4
## Drama, Sci-Fi, Thriller 0 3 2 5
## Drama, Sport 2 3 5 10
## Drama, Thriller 0 2 10 12
## Drama, Thriller, War 0 0 3 3
## Drama, Thriller, Western 0 0 1 1
## Drama, War 3 1 5 9
## Drama, Western 0 0 4 4
## Family, Fantasy, Musical 0 0 1 1
## Fantasy, Horror 0 2 1 3
## Fantasy, Horror, Mystery 0 3 2 5
## Fantasy, Mystery, Romance 0 1 0 1
## Fantasy, Mystery, Sci-Fi 0 0 1 1
## Film-Noir, Mystery, Thriller 0 0 1 1
## Horror 1 11 5 17
## Horror, Mystery 0 9 5 14
## Horror, Mystery, Sci-Fi 0 1 2 3
## Horror, Mystery, Thriller 0 15 4 19
## Horror, Sci-Fi 1 1 2 4
## Horror, Sci-Fi, Thriller 0 4 1 5
## Horror, Thriller 0 16 4 20
## Mystery, Romance, Thriller 0 0 1 1
## Mystery, Sci-Fi, Thriller 1 0 0 1
## Mystery, Thriller 2 2 2 6
## Sci-Fi, Thriller 0 1 1 2
## Thriller 0 1 0 1
## Western 0 0 2 2
## Sum 185 826 843 1854
addmargins(round(chi_square$expected), 2)
##
## movie_data_clean$Genre 1 2 3 Sum
## Action, Adventure 1 3 3 7
## Action, Adventure, Biography 0 0 0 0
## Action, Adventure, Comedy 5 20 21 46
## Action, Adventure, Crime 1 3 3 7
## Action, Adventure, Drama 3 13 13 29
## Action, Adventure, Family 1 5 5 11
## Action, Adventure, Fantasy 3 14 14 31
## Action, Adventure, History 0 0 0 0
## Action, Adventure, Horror 0 1 1 2
## Action, Adventure, Mystery 1 3 3 7
## Action, Adventure, Romance 0 1 1 2
## Action, Adventure, Sci-Fi 5 24 24 53
## Action, Adventure, Thriller 4 18 18 40
## Action, Adventure, War 0 0 0 0
## Action, Biography, Crime 0 1 1 2
## Action, Biography, Drama 0 1 1 2
## Action, Comedy 1 3 3 7
## Action, Comedy, Crime 2 9 9 20
## Action, Comedy, Drama 0 0 0 0
## Action, Comedy, Fantasy 1 3 3 7
## Action, Comedy, Horror 0 0 0 0
## Action, Comedy, Mystery 0 0 0 0
## Action, Comedy, Romance 0 1 1 2
## Action, Comedy, Sci-Fi 0 1 1 2
## Action, Comedy, Thriller 0 1 1 2
## Action, Comedy, War 0 1 1 2
## Action, Crime 0 0 0 0
## Action, Crime, Drama 4 18 19 41
## Action, Crime, Fantasy 0 1 1 2
## Action, Crime, Horror 0 0 0 0
## Action, Crime, Mystery 0 0 0 0
## Action, Crime, Romance 0 1 1 2
## Action, Crime, Sci-Fi 0 2 2 4
## Action, Crime, Thriller 4 18 18 40
## Action, Drama 0 1 1 2
## Action, Drama, Family 0 1 1 2
## Action, Drama, Fantasy 0 0 0 0
## Action, Drama, History 1 3 3 7
## Action, Drama, Horror 0 0 0 0
## Action, Drama, Music 0 0 0 0
## Action, Drama, Mystery 0 2 2 4
## Action, Drama, Romance 0 1 1 2
## Action, Drama, Sci-Fi 1 3 3 7
## Action, Drama, Sport 0 2 2 4
## Action, Drama, Thriller 1 5 5 11
## Action, Drama, War 0 1 1 2
## Action, Drama, Western 0 1 1 2
## Action, Family, Sport 0 0 0 0
## Action, Fantasy, Horror 0 1 1 2
## Action, Fantasy, Thriller 0 2 2 4
## Action, Horror 0 1 1 2
## Action, Horror, Mystery 0 0 0 0
## Action, Horror, Sci-Fi 2 7 7 16
## Action, Horror, Thriller 0 1 1 2
## Action, Mystery, Sci-Fi 0 1 1 2
## Action, Mystery, Thriller 1 3 3 7
## Action, Sci-Fi 1 4 4 9
## Action, Sci-Fi, Thriller 1 6 6 13
## Action, Sport, Thriller 0 0 0 0
## Action, Thriller 1 4 4 9
## Action, War 0 0 0 0
## Adventure, Biography, Drama 1 3 3 7
## Adventure, Biography, War 0 0 0 0
## Adventure, Comedy 1 3 3 7
## Adventure, Comedy, Crime 1 3 3 7
## Adventure, Comedy, Drama 1 6 6 13
## Adventure, Comedy, Family 2 9 10 21
## Adventure, Comedy, Fantasy 0 2 2 4
## Adventure, Comedy, Music 0 1 1 2
## Adventure, Comedy, Musical 0 0 0 0
## Adventure, Comedy, Sci-Fi 1 3 3 7
## Adventure, Crime, Drama 0 1 1 2
## Adventure, Drama 0 1 1 2
## Adventure, Drama, Family 0 2 2 4
## Adventure, Drama, Fantasy 0 2 2 4
## Adventure, Drama, History 0 1 1 2
## Adventure, Drama, Mystery 0 0 0 0
## Adventure, Drama, Romance 1 4 4 9
## Adventure, Drama, Sci-Fi 0 1 1 2
## Adventure, Drama, Thriller 0 2 2 4
## Adventure, Drama, War 0 1 1 2
## Adventure, Drama, Western 0 1 1 2
## Adventure, Family, Fantasy 1 7 7 15
## Adventure, Family, Romance 0 0 0 0
## Adventure, Family, Sci-Fi 0 0 0 0
## Adventure, Fantasy, Romance 0 0 0 0
## Adventure, Horror 0 0 0 0
## Adventure, Horror, Thriller 0 1 1 2
## Adventure, Mystery, Sci-Fi 0 0 0 0
## Adventure, Mystery, Thriller 0 0 0 0
## Adventure, Sci-Fi 0 1 1 2
## Adventure, Sci-Fi, Thriller 0 0 0 0
## Adventure, War, Western 0 0 0 0
## Adventure, Western 0 0 0 0
## Animation, Action, Adventure 1 6 6 13
## Animation, Action, Crime 0 0 0 0
## Animation, Action, Drama 0 0 0 0
## Animation, Adventure, Comedy 6 26 26 58
## Animation, Adventure, Drama 1 4 4 9
## Animation, Adventure, Family 0 2 2 4
## Animation, Adventure, Sci-Fi 0 0 0 0
## Animation, Comedy, Crime 0 0 0 0
## Animation, Comedy, Family 0 1 1 2
## Animation, Comedy, Fantasy 0 0 0 0
## Animation, Drama, Family 0 1 1 2
## Animation, Drama, Fantasy 0 0 0 0
## Animation, Family, Fantasy 0 1 1 2
## Biography, Comedy, Crime 0 1 1 2
## Biography, Comedy, Drama 1 4 4 9
## Biography, Crime, Drama 2 10 10 22
## Biography, Drama 1 6 6 13
## Biography, Drama, Family 0 1 1 2
## Biography, Drama, History 2 10 10 22
## Biography, Drama, Music 1 6 6 13
## Biography, Drama, Mystery 0 0 0 0
## Biography, Drama, Romance 1 4 4 9
## Biography, Drama, Sport 1 5 5 11
## Biography, Drama, Thriller 0 0 0 0
## Biography, Drama, War 0 2 2 4
## Comedy 6 27 27 60
## Comedy, Crime 1 7 7 15
## Comedy, Crime, Drama 2 8 8 18
## Comedy, Crime, Fantasy 0 0 0 0
## Comedy, Crime, Music 0 1 1 2
## Comedy, Crime, Musical 0 1 1 2
## Comedy, Crime, Mystery 0 1 1 2
## Comedy, Crime, Romance 0 1 1 2
## Comedy, Crime, Sport 0 0 0 0
## Comedy, Crime, Thriller 0 0 0 0
## Comedy, Drama 4 19 19 42
## Comedy, Drama, Family 1 3 3 7
## Comedy, Drama, Fantasy 2 7 7 16
## Comedy, Drama, History 0 0 0 0
## Comedy, Drama, Music 1 4 4 9
## Comedy, Drama, Musical 0 1 1 2
## Comedy, Drama, Mystery 0 1 1 2
## Comedy, Drama, Romance 9 41 42 92
## Comedy, Drama, Sport 0 0 0 0
## Comedy, Drama, Thriller 0 1 1 2
## Comedy, Drama, War 0 0 0 0
## Comedy, Family 0 2 2 4
## Comedy, Family, Fantasy 1 4 5 10
## Comedy, Family, Music 0 1 1 2
## Comedy, Family, Romance 1 3 3 7
## Comedy, Family, Sci-Fi 0 0 0 0
## Comedy, Family, Sport 0 1 1 2
## Comedy, Fantasy 1 4 4 9
## Comedy, Fantasy, Horror 0 1 1 2
## Comedy, Fantasy, Romance 1 3 3 7
## Comedy, History, Musical 0 0 0 0
## Comedy, Horror 1 4 4 9
## Comedy, Horror, Musical 0 1 1 2
## Comedy, Horror, Sci-Fi 0 1 1 2
## Comedy, Horror, Thriller 0 0 0 0
## Comedy, Music 1 3 3 7
## Comedy, Music, Musical 0 0 0 0
## Comedy, Music, Romance 0 1 1 2
## Comedy, Musical 0 1 1 2
## Comedy, Musical, Romance 0 1 1 2
## Comedy, Mystery, Romance 0 0 0 0
## Comedy, Mystery, Sci-Fi 0 0 0 0
## Comedy, Mystery, Thriller 0 0 0 0
## Comedy, Romance 5 25 25 55
## Comedy, Romance, Sci-Fi 0 1 1 2
## Comedy, Romance, Sport 0 2 2 4
## Comedy, Romance, Thriller 0 0 0 0
## Comedy, Sci-Fi 0 2 2 4
## Comedy, Sport 1 4 5 10
## Comedy, War 0 1 1 2
## Comedy, Western 0 1 1 2
## Crime, Drama 2 11 11 24
## Crime, Drama, Fantasy 0 1 1 2
## Crime, Drama, Film-Noir 0 2 2 4
## Crime, Drama, Horror 0 1 1 2
## Crime, Drama, Music 0 1 1 2
## Crime, Drama, Musical 0 1 1 2
## Crime, Drama, Mystery 4 18 19 41
## Crime, Drama, Romance 1 3 3 7
## Crime, Drama, Sci-Fi 0 0 0 0
## Crime, Drama, Sport 0 0 0 0
## Crime, Drama, Thriller 4 17 17 38
## Crime, Film-Noir, Mystery 0 0 0 0
## Crime, Horror, Sci-Fi 0 0 0 0
## Crime, Horror, Thriller 0 0 0 0
## Crime, Mystery, Sci-Fi 0 0 0 0
## Crime, Mystery, Thriller 1 3 3 7
## Crime, Romance, Thriller 0 0 0 0
## Crime, Sci-Fi 0 0 0 0
## Crime, Thriller 1 4 5 10
## Documentary 0 0 0 0
## Documentary, Biography, Crime 0 0 0 0
## Drama 5 24 24 53
## Drama, Family 0 1 1 2
## Drama, Family, Fantasy 0 2 2 4
## Drama, Family, Musical 0 1 1 2
## Drama, Family, Romance 0 0 0 0
## Drama, Family, Sport 0 0 0 0
## Drama, Fantasy 0 0 0 0
## Drama, Fantasy, History 0 0 0 0
## Drama, Fantasy, Horror 1 4 4 9
## Drama, Fantasy, Music 0 0 0 0
## Drama, Fantasy, Mystery 1 3 3 7
## Drama, Fantasy, Romance 1 4 5 10
## Drama, Fantasy, Sport 0 0 0 0
## Drama, Fantasy, Thriller 0 0 0 0
## Drama, Fantasy, War 0 0 0 0
## Drama, Film-Noir, Romance 0 0 0 0
## Drama, History 0 1 1 2
## Drama, History, Thriller 1 3 3 7
## Drama, History, War 0 0 0 0
## Drama, Horror 0 1 1 2
## Drama, Horror, Mystery 1 3 3 7
## Drama, Horror, Romance 0 0 0 0
## Drama, Horror, Sci-Fi 1 3 3 7
## Drama, Horror, Thriller 0 1 1 2
## Drama, Music 1 4 4 9
## Drama, Music, Musical 0 1 1 2
## Drama, Music, Romance 1 6 6 13
## Drama, Musical, Romance 0 2 2 4
## Drama, Musical, Sport 0 0 0 0
## Drama, Mystery 0 2 2 4
## Drama, Mystery, Romance 1 5 5 11
## Drama, Mystery, Sci-Fi 1 4 4 9
## Drama, Mystery, Thriller 2 10 10 22
## Drama, Mystery, War 0 0 0 0
## Drama, Romance 5 23 24 52
## Drama, Romance, Sci-Fi 0 1 1 2
## Drama, Romance, Sport 0 1 1 2
## Drama, Romance, Thriller 0 2 2 4
## Drama, Romance, War 1 3 3 7
## Drama, Romance, Western 0 0 0 0
## Drama, Sci-Fi 0 2 2 4
## Drama, Sci-Fi, Thriller 0 2 2 4
## Drama, Sport 1 4 5 10
## Drama, Thriller 1 5 5 11
## Drama, Thriller, War 0 1 1 2
## Drama, Thriller, Western 0 0 0 0
## Drama, War 1 4 4 9
## Drama, Western 0 2 2 4
## Family, Fantasy, Musical 0 0 0 0
## Fantasy, Horror 0 1 1 2
## Fantasy, Horror, Mystery 0 2 2 4
## Fantasy, Mystery, Romance 0 0 0 0
## Fantasy, Mystery, Sci-Fi 0 0 0 0
## Film-Noir, Mystery, Thriller 0 0 0 0
## Horror 2 8 8 18
## Horror, Mystery 1 6 6 13
## Horror, Mystery, Sci-Fi 0 1 1 2
## Horror, Mystery, Thriller 2 8 9 19
## Horror, Sci-Fi 0 2 2 4
## Horror, Sci-Fi, Thriller 0 2 2 4
## Horror, Thriller 2 9 9 20
## Mystery, Romance, Thriller 0 0 0 0
## Mystery, Sci-Fi, Thriller 0 0 0 0
## Mystery, Thriller 1 3 3 7
## Sci-Fi, Thriller 0 1 1 2
## Thriller 0 0 0 0
## Western 0 1 1 2
round(chi_square$res, 2)
##
## movie_data_clean$Genre 1 2 3
## Action, Adventure 5.69 -1.02 -1.65
## Action, Adventure, Biography -0.32 -0.67 0.81
## Action, Adventure, Comedy -1.68 3.43 -2.61
## Action, Adventure, Crime -0.84 2.20 -1.78
## Action, Adventure, Drama -1.11 -0.81 1.33
## Action, Adventure, Family -0.09 1.40 -1.34
## Action, Adventure, Fantasy 3.93 1.40 -3.22
## Action, Adventure, History -0.32 0.83 -0.67
## Action, Adventure, Horror -0.55 0.57 -0.31
## Action, Adventure, Mystery 1.56 0.50 -1.22
## Action, Adventure, Romance 1.28 -0.29 -0.31
## Action, Adventure, Sci-Fi 5.96 0.08 -2.87
## Action, Adventure, Thriller 0.00 0.04 -0.04
## Action, Adventure, War -0.32 -0.67 0.81
## Action, Biography, Crime -0.55 -0.29 0.54
## Action, Biography, Drama -0.45 0.12 0.10
## Action, Comedy -0.84 1.07 -0.66
## Action, Comedy, Crime 0.00 1.71 -1.69
## Action, Comedy, Drama 2.85 -0.67 -0.67
## Action, Comedy, Fantasy 0.52 0.81 -1.05
## Action, Comedy, Horror -0.32 -0.67 0.81
## Action, Comedy, Mystery -0.32 -0.67 0.81
## Action, Comedy, Romance -0.45 1.17 -0.95
## Action, Comedy, Sci-Fi -0.55 1.44 -1.17
## Action, Comedy, Thriller -0.45 0.12 0.10
## Action, Comedy, War -0.45 0.12 0.10
## Action, Crime -0.32 0.83 -0.67
## Action, Crime, Drama -0.05 -0.53 0.55
## Action, Crime, Fantasy -0.55 0.57 -0.31
## Action, Crime, Horror -0.32 0.83 -0.67
## Action, Crime, Mystery 2.85 -0.67 -0.67
## Action, Crime, Romance -0.45 0.12 0.10
## Action, Crime, Sci-Fi -0.71 -0.82 1.15
## Action, Crime, Thriller -0.50 1.46 -1.22
## Action, Drama 4.94 -1.16 -1.17
## Action, Drama, Family -0.45 0.12 0.10
## Action, Drama, Fantasy -0.32 0.83 -0.67
## Action, Drama, History -0.77 -1.63 1.98
## Action, Drama, Horror 2.85 -0.67 -0.67
## Action, Drama, Music -0.32 0.83 -0.67
## Action, Drama, Mystery 0.95 0.16 -0.61
## Action, Drama, Romance 1.28 -0.29 -0.31
## Action, Drama, Sci-Fi 1.56 -0.07 -0.66
## Action, Drama, Sport -0.71 0.52 -0.18
## Action, Drama, Thriller -1.05 0.95 -0.45
## Action, Drama, War -0.55 -0.29 0.54
## Action, Drama, Western -0.45 0.12 0.10
## Action, Family, Sport -0.32 0.83 -0.67
## Action, Fantasy, Horror -0.55 1.44 -1.17
## Action, Fantasy, Thriller -0.63 1.66 -1.35
## Action, Horror -0.45 0.12 0.10
## Action, Horror, Mystery -0.32 0.83 -0.67
## Action, Horror, Sci-Fi -1.26 3.32 -2.70
## Action, Horror, Thriller -0.45 0.12 0.10
## Action, Mystery, Sci-Fi 1.28 0.57 -1.17
## Action, Mystery, Thriller 3.10 -0.41 -1.05
## Action, Sci-Fi 3.27 0.49 -2.02
## Action, Sci-Fi, Thriller 0.62 1.75 -2.02
## Action, Sport, Thriller -0.32 0.83 -0.67
## Action, Thriller 0.23 -0.30 0.19
## Action, War -0.32 0.83 -0.67
## Adventure, Biography, Drama -0.84 -1.77 2.14
## Adventure, Biography, War -0.32 -0.67 0.81
## Adventure, Comedy -0.77 1.42 -1.05
## Adventure, Comedy, Crime -0.84 1.63 -1.22
## Adventure, Comedy, Drama -1.14 -0.33 0.86
## Adventure, Comedy, Family -0.07 1.52 -1.47
## Adventure, Comedy, Fantasy -0.71 -0.82 1.15
## Adventure, Comedy, Music -0.45 1.17 -0.95
## Adventure, Comedy, Musical -0.32 0.83 -0.67
## Adventure, Comedy, Sci-Fi 1.56 -0.63 -0.10
## Adventure, Crime, Drama -0.45 0.12 0.10
## Adventure, Drama -0.55 -0.29 0.54
## Adventure, Drama, Family -0.63 0.16 0.13
## Adventure, Drama, Fantasy -0.63 -1.33 1.62
## Adventure, Drama, History 1.79 -0.94 0.10
## Adventure, Drama, Mystery -0.32 -0.67 0.81
## Adventure, Drama, Romance 0.23 0.23 -0.33
## Adventure, Drama, Sci-Fi -0.55 0.57 -0.31
## Adventure, Drama, Thriller -0.71 -0.15 0.48
## Adventure, Drama, War 1.79 -0.94 0.10
## Adventure, Drama, Western 1.28 -1.16 0.54
## Adventure, Family, Fantasy 3.68 -1.04 -0.70
## Adventure, Family, Romance -0.32 0.83 -0.67
## Adventure, Family, Sci-Fi 2.85 -0.67 -0.67
## Adventure, Fantasy, Romance -0.32 0.83 -0.67
## Adventure, Horror -0.32 -0.67 0.81
## Adventure, Horror, Thriller -0.55 0.57 -0.31
## Adventure, Mystery, Sci-Fi -0.32 0.83 -0.67
## Adventure, Mystery, Thriller 2.85 -0.67 -0.67
## Adventure, Sci-Fi 1.79 -0.94 0.10
## Adventure, Sci-Fi, Thriller -0.32 0.83 -0.67
## Adventure, War, Western -0.32 -0.67 0.81
## Adventure, Western 2.85 -0.67 -0.67
## Animation, Action, Adventure 1.50 0.92 -1.61
## Animation, Action, Crime -0.32 -0.67 0.81
## Animation, Action, Drama -0.32 -0.67 0.81
## Animation, Adventure, Comedy 4.25 -0.36 -1.63
## Animation, Adventure, Drama 0.23 0.23 -0.33
## Animation, Adventure, Family 0.71 -1.49 1.15
## Animation, Adventure, Sci-Fi -0.32 -0.67 0.81
## Animation, Comedy, Crime -0.32 -0.67 0.81
## Animation, Comedy, Family -0.45 0.12 0.10
## Animation, Comedy, Fantasy -0.32 -0.67 0.81
## Animation, Drama, Family -0.55 -1.16 1.40
## Animation, Drama, Fantasy -0.32 -0.67 0.81
## Animation, Family, Fantasy 1.79 -0.94 0.10
## Biography, Comedy, Crime -0.55 0.57 -0.31
## Biography, Comedy, Drama -0.89 -0.83 1.24
## Biography, Crime, Drama 0.47 -2.89 2.64
## Biography, Drama -0.26 -1.99 2.09
## Biography, Drama, Family 1.28 -1.16 0.54
## Biography, Drama, History -1.48 -2.49 3.16
## Biography, Drama, Music -0.26 -1.99 2.09
## Biography, Drama, Mystery 2.85 -0.67 -0.67
## Biography, Drama, Romance -0.89 -1.89 2.29
## Biography, Drama, Sport -0.18 -1.88 1.95
## Biography, Drama, Thriller -0.32 -0.67 0.81
## Biography, Drama, War 0.95 -1.33 0.88
## Comedy -1.63 3.15 -2.35
## Comedy, Crime 0.41 0.12 -0.31
## Comedy, Crime, Drama -1.30 -1.30 1.90
## Comedy, Crime, Fantasy -0.32 0.83 -0.67
## Comedy, Crime, Music -0.55 0.57 -0.31
## Comedy, Crime, Musical -0.45 0.12 0.10
## Comedy, Crime, Mystery -0.55 0.57 -0.31
## Comedy, Crime, Romance -0.55 0.57 -0.31
## Comedy, Crime, Sport -0.32 0.83 -0.67
## Comedy, Crime, Thriller -0.32 0.83 -0.67
## Comedy, Drama -0.09 -2.01 2.04
## Comedy, Drama, Family -0.77 0.20 0.16
## Comedy, Drama, Fantasy -0.47 0.33 -0.10
## Comedy, Drama, History -0.32 -0.67 0.81
## Comedy, Drama, Music -0.89 -0.30 0.71
## Comedy, Drama, Musical -0.55 -0.29 0.54
## Comedy, Drama, Mystery -0.45 -0.94 1.14
## Comedy, Drama, Romance -2.70 1.10 0.18
## Comedy, Drama, Sport -0.32 -0.67 0.81
## Comedy, Drama, Thriller -0.45 0.12 0.10
## Comedy, Drama, War -0.32 -0.67 0.81
## Comedy, Family 0.71 0.52 -0.84
## Comedy, Family, Fantasy -1.00 2.15 -1.66
## Comedy, Family, Music -0.45 1.17 -0.95
## Comedy, Family, Romance -0.77 2.03 -1.65
## Comedy, Family, Sci-Fi -0.32 0.83 -0.67
## Comedy, Family, Sport -0.45 1.17 -0.95
## Comedy, Fantasy 0.23 1.29 -1.38
## Comedy, Fantasy, Horror -0.55 1.44 -1.17
## Comedy, Fantasy, Romance -0.84 1.63 -1.22
## Comedy, History, Musical -0.32 0.83 -0.67
## Comedy, Horror -0.95 -1.00 1.44
## Comedy, Horror, Musical -0.45 -0.94 1.14
## Comedy, Horror, Sci-Fi -0.45 1.17 -0.95
## Comedy, Horror, Thriller -0.32 -0.67 0.81
## Comedy, Music -0.77 0.20 0.16
## Comedy, Music, Musical -0.32 0.83 -0.67
## Comedy, Music, Romance -0.55 0.57 -0.31
## Comedy, Musical -0.45 0.12 0.10
## Comedy, Musical, Romance -0.45 0.12 0.10
## Comedy, Mystery, Romance -0.32 -0.67 0.81
## Comedy, Mystery, Sci-Fi -0.32 0.83 -0.67
## Comedy, Mystery, Thriller -0.32 0.83 -0.67
## Comedy, Romance -1.06 3.13 -2.60
## Comedy, Romance, Sci-Fi -0.45 1.17 -0.95
## Comedy, Romance, Sport -0.63 0.91 -0.61
## Comedy, Romance, Thriller -0.32 0.83 -0.67
## Comedy, Sci-Fi -0.63 1.66 -1.35
## Comedy, Sport -1.00 2.15 -1.66
## Comedy, War -0.45 0.12 0.10
## Comedy, Western -0.55 -0.29 0.54
## Crime, Drama 1.68 -2.96 2.15
## Crime, Drama, Fantasy 1.79 -0.94 0.10
## Crime, Drama, Film-Noir -0.63 -1.33 1.62
## Crime, Drama, Horror -0.55 0.57 -0.31
## Crime, Drama, Music -0.45 0.12 0.10
## Crime, Drama, Musical -0.45 -0.94 1.14
## Crime, Drama, Mystery -0.54 -0.76 1.01
## Crime, Drama, Romance 0.52 -1.02 0.77
## Crime, Drama, Sci-Fi -0.32 -0.67 0.81
## Crime, Drama, Sport -0.32 -0.67 0.81
## Crime, Drama, Thriller -0.41 -2.41 2.58
## Crime, Film-Noir, Mystery -0.32 -0.67 0.81
## Crime, Horror, Sci-Fi -0.32 0.83 -0.67
## Crime, Horror, Thriller -0.32 0.83 -0.67
## Crime, Mystery, Sci-Fi -0.32 -0.67 0.81
## Crime, Mystery, Thriller -0.77 0.81 -0.44
## Crime, Romance, Thriller -0.32 -0.67 0.81
## Crime, Sci-Fi 2.85 -0.67 -0.67
## Crime, Thriller 1.00 -1.16 0.68
## Documentary -0.32 -0.67 0.81
## Documentary, Biography, Crime -0.32 -0.67 0.81
## Drama 0.74 -3.83 3.44
## Drama, Family -0.45 1.17 -0.95
## Drama, Family, Fantasy -0.63 -0.59 0.88
## Drama, Family, Musical -0.55 -1.16 1.40
## Drama, Family, Romance -0.32 -0.67 0.81
## Drama, Family, Sport -0.32 -0.67 0.81
## Drama, Fantasy -0.32 -0.67 0.81
## Drama, Fantasy, History -0.32 -0.67 0.81
## Drama, Fantasy, Horror -0.95 0.49 -0.05
## Drama, Fantasy, Music -0.32 -0.67 0.81
## Drama, Fantasy, Mystery -0.77 0.20 0.16
## Drama, Fantasy, Romance 1.00 0.26 -0.73
## Drama, Fantasy, Sport -0.32 0.83 -0.67
## Drama, Fantasy, Thriller -0.32 0.83 -0.67
## Drama, Fantasy, War -0.32 -0.67 0.81
## Drama, Film-Noir, Romance -0.32 -0.67 0.81
## Drama, History -0.45 -0.94 1.14
## Drama, History, Thriller -0.77 -1.02 1.38
## Drama, History, War -0.32 -0.67 0.81
## Drama, Horror 1.79 -0.94 0.10
## Drama, Horror, Mystery -0.84 -0.63 1.02
## Drama, Horror, Romance -0.32 0.83 -0.67
## Drama, Horror, Sci-Fi -0.77 -0.41 0.77
## Drama, Horror, Thriller -0.45 1.17 -0.95
## Drama, Music -0.89 0.76 -0.33
## Drama, Music, Musical -0.45 -0.94 1.14
## Drama, Music, Romance -1.14 0.92 -0.37
## Drama, Musical, Romance -0.63 -0.59 0.88
## Drama, Musical, Sport -0.32 -0.67 0.81
## Drama, Mystery -0.63 -1.33 1.62
## Drama, Mystery, Romance -1.05 -0.41 0.89
## Drama, Mystery, Sci-Fi 2.22 -2.00 0.94
## Drama, Mystery, Thriller -0.13 0.38 -0.32
## Drama, Mystery, War 2.85 -0.67 -0.67
## Drama, Romance -1.40 -2.32 2.95
## Drama, Romance, Sci-Fi 1.28 -0.29 -0.31
## Drama, Romance, Sport -0.55 0.57 -0.31
## Drama, Romance, Thriller -0.63 -0.59 0.88
## Drama, Romance, War 0.36 -1.20 1.02
## Drama, Romance, Western -0.32 -0.67 0.81
## Drama, Sci-Fi -0.63 -1.33 1.62
## Drama, Sci-Fi, Thriller -0.71 0.52 -0.18
## Drama, Sport 1.00 -0.69 0.21
## Drama, Thriller -1.09 -1.45 1.95
## Drama, Thriller, War -0.55 -1.16 1.40
## Drama, Thriller, Western -0.32 -0.67 0.81
## Drama, War 2.22 -1.50 0.45
## Drama, Western -0.63 -1.33 1.62
## Family, Fantasy, Musical -0.32 -0.67 0.81
## Fantasy, Horror -0.55 0.57 -0.31
## Fantasy, Horror, Mystery -0.71 0.52 -0.18
## Fantasy, Mystery, Romance -0.32 0.83 -0.67
## Fantasy, Mystery, Sci-Fi -0.32 -0.67 0.81
## Film-Noir, Mystery, Thriller -0.32 -0.67 0.81
## Horror -0.53 1.24 -0.98
## Horror, Mystery -1.18 1.11 -0.54
## Horror, Mystery, Sci-Fi -0.55 -0.29 0.54
## Horror, Mystery, Thriller -1.38 2.25 -1.58
## Horror, Sci-Fi 0.95 -0.59 0.13
## Horror, Sci-Fi, Thriller -0.71 1.19 -0.84
## Horror, Thriller -1.41 2.38 -1.69
## Mystery, Romance, Thriller -0.32 -0.67 0.81
## Mystery, Sci-Fi, Thriller 2.85 -0.67 -0.67
## Mystery, Thriller 1.81 -0.41 -0.44
## Sci-Fi, Thriller -0.45 0.12 0.10
## Thriller -0.32 0.83 -0.67
## Western -0.45 -0.94 1.14
library(effectsize)
##
## Attaching package: 'effectsize'
## The following object is masked from 'package:psych':
##
## phi
effectsize::cramers_v(movie_data_clean$Genre, movie_data_clean$Group)
## Cramer's V (adj.) | 95% CI
## --------------------------------
## 0.36 | [0.00, 1.00]
##
## - One-sided CIs: upper bound fixed at [1.00].