data <- read.csv("Netflix TV Shows and Movies.csv")
summary(data)
## index id title type
## Min. : 0 Length:5283 Length:5283 Length:5283
## 1st Qu.:1320 Class :character Class :character Class :character
## Median :2641 Mode :character Mode :character Mode :character
## Mean :2641
## 3rd Qu.:3962
## Max. :5282
##
## description release_year age_certification runtime
## Length:5283 Min. :1953 Length:5283 Min. : 0.0
## Class :character 1st Qu.:2015 Class :character 1st Qu.: 45.0
## Mode :character Median :2018 Mode :character Median : 87.0
## Mean :2016 Mean : 79.2
## 3rd Qu.:2020 3rd Qu.:106.0
## Max. :2022 Max. :235.0
##
## imdb_id imdb_score imdb_votes
## Length:5283 Min. :1.500 Min. : 5
## Class :character 1st Qu.:5.800 1st Qu.: 521
## Mode :character Median :6.600 Median : 2279
## Mean :6.533 Mean : 23407
## 3rd Qu.:7.400 3rd Qu.: 10144
## Max. :9.600 Max. :2268288
## NA's :16
data$type <- factor(data$type, levels = c("MOVIE", "SHOW"), labels = c(0, 1))
head(data)
## index id title type
## 1 0 tm84618 Taxi Driver 0
## 2 1 tm127384 Monty Python and the Holy Grail 0
## 3 2 tm70993 Life of Brian 0
## 4 3 tm190788 The Exorcist 0
## 5 4 ts22164 Monty Python's Flying Circus 1
## 6 5 tm14873 Dirty Harry 0
## description
## 1 A mentally unstable Vietnam War veteran works as a night-time taxi driver in New York City where the perceived decadence and sleaze feed his urge for violent action, attempting to save a preadolescent prostitute in the process.
## 2 King Arthur, accompanied by his squire, recruits his Knights of the Round Table, including Sir Bedevere the Wise, Sir Lancelot the Brave, Sir Robin the Not-Quite-So-Brave-As-Sir-Lancelot and Sir Galahad the Pure. On the way, Arthur battles the Black Knight who, despite having had all his limbs chopped off, insists he can still fight. They reach Camelot, but Arthur decides not to enter, as "it is a silly place".
## 3 Brian Cohen is an average young Jewish man, but through a series of ridiculous events, he gains a reputation as the Messiah. When he's not dodging his followers or being scolded by his shrill mother, the hapless Brian has to contend with the pompous Pontius Pilate and acronym-obsessed members of a separatist movement. Rife with Monty Python's signature absurdity, the tale finds Brian's life paralleling Biblical lore, albeit with many more laughs.
## 4 12-year-old Regan MacNeil begins to adapt an explicit new personality as strange events befall the local area of Georgetown. Her mother becomes torn between science and superstition in a desperate bid to save her daughter, and ultimately turns to her last hope: Father Damien Karras, a troubled priest who is struggling with his own faith.
## 5 A British sketch comedy series with the shows being composed of surreality, risqué or innuendo-laden humour, sight gags and observational sketches without punchlines.
## 6 When a madman dubbed 'Scorpio' terrorizes San Francisco, hard-nosed cop, Harry Callahan – famous for his take-no-prisoners approach to law enforcement – is tasked with hunting down the psychopath. Harry eventually collars Scorpio in the process of rescuing a kidnap victim, only to see him walk on technicalities. Now, the maverick detective is determined to nail the maniac himself.
## release_year age_certification runtime imdb_id imdb_score imdb_votes
## 1 1976 R 113 tt0075314 8.3 795222
## 2 1975 PG 91 tt0071853 8.2 530877
## 3 1979 R 94 tt0079470 8.0 392419
## 4 1973 R 133 tt0070047 8.1 391942
## 5 1969 TV-14 30 tt0063929 8.8 72895
## 6 1971 R 102 tt0066999 7.7 153463
t_test_result <- t.test(imdb_score ~ type, data = data)
print(t_test_result)
##
## Welch Two Sample t-test
##
## data: imdb_score by type
## t = -23.875, df = 3976.8, p-value < 2.2e-16
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
## -0.8120173 -0.6887780
## sample estimates:
## mean in group 0 mean in group 1
## 6.266980 7.017377
boxplot(imdb_score ~ type, data = data,
main = "Graf kvantilov IMDb ocen - Film proti TV serija",
xlab = "Vrsta (0: MOVIE, 1: SHOW)",
ylab = "IMDb ocena",
col = c("red", "green"))
### Opravil bom še neparametričen Wilcoxonov preizkus za primerjavo dveh
neodvisnih skupin
wilcox_test_result <- wilcox.test(imdb_score ~ type, data = data)
print(wilcox_test_result)
##
## Wilcoxon rank sum test with continuity correction
##
## data: imdb_score by type
## W = 1923314, p-value < 2.2e-16
## alternative hypothesis: true location shift is not equal to 0