#Primero cargamos los paquetes que vamos a utilizar para el análisis del problema.
library(pacman)
library(ggplot2)
library(readr)
library(dplyr)
Attaching package: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
library(lubridate)
Attaching package: 'lubridate'
The following objects are masked from 'package:base':
date, intersect, setdiff, union
library(caret)
Loading required package: lattice
library(dplyr)
#Siguiendo, cargamos la base de datos "NetflixOriginals.csv" a R.
netflix_data <- read.csv("/Users/mariadelosangelesgutierrezschweitzer/Desktop/Tarea3/NetflixOriginals.csv")
#Primero, verificamos el formato de la columna "Premiere" para dejar solo los últimos 4 dígitos y convertirlo a año como se solicita.
netflix_data$Premiere_final <- substring(netflix_data$Premiere, nchar(netflix_data$Premiere) - 3)
#Luego, eliminamos las filas que contengan datos nulos/olvidados en las columnas de las variables que nos interesan para evitar errores futuros.
netflix_data_clean <- na.omit(netflix_data[c("IMDB.Score", "Runtime", "Genre", "Language", "Premiere_final")])
#Prosiguiendo, ajustamos el modelo de regresión lineal múltiple en el cual nuestra variable dependiente es "IMDB.Score" y las variables independientes son "Runtime", "Genre, "Language" y "Premiere_final" (que es la variable Premiere con solo el año de la fecha de publicación de las series y las peliculas).
modelo <- lm(IMDB.Score ~ Runtime + Genre + Language + Premiere_final, data = netflix_data)
#Finalmente, revisamos el modelo de regresión lineal múltiple solicitado.
summary(modelo)
Call:
lm(formula = IMDB.Score ~ Runtime + Genre + Language + Premiere_final,
data = netflix_data)
Residuals:
Min 1Q Median 3Q Max
-2.7598 -0.3069 0.0000 0.3631 1.9993
Coefficients: (1 not defined because of singularities)
Estimate Std. Error t value Pr(>|t|)
(Intercept) 4.461004 1.119001 3.987 7.89e-05
Runtime 0.010017 0.001737 5.768 1.54e-08
GenreAction comedy 0.169062 0.484225 0.349 0.727157
GenreAction thriller 1.011262 0.816944 1.238 0.216451
GenreAction-adventure 1.000632 1.119450 0.894 0.371903
GenreAction-thriller 0.434503 0.535885 0.811 0.417926
GenreAction/Comedy -0.306977 0.813483 -0.377 0.706094
GenreAction/Science fiction -0.201541 0.814905 -0.247 0.804781
GenreAdventure 0.704866 0.615005 1.146 0.252392
GenreAdventure-romance 0.769920 0.808807 0.952 0.341678
GenreAdventure/Comedy -0.026674 0.814529 -0.033 0.973891
GenreAftershow / Interview 1.997962 0.449387 4.446 1.12e-05
GenreAnimated musical comedy 1.318828 0.815427 1.617 0.106545
GenreAnimation 1.430602 0.458828 3.118 0.001945
GenreAnimation / Comedy 2.070592 0.814980 2.541 0.011419
GenreAnimation / Musicial 1.510660 0.815920 1.851 0.064794
GenreAnimation / Science Fiction 2.310155 0.810296 2.851 0.004570
GenreAnimation / Short 2.495593 0.511385 4.880 1.50e-06
GenreAnimation/Christmas/Comedy/Adventure 2.749718 0.808110 3.403 0.000731
GenreAnimation/Comedy/Adventure 0.976761 0.811186 1.204 0.229216
GenreAnimation/Musical/Adventure 0.926677 0.810855 1.143 0.253749
GenreAnimation/Superhero -0.513222 0.811264 -0.633 0.527321
GenreAnime / Short 0.138206 0.831183 0.166 0.868018
GenreAnime/Fantasy 1.224587 0.976762 1.254 0.210633
GenreAnime/Science fiction 1.242006 0.819591 1.515 0.130415
GenreAnthology/Dark comedy 1.758897 0.820270 2.144 0.032576
GenreBiographical/Comedy 1.093023 0.813483 1.344 0.179784
GenreBiopic 0.830106 0.386515 2.148 0.032305
GenreBlack comedy -0.136859 0.813748 -0.168 0.866519
GenreChristian musical 0.268744 0.815151 0.330 0.741799
GenreChristmas comedy 0.326340 0.810459 0.403 0.687400
GenreChristmas musical -0.303374 0.810701 -0.374 0.708433
GenreChristmas/Fantasy/Adventure/Comedy 1.262973 0.813426 1.553 0.121249
GenreComedy 0.067232 0.312721 0.215 0.829878
GenreComedy / Musical 1.388165 0.636016 2.183 0.029613
GenreComedy horror 0.191061 0.816022 0.234 0.814992
GenreComedy mystery 0.549718 0.808110 0.680 0.496716
GenreComedy-drama 0.711892 0.359339 1.981 0.048222
GenreComedy/Fantasy/Family -0.103374 0.810701 -0.128 0.898596
GenreComedy/Horror 0.256559 0.810547 0.317 0.751759
GenreComing-of-age comedy-drama 1.831499 0.821036 2.231 0.026221
GenreConcert Film 2.080081 0.427596 4.865 1.62e-06
GenreCrime drama 1.436450 0.375199 3.829 0.000148
GenreCrime thriller 0.871195 0.816578 1.067 0.286629
GenreDance comedy 0.646710 0.810976 0.797 0.425637
GenreDark comedy 0.261094 0.618581 0.422 0.673176
GenreDocumentary 1.647610 0.302112 5.454 8.39e-08
GenreDrama 0.868354 0.305689 2.841 0.004719
GenreDrama / Short 1.807988 0.826462 2.188 0.029241
GenreDrama-Comedy 1.829853 0.808515 2.263 0.024125
GenreDrama/Horror 0.562973 0.813426 0.692 0.489252
GenreFamily 0.333341 0.614591 0.542 0.587843
GenreFamily film 0.106514 0.609205 0.175 0.861288
GenreFamily/Christmas musical 0.786273 0.810559 0.970 0.332579
GenreFamily/Comedy-drama 0.706475 0.810439 0.872 0.383854
GenreFantasy 0.889825 0.985624 0.903 0.367141
GenreHeist -0.738990 0.815875 -0.906 0.365574
GenreHeist film/Thriller -2.314231 0.813197 -2.846 0.004644
GenreHidden-camera prank comedy 1.278929 0.815879 1.568 0.117731
GenreHistorical drama 1.318995 0.810025 1.628 0.104194
GenreHistorical-epic 0.992687 0.813729 1.220 0.223170
GenreHorror -0.047552 0.404513 -0.118 0.906477
GenreHorror anthology -1.491019 0.819782 -1.819 0.069646
GenreHorror comedy 0.216828 0.811518 0.267 0.789454
GenreHorror thriller -0.901663 0.531007 -1.698 0.090235
GenreHorror-thriller 0.516194 0.608535 0.848 0.396773
GenreHorror/Crime drama 0.710926 0.815703 0.872 0.383946
GenreMaking-of 2.575004 0.620513 4.150 4.02e-05
GenreMentalism special 1.913897 0.819760 2.335 0.020024
GenreMockumentary 0.693735 0.616165 1.126 0.260847
GenreMusical -0.070453 0.610481 -0.115 0.908178
GenreMusical / Short 3.071097 0.823433 3.730 0.000218
GenreMusical comedy 0.823743 0.610971 1.348 0.178295
GenreMusical/Western/Fantasy -1.526506 0.815627 -1.872 0.061952
GenreMystery -0.750281 0.608859 -1.232 0.218528
GenreOne-man show 1.492369 0.529870 2.816 0.005081
GenrePolitical thriller -1.373660 0.810459 -1.695 0.090824
GenrePsychological horror 0.263141 0.813748 0.323 0.746575
GenrePsychological thriller 0.708326 0.479983 1.476 0.140757
GenrePsychological thriller drama -0.293950 0.983615 -0.299 0.765203
GenreRomance 1.045082 0.430834 2.426 0.015694
GenreRomance drama -0.251415 0.818434 -0.307 0.758849
GenreRomantic comedy 0.357218 0.316315 1.129 0.259404
GenreRomantic comedy-drama 0.052956 0.813414 0.065 0.948122
GenreRomantic comedy/Holiday 0.536525 0.810493 0.662 0.508347
GenreRomantic drama 0.591964 0.468090 1.265 0.206694
GenreRomantic teen drama 0.795980 0.848546 0.938 0.348750
GenreRomantic teenage drama 0.413845 0.843046 0.491 0.623756
GenreRomantic thriller 0.246206 0.810718 0.304 0.761514
GenreSatire 0.428695 0.646753 0.663 0.507791
GenreScience fiction 0.166496 0.483009 0.345 0.730486
GenreScience fiction adventure -0.260299 0.808077 -0.322 0.747519
GenreScience fiction thriller 1.487309 0.821605 1.810 0.070964
GenreScience fiction/Action 0.656440 0.822642 0.798 0.425336
GenreScience fiction/Drama -0.714011 0.531098 -1.344 0.179534
GenreScience fiction/Mystery -0.457397 0.814022 -0.562 0.574482
GenreScience fiction/Thriller 0.443530 0.482941 0.918 0.358934
GenreSports film 0.843727 0.843092 1.001 0.317516
GenreSports-drama 0.581983 0.527317 1.104 0.270362
GenreSpy thriller 0.790984 0.609070 1.299 0.194760
GenreStop Motion 1.257568 0.819056 1.535 0.125433
GenreSuperhero -0.238517 0.611050 -0.390 0.696482
GenreSuperhero-Comedy -1.111390 0.814902 -1.364 0.173342
GenreSuperhero/Action 0.936189 0.810767 1.155 0.248864
GenreSupernatural drama 0.240850 0.904601 0.266 0.790176
GenreTeen comedy horror 0.881212 0.816664 1.079 0.281182
GenreTeen comedy-drama -0.566910 0.813612 -0.697 0.486321
GenreThriller 0.128494 0.321244 0.400 0.689364
GenreUrban fantasy 0.560674 0.815742 0.687 0.492257
GenreVariety show 0.854563 0.482918 1.770 0.077514
GenreVariety Show 1.591733 0.821150 1.938 0.053234
GenreWar 1.078590 0.615383 1.753 0.080372
GenreWar drama 0.425668 0.814124 0.523 0.601348
GenreWar-Comedy 0.210590 0.815939 0.258 0.796459
GenreWestern 0.308905 0.543521 0.568 0.570104
GenreZombie/Heist -0.042113 0.817617 -0.052 0.958946
LanguageDutch -1.093140 0.946302 -1.155 0.248669
LanguageEnglish -0.519975 0.762265 -0.682 0.495519
LanguageEnglish/Akan 1.108497 1.351811 0.820 0.412670
LanguageEnglish/Arabic -0.531227 1.072301 -0.495 0.620568
LanguageEnglish/Hindi 0.219904 0.926759 0.237 0.812552
LanguageEnglish/Japanese -3.140466 0.936683 -3.353 0.000872
LanguageEnglish/Korean NA NA NA NA
LanguageEnglish/Mandarin -0.228774 0.929658 -0.246 0.805736
LanguageEnglish/Russian -0.330842 1.082055 -0.306 0.759942
LanguageEnglish/Spanish -0.818895 0.831417 -0.985 0.325215
LanguageEnglish/Swedish -0.546908 1.069939 -0.511 0.609506
LanguageEnglish/Taiwanese/Mandarin -0.321585 1.072252 -0.300 0.764388
LanguageEnglish/Ukranian/Russian 1.037312 1.114066 0.931 0.352328
LanguageFilipino -1.415261 0.933019 -1.517 0.130045
LanguageFrench -0.978607 0.780788 -1.253 0.210765
LanguageGeorgian -0.293250 1.071145 -0.274 0.784393
LanguageGerman -0.804098 0.861633 -0.933 0.351233
LanguageHindi -0.693103 0.777121 -0.892 0.372959
LanguageIndonesian -0.888728 0.809795 -1.097 0.273056
LanguageItalian -1.067244 0.796811 -1.339 0.181158
LanguageJapanese -0.608037 0.932244 -0.652 0.514606
LanguageKhmer/English/French -0.117975 1.087593 -0.108 0.913672
LanguageKorean -0.878688 0.827181 -1.062 0.288717
LanguageMalay -2.022462 1.136082 -1.780 0.075756
LanguageMarathi -0.960529 0.887910 -1.082 0.279961
LanguageNorwegian -0.755595 1.104670 -0.684 0.494347
LanguagePolish -0.968012 0.886533 -1.092 0.275492
LanguagePortuguese -0.298785 0.793101 -0.377 0.706562
LanguageSpanish -0.621790 0.772938 -0.804 0.421587
LanguageSpanish/Basque -0.241971 1.317313 -0.184 0.854348
LanguageSpanish/Catalan -1.624813 1.079266 -1.505 0.132943
LanguageSpanish/English -0.350925 1.069056 -0.328 0.742879
LanguageSwedish -0.469540 1.085572 -0.433 0.665577
LanguageTamil 0.104694 1.079617 0.097 0.922793
LanguageThai -1.372377 0.939816 -1.460 0.144957
LanguageThia/English -0.790656 1.066933 -0.741 0.459069
LanguageTurkish -0.947403 0.838883 -1.129 0.259382
Premiere_final2015 0.342545 0.811896 0.422 0.673306
Premiere_final2016 0.620716 0.769968 0.806 0.420602
Premiere_final2017 0.626330 0.762085 0.822 0.411615
Premiere_final2018 0.754250 0.760156 0.992 0.321650
Premiere_final2019 0.537622 0.758991 0.708 0.479124
Premiere_final2020 0.580698 0.758662 0.765 0.444444
Premiere_final2021 0.518596 0.763484 0.679 0.497349
(Intercept) ***
Runtime ***
GenreAction comedy
GenreAction thriller
GenreAction-adventure
GenreAction-thriller
GenreAction/Comedy
GenreAction/Science fiction
GenreAdventure
GenreAdventure-romance
GenreAdventure/Comedy
GenreAftershow / Interview ***
GenreAnimated musical comedy
GenreAnimation **
GenreAnimation / Comedy *
GenreAnimation / Musicial .
GenreAnimation / Science Fiction **
GenreAnimation / Short ***
GenreAnimation/Christmas/Comedy/Adventure ***
GenreAnimation/Comedy/Adventure
GenreAnimation/Musical/Adventure
GenreAnimation/Superhero
GenreAnime / Short
GenreAnime/Fantasy
GenreAnime/Science fiction
GenreAnthology/Dark comedy *
GenreBiographical/Comedy
GenreBiopic *
GenreBlack comedy
GenreChristian musical
GenreChristmas comedy
GenreChristmas musical
GenreChristmas/Fantasy/Adventure/Comedy
GenreComedy
GenreComedy / Musical *
GenreComedy horror
GenreComedy mystery
GenreComedy-drama *
GenreComedy/Fantasy/Family
GenreComedy/Horror
GenreComing-of-age comedy-drama *
GenreConcert Film ***
GenreCrime drama ***
GenreCrime thriller
GenreDance comedy
GenreDark comedy
GenreDocumentary ***
GenreDrama **
GenreDrama / Short *
GenreDrama-Comedy *
GenreDrama/Horror
GenreFamily
GenreFamily film
GenreFamily/Christmas musical
GenreFamily/Comedy-drama
GenreFantasy
GenreHeist
GenreHeist film/Thriller **
GenreHidden-camera prank comedy
GenreHistorical drama
GenreHistorical-epic
GenreHorror
GenreHorror anthology .
GenreHorror comedy
GenreHorror thriller .
GenreHorror-thriller
GenreHorror/Crime drama
GenreMaking-of ***
GenreMentalism special *
GenreMockumentary
GenreMusical
GenreMusical / Short ***
GenreMusical comedy
GenreMusical/Western/Fantasy .
GenreMystery
GenreOne-man show **
GenrePolitical thriller .
GenrePsychological horror
GenrePsychological thriller
GenrePsychological thriller drama
GenreRomance *
GenreRomance drama
GenreRomantic comedy
GenreRomantic comedy-drama
GenreRomantic comedy/Holiday
GenreRomantic drama
GenreRomantic teen drama
GenreRomantic teenage drama
GenreRomantic thriller
GenreSatire
GenreScience fiction
GenreScience fiction adventure
GenreScience fiction thriller .
GenreScience fiction/Action
GenreScience fiction/Drama
GenreScience fiction/Mystery
GenreScience fiction/Thriller
GenreSports film
GenreSports-drama
GenreSpy thriller
GenreStop Motion
GenreSuperhero
GenreSuperhero-Comedy
GenreSuperhero/Action
GenreSupernatural drama
GenreTeen comedy horror
GenreTeen comedy-drama
GenreThriller
GenreUrban fantasy
GenreVariety show .
GenreVariety Show .
GenreWar .
GenreWar drama
GenreWar-Comedy
GenreWestern
GenreZombie/Heist
LanguageDutch
LanguageEnglish
LanguageEnglish/Akan
LanguageEnglish/Arabic
LanguageEnglish/Hindi
LanguageEnglish/Japanese ***
LanguageEnglish/Korean
LanguageEnglish/Mandarin
LanguageEnglish/Russian
LanguageEnglish/Spanish
LanguageEnglish/Swedish
LanguageEnglish/Taiwanese/Mandarin
LanguageEnglish/Ukranian/Russian
LanguageFilipino
LanguageFrench
LanguageGeorgian
LanguageGerman
LanguageHindi
LanguageIndonesian
LanguageItalian
LanguageJapanese
LanguageKhmer/English/French
LanguageKorean
LanguageMalay .
LanguageMarathi
LanguageNorwegian
LanguagePolish
LanguagePortuguese
LanguageSpanish
LanguageSpanish/Basque
LanguageSpanish/Catalan
LanguageSpanish/English
LanguageSwedish
LanguageTamil
LanguageThai
LanguageThia/English
LanguageTurkish
Premiere_final2015
Premiere_final2016
Premiere_final2017
Premiere_final2018
Premiere_final2019
Premiere_final2020
Premiere_final2021
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.7529 on 425 degrees of freedom
Multiple R-squared: 0.5691, Adjusted R-squared: 0.4088
F-statistic: 3.552 on 158 and 425 DF, p-value: < 2.2e-16
#Para comenzar, para poder realizar la matriz de confusión, debemos transformar nuestra variable dependiente "IMDB.Score" a una variable binaria. En este caso, tomaremos como "1" si es buena y como "0" si no es buena. Considerando como buena el puntaje "IMDB.Score" > 5 para una serie o película.
netflix_data$Bueno <- ifelse(netflix_data$`IMDB.Score` > 5, 1, 0)
#Luego, dividimos los datos en conjuntos de entrenamiento (75%) y prueba (25%) en función de la variable anteriormente creada "Bueno".
set.seed(12345)
training.samples <- netflix_data$Bueno %>%
createDataPartition(p = 0.75, list = FALSE)
train.data <- netflix_data[training.samples, ]
test.data <- netflix_data[-training.samples, ]
#Prosiguiendo, aseguramos que los niveles de las variables categóricas "Genre", "Language" y "Premiere" sean consistentes en los datos de prueba. No necesitamos ajustar los niveles de "IMDB.Score" y "Bueno" en test.data, ya que no son variables categóricas.
train.data$Genre <- test.data$Genre[1:nrow(train.data)]
train.data$Language <- test.data$Language [1:nrow(train.data)]
train.data$Premiere_final <- test.data$Premiere_final[1:nrow(train.data)]
#A continuación, creamos el modelo lineal generalizado con el conjunto de entrenamiento. En este caso, utilizamos como variable dependiente la binaria de "IMDB.Score" que es "Bueno" y como variables independientes "Runtime", "Genre", "Language" y "Premiere_final".
modelo_predictivo <- glm(Bueno ~ Runtime + Genre + Language + Premiere_final, data = train.data)
summary(modelo_predictivo)
Call:
glm(formula = Bueno ~ Runtime + Genre + Language + Premiere_final,
data = train.data)
Coefficients: (2 not defined because of singularities)
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.183392 0.507931 2.330 0.02255
Runtime -0.002893 0.001577 -1.835 0.07060
GenreAdventure 0.089255 0.464344 0.192 0.84810
GenreAdventure-romance 0.099960 0.452667 0.221 0.82584
GenreAftershow / Interview -0.018193 0.394701 -0.046 0.96336
GenreAnimated musical comedy 0.217718 0.457889 0.475 0.63584
GenreAnimation -0.014466 0.440976 -0.033 0.97392
GenreAnimation / Comedy 0.053669 0.451719 0.119 0.90575
GenreAnimation / Short 0.017359 0.441007 0.039 0.96871
GenreAnimation/Christmas/Comedy/Adventure 0.152037 0.455406 0.334 0.73944
GenreAnimation/Comedy/Adventure 0.049184 0.441720 0.111 0.91164
GenreAnimation/Musical/Adventure 0.052078 0.441819 0.118 0.90649
GenreBiopic 0.131232 0.402111 0.326 0.74507
GenreChristmas musical -0.982641 0.441007 -2.228 0.02891
GenreComedy -0.567308 0.327842 -1.730 0.08772
GenreComedy-drama 0.148661 0.411939 0.361 0.71922
GenreComedy/Fantasy/Family -0.982641 0.441007 -2.228 0.02891
GenreConcert Film -0.092008 0.385852 -0.238 0.81219
GenreCrime drama -0.965282 0.441312 -2.187 0.03188
GenreCrime thriller 0.043033 0.464902 0.093 0.92650
GenreDocumentary 0.006832 0.324668 0.021 0.98327
GenreDrama -0.021809 0.329981 -0.066 0.94748
GenreDrama/Horror -0.082755 0.452978 -0.183 0.85554
GenreFamily 0.252436 0.459210 0.550 0.58417
GenreHeist -0.887530 0.470533 -1.886 0.06319
GenreHistorical drama 0.105746 0.452884 0.233 0.81602
GenreHorror -0.940545 0.451761 -2.082 0.04080
GenreHorror comedy -0.971068 0.441188 -2.201 0.03085
GenreHorror thriller -0.873064 0.472080 -1.849 0.06839
GenreMockumentary 0.007377 0.452180 0.016 0.98703
GenreMusical/Western/Fantasy -1.079861 0.453028 -2.384 0.01971
GenreMystery -1.226597 0.463334 -2.647 0.00991
GenrePsychological horror -0.013318 0.455689 -0.029 0.97676
GenrePsychological thriller 0.177213 0.457335 0.387 0.69951
GenrePsychological thriller drama -0.951936 0.571932 -1.664 0.10026
GenreRomantic comedy -0.498438 0.359015 -1.388 0.16920
GenreRomantic comedy-drama -0.071182 0.453212 -0.157 0.87562
GenreRomantic drama -0.210438 0.410750 -0.512 0.60995
GenreRomantic teen drama -0.648994 0.486082 -1.335 0.18592
GenreSatire 0.110097 0.420866 0.262 0.79436
GenreScience fiction -1.340250 0.470178 -2.851 0.00565
GenreScience fiction/Mystery -1.048036 0.453942 -2.309 0.02375
GenreSports-drama 0.026370 0.373477 0.071 0.94390
GenreSuperhero -1.167806 0.450294 -2.593 0.01145
GenreSuperhero/Action 0.057864 0.442033 0.131 0.89621
GenreSupernatural drama -0.964536 0.519046 -1.858 0.06711
GenreTeen comedy horror -0.011938 0.462597 -0.026 0.97948
GenreThriller -1.067905 0.349689 -3.054 0.00314
GenreUrban fantasy -0.029297 0.462270 -0.063 0.94964
GenreVariety show -0.977505 0.384976 -2.539 0.01321
GenreWar 0.132653 0.466092 0.285 0.77674
GenreWestern -0.785176 0.457814 -1.715 0.09052
LanguageEnglish -0.111789 0.326036 -0.343 0.73267
LanguageEnglish/Hindi -0.120366 0.452826 -0.266 0.79112
LanguageEnglish/Spanish 0.036309 0.451727 0.080 0.93615
LanguageEnglish/Swedish -0.041807 0.454207 -0.092 0.92691
LanguageFrench -0.044270 0.376153 -0.118 0.90663
LanguageGeorgian -0.189803 0.454978 -0.417 0.67776
LanguageGerman -0.254302 0.430280 -0.591 0.55631
LanguageHindi 0.103235 0.353189 0.292 0.77088
LanguageIndonesian -0.258371 0.406767 -0.635 0.52727
LanguageItalian -0.285582 0.359136 -0.795 0.42904
LanguageJapanese NA NA NA NA
LanguageKorean -0.292045 0.470798 -0.620 0.53695
LanguagePortuguese -0.437433 0.451384 -0.969 0.33566
LanguageSpanish -0.037170 0.353313 -0.105 0.91650
LanguageThai -1.063650 0.442269 -2.405 0.01867
LanguageTurkish 0.155223 0.414511 0.374 0.70912
Premiere_final2016 0.125570 0.157040 0.800 0.42650
Premiere_final2017 0.232549 0.154804 1.502 0.13730
Premiere_final2018 0.280220 0.122895 2.280 0.02548
Premiere_final2019 0.123544 0.123198 1.003 0.31922
Premiere_final2020 0.168533 0.121554 1.386 0.16976
Premiere_final2021 NA NA NA NA
(Intercept) *
Runtime .
GenreAdventure
GenreAdventure-romance
GenreAftershow / Interview
GenreAnimated musical comedy
GenreAnimation
GenreAnimation / Comedy
GenreAnimation / Short
GenreAnimation/Christmas/Comedy/Adventure
GenreAnimation/Comedy/Adventure
GenreAnimation/Musical/Adventure
GenreBiopic
GenreChristmas musical *
GenreComedy .
GenreComedy-drama
GenreComedy/Fantasy/Family *
GenreConcert Film
GenreCrime drama *
GenreCrime thriller
GenreDocumentary
GenreDrama
GenreDrama/Horror
GenreFamily
GenreHeist .
GenreHistorical drama
GenreHorror *
GenreHorror comedy *
GenreHorror thriller .
GenreMockumentary
GenreMusical/Western/Fantasy *
GenreMystery **
GenrePsychological horror
GenrePsychological thriller
GenrePsychological thriller drama
GenreRomantic comedy
GenreRomantic comedy-drama
GenreRomantic drama
GenreRomantic teen drama
GenreSatire
GenreScience fiction **
GenreScience fiction/Mystery *
GenreSports-drama
GenreSuperhero *
GenreSuperhero/Action
GenreSupernatural drama .
GenreTeen comedy horror
GenreThriller **
GenreUrban fantasy
GenreVariety show *
GenreWar
GenreWestern .
LanguageEnglish
LanguageEnglish/Hindi
LanguageEnglish/Spanish
LanguageEnglish/Swedish
LanguageFrench
LanguageGeorgian
LanguageGerman
LanguageHindi
LanguageIndonesian
LanguageItalian
LanguageJapanese
LanguageKorean
LanguagePortuguese
LanguageSpanish
LanguageThai *
LanguageTurkish
Premiere_final2016
Premiere_final2017
Premiere_final2018 *
Premiere_final2019
Premiere_final2020
Premiere_final2021
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for gaussian family taken to be 0.09719892)
Null deviance: 29.9178 on 145 degrees of freedom
Residual deviance: 7.1927 on 74 degrees of freedom
(292 observations deleted due to missingness)
AIC: 120.79
Number of Fisher Scoring iterations: 2
#Por otro lado, ahora vamos a predecir la probabilidad de que la pelicula tenga un "IMDB.Score" tenga una calificación de bueno.
probabilities <- modelo_predictivo %>%
predict(test.data, type = "response")
predicted.classes <- ifelse(probabilities > 0.5, "1", "0")
#Creamos el factor con los niveles adecuados en "predicted.factor".
predicted.factor <- factor(predicted.classes, levels = c("0", "1"))
#Hacemos lo mismo con los niveles para el conjunto "test.data$Bueno".
test.data$Bueno <- factor(test.data$Bueno, levels = c("0", "1"))
#Prosiguiendo, calculamos la precisión de los datos de entrenamiento y de prueba de la clasificación.
mean(predicted.classes == train.data$Bueno)
[1] 0.7853881
mean(predicted.classes == test.data$Bueno)
[1] 0.7876712
#Finalmente, creamos la matriz de confusión para analizar los indicadores de desempeño, sensibilidad, especificidad, precisión, entre otros, en función de las predicciones que realizmaos anteriormente y nuestros datos de referencia.
confusionMatrix(data = predicted.factor, reference = test.data$Bueno, positive = "1")
Confusion Matrix and Statistics
Reference
Prediction 0 1
0 14 28
1 3 101
Accuracy : 0.7877
95% CI : (0.7124, 0.8509)
No Information Rate : 0.8836
P-Value [Acc > NIR] : 0.9997
Kappa : 0.3702
Mcnemar's Test P-Value : 1.629e-05
Sensitivity : 0.7829
Specificity : 0.8235
Pos Pred Value : 0.9712
Neg Pred Value : 0.3333
Prevalence : 0.8836
Detection Rate : 0.6918
Detection Prevalence : 0.7123
Balanced Accuracy : 0.8032
'Positive' Class : 1