Desarrollo taller 4

## Este conjunto de datos contiene información sobre las 100 películas principales, incluidos varios atributos como el título, el género, la calificación y de más.
##A continuación se muestra una descripción detallada de cada columna del conjunto de datos:
## Sin nombre: 0: se trata de una columna de índice que representa la posición del registro en el conjunto de datos. Va de 0 a 99. 
## rango: Esta columna representa el rango de la película dentro de la lista de los 100 mejores. Va de 1 a 100. 
## title: El título de la película
##description: Una breve descripción o sinopsis de la película
## genre: Los géneros asociados con la película, enumerados en un formato de cadena con géneros encerrados entre corchetes.
## rating: La calificación de IMDb de la película, dada como un número de punto flotante.
##id: Un identificador único para la película dentro del conjunto de datos, con el prefijo "top".
## year: El año de estreno de la película.
##imdbid: El ID de IMDb de la película, que se puede utilizar para localizar la película en IMDb.
## imdb_link: Un enlace directo a la página de la película en IMDb.
## image: Un enlace URL a la imagen del póster de la película

packages <- c("readxl", "openxlsx", "readr", "corrplot", "DataExplorer", "data.table", "ggplot2", "knitr",
              "gamlss", "ggpubr", "dplyr")

# Install packages not yet installed

#if (!require(install.load)) {install.packages("install.load")}

install.load::install_load(packages)

## lee el archivo Csv y almacena los datos en el dataframe Top_100_Movies
top_100_movies <- read_csv("top_100_movies.csv")

## muestra las 10 primeras filas del dataframe
head(top_100_movies)

## # A tibble: 6 × 11
##    ...1  rank title  description genre rating id     year imdbid imdb_link image
##   <dbl> <dbl> <chr>  <chr>       <chr>  <dbl> <chr> <dbl> <chr>  <chr>     <chr>
## 1     0     1 The S… Two impris… ['Dr…    9.3 top1   1994 tt011… https://… http…
## 2     1     2 The G… The aging … ['Cr…    9.2 top2   1972 tt006… https://… http…
## 3     2     3 The D… When the m… ['Ac…    9   top3   2008 tt046… https://… http…
## 4     3     4 The G… The early … ['Cr…    9   top4   1974 tt007… https://… http…
## 5     4     5 12 An… The jury i… ['Cr…    9   top5   1957 tt005… https://… http…
## 6     5     6 Schin… In German-… ['Bi…    9   top6   1993 tt010… https://… http…

## lee el archivo Csv y almacena los datos en el dataframe Top_100_Movies
top_100_movies <- read_csv("top_100_movies.csv")

## New names:
## Rows: 100 Columns: 11
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (7): title, description, genre, id, imdbid, imdb_link, image dbl (4): ...1,
## rank, rating, year
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`

## muetra las primeras 10 filas del dataframe
top_100_movies %>% head(10)

## # A tibble: 10 × 11
##     ...1  rank title description genre rating id     year imdbid imdb_link image
##    <dbl> <dbl> <chr> <chr>       <chr>  <dbl> <chr> <dbl> <chr>  <chr>     <chr>
##  1     0     1 The … Two impris… ['Dr…    9.3 top1   1994 tt011… https://… http…
##  2     1     2 The … The aging … ['Cr…    9.2 top2   1972 tt006… https://… http…
##  3     2     3 The … When the m… ['Ac…    9   top3   2008 tt046… https://… http…
##  4     3     4 The … The early … ['Cr…    9   top4   1974 tt007… https://… http…
##  5     4     5 12 A… The jury i… ['Cr…    9   top5   1957 tt005… https://… http…
##  6     5     6 Schi… In German-… ['Bi…    9   top6   1993 tt010… https://… http…
##  7     6     7 The … Gandalf an… ['Ac…    9   top7   2003 tt016… https://… http…
##  8     7     8 Pulp… The lives … ['Cr…    8.9 top8   1994 tt011… https://… http…
##  9     8     9 The … A meek Hob… ['Ac…    8.8 top9   2001 tt012… https://… http…
## 10     9    10 The … A bounty h… ['Ad…    8.8 top10  1966 tt006… https://… http…

## muestra las primeras 10 filas del dataframe
top_100_movies %>% head(., 10)

## # A tibble: 10 × 11
##     ...1  rank title description genre rating id     year imdbid imdb_link image
##    <dbl> <dbl> <chr> <chr>       <chr>  <dbl> <chr> <dbl> <chr>  <chr>     <chr>
##  1     0     1 The … Two impris… ['Dr…    9.3 top1   1994 tt011… https://… http…
##  2     1     2 The … The aging … ['Cr…    9.2 top2   1972 tt006… https://… http…
##  3     2     3 The … When the m… ['Ac…    9   top3   2008 tt046… https://… http…
##  4     3     4 The … The early … ['Cr…    9   top4   1974 tt007… https://… http…
##  5     4     5 12 A… The jury i… ['Cr…    9   top5   1957 tt005… https://… http…
##  6     5     6 Schi… In German-… ['Bi…    9   top6   1993 tt010… https://… http…
##  7     6     7 The … Gandalf an… ['Ac…    9   top7   2003 tt016… https://… http…
##  8     7     8 Pulp… The lives … ['Cr…    8.9 top8   1994 tt011… https://… http…
##  9     8     9 The … A meek Hob… ['Ac…    8.8 top9   2001 tt012… https://… http…
## 10     9    10 The … A bounty h… ['Ad…    8.8 top10  1966 tt006… https://… http…

# Leer el archivo CSV y almacenar los datos en el dataframe Top_100_Movies
top_100_movies <- read_csv("top_100_movies.csv")

## New names:
## Rows: 100 Columns: 11
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (7): title, description, genre, id, imdbid, imdb_link, image dbl (4): ...1,
## rank, rating, year
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`

# Seleccionar las columnas date, rank, song, artist y 'weeks-on-board'
top_100_movies_selected <- top_100_movies %>%
  dplyr::select(title, genre, rating)
# Mostrar las primeras filas del dataframe seleccionado para verificar
head(top_100_movies_selected)

## # A tibble: 6 × 3
##   title                    genre                             rating
##   <chr>                    <chr>                              <dbl>
## 1 The Shawshank Redemption ['Drama']                            9.3
## 2 The Godfather            ['Crime', 'Drama']                   9.2
## 3 The Dark Knight          ['Action', 'Crime', 'Drama']         9  
## 4 The Godfather Part II    ['Crime', 'Drama']                   9  
## 5 12 Angry Men             ['Crime', 'Drama']                   9  
## 6 Schindler's List         ['Biography', 'Drama', 'History']    9

top_100_movies <- read_csv("top_100_movies.csv")

## New names:
## Rows: 100 Columns: 11
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (7): title, description, genre, id, imdbid, imdb_link, image dbl (4): ...1,
## rank, rating, year
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`

top_100_movies_selected <- top_100_movies %>%
  mutate(is_collab = grepl('drama', genre) & grepl('1994', year)) %>%
  dplyr::select(title, genre, year, is_collab)

# Mostrar las primeras filas del dataframe seleccionado para verificar
head(top_100_movies_selected)

## # A tibble: 6 × 4
##   title                    genre                              year is_collab
##   <chr>                    <chr>                             <dbl> <lgl>    
## 1 The Shawshank Redemption ['Drama']                          1994 FALSE    
## 2 The Godfather            ['Crime', 'Drama']                 1972 FALSE    
## 3 The Dark Knight          ['Action', 'Crime', 'Drama']       2008 FALSE    
## 4 The Godfather Part II    ['Crime', 'Drama']                 1974 FALSE    
## 5 12 Angry Men             ['Crime', 'Drama']                 1957 FALSE    
## 6 Schindler's List         ['Biography', 'Drama', 'History']  1993 FALSE

 # Leer el archivo CSV y almacenar los datos en el dataframe top_100_movies
top_100_movies <- read_csv("top_100_movies.csv")

## New names:
## Rows: 100 Columns: 11
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (7): title, description, genre, id, imdbid, imdb_link, image dbl (4): ...1,
## rank, rating, year
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`

# Filtrar las filas donde el año es al menos 1994 y el género es 'Drama' o 'Crime'

filter_drama_since_1974 <- function(dataframe) {
  dataframe %>%
    filter(year >= 1994 & genre == 'Drama')
}

# Usar la función para filtrar las películas por el género 'Drama' desde el año 1974
drama_since_1974_movies <- filter_drama_since_1974(top_100_movies)

# Mostrar el resultado
print(drama_since_1974_movies)

## # A tibble: 0 × 11
## # ℹ 11 variables: ...1 <dbl>, rank <dbl>, title <chr>, description <chr>,
## #   genre <chr>, rating <dbl>, id <chr>, year <dbl>, imdbid <chr>,
## #   imdb_link <chr>, image <chr>

## La cantidad de peliculas por género

top_100_movies <- read_csv("top_100_movies.csv")

## New names:
## Rows: 100 Columns: 11
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (7): title, description, genre, id, imdbid, imdb_link, image dbl (4): ...1,
## rank, rating, year
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`

# para contar la cantidad de películas por género
count_movies_by_genre <- function(dataframe) {
  dataframe %>%
    group_by(genre) %>%
    summarise(count = n()) %>%
    arrange(desc(count))
}

# Usar la función para contar las películas por género en el dataframe top_100_movies
movies_by_genre <- count_movies_by_genre(top_100_movies)

print(movies_by_genre)

## # A tibble: 55 × 2
##    genre                                count
##    <chr>                                <int>
##  1 ['Drama']                                8
##  2 ['Crime', 'Drama']                       7
##  3 ['Action', 'Adventure', 'Drama']         5
##  4 ['Crime', 'Drama', 'Mystery']            4
##  5 ['Action', 'Adventure', 'Fantasy']       3
##  6 ['Action', 'Adventure', 'Sci-Fi']        3
##  7 ['Animation', 'Action', 'Adventure']     3
##  8 ['Animation', 'Adventure', 'Comedy']     3
##  9 ['Biography', 'Drama', 'History']        3
## 10 ['Comedy', 'Drama', 'Romance']           3
## # ℹ 45 more rows

Desarrollo taller 4

Viviana Bravo

2024-07-26