1.1

library(readxl)
Movies_gross_rating <- read_excel("C:/Users/hp/Downloads/Movies_gross_rating.xlsx")
View(Movies_gross_rating)

# renombro mi base de datos
movies<-Movies_gross_rating

# tipo de BD
class(movies)# es un dataframe, es decir,  esta un panel bidimensional compuesto  por filas y columnas
## [1] "tbl_df"     "tbl"        "data.frame"
# numero de filas
nrow(movies)
## [1] 508
# numero de columnas
ncol(movies)
## [1] 10
# numero de observaciones y variables de datos
dim(movies)
## [1] 508  10
#panorama mas completo
library(Hmisc)
describe(movies)
## movies 
## 
##  10  Variables      508  Observations
## --------------------------------------------------------------------------------
## MovieID 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      508        0      508        1    254.6    169.8    26.35    51.70 
##      .25      .50      .75      .90      .95 
##   127.75   254.50   381.25   457.30   482.65 
## 
## lowest :   1   2   3   4   5, highest: 506 507 508 509 510
## --------------------------------------------------------------------------------
## Title 
##        n  missing distinct 
##      508        0      505 
## 
## lowest : 101 Dalmatians           2 Fast 2 Furious         2012                     22 Jump Street           300                     
## highest: X-Men Origins: Wolverine X-Men: The Last Stand    X2: X-Men United         xXx                      You've Got Mail         
## --------------------------------------------------------------------------------
## MPAA Rating 
##        n  missing distinct 
##      508        0        4 
##                                   
## Value          G    PG PG-13     R
## Frequency     26   126   227   129
## Proportion 0.051 0.248 0.447 0.254
## --------------------------------------------------------------------------------
## Budget 
##         n   missing  distinct      Info      Mean       Gmd       .05       .10 
##       508         0       128     0.999  83922275  65060302  15000000  20000000 
##       .25       .50       .75       .90       .95 
##  35075000  70000000 125000000 166500000 200000000 
## 
## lowest :     60000   3705538   5000000   6000000   7500000
## highest: 258000000 260000000 270000000 300000000 380000000
## --------------------------------------------------------------------------------
## Gross 
##         n   missing  distinct      Info      Mean       Gmd       .05       .10 
##       508         0       507         1 382186608 275653300  91469974 128027891 
##       .25       .50       .75       .90       .95 
## 196298617 309475095 479115867 749794939 922749978 
## 
## lowest :   53000000   56505065   57319029   61276872   61489265
## highest: 1274219009 1342000000 1519557910 1845034188 2787965087
## --------------------------------------------------------------------------------
## Release Date 
##          n    missing   distinct       Info       Mean        Gmd        .05 
##        508          0        489          1 2001-12-13  269523732 1990-06-03 
##        .10        .25        .50        .75        .90        .95 
## 1991-11-14 1995-06-27 2001-12-11 2008-05-17 2012-03-25 2013-06-20 
## 
## lowest : 1989-04-21 1989-05-24 1989-06-02 1989-06-15 1989-06-22
## highest: 2014-10-01 2014-10-24 2014-11-05 2014-11-18 2014-12-11
## --------------------------------------------------------------------------------
## Genre 
##        n  missing distinct 
##      508        0       16 
## 
## lowest : Action          Adventure       Animation       Comedy          Crime          
## highest: Romance         Science Fiction Thriller        War             Western        
## --------------------------------------------------------------------------------
## Runtime 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      508        0       94        1    117.7    25.28    87.35    91.00 
##      .25      .50      .75      .90      .95 
##   100.00   115.00   130.00   147.00   158.00 
## 
## lowest :  79  81  82  83  84, highest: 187 189 194 195 201
## --------------------------------------------------------------------------------
## Rating 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      508        0       48    0.999    6.917   0.9986      5.4      5.8 
##      .25      .50      .75      .90      .95 
##      6.4      6.9      7.6      8.0      8.3 
## 
## lowest : 4.1 4.2 4.3 4.4 4.5, highest: 8.6 8.7 8.8 8.9 9.0
## --------------------------------------------------------------------------------
## Rating Count 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      508        0      479        1   339252   309843    45576    69534 
##      .25      .50      .75      .90      .95 
##   127592   240348   425700   702166   978389 
## 
## lowest :   14918   17025   22759   24114   28949
## highest: 1550148 1657851 1690474 1888105 2127228
## --------------------------------------------------------------------------------
# contenido de los datos
names(movies)
##  [1] "MovieID"      "Title"        "MPAA Rating"  "Budget"       "Gross"       
##  [6] "Release Date" "Genre"        "Runtime"      "Rating"       "Rating Count"
str(movies)
## tibble [508 × 10] (S3: tbl_df/tbl/data.frame)
##  $ MovieID     : num [1:508] 1 2 3 4 5 6 7 8 9 10 ...
##  $ Title       : chr [1:508] "Look Who's Talking" "Driving Miss Daisy" "Turner & Hooch" "Born on the Fourth of July" ...
##  $ MPAA Rating : chr [1:508] "PG-13" "PG" "PG" "R" ...
##  $ Budget      : num [1:508] 7500000 7500000 13000000 14000000 15000000 15000000 16000000 16400000 20000000 25000000 ...
##  $ Gross       : num [1:508] 2.96e+08 1.46e+08 7.11e+07 1.61e+08 8.44e+07 ...
##  $ Release Date: POSIXct[1:508], format: "1989-10-12" "1989-12-13" ...
##  $ Genre       : chr [1:508] "Romance" "Comedy" "Crime" "War" ...
##  $ Runtime     : num [1:508] 93 99 100 145 107 100 96 129 124 114 ...
##  $ Rating      : num [1:508] 5.9 7.4 7.2 7.2 7.5 7 7.6 8.1 7 7.2 ...
##  $ Rating Count: num [1:508] 73638 91075 91415 91415 101702 ...
#distribucion de los valores de las variables
summary(movies)
##     MovieID         Title           MPAA Rating            Budget         
##  Min.   :  1.0   Length:508         Length:508         Min.   :    60000  
##  1st Qu.:127.8   Class :character   Class :character   1st Qu.: 35075000  
##  Median :254.5   Mode  :character   Mode  :character   Median : 70000000  
##  Mean   :254.6                                         Mean   : 83922275  
##  3rd Qu.:381.2                                         3rd Qu.:125000000  
##  Max.   :510.0                                         Max.   :380000000  
##      Gross            Release Date                       Genre          
##  Min.   :5.300e+07   Min.   :1989-04-21 00:00:00.00   Length:508        
##  1st Qu.:1.963e+08   1st Qu.:1995-06-26 12:00:00.00   Class :character  
##  Median :3.095e+08   Median :2001-12-10 12:00:00.00   Mode  :character  
##  Mean   :3.822e+08   Mean   :2001-12-13 06:11:20.30                     
##  3rd Qu.:4.791e+08   3rd Qu.:2008-05-16 12:00:00.00                     
##  Max.   :2.788e+09   Max.   :2014-12-11 00:00:00.00                     
##     Runtime          Rating       Rating Count    
##  Min.   : 79.0   Min.   :4.100   Min.   :  14918  
##  1st Qu.:100.0   1st Qu.:6.400   1st Qu.: 127592  
##  Median :115.0   Median :6.900   Median : 240348  
##  Mean   :117.7   Mean   :6.917   Mean   : 339252  
##  3rd Qu.:130.0   3rd Qu.:7.600   3rd Qu.: 425700  
##  Max.   :201.0   Max.   :9.000   Max.   :2127228

1.2

# generamos tablas de frecuencia
library(fdth)
tabla1<-fdt(movies, k=nclass.Sturges(movies))
tabla1
## MovieID 
##     Class limits   f   rf rf(%)  cf  cf(%)
##    [0.99,103.81) 103 0.20 20.28 103  20.28
##  [103.81,206.63) 103 0.20 20.28 206  40.55
##  [206.63,309.46) 103 0.20 20.28 309  60.83
##  [309.46,412.28) 103 0.20 20.28 412  81.10
##   [412.28,515.1)  96 0.19 18.90 508 100.00
## 
## Budget 
##           Class limits   f   rf rf(%)  cf  cf(%)
##       [59400,76807520) 281 0.55 55.31 281  55.31
##   [76807520,153555640) 165 0.32 32.48 446  87.80
##  [153555640,230303760)  51 0.10 10.04 497  97.83
##  [230303760,307051880)  10 0.02  1.97 507  99.80
##  [307051880,383800000)   1 0.00  0.20 508 100.00
## 
## Gross 
##                     Class limits   f   rf rf(%)  cf  cf(%)
##         [52470000,605144947.574) 423 0.83 83.27 423  83.27
##   [605144947.574,1157819895.148)  79 0.16 15.55 502  98.82
##  [1157819895.148,1710494842.722)   4 0.01  0.79 506  99.61
##  [1710494842.722,2263169790.296)   1 0.00  0.20 507  99.80
##   [2263169790.296,2815844737.87)   1 0.00  0.20 508 100.00
## 
## Runtime 
##     Class limits   f   rf rf(%)  cf  cf(%)
##   [78.21,103.17) 153 0.30 30.12 153  30.12
##  [103.17,128.13) 209 0.41 41.14 362  71.26
##  [128.13,153.09) 109 0.21 21.46 471  92.72
##  [153.09,178.05)  28 0.06  5.51 499  98.23
##  [178.05,203.01)   9 0.02  1.77 508 100.00
## 
## Rating 
##   Class limits   f   rf rf(%)  cf  cf(%)
##  [4.059,5.065)  15 0.03  2.95  15   2.95
##  [5.065,6.071)  62 0.12 12.20  77  15.16
##  [6.071,7.078) 207 0.41 40.75 284  55.91
##  [7.078,8.084) 179 0.35 35.24 463  91.14
##   [8.084,9.09)  45 0.09  8.86 508 100.00
## 
## Rating Count 
##               Class limits   f   rf rf(%)  cf  cf(%)
##      [14768.82,441515.112) 387 0.76 76.18 387  76.18
##    [441515.112,868261.404)  86 0.17 16.93 473  93.11
##   [868261.404,1295007.696)  22 0.04  4.33 495  97.44
##  [1295007.696,1721753.988)  10 0.02  1.97 505  99.41
##   [1721753.988,2148500.28)   3 0.01  0.59 508 100.00
# tabla de frecuencia para datos no agrupados
genero<-Movies_gross_rating$Genre
library(summarytools)
tabla<-freq(genero)
tabla
## Frequencies  
## genero  
## Type: Character  
## 
##                         Freq   % Valid   % Valid Cum.   % Total   % Total Cum.
## --------------------- ------ --------- -------------- --------- --------------
##                Action     76     14.96          14.96     14.96          14.96
##             Adventure     26      5.12          20.08      5.12          20.08
##             Animation     63     12.40          32.48     12.40          32.48
##                Comedy     94     18.50          50.98     18.50          50.98
##                 Crime     16      3.15          54.13      3.15          54.13
##                 Drama     56     11.02          65.16     11.02          65.16
##                Family     28      5.51          70.67      5.51          70.67
##               Fantasy     22      4.33          75.00      4.33          75.00
##               History      4      0.79          75.79      0.79          75.79
##                Horror      9      1.77          77.56      1.77          77.56
##               Mystery     10      1.97          79.53      1.97          79.53
##               Romance     26      5.12          84.65      5.12          84.65
##       Science Fiction     31      6.10          90.75      6.10          90.75
##              Thriller     34      6.69          97.44      6.69          97.44
##                   War      8      1.57          99.02      1.57          99.02
##               Western      5      0.98         100.00      0.98         100.00
##                  <NA>      0                               0.00         100.00
##                 Total    508    100.00         100.00    100.00         100.00
# grafico de barras en ggplot2 para la variable genre

library(ggplot2)
ggplot(movies, aes(x=Genre))+
  geom_bar(fill="skyblue", colour="pink", alpha=1)+
  labs(title = "generos de peliculas")

# grafico de barras en base
library(BSDA)
library(RColorBrewer)
coul<-brewer.pal(4, "Set2")
z<-table(movies$Genre)
barplot(height = z, names=row.names(z),col=coul, main = "peliculas desde el año 1989 hasta 2014",
        ylab = "Frequency",xlab = "genero" )

1.3

# medidas de posicion

# cuartiles
summary(movies$Runtime)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    79.0   100.0   115.0   117.7   130.0   201.0
quantile(movies$Runtime)
##   0%  25%  50%  75% 100% 
##   79  100  115  130  201
# crear diagrama de caja y bigote en base para la variable runtime
library(carData)
## 
## Attaching package: 'carData'
## The following objects are masked from 'package:BSDA':
## 
##     Vocab, Wool
boxplot(movies$Runtime, col = "red", xlab="runtime", horizontal = TRUE)

# creamos diagrama de caja y bigote en ggplot2
library(ggplot2)
ggplot(Movies_gross_rating, aes(x=Runtime)) + 
  geom_boxplot(fill="skyblue", colour="black", alpha=1)

#medidas para la variable runtime:
#estadisticos descriptivos:

#medidas de tendencia central

# media:
mean(movies$Runtime)
## [1] 117.7106
# mediana
median(movies$Runtime)
## [1] 115
# moda
library(modeest)
## 
## Attaching package: 'modeest'
## The following object is masked from 'package:fdth':
## 
##     mfv
#medidas de dispersion
# varianza
var(movies$Runtime)
## [1] 515.7563
# medidas de forma
#asimetria:
library(moments)
## 
## Attaching package: 'moments'
## The following object is masked from 'package:modeest':
## 
##     skewness
skewness(movies$Runtime)
## [1] 0.7611015
#curtosis:
kurtosis(movies$Runtime)
## [1] 3.512343
#analisis de la variable
class(movies$Runtime)#(tipo de variable)
## [1] "numeric"
#minimo
min(movies$Runtime)
## [1] 79
# maximo
max(movies$Runtime)
## [1] 201
# rango intercuantilico
IQR(movies$Runtime)
## [1] 30

1.4

library(ggplot2)
ggplot(movies, aes(x=Budget))+
  geom_histogram(colour="black", fill="orange",bins = 7)+
  labs(title = "presupuesto de peliculas", y="frecuencia")

# tabla de frecuencia
data2<-movies$Budget
library(fdth)
tabla2<-fdt(data2, k=nclass.Sturges(data2))
tabla2
##           Class limits   f   rf rf(%)  cf  cf(%)
##       [59400,38433460) 140 0.28 27.56 140  27.56
##    [38433460,76807520) 141 0.28 27.76 281  55.31
##   [76807520,115181580)  91 0.18 17.91 372  73.23
##  [115181580,153555640)  74 0.15 14.57 446  87.80
##  [153555640,191929700)  29 0.06  5.71 475  93.50
##  [191929700,230303760)  22 0.04  4.33 497  97.83
##  [230303760,268677820)   8 0.02  1.57 505  99.41
##  [268677820,307051880)   2 0.00  0.39 507  99.80
##  [307051880,345425940)   0 0.00  0.00 507  99.80
##  [345425940,383800000)   1 0.00  0.20 508 100.00
#medidas de tendencia central

# media:
mean(movies$Budget)
## [1] 83922275
# mediana
median(movies$Budget)
## [1] 7e+07
# moda
library(modeest)


#medidas de dispersion
# varianza
var(movies$Budget)
## [1] 3.570687e+15
# medidas de forma
#asimetria:
library(moments)
skewness(movies$Budget)
## [1] 1.083681
#curtosis:
kurtosis(movies$Budget)
## [1] 4.190951
#analisis de la variable
class(movies$Budget)#(tipo de variable)
## [1] "numeric"
#minimo
min(movies$Budget)
## [1] 60000
# maximo
max(movies$Budget)
## [1] 3.8e+08
# rango intercuantilico
IQR(movies$Budget)
## [1] 89925000