knitr::opts_chunk$set(echo = TRUE)

Preparar un R markdown con el siguiente archivo llamado “MusicSales.csv” ubicado en la sección de documentos del TEC Digital, carpeta llamada “Materiales”

Exploración de la estructura del archivo • Revisar la estructura del archivo (str, summary)

music_sales <-  read.csv("MusicSales.csv")

summary(music_sales)
##     Album              Artist              City             Company         
##  Length:2143        Length:2143        Length:2143        Length:2143       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##    Composer           Country            CustomerID       Email          
##  Length:2143        Length:2143        Min.   : 1.00   Length:2143       
##  Class :character   Class :character   1st Qu.:15.00   Class :character  
##  Mode  :character   Mode  :character   Median :30.00   Mode  :character  
##                                        Mean   :29.93                     
##                                        3rd Qu.:45.00                     
##                                        Max.   :59.00                     
##   FirstName            Genre           InvoiceDate          InvoiceID    
##  Length:2143        Length:2143        Length:2143        Min.   :  1.0  
##  Class :character   Class :character   Class :character   1st Qu.:100.0  
##  Mode  :character   Mode  :character   Mode  :character   Median :201.0  
##                                                           Mean   :205.2  
##                                                           3rd Qu.:313.0  
##                                                           Max.   :412.0  
##    LastName          MediaType            Phone            PostalCode       
##  Length:2143        Length:2143        Length:2143        Length:2143       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##     Region             State              Track              Bytes          
##  Length:2143        Length:2143        Length:2143        Length:2143       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##   Milliseconds     Number.of.Records    Quantity         Sales      
##  Min.   :   6373   Min.   :1         Min.   :1.000   Min.   :0.990  
##  1st Qu.: 205871   1st Qu.:1         1st Qu.:2.000   1st Qu.:1.980  
##  Median : 255869   Median :1         Median :3.000   Median :2.970  
##  Mean   : 374213   Mean   :1         Mean   :2.979   Mean   :3.096  
##  3rd Qu.: 320404   3rd Qu.:1         3rd Qu.:4.000   3rd Qu.:3.960  
##  Max.   :5286953   Max.   :1         Max.   :5.000   Max.   :9.950  
##    UnitPrice    
##  Min.   :0.990  
##  1st Qu.:0.990  
##  Median :0.990  
##  Mean   :1.039  
##  3rd Qu.:0.990  
##  Max.   :1.990
str(music_sales)
## 'data.frame':    2143 obs. of  25 variables:
##  $ Album            : chr  "Balls to the Wall" "Restless and Wild" "For Those About To Rock We Salute You" "For Those About To Rock We Salute You" ...
##  $ Artist           : chr  "Accept" "Accept" "AC/DC" "AC/DC" ...
##  $ City             : chr  "Stuttgart" "Stuttgart" "Oslo" "Oslo" ...
##  $ Company          : chr  "" "" "" "" ...
##  $ Composer         : chr  "" "F. Baltes, R.A. Smith-Diesel, S. Kaufman, U. Dirkscneider & W. Hoffman" "Angus Young, Malcolm Young, Brian Johnson" "Angus Young, Malcolm Young, Brian Johnson" ...
##  $ Country          : chr  "Germany" "Germany" "Norway" "Norway" ...
##  $ CustomerID       : int  2 2 4 4 4 4 8 8 8 8 ...
##  $ Email            : chr  "leonekohler@surfeu.de" "leonekohler@surfeu.de" "bjorn.hansen@yahoo.no" "bjorn.hansen@yahoo.no" ...
##  $ FirstName        : chr  "Leonie" "Leonie" "Bjarn" "Bjarn" ...
##  $ Genre            : chr  "Rock" "Rock" "Rock" "Rock" ...
##  $ InvoiceDate      : chr  "1/1/08 00:00" "1/1/08 00:00" "1/2/08 00:00" "1/2/08 00:00" ...
##  $ InvoiceID        : int  1 1 2 2 2 2 3 3 3 3 ...
##  $ LastName         : chr  "Kahler" "Kahler" "Hansen" "Hansen" ...
##  $ MediaType        : chr  "Protected AAC audio file" "Protected AAC audio file" "MPEG audio file" "MPEG audio file" ...
##  $ Phone            : chr  "+49 0711 2842222" "+49 0711 2842222" "+47 22 44 22 22" "+47 22 44 22 22" ...
##  $ PostalCode       : chr  "70174" "70174" "171" "171" ...
##  $ Region           : chr  "Europe" "Europe" "Europe" "Europe" ...
##  $ State            : chr  "" "" "" "" ...
##  $ Track            : chr  "Balls to the Wall" "Restless and Wild" "Breaking The Rules" "Inject The Venom" ...
##  $ Bytes            : chr  "5.51M" "4.33M" "8.60M" "6.85M" ...
##  $ Milliseconds     : int  342562 252051 263288 210834 263497 205662 215196 369319 307617 321828 ...
##  $ Number.of.Records: int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Quantity         : int  4 4 1 2 2 5 1 2 2 4 ...
##  $ Sales            : num  3.96 3.96 0.99 1.98 1.98 4.95 0.99 1.98 1.98 3.96 ...
##  $ UnitPrice        : num  0.99 0.99 0.99 0.99 0.99 0.99 0.99 0.99 0.99 0.99 ...

• Retornar los nombres de las columnas • Convertir la columna InvoiceDate a tipo fecha • Retornar las primeras 5 filas y las últimas 8 filas

colnames(music_sales)
##  [1] "Album"             "Artist"            "City"             
##  [4] "Company"           "Composer"          "Country"          
##  [7] "CustomerID"        "Email"             "FirstName"        
## [10] "Genre"             "InvoiceDate"       "InvoiceID"        
## [13] "LastName"          "MediaType"         "Phone"            
## [16] "PostalCode"        "Region"            "State"            
## [19] "Track"             "Bytes"             "Milliseconds"     
## [22] "Number.of.Records" "Quantity"          "Sales"            
## [25] "UnitPrice"
music_sales$InvoiceDate <- as.Date(music_sales$InvoiceDat)
head(music_sales,5)
##                                   Album Artist      City Company
## 1                     Balls to the Wall Accept Stuttgart        
## 2                     Restless and Wild Accept Stuttgart        
## 3 For Those About To Rock We Salute You  AC/DC      Oslo        
## 4 For Those About To Rock We Salute You  AC/DC      Oslo        
## 5 For Those About To Rock We Salute You  AC/DC      Oslo        
##                                                                 Composer
## 1                                                                       
## 2 F. Baltes, R.A. Smith-Diesel, S. Kaufman, U. Dirkscneider & W. Hoffman
## 3                              Angus Young, Malcolm Young, Brian Johnson
## 4                              Angus Young, Malcolm Young, Brian Johnson
## 5                              Angus Young, Malcolm Young, Brian Johnson
##   Country CustomerID                 Email FirstName Genre InvoiceDate
## 1 Germany          2 leonekohler@surfeu.de    Leonie  Rock  0001-01-08
## 2 Germany          2 leonekohler@surfeu.de    Leonie  Rock  0001-01-08
## 3  Norway          4 bjorn.hansen@yahoo.no     Bjarn  Rock  0001-02-08
## 4  Norway          4 bjorn.hansen@yahoo.no     Bjarn  Rock  0001-02-08
## 5  Norway          4 bjorn.hansen@yahoo.no     Bjarn  Rock  0001-02-08
##   InvoiceID LastName                MediaType            Phone PostalCode
## 1         1   Kahler Protected AAC audio file +49 0711 2842222      70174
## 2         1   Kahler Protected AAC audio file +49 0711 2842222      70174
## 3         2   Hansen          MPEG audio file  +47 22 44 22 22        171
## 4         2   Hansen          MPEG audio file  +47 22 44 22 22        171
## 5         2   Hansen          MPEG audio file  +47 22 44 22 22        171
##   Region State              Track Bytes Milliseconds Number.of.Records Quantity
## 1 Europe        Balls to the Wall 5.51M       342562                 1        4
## 2 Europe        Restless and Wild 4.33M       252051                 1        4
## 3 Europe       Breaking The Rules 8.60M       263288                 1        1
## 4 Europe         Inject The Venom 6.85M       210834                 1        2
## 5 Europe               Evil Walks 8.61M       263497                 1        2
##   Sales UnitPrice
## 1  3.96      0.99
## 2  3.96      0.99
## 3  0.99      0.99
## 4  1.98      0.99
## 5  1.98      0.99
tail(music_sales,8)
##                              Album              Artist     City Company
## 2136              Ao Vivo [IMPORT]      Zeca Pagodinho Helsinki        
## 2137 The Best Of Van Halen, Vol. I           Van Halen Helsinki        
## 2138                     Van Halen           Van Halen Helsinki        
## 2139                    Contraband     Velvet Revolver Helsinki        
## 2140            Vinicius De Moraes Vinícius De Moraes Helsinki        
## 2141              Ao Vivo [IMPORT]      Zeca Pagodinho Helsinki        
## 2142                     Van Halen           Van Halen Helsinki        
## 2143          The Office, Season 1          The Office    Delhi        
##                                                                                                               Composer
## 2136                                         Arlindo Cruz/Franco/Marquinhos PQD/Negro, Jovelina Pérolo/Zeca Pagodinho
## 2137 Edward Van Halen, Alex Van Halen, Michael Anthony,/Edward Van Halen, Alex Van Halen, Michael Anthony, Sammy Hagar
## 2138                                              Edward Van Halen, Alex Van Halen, Michael Anthony and David Lee Roth
## 2139                                                             Dave Kushner, Duff, Matt Sorum, Scott Weiland & Slash
## 2140                                                                                                                  
## 2141                                                                                          Grazielle/Roque Ferreira
## 2142                                              Edward Van Halen, Alex Van Halen, Michael Anthony and David Lee Roth
## 2143                                                                                                                  
##      Country CustomerID                     Email FirstName    Genre
## 2136 Finland         44 terhi.hamalainen@apple.fi     Terhi    Latin
## 2137 Finland         44 terhi.hamalainen@apple.fi     Terhi     Rock
## 2138 Finland         44 terhi.hamalainen@apple.fi     Terhi     Rock
## 2139 Finland         44 terhi.hamalainen@apple.fi     Terhi     Rock
## 2140 Finland         44 terhi.hamalainen@apple.fi     Terhi    Latin
## 2141 Finland         44 terhi.hamalainen@apple.fi     Terhi    Latin
## 2142 Finland         44 terhi.hamalainen@apple.fi     Terhi     Rock
## 2143   India         58   manoj.pareek@rediff.com     Manoj TV Shows
##      InvoiceDate InvoiceID    LastName                   MediaType
## 2136        <NA>       411 Hamamalinen             MPEG audio file
## 2137        <NA>       411 Hamamalinen             MPEG audio file
## 2138        <NA>       411 Hamamalinen             MPEG audio file
## 2139        <NA>       411 Hamamalinen             MPEG audio file
## 2140        <NA>       411 Hamamalinen             MPEG audio file
## 2141        <NA>       411 Hamamalinen             MPEG audio file
## 2142        <NA>       411 Hamamalinen             MPEG audio file
## 2143        <NA>       412      Pareek Protected MPEG-4 video file
##                  Phone PostalCode Region State
## 2136  +358 09 870 2000        530 Europe      
## 2137  +358 09 870 2000        530 Europe      
## 2138  +358 09 870 2000        530 Europe      
## 2139  +358 09 870 2000        530 Europe      
## 2140  +358 09 870 2000        530 Europe      
## 2141  +358 09 870 2000        530 Europe      
## 2142  +358 09 870 2000        530 Europe      
## 2143 +91 0124 39883988     110017   Asia      
##                                                     Track   Bytes Milliseconds
## 2136 Feirinha da Pavuna/Luz do Repente/Bagaço da Laranja   3.59M       107206
## 2137                                               Dreams   9.50M       291813
## 2138                                              On Fire   5.88M       180636
## 2139                                       Fall To Pieces   8.82M       270889
## 2140                                    A Bencao E Outros  14.23M       421093
## 2141                                    Samba pras moças   5.12M       152816
## 2142                                             Eruption   3.29M       102556
## 2143                                             Hot Girl 267.84M      1325458
##      Number.of.Records Quantity Sales UnitPrice
## 2136                 1        4  3.96      0.99
## 2137                 1        4  3.96      0.99
## 2138                 1        4  3.96      0.99
## 2139                 1        4  3.96      0.99
## 2140                 1        5  4.95      0.99
## 2141                 1        5  4.95      0.99
## 2142                 1        5  4.95      0.99
## 2143                 1        2  3.98      1.99

Exploración de los datos • Retornar el total de filas • Retornar el total de valores NA por columna

nrow(music_sales)
## [1] 2143
colSums(is.na(music_sales))
##             Album            Artist              City           Company 
##                 0                 0                 0                 0 
##          Composer           Country        CustomerID             Email 
##                 0                 0                 0                 0 
##         FirstName             Genre       InvoiceDate         InvoiceID 
##                 0                 0              1234                 0 
##          LastName         MediaType             Phone        PostalCode 
##                 0                 0                 0                 0 
##            Region             State             Track             Bytes 
##                 0                 0                 0                 0 
##      Milliseconds Number.of.Records          Quantity             Sales 
##                 0                 0                 0                 0 
##         UnitPrice 
##                 0

• Preparar dos gráficos, uno de ellos histograma y otro un gráfico de barras

hist(music_sales$Sales, 
     main = "Distribución de las ventas",
     xlab = "Ventas",
     ylab = "Frecuencia",
     col = "skyblue", 
     border = "white")

barplot(table(music_sales$Genre),
        main = "Cantidad por género",
        xlab = "Género",
        ylab = "Cantidad",
        col = "pink")