knitr::opts_chunk$set(echo = TRUE)
Preparar un R markdown con el siguiente archivo llamado “MusicSales.csv” ubicado en la sección de documentos del TEC Digital, carpeta llamada “Materiales”
Exploración de la estructura del archivo • Revisar la estructura del archivo (str, summary)
music_sales <- read.csv("MusicSales.csv")
summary(music_sales)
## Album Artist City Company
## Length:2143 Length:2143 Length:2143 Length:2143
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## Composer Country CustomerID Email
## Length:2143 Length:2143 Min. : 1.00 Length:2143
## Class :character Class :character 1st Qu.:15.00 Class :character
## Mode :character Mode :character Median :30.00 Mode :character
## Mean :29.93
## 3rd Qu.:45.00
## Max. :59.00
## FirstName Genre InvoiceDate InvoiceID
## Length:2143 Length:2143 Length:2143 Min. : 1.0
## Class :character Class :character Class :character 1st Qu.:100.0
## Mode :character Mode :character Mode :character Median :201.0
## Mean :205.2
## 3rd Qu.:313.0
## Max. :412.0
## LastName MediaType Phone PostalCode
## Length:2143 Length:2143 Length:2143 Length:2143
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## Region State Track Bytes
## Length:2143 Length:2143 Length:2143 Length:2143
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## Milliseconds Number.of.Records Quantity Sales
## Min. : 6373 Min. :1 Min. :1.000 Min. :0.990
## 1st Qu.: 205871 1st Qu.:1 1st Qu.:2.000 1st Qu.:1.980
## Median : 255869 Median :1 Median :3.000 Median :2.970
## Mean : 374213 Mean :1 Mean :2.979 Mean :3.096
## 3rd Qu.: 320404 3rd Qu.:1 3rd Qu.:4.000 3rd Qu.:3.960
## Max. :5286953 Max. :1 Max. :5.000 Max. :9.950
## UnitPrice
## Min. :0.990
## 1st Qu.:0.990
## Median :0.990
## Mean :1.039
## 3rd Qu.:0.990
## Max. :1.990
str(music_sales)
## 'data.frame': 2143 obs. of 25 variables:
## $ Album : chr "Balls to the Wall" "Restless and Wild" "For Those About To Rock We Salute You" "For Those About To Rock We Salute You" ...
## $ Artist : chr "Accept" "Accept" "AC/DC" "AC/DC" ...
## $ City : chr "Stuttgart" "Stuttgart" "Oslo" "Oslo" ...
## $ Company : chr "" "" "" "" ...
## $ Composer : chr "" "F. Baltes, R.A. Smith-Diesel, S. Kaufman, U. Dirkscneider & W. Hoffman" "Angus Young, Malcolm Young, Brian Johnson" "Angus Young, Malcolm Young, Brian Johnson" ...
## $ Country : chr "Germany" "Germany" "Norway" "Norway" ...
## $ CustomerID : int 2 2 4 4 4 4 8 8 8 8 ...
## $ Email : chr "leonekohler@surfeu.de" "leonekohler@surfeu.de" "bjorn.hansen@yahoo.no" "bjorn.hansen@yahoo.no" ...
## $ FirstName : chr "Leonie" "Leonie" "Bjarn" "Bjarn" ...
## $ Genre : chr "Rock" "Rock" "Rock" "Rock" ...
## $ InvoiceDate : chr "1/1/08 00:00" "1/1/08 00:00" "1/2/08 00:00" "1/2/08 00:00" ...
## $ InvoiceID : int 1 1 2 2 2 2 3 3 3 3 ...
## $ LastName : chr "Kahler" "Kahler" "Hansen" "Hansen" ...
## $ MediaType : chr "Protected AAC audio file" "Protected AAC audio file" "MPEG audio file" "MPEG audio file" ...
## $ Phone : chr "+49 0711 2842222" "+49 0711 2842222" "+47 22 44 22 22" "+47 22 44 22 22" ...
## $ PostalCode : chr "70174" "70174" "171" "171" ...
## $ Region : chr "Europe" "Europe" "Europe" "Europe" ...
## $ State : chr "" "" "" "" ...
## $ Track : chr "Balls to the Wall" "Restless and Wild" "Breaking The Rules" "Inject The Venom" ...
## $ Bytes : chr "5.51M" "4.33M" "8.60M" "6.85M" ...
## $ Milliseconds : int 342562 252051 263288 210834 263497 205662 215196 369319 307617 321828 ...
## $ Number.of.Records: int 1 1 1 1 1 1 1 1 1 1 ...
## $ Quantity : int 4 4 1 2 2 5 1 2 2 4 ...
## $ Sales : num 3.96 3.96 0.99 1.98 1.98 4.95 0.99 1.98 1.98 3.96 ...
## $ UnitPrice : num 0.99 0.99 0.99 0.99 0.99 0.99 0.99 0.99 0.99 0.99 ...
• Retornar los nombres de las columnas • Convertir la columna InvoiceDate a tipo fecha • Retornar las primeras 5 filas y las últimas 8 filas
colnames(music_sales)
## [1] "Album" "Artist" "City"
## [4] "Company" "Composer" "Country"
## [7] "CustomerID" "Email" "FirstName"
## [10] "Genre" "InvoiceDate" "InvoiceID"
## [13] "LastName" "MediaType" "Phone"
## [16] "PostalCode" "Region" "State"
## [19] "Track" "Bytes" "Milliseconds"
## [22] "Number.of.Records" "Quantity" "Sales"
## [25] "UnitPrice"
music_sales$InvoiceDate <- as.Date(music_sales$InvoiceDat)
head(music_sales,5)
## Album Artist City Company
## 1 Balls to the Wall Accept Stuttgart
## 2 Restless and Wild Accept Stuttgart
## 3 For Those About To Rock We Salute You AC/DC Oslo
## 4 For Those About To Rock We Salute You AC/DC Oslo
## 5 For Those About To Rock We Salute You AC/DC Oslo
## Composer
## 1
## 2 F. Baltes, R.A. Smith-Diesel, S. Kaufman, U. Dirkscneider & W. Hoffman
## 3 Angus Young, Malcolm Young, Brian Johnson
## 4 Angus Young, Malcolm Young, Brian Johnson
## 5 Angus Young, Malcolm Young, Brian Johnson
## Country CustomerID Email FirstName Genre InvoiceDate
## 1 Germany 2 leonekohler@surfeu.de Leonie Rock 0001-01-08
## 2 Germany 2 leonekohler@surfeu.de Leonie Rock 0001-01-08
## 3 Norway 4 bjorn.hansen@yahoo.no Bjarn Rock 0001-02-08
## 4 Norway 4 bjorn.hansen@yahoo.no Bjarn Rock 0001-02-08
## 5 Norway 4 bjorn.hansen@yahoo.no Bjarn Rock 0001-02-08
## InvoiceID LastName MediaType Phone PostalCode
## 1 1 Kahler Protected AAC audio file +49 0711 2842222 70174
## 2 1 Kahler Protected AAC audio file +49 0711 2842222 70174
## 3 2 Hansen MPEG audio file +47 22 44 22 22 171
## 4 2 Hansen MPEG audio file +47 22 44 22 22 171
## 5 2 Hansen MPEG audio file +47 22 44 22 22 171
## Region State Track Bytes Milliseconds Number.of.Records Quantity
## 1 Europe Balls to the Wall 5.51M 342562 1 4
## 2 Europe Restless and Wild 4.33M 252051 1 4
## 3 Europe Breaking The Rules 8.60M 263288 1 1
## 4 Europe Inject The Venom 6.85M 210834 1 2
## 5 Europe Evil Walks 8.61M 263497 1 2
## Sales UnitPrice
## 1 3.96 0.99
## 2 3.96 0.99
## 3 0.99 0.99
## 4 1.98 0.99
## 5 1.98 0.99
tail(music_sales,8)
## Album Artist City Company
## 2136 Ao Vivo [IMPORT] Zeca Pagodinho Helsinki
## 2137 The Best Of Van Halen, Vol. I Van Halen Helsinki
## 2138 Van Halen Van Halen Helsinki
## 2139 Contraband Velvet Revolver Helsinki
## 2140 Vinicius De Moraes VinÃcius De Moraes Helsinki
## 2141 Ao Vivo [IMPORT] Zeca Pagodinho Helsinki
## 2142 Van Halen Van Halen Helsinki
## 2143 The Office, Season 1 The Office Delhi
## Composer
## 2136 Arlindo Cruz/Franco/Marquinhos PQD/Negro, Jovelina Pérolo/Zeca Pagodinho
## 2137 Edward Van Halen, Alex Van Halen, Michael Anthony,/Edward Van Halen, Alex Van Halen, Michael Anthony, Sammy Hagar
## 2138 Edward Van Halen, Alex Van Halen, Michael Anthony and David Lee Roth
## 2139 Dave Kushner, Duff, Matt Sorum, Scott Weiland & Slash
## 2140
## 2141 Grazielle/Roque Ferreira
## 2142 Edward Van Halen, Alex Van Halen, Michael Anthony and David Lee Roth
## 2143
## Country CustomerID Email FirstName Genre
## 2136 Finland 44 terhi.hamalainen@apple.fi Terhi Latin
## 2137 Finland 44 terhi.hamalainen@apple.fi Terhi Rock
## 2138 Finland 44 terhi.hamalainen@apple.fi Terhi Rock
## 2139 Finland 44 terhi.hamalainen@apple.fi Terhi Rock
## 2140 Finland 44 terhi.hamalainen@apple.fi Terhi Latin
## 2141 Finland 44 terhi.hamalainen@apple.fi Terhi Latin
## 2142 Finland 44 terhi.hamalainen@apple.fi Terhi Rock
## 2143 India 58 manoj.pareek@rediff.com Manoj TV Shows
## InvoiceDate InvoiceID LastName MediaType
## 2136 <NA> 411 Hamamalinen MPEG audio file
## 2137 <NA> 411 Hamamalinen MPEG audio file
## 2138 <NA> 411 Hamamalinen MPEG audio file
## 2139 <NA> 411 Hamamalinen MPEG audio file
## 2140 <NA> 411 Hamamalinen MPEG audio file
## 2141 <NA> 411 Hamamalinen MPEG audio file
## 2142 <NA> 411 Hamamalinen MPEG audio file
## 2143 <NA> 412 Pareek Protected MPEG-4 video file
## Phone PostalCode Region State
## 2136 +358 09 870 2000 530 Europe
## 2137 +358 09 870 2000 530 Europe
## 2138 +358 09 870 2000 530 Europe
## 2139 +358 09 870 2000 530 Europe
## 2140 +358 09 870 2000 530 Europe
## 2141 +358 09 870 2000 530 Europe
## 2142 +358 09 870 2000 530 Europe
## 2143 +91 0124 39883988 110017 Asia
## Track Bytes Milliseconds
## 2136 Feirinha da Pavuna/Luz do Repente/Bagaço da Laranja 3.59M 107206
## 2137 Dreams 9.50M 291813
## 2138 On Fire 5.88M 180636
## 2139 Fall To Pieces 8.82M 270889
## 2140 A Bencao E Outros 14.23M 421093
## 2141 Samba pras moças 5.12M 152816
## 2142 Eruption 3.29M 102556
## 2143 Hot Girl 267.84M 1325458
## Number.of.Records Quantity Sales UnitPrice
## 2136 1 4 3.96 0.99
## 2137 1 4 3.96 0.99
## 2138 1 4 3.96 0.99
## 2139 1 4 3.96 0.99
## 2140 1 5 4.95 0.99
## 2141 1 5 4.95 0.99
## 2142 1 5 4.95 0.99
## 2143 1 2 3.98 1.99
Exploración de los datos • Retornar el total de filas • Retornar el total de valores NA por columna
nrow(music_sales)
## [1] 2143
colSums(is.na(music_sales))
## Album Artist City Company
## 0 0 0 0
## Composer Country CustomerID Email
## 0 0 0 0
## FirstName Genre InvoiceDate InvoiceID
## 0 0 1234 0
## LastName MediaType Phone PostalCode
## 0 0 0 0
## Region State Track Bytes
## 0 0 0 0
## Milliseconds Number.of.Records Quantity Sales
## 0 0 0 0
## UnitPrice
## 0
• Preparar dos gráficos, uno de ellos histograma y otro un gráfico de barras
hist(music_sales$Sales,
main = "Distribución de las ventas",
xlab = "Ventas",
ylab = "Frecuencia",
col = "skyblue",
border = "white")
barplot(table(music_sales$Genre),
main = "Cantidad por género",
xlab = "Género",
ylab = "Cantidad",
col = "pink")