A continuacion se realiza un analisis descriptivo de la base de datos de Toyota Corolla. Esta Base de datos la puede descargar del siguiente link https://www.kaggle.com/datasets/tolgahancepel/toyota-corolla.
## importar la base de datos de toyota corolla y realizar un resumen de la data
library(readr)
BD <- read_csv("ToyotaCorolla.csv")
## Rows: 1436 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): FuelType
## dbl (9): Price, Age, KM, HP, MetColor, Automatic, CC, Doors, Weight
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## imprimir la dimension de la base de datos
dim(BD)
## [1] 1436 10
# los nombres de la base de datos
names(BD)
## [1] "Price" "Age" "KM" "FuelType" "HP" "MetColor"
## [7] "Automatic" "CC" "Doors" "Weight"
# la cantidad de columnas de la base de datos
columna <- dim(BD)[2]
columna
## [1] 10
# la composiicion de la data
str(BD)
## spc_tbl_ [1,436 × 10] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ Price : num [1:1436] 13500 13750 13950 14950 13750 ...
## $ Age : num [1:1436] 23 23 24 26 30 32 27 30 27 23 ...
## $ KM : num [1:1436] 46986 72937 41711 48000 38500 ...
## $ FuelType : chr [1:1436] "Diesel" "Diesel" "Diesel" "Diesel" ...
## $ HP : num [1:1436] 90 90 90 90 90 90 90 90 192 69 ...
## $ MetColor : num [1:1436] 1 1 1 0 0 0 1 1 0 0 ...
## $ Automatic: num [1:1436] 0 0 0 0 0 0 0 0 0 0 ...
## $ CC : num [1:1436] 2000 2000 2000 2000 2000 2000 2000 2000 1800 1900 ...
## $ Doors : num [1:1436] 3 3 3 3 3 3 3 3 3 3 ...
## $ Weight : num [1:1436] 1165 1165 1165 1165 1170 ...
## - attr(*, "spec")=
## .. cols(
## .. Price = col_double(),
## .. Age = col_double(),
## .. KM = col_double(),
## .. FuelType = col_character(),
## .. HP = col_double(),
## .. MetColor = col_double(),
## .. Automatic = col_double(),
## .. CC = col_double(),
## .. Doors = col_double(),
## .. Weight = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
# resumen de los cuartiles de la data
summary(BD)
## Price Age KM FuelType
## Min. : 4350 Min. : 1.00 Min. : 1 Length:1436
## 1st Qu.: 8450 1st Qu.:44.00 1st Qu.: 43000 Class :character
## Median : 9900 Median :61.00 Median : 63390 Mode :character
## Mean :10731 Mean :55.95 Mean : 68533
## 3rd Qu.:11950 3rd Qu.:70.00 3rd Qu.: 87021
## Max. :32500 Max. :80.00 Max. :243000
## HP MetColor Automatic CC
## Min. : 69.0 Min. :0.0000 Min. :0.00000 Min. :1300
## 1st Qu.: 90.0 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:1400
## Median :110.0 Median :1.0000 Median :0.00000 Median :1600
## Mean :101.5 Mean :0.6748 Mean :0.05571 Mean :1567
## 3rd Qu.:110.0 3rd Qu.:1.0000 3rd Qu.:0.00000 3rd Qu.:1600
## Max. :192.0 Max. :1.0000 Max. :1.00000 Max. :2000
## Doors Weight
## Min. :2.000 Min. :1000
## 1st Qu.:3.000 1st Qu.:1040
## Median :4.000 Median :1070
## Mean :4.033 Mean :1072
## 3rd Qu.:5.000 3rd Qu.:1085
## Max. :5.000 Max. :1615
indexn <- NULL
indexc <- NULL
par(mfrow = c(2,5))
for(i in 1:columna){
if(is.numeric(BD[ , i])== TRUE){
titulo <- paste("Analisis de la variable: ", colnames(BD[i]))
hist(BD[ , i], col = i, main = titulo)
indexn <- c(indexn,i)
} else{
titulo <- paste("Analisis de la variable: ", colnames(BD[i]))
pie(table(BD[ , i]), main = titulo)
indexc <- c(indexc,i)
}
}