Analisis descriptivo de Toyota Corolla

A continuacion se realiza un analisis descriptivo de la base de datos de Toyota Corolla. Esta Base de datos la puede descargar del siguiente link https://www.kaggle.com/datasets/tolgahancepel/toyota-corolla.

## importar  la base de datos de toyota corolla y realizar un resumen de la data
library(readr)
BD <- read_csv("ToyotaCorolla.csv")
## Rows: 1436 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): FuelType
## dbl (9): Price, Age, KM, HP, MetColor, Automatic, CC, Doors, Weight
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## imprimir la dimension de la base de datos
dim(BD)
## [1] 1436   10
# los nombres de la base de datos
names(BD)
##  [1] "Price"     "Age"       "KM"        "FuelType"  "HP"        "MetColor" 
##  [7] "Automatic" "CC"        "Doors"     "Weight"
# la cantidad de columnas de la base de datos
columna <- dim(BD)[2]
columna
## [1] 10
# la composiicion de la data
str(BD)
## spc_tbl_ [1,436 × 10] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ Price    : num [1:1436] 13500 13750 13950 14950 13750 ...
##  $ Age      : num [1:1436] 23 23 24 26 30 32 27 30 27 23 ...
##  $ KM       : num [1:1436] 46986 72937 41711 48000 38500 ...
##  $ FuelType : chr [1:1436] "Diesel" "Diesel" "Diesel" "Diesel" ...
##  $ HP       : num [1:1436] 90 90 90 90 90 90 90 90 192 69 ...
##  $ MetColor : num [1:1436] 1 1 1 0 0 0 1 1 0 0 ...
##  $ Automatic: num [1:1436] 0 0 0 0 0 0 0 0 0 0 ...
##  $ CC       : num [1:1436] 2000 2000 2000 2000 2000 2000 2000 2000 1800 1900 ...
##  $ Doors    : num [1:1436] 3 3 3 3 3 3 3 3 3 3 ...
##  $ Weight   : num [1:1436] 1165 1165 1165 1165 1170 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   Price = col_double(),
##   ..   Age = col_double(),
##   ..   KM = col_double(),
##   ..   FuelType = col_character(),
##   ..   HP = col_double(),
##   ..   MetColor = col_double(),
##   ..   Automatic = col_double(),
##   ..   CC = col_double(),
##   ..   Doors = col_double(),
##   ..   Weight = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>
# resumen de los cuartiles de la data
summary(BD)
##      Price            Age              KM           FuelType        
##  Min.   : 4350   Min.   : 1.00   Min.   :     1   Length:1436       
##  1st Qu.: 8450   1st Qu.:44.00   1st Qu.: 43000   Class :character  
##  Median : 9900   Median :61.00   Median : 63390   Mode  :character  
##  Mean   :10731   Mean   :55.95   Mean   : 68533                     
##  3rd Qu.:11950   3rd Qu.:70.00   3rd Qu.: 87021                     
##  Max.   :32500   Max.   :80.00   Max.   :243000                     
##        HP           MetColor        Automatic             CC      
##  Min.   : 69.0   Min.   :0.0000   Min.   :0.00000   Min.   :1300  
##  1st Qu.: 90.0   1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:1400  
##  Median :110.0   Median :1.0000   Median :0.00000   Median :1600  
##  Mean   :101.5   Mean   :0.6748   Mean   :0.05571   Mean   :1567  
##  3rd Qu.:110.0   3rd Qu.:1.0000   3rd Qu.:0.00000   3rd Qu.:1600  
##  Max.   :192.0   Max.   :1.0000   Max.   :1.00000   Max.   :2000  
##      Doors           Weight    
##  Min.   :2.000   Min.   :1000  
##  1st Qu.:3.000   1st Qu.:1040  
##  Median :4.000   Median :1070  
##  Mean   :4.033   Mean   :1072  
##  3rd Qu.:5.000   3rd Qu.:1085  
##  Max.   :5.000   Max.   :1615

Grafica de la base de datos

indexn <- NULL
indexc <- NULL

par(mfrow = c(2,5))

for(i in 1:columna){
  if(is.numeric(BD[ , i])== TRUE){
    titulo <- paste("Analisis de la variable: ", colnames(BD[i]))
    hist(BD[ , i], col = i, main = titulo)
    indexn <- c(indexn,i)
  } else{
    titulo <- paste("Analisis de la variable: ", colnames(BD[i]))
    pie(table(BD[ , i]), main = titulo)
    indexc <- c(indexc,i)
  }
}