ggplot_almada

library(janitor)
library(tidyverse)
library(flextable)
library(moments)

setwd("~/Downloads/proy_cars_almada")
df=read.csv("car_dataset.csv")
colnames(df)

##  [1] "Car.Make"                   "Car.Model"                 
##  [3] "Year"                       "Body.Type"                 
##  [5] "Color.Options"              "Fuel.Type"                 
##  [7] "Engine.Size..L."            "Horsepower"                
##  [9] "Torque..Nm."                "Transmission.Type"         
## [11] "Acceleration..0.60.mph."    "Top.Speed..mph."           
## [13] "Mileage..MPG."              "Safety.Features"           
## [15] "Entertainment.Features"     "Interior.Features"         
## [17] "Exterior.Features"          "Price...."                 
## [19] "Customer.Ratings"           "Sales.Figures..Units.Sold."

previo=head(df)
library(ggplot2)

colnames(df)=c("Marca","Modelo","Año","Forma","Colores Disponibles",
"Tipo de Combustible","Tamaño del Motor","Caballos de Fuerza","Torque",
"Tipo de Transmisión","Aceleración","Velocidad Máxima",
"Kilometraje","Seguridad","Entretenimiento","Interior","Exterior",
"Precio","Calificación del cliente","Unidades Vendidas")

colnames(df)

##  [1] "Marca"                    "Modelo"                  
##  [3] "Año"                      "Forma"                   
##  [5] "Colores Disponibles"      "Tipo de Combustible"     
##  [7] "Tamaño del Motor"         "Caballos de Fuerza"      
##  [9] "Torque"                   "Tipo de Transmisión"     
## [11] "Aceleración"              "Velocidad Máxima"        
## [13] "Kilometraje"              "Seguridad"               
## [15] "Entretenimiento"          "Interior"                
## [17] "Exterior"                 "Precio"                  
## [19] "Calificación del cliente" "Unidades Vendidas"

table(df$Forma)

## 
##  Convertible     Coupe         Hatchback     Minivan       Sedan      
##             7            11            18             3            27 
##       SUV         SUV          SUV           Truck         Wagon      
##             1             1            82            12             2

marcas_autos_top11r <- c(Infiniti=1, Acura=2, Tesla=2, Mercedes=3, Volkswagen=3, 
                         Volvo=3, Jeep=4, BMW=4, Mazda=5, Lexus=5, Audi=5)

table(marcas_autos_top11r)

## marcas_autos_top11r
## 1 2 3 4 5 
## 1 2 3 2 3

dim(df)

## [1] 164  20

df %>% tabyl(Forma)

##          Forma  n     percent
##   Convertible   7 0.042682927
##     Coupe      11 0.067073171
##     Hatchback  18 0.109756098
##     Minivan     3 0.018292683
##        SUV      1 0.006097561
##      SUV        1 0.006097561
##     SUV        82 0.500000000
##     Sedan      27 0.164634146
##     Truck      12 0.073170732
##     Wagon       2 0.012195122

df %>% tabyl(Forma) %>%
  adorn_pct_formatting() %>%
  flextable() %>%
  fontsize(size=14) %>%
  autofit()

Forma	n	percent
Convertible	7	4.3%
Coupe	11	6.7%
Hatchback	18	11.0%
Minivan	3	1.8%
SUV	1	0.6%
SUV	1	0.6%
SUV	82	50.0%
Sedan	27	16.5%
Truck	12	7.3%
Wagon	2	1.2%

Esta es una tabla mejor:

df %>% tabyl(Forma) %>%
  adorn_pct_formatting() %>%
  flextable() %>%
  fontsize(size=14) %>%
  autofit() %>%
  theme_box()

Forma	n	percent
Convertible	7	4.3%
Coupe	11	6.7%
Hatchback	18	11.0%
Minivan	3	1.8%
SUV	1	0.6%
SUV	1	0.6%
SUV	82	50.0%
Sedan	27	16.5%
Truck	12	7.3%
Wagon	2	1.2%

Esta le agrega el total al final:

df %>% tabyl(Forma) %>%
  adorn_totals("row") %>%
  adorn_pct_formatting() %>%
  flextable() %>%
  fontsize(size=14) %>%
  autofit() %>%
  theme_box()

Forma	n	percent
Convertible	7	4.3%
Coupe	11	6.7%
Hatchback	18	11.0%
Minivan	3	1.8%
SUV	1	0.6%
SUV	1	0.6%
SUV	82	50.0%
Sedan	27	16.5%
Truck	12	7.3%
Wagon	2	1.2%
Total	164	100.0%

df %>% tabyl(Marca) %>%
  adorn_totals("row") %>%
  adorn_pct_formatting() %>%
  flextable() %>%
  fontsize(size=14) %>%
  autofit() %>%
  theme_box()

Marca	n	percent
Acura	2	1.2%
Audi	5	3.0%
BMW	4	2.4%
Chevrolet	24	14.6%
Ford	22	13.4%
GMC	8	4.9%
Genesis	6	3.7%
Honda	11	6.7%
Hyundai	10	6.1%
Infiniti	1	0.6%
Jeep	4	2.4%
Kia	11	6.7%
Lexus	5	3.0%
Mazda	5	3.0%
Mercedes	3	1.8%
Nissan	7	4.3%
Subaru	9	5.5%
Tesla	2	1.2%
Toyota	19	11.6%
Volkswagen	1	0.6%
Volkswagen	2	1.2%
Volvo	3	1.8%
Total	164	100.0%

df %>% tabyl(Modelo) %>%
  adorn_totals("row") %>%
  adorn_pct_formatting() %>%
  flextable() %>%
  fontsize(size=14) %>%
  autofit() %>%
  theme_box()

Modelo	n	percent
4Runner	1	0.6%
5 Series	2	1.2%
A4	1	0.6%
A5	1	0.6%
Acadia	6	3.7%
Ascent	1	0.6%
Atlas	1	0.6%
Bolt EUV	7	4.3%
Bronco	1	0.6%
C-Class	1	0.6%
CR-V	1	0.6%
CX-5	2	1.2%
Camaro	7	4.3%
Camry	7	4.3%
Carnival	1	0.6%
Civic	2	1.2%
Compass	1	0.6%
Corvette	7	4.3%
Crosstrek	1	0.6%
ES	1	0.6%
Edge	1	0.6%
Equinox	1	0.6%
Escape	2	1.2%
Expedition	1	0.6%
Explorer	7	4.3%
F-150	1	0.6%
Fit	6	3.7%
Forester	2	1.2%
Frontier	1	0.6%
GLC-Class	1	0.6%
GV80	6	3.7%
Grand Cherokee	1	0.6%
HR-V	1	0.6%
Highlander	1	0.6%
IS	1	0.6%
Jetta	1	0.6%
Legacy	2	1.2%
MDX	1	0.6%
MX-5 Miata	1	0.6%
Maverick	6	3.7%
Mazda6	2	1.2%
Model 3	2	1.2%
Mustang	2	1.2%
NX	1	0.6%
Odyssey	1	0.6%
Outback	3	1.8%
Pathfinder	2	1.2%
Prius	6	3.7%
Q5	2	1.2%
Q7	1	0.6%
QX50	1	0.6%
RAV4	1	0.6%
RDX	1	0.6%
RX	2	1.2%
Ranger	1	0.6%
Rogue	1	0.6%
Rogue Sport	1	0.6%
S-Class	1	0.6%
Santa Fe	1	0.6%
Sentra	1	0.6%
Sienna	1	0.6%
Sierra	1	0.6%
Sonata	2	1.2%
Sorento	1	0.6%
Soul	6	3.7%
Sportage	2	1.2%
Tacoma	1	0.6%
Tahoe	1	0.6%
Taos	1	0.6%
Telluride	1	0.6%
Trax	1	0.6%
Tucson	1	0.6%
Tundra	1	0.6%
Venue	6	3.7%
Versa	1	0.6%
Wrangler	2	1.2%
X5	2	1.2%
XC40	1	0.6%
XC60	2	1.2%
Yukon	1	0.6%
Total	164	100.0%

df %>% tabyl(Modelo) %>%
  ggplot(aes(x=Modelo,y=n,fill=Modelo)) +
  geom_col()

Esta es la mejor tabla:

df %>% tabyl(Forma)

##          Forma  n     percent
##   Convertible   7 0.042682927
##     Coupe      11 0.067073171
##     Hatchback  18 0.109756098
##     Minivan     3 0.018292683
##        SUV      1 0.006097561
##      SUV        1 0.006097561
##     SUV        82 0.500000000
##     Sedan      27 0.164634146
##     Truck      12 0.073170732
##     Wagon       2 0.012195122

df %>% tabyl(Forma) %>%
  ggplot(aes(x=Forma,y=n,fill=Forma)) +
  geom_col()

df %>% tabyl(Marca)

##         Marca  n     percent
##     Acura      2 0.012195122
##     Audi       5 0.030487805
##     BMW        4 0.024390244
##     Chevrolet 24 0.146341463
##     Ford      22 0.134146341
##     GMC        8 0.048780488
##     Genesis    6 0.036585366
##     Honda     11 0.067073171
##     Hyundai   10 0.060975610
##     Infiniti   1 0.006097561
##     Jeep       4 0.024390244
##     Kia       11 0.067073171
##     Lexus      5 0.030487805
##     Mazda      5 0.030487805
##     Mercedes   3 0.018292683
##     Nissan     7 0.042682927
##     Subaru     9 0.054878049
##     Tesla      2 0.012195122
##     Toyota    19 0.115853659
##    Volkswagen  1 0.006097561
##   Volkswagen   2 0.012195122
##     Volvo      3 0.018292683

df %>% tabyl(Marca) %>%
  ggplot(aes(x=Marca,y=n,fill=Marca)) +
  geom_col()

Esta es la mejor opción hasta el momento:

df %>% tabyl(Forma) %>%
  ggplot(aes(x=Forma,y=n,fill=Forma)) +
  geom_col() +
  labs(x="Forma",y="Frecuencias",title="Formas de los autos")

Otra prueba pero ahora con las frecuencias (números) visibles:

df %>% tabyl(Forma) %>%
  ggplot(aes(x=Forma,y=n,fill=Forma)) +
  geom_col() +
  labs(x="Forma",y="Frecuencias",title="Formas de los autos") +
  geom_text(aes(label=n),vjust=1.5,col="white",fontface="bold")

En vez de números que salgan porcentajes:

df %>% tabyl(Forma) %>%
  ggplot(aes(x=Forma,y=n,fill=Forma)) +
  geom_col() +
  labs(x="Forma",y="Frecuencias",title="Formas de los autos") +
  geom_text(aes(label= sprintf("%.2f%%",100*percent)),vjust=1.5,col="white",fontface="bold")

Más ejemplos:

Números al azar de la campana de Gauss, los más probables son los cercanos a 0.

Se puede usar el histograma cuando es dificil hacer una tabla de frecuencias, donde es dificil que los números se repitan.

n=100000

numeros=rnorm(n=n,mean=20,sd=1)

df1=data.frame(numeros)

df1 %>%
  ggplot(aes(x=numeros)) +
  geom_histogram(color="blue",fill="lightblue") +
  labs(x="Números",y="Frecuencia",title="Campana de Gauss Experimental")

ggplot_almada

Danna Almada

2023-11-15