library(janitor)
library(tidyverse)
library(flextable)
library(moments)

setwd("~/Downloads/proy_cars_almada")
df=read.csv("car_dataset.csv")
colnames(df)
##  [1] "Car.Make"                   "Car.Model"                 
##  [3] "Year"                       "Body.Type"                 
##  [5] "Color.Options"              "Fuel.Type"                 
##  [7] "Engine.Size..L."            "Horsepower"                
##  [9] "Torque..Nm."                "Transmission.Type"         
## [11] "Acceleration..0.60.mph."    "Top.Speed..mph."           
## [13] "Mileage..MPG."              "Safety.Features"           
## [15] "Entertainment.Features"     "Interior.Features"         
## [17] "Exterior.Features"          "Price...."                 
## [19] "Customer.Ratings"           "Sales.Figures..Units.Sold."
previo=head(df)
library(ggplot2)

colnames(df)=c("Marca","Modelo","Año","Forma","Colores Disponibles",
"Tipo de Combustible","Tamaño del Motor","Caballos de Fuerza","Torque",
"Tipo de Transmisión","Aceleración","Velocidad Máxima",
"Kilometraje","Seguridad","Entretenimiento","Interior","Exterior",
"Precio","Calificación del cliente","Unidades Vendidas")

colnames(df)
##  [1] "Marca"                    "Modelo"                  
##  [3] "Año"                      "Forma"                   
##  [5] "Colores Disponibles"      "Tipo de Combustible"     
##  [7] "Tamaño del Motor"         "Caballos de Fuerza"      
##  [9] "Torque"                   "Tipo de Transmisión"     
## [11] "Aceleración"              "Velocidad Máxima"        
## [13] "Kilometraje"              "Seguridad"               
## [15] "Entretenimiento"          "Interior"                
## [17] "Exterior"                 "Precio"                  
## [19] "Calificación del cliente" "Unidades Vendidas"
table(df$Forma)
## 
##  Convertible     Coupe         Hatchback     Minivan       Sedan      
##             7            11            18             3            27 
##       SUV         SUV          SUV           Truck         Wagon      
##             1             1            82            12             2
marcas_autos_top11r <- c(Infiniti=1, Acura=2, Tesla=2, Mercedes=3, Volkswagen=3, 
                         Volvo=3, Jeep=4, BMW=4, Mazda=5, Lexus=5, Audi=5)

table(marcas_autos_top11r)
## marcas_autos_top11r
## 1 2 3 4 5 
## 1 2 3 2 3
dim(df)
## [1] 164  20
df %>% tabyl(Forma)
##          Forma  n     percent
##   Convertible   7 0.042682927
##     Coupe      11 0.067073171
##     Hatchback  18 0.109756098
##     Minivan     3 0.018292683
##        SUV      1 0.006097561
##      SUV        1 0.006097561
##     SUV        82 0.500000000
##     Sedan      27 0.164634146
##     Truck      12 0.073170732
##     Wagon       2 0.012195122
df %>% tabyl(Forma) %>%
  adorn_pct_formatting() %>%
  flextable() %>%
  fontsize(size=14) %>%
  autofit()

Forma

n

percent

Convertible

7

4.3%

Coupe

11

6.7%

Hatchback

18

11.0%

Minivan

3

1.8%

SUV

1

0.6%

SUV

1

0.6%

SUV

82

50.0%

Sedan

27

16.5%

Truck

12

7.3%

Wagon

2

1.2%

Esta es una tabla mejor:

df %>% tabyl(Forma) %>%
  adorn_pct_formatting() %>%
  flextable() %>%
  fontsize(size=14) %>%
  autofit() %>%
  theme_box()

Forma

n

percent

Convertible

7

4.3%

Coupe

11

6.7%

Hatchback

18

11.0%

Minivan

3

1.8%

SUV

1

0.6%

SUV

1

0.6%

SUV

82

50.0%

Sedan

27

16.5%

Truck

12

7.3%

Wagon

2

1.2%

Esta le agrega el total al final:

df %>% tabyl(Forma) %>%
  adorn_totals("row") %>%
  adorn_pct_formatting() %>%
  flextable() %>%
  fontsize(size=14) %>%
  autofit() %>%
  theme_box()

Forma

n

percent

Convertible

7

4.3%

Coupe

11

6.7%

Hatchback

18

11.0%

Minivan

3

1.8%

SUV

1

0.6%

SUV

1

0.6%

SUV

82

50.0%

Sedan

27

16.5%

Truck

12

7.3%

Wagon

2

1.2%

Total

164

100.0%

df %>% tabyl(Marca) %>%
  adorn_totals("row") %>%
  adorn_pct_formatting() %>%
  flextable() %>%
  fontsize(size=14) %>%
  autofit() %>%
  theme_box()

Marca

n

percent

Acura

2

1.2%

Audi

5

3.0%

BMW

4

2.4%

Chevrolet

24

14.6%

Ford

22

13.4%

GMC

8

4.9%

Genesis

6

3.7%

Honda

11

6.7%

Hyundai

10

6.1%

Infiniti

1

0.6%

Jeep

4

2.4%

Kia

11

6.7%

Lexus

5

3.0%

Mazda

5

3.0%

Mercedes

3

1.8%

Nissan

7

4.3%

Subaru

9

5.5%

Tesla

2

1.2%

Toyota

19

11.6%

Volkswagen

1

0.6%

Volkswagen

2

1.2%

Volvo

3

1.8%

Total

164

100.0%

df %>% tabyl(Modelo) %>%
  adorn_totals("row") %>%
  adorn_pct_formatting() %>%
  flextable() %>%
  fontsize(size=14) %>%
  autofit() %>%
  theme_box()

Modelo

n

percent

4Runner

1

0.6%

5 Series

2

1.2%

A4

1

0.6%

A5

1

0.6%

Acadia

6

3.7%

Ascent

1

0.6%

Atlas

1

0.6%

Bolt EUV

7

4.3%

Bronco

1

0.6%

C-Class

1

0.6%

CR-V

1

0.6%

CX-5

2

1.2%

Camaro

7

4.3%

Camry

7

4.3%

Carnival

1

0.6%

Civic

2

1.2%

Compass

1

0.6%

Corvette

7

4.3%

Crosstrek

1

0.6%

ES

1

0.6%

Edge

1

0.6%

Equinox

1

0.6%

Escape

2

1.2%

Expedition

1

0.6%

Explorer

7

4.3%

F-150

1

0.6%

Fit

6

3.7%

Forester

2

1.2%

Frontier

1

0.6%

GLC-Class

1

0.6%

GV80

6

3.7%

Grand Cherokee

1

0.6%

HR-V

1

0.6%

Highlander

1

0.6%

IS

1

0.6%

Jetta

1

0.6%

Legacy

2

1.2%

MDX

1

0.6%

MX-5 Miata

1

0.6%

Maverick

6

3.7%

Mazda6

2

1.2%

Model 3

2

1.2%

Mustang

2

1.2%

NX

1

0.6%

Odyssey

1

0.6%

Outback

3

1.8%

Pathfinder

2

1.2%

Prius

6

3.7%

Q5

2

1.2%

Q7

1

0.6%

QX50

1

0.6%

RAV4

1

0.6%

RDX

1

0.6%

RX

2

1.2%

Ranger

1

0.6%

Rogue

1

0.6%

Rogue Sport

1

0.6%

S-Class

1

0.6%

Santa Fe

1

0.6%

Sentra

1

0.6%

Sienna

1

0.6%

Sierra

1

0.6%

Sonata

2

1.2%

Sorento

1

0.6%

Soul

6

3.7%

Sportage

2

1.2%

Tacoma

1

0.6%

Tahoe

1

0.6%

Taos

1

0.6%

Telluride

1

0.6%

Trax

1

0.6%

Tucson

1

0.6%

Tundra

1

0.6%

Venue

6

3.7%

Versa

1

0.6%

Wrangler

2

1.2%

X5

2

1.2%

XC40

1

0.6%

XC60

2

1.2%

Yukon

1

0.6%

Total

164

100.0%

df %>% tabyl(Modelo) %>%
  ggplot(aes(x=Modelo,y=n,fill=Modelo)) +
  geom_col()

Esta es la mejor tabla:

df %>% tabyl(Forma)
##          Forma  n     percent
##   Convertible   7 0.042682927
##     Coupe      11 0.067073171
##     Hatchback  18 0.109756098
##     Minivan     3 0.018292683
##        SUV      1 0.006097561
##      SUV        1 0.006097561
##     SUV        82 0.500000000
##     Sedan      27 0.164634146
##     Truck      12 0.073170732
##     Wagon       2 0.012195122
df %>% tabyl(Forma) %>%
  ggplot(aes(x=Forma,y=n,fill=Forma)) +
  geom_col()

df %>% tabyl(Marca)
##         Marca  n     percent
##     Acura      2 0.012195122
##     Audi       5 0.030487805
##     BMW        4 0.024390244
##     Chevrolet 24 0.146341463
##     Ford      22 0.134146341
##     GMC        8 0.048780488
##     Genesis    6 0.036585366
##     Honda     11 0.067073171
##     Hyundai   10 0.060975610
##     Infiniti   1 0.006097561
##     Jeep       4 0.024390244
##     Kia       11 0.067073171
##     Lexus      5 0.030487805
##     Mazda      5 0.030487805
##     Mercedes   3 0.018292683
##     Nissan     7 0.042682927
##     Subaru     9 0.054878049
##     Tesla      2 0.012195122
##     Toyota    19 0.115853659
##    Volkswagen  1 0.006097561
##   Volkswagen   2 0.012195122
##     Volvo      3 0.018292683
df %>% tabyl(Marca) %>%
  ggplot(aes(x=Marca,y=n,fill=Marca)) +
  geom_col()

Esta es la mejor opción hasta el momento:

df %>% tabyl(Forma) %>%
  ggplot(aes(x=Forma,y=n,fill=Forma)) +
  geom_col() +
  labs(x="Forma",y="Frecuencias",title="Formas de los autos")

Otra prueba pero ahora con las frecuencias (números) visibles:

df %>% tabyl(Forma) %>%
  ggplot(aes(x=Forma,y=n,fill=Forma)) +
  geom_col() +
  labs(x="Forma",y="Frecuencias",title="Formas de los autos") +
  geom_text(aes(label=n),vjust=1.5,col="white",fontface="bold")

En vez de números que salgan porcentajes:

df %>% tabyl(Forma) %>%
  ggplot(aes(x=Forma,y=n,fill=Forma)) +
  geom_col() +
  labs(x="Forma",y="Frecuencias",title="Formas de los autos") +
  geom_text(aes(label= sprintf("%.2f%%",100*percent)),vjust=1.5,col="white",fontface="bold")

Más ejemplos:

Números al azar de la campana de Gauss, los más probables son los cercanos a 0.

Se puede usar el histograma cuando es dificil hacer una tabla de frecuencias, donde es dificil que los números se repitan.

n=100000

numeros=rnorm(n=n,mean=20,sd=1)

df1=data.frame(numeros)

df1 %>%
  ggplot(aes(x=numeros)) +
  geom_histogram(color="blue",fill="lightblue") +
  labs(x="Números",y="Frecuencia",title="Campana de Gauss Experimental")