Entregable 2.1

Fuente externa de base de datos
Importar base de datos de fuente externa

#file.choose()
bd <- read.csv("/Users/dannaperez/Desktop/Tabla3.csv")
summary(bd)
##   Geography           Category          Data.Type              Unit  
##  Length:54          Length:54          Length:54          Min.   :0  
##  Class :character   Class :character   Class :character   1st Qu.:0  
##  Mode  :character   Mode  :character   Mode  :character   Median :0  
##                                                           Mean   :0  
##                                                           3rd Qu.:0  
##                                                           Max.   :0  
##                                                                      
##      X2017              X2018             X2019             X2020        
##  Min.   :    1.00   Min.   :    1.0   Min.   :    1.0   Min.   :    1.0  
##  1st Qu.:   85.75   1st Qu.:  120.2   1st Qu.:  109.8   1st Qu.:   78.0  
##  Median :  314.00   Median :  344.0   Median :  363.5   Median :  285.0  
##  Mean   : 1423.31   Mean   : 1442.6   Mean   : 1345.7   Mean   : 1109.9  
##  3rd Qu.: 1316.75   3rd Qu.: 1387.2   3rd Qu.: 1252.2   3rd Qu.:  852.8  
##  Max.   :24807.00   Max.   :23529.0   Max.   :21360.0   Max.   :19994.0  
##  NA's   :2          NA's   :4         NA's   :4         NA's   :4        
##      X2021        
##  Min.   :    1.0  
##  1st Qu.:  105.5  
##  Median :  271.5  
##  Mean   : 1143.6  
##  3rd Qu.:  868.0  
##  Max.   :21408.0  
##  NA's   :4

1.1 Número de variables y observaciones

Cuenta con 9 variables y 54 registros.

str(bd)
## 'data.frame':    54 obs. of  9 variables:
##  $ Geography: chr  "China" "Japan" "India" "Germany" ...
##  $ Category : chr  "Passenger Car Production" "Passenger Car Production" "Passenger Car Production" "Passenger Car Production" ...
##  $ Data.Type: chr  "Unit Volume" "Unit Volume" "Unit Volume" "Unit Volume" ...
##  $ Unit     : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ X2017    : num  24807 8348 3961 5646 3735 ...
##  $ X2018    : num  23529 8359 4032 5120 3662 ...
##  $ X2019    : num  21360 8329 3623 4661 3613 ...
##  $ X2020    : num  19994 6960 2850 3510 3175 ...
##  $ X2021    : num  21408 6619 3628 3162 3129 ...

1.2 Clasificación de variables

Variable <- c("Geography", "Category", "Data.Type", "Unit", "X2017", "X2018", "X2019", "X2020", "X2021")
Type <- c("Cualitativa", "Cualitativa", "Cualitativa", "Cuantitativa discreta", "Cuantitativa discreta", "Cuantitativa discreta", "Cuantitativa discreta", "Cuantitativa discreta", "Cuantitativa discreta")
escala <- c("NA", "NA", "NA", "Miles (Razón)", "años (intervalo)", "años (intervalo)", "años (intervalo)", "años (intervalo)", "años (intervalo)")
Clasificacion <- data.frame (Variable, Type, escala)
Clasificacion
##    Variable                  Type           escala
## 1 Geography           Cualitativa               NA
## 2  Category           Cualitativa               NA
## 3 Data.Type           Cualitativa               NA
## 4      Unit Cuantitativa discreta    Miles (Razón)
## 5     X2017 Cuantitativa discreta años (intervalo)
## 6     X2018 Cuantitativa discreta años (intervalo)
## 7     X2019 Cuantitativa discreta años (intervalo)
## 8     X2020 Cuantitativa discreta años (intervalo)
## 9     X2021 Cuantitativa discreta años (intervalo)

1.3 Limpieza de datos

Limpieza 1: Omitir NA´s ya que no había datos registrados

sum(is.na(bd))
## [1] 18
bd1 <- bd
bd1 <- na.omit(bd1)
sum(is.na(bd1))
## [1] 0

Los NA´s es información no capturada por lo que es irrelevante y puede ser eliminada

Sacar el porcentage de crecimiento entre los años 2017-2021

#install.packages("lubridate")
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
bd2 <- bd1
bd2$crecimiento <- ((bd2$X2021-bd2$X2017)/bd2$X2017)
bd2$crecimiento
##  [1] -0.137017777 -0.207115477 -0.084069679 -0.439957492 -0.162248996
##  [6] -0.259965338 -0.258838935 -0.479723046  0.002965159 -0.173813170
## [11] -0.347482014 -0.103869654 -0.156976744 -0.508551881 -0.485338121
## [16] -0.628736235 -0.433070866 -0.272616137 -0.337819650 -0.034632035
## [21]  0.043062201  0.156593407 -0.563249001  0.170124481 -0.467961165
## [26] -0.161993769 -0.143322476 -0.276276276  0.685714286  0.603174603
## [31] -0.039024390 -0.143478261 -0.106481481 -0.098039216 -0.263157895
## [36]  0.415730337  0.009174312  0.333333333  4.058823529 -0.283783784
## [41] -0.514285714  0.000000000  6.500000000 -0.620253165  1.500000000
## [46] -0.639344262  0.000000000 -0.222222222  2.000000000  0.000000000

Analizar los valores de crecimiento de mayor a menor

bd3 <- bd2
orden_descendente <- sort(bd3$crecimiento,decreasing=TRUE)
orden_descendente
##  [1]  6.500000000  4.058823529  2.000000000  1.500000000  0.685714286
##  [6]  0.603174603  0.415730337  0.333333333  0.170124481  0.156593407
## [11]  0.043062201  0.009174312  0.002965159  0.000000000  0.000000000
## [16]  0.000000000 -0.034632035 -0.039024390 -0.084069679 -0.098039216
## [21] -0.103869654 -0.106481481 -0.137017777 -0.143322476 -0.143478261
## [26] -0.156976744 -0.161993769 -0.162248996 -0.173813170 -0.207115477
## [31] -0.222222222 -0.258838935 -0.259965338 -0.263157895 -0.272616137
## [36] -0.276276276 -0.283783784 -0.337819650 -0.347482014 -0.433070866
## [41] -0.439957492 -0.467961165 -0.479723046 -0.485338121 -0.508551881
## [46] -0.514285714 -0.563249001 -0.620253165 -0.628736235 -0.639344262

Ordenar los valores de crecimiento de mayor a menor

bd4 <- bd3
bd4 <- bd4[order(-bd3$crecimiento),]
head(bd4)  
##     Geography                 Category   Data.Type Unit X2017 X2018 X2019 X2020
## 43    Belarus Passenger Car Production Unit Volume    0     4    11    20    21
## 39 Kazakhstan Passenger Car Production Unit Volume    0    17    30    44    65
## 49 Azerbaijan Passenger Car Production Unit Volume    0     1     1     2     2
## 45      Egypt Passenger Car Production Unit Volume    0    10    19    19    24
## 29 Uzbekistan Passenger Car Production Unit Volume    0   140   221   271   280
## 30   Portugal Passenger Car Production Unit Volume    0   126   234   282   188
##    X2021 crecimiento
## 43    30   6.5000000
## 39    86   4.0588235
## 49     3   2.0000000
## 45    25   1.5000000
## 29   236   0.6857143
## 30   202   0.6031746

Limpieza 2. Eliminar renglones y mantener los top 10 en el porcentaje de crecimiento

bd5 <- bd4
bd5 <- bd4[-(11:54),]
bd5
##      Geography                 Category   Data.Type Unit X2017 X2018 X2019
## 43     Belarus Passenger Car Production Unit Volume    0     4    11    20
## 39  Kazakhstan Passenger Car Production Unit Volume    0    17    30    44
## 49  Azerbaijan Passenger Car Production Unit Volume    0     1     1     2
## 45       Egypt Passenger Car Production Unit Volume    0    10    19    19
## 29  Uzbekistan Passenger Car Production Unit Volume    0   140   221   271
## 30    Portugal Passenger Car Production Unit Volume    0   126   234   282
## 36 Netherlands Passenger Car Production Unit Volume    0    89   185   179
## 38     Austria Passenger Car Production Unit Volume    0    78   145   158
## 24     Vietnam Passenger Car Production Unit Volume    0   241   267   287
## 22     Romania Passenger Car Production Unit Volume    0   364   477   490
##    X2020 X2021 crecimiento
## 43    21    30   6.5000000
## 39    65    86   4.0588235
## 49     2     3   2.0000000
## 45    24    25   1.5000000
## 29   280   236   0.6857143
## 30   188   202   0.6031746
## 36   127   126   0.4157303
## 38   105   104   0.3333333
## 24   272   282   0.1701245
## 22   438   421   0.1565934

Nomás se mantienen los top 10 países con mayor porcentaje de crecimiento.

Breve Reflexión

Observar y analizar una base de datos antes de manipularla es importante, ya que de esta manera podemos conocer el contexto de la empresa y darnos una idea de como podemos hacer uso de estos datos.

Usar una base de datos de una fuente externa a la empresa es de gran importancia ya que se da una visualización de como está la industria y así poder visualizar a la empresa dentro de un contexto internacional.

LS0tCnRpdGxlOiA8c3BhbiBzdHlsZT0iQ29sb3I6T3JhbmdlIj4gIkVudHJlZ2FibGUyIgphdXRob3I6ICJEYW5hIFBlcmV6IC0gQTAwMjI3MDQxIgpkYXRlOiAiOS8yMi8yMDIyIgpvdXRwdXQ6IAogIGh0bWxfZG9jdW1lbnQ6IAogICAgdG9jOiB0cnVlCiAgICB0b2NfZmxvYXQ6IHRydWUKICAgIGNvZGVfZG93bmxvYWQ6IHRydWUKLS0tCgo8aW1nIHNyYz0iL1VzZXJzL2Rhbm5hcGVyZXovRGVza3RvcC9pbWFnZW5lcyBwYXJhIEhUTUwvQ2FwdHVyYSBkZSBQYW50YWxsYSAyMDIyLTA5LTIyIGEgbGEocykgMjIuMTMuMTEucG5nIj4KCiMgPHNwYW4gc3R5bGU9IkNvbG9yOkJsdWUiPiBFbnRyZWdhYmxlIDIuMQoqKkZ1ZW50ZSBleHRlcm5hIGRlIGJhc2UgZGUgZGF0b3MqKiAgIApJbXBvcnRhciBiYXNlIGRlIGRhdG9zIGRlIGZ1ZW50ZSBleHRlcm5hCmBgYHtyfQojZmlsZS5jaG9vc2UoKQpiZCA8LSByZWFkLmNzdigiL1VzZXJzL2Rhbm5hcGVyZXovRGVza3RvcC9UYWJsYTMuY3N2IikKc3VtbWFyeShiZCkKYGBgCgojIyAxLjEgTsO6bWVybyBkZSB2YXJpYWJsZXMgeSBvYnNlcnZhY2lvbmVzIApDdWVudGEgY29uIDkgdmFyaWFibGVzIHkgNTQgcmVnaXN0cm9zLiAKYGBge3J9CnN0cihiZCkKYGBgCgojIyAxLjIgQ2xhc2lmaWNhY2nDs24gZGUgdmFyaWFibGVzCmBgYHtyfQpWYXJpYWJsZSA8LSBjKCJHZW9ncmFwaHkiLCAiQ2F0ZWdvcnkiLCAiRGF0YS5UeXBlIiwgIlVuaXQiLCAiWDIwMTciLCAiWDIwMTgiLCAiWDIwMTkiLCAiWDIwMjAiLCAiWDIwMjEiKQpUeXBlIDwtIGMoIkN1YWxpdGF0aXZhIiwgIkN1YWxpdGF0aXZhIiwgIkN1YWxpdGF0aXZhIiwgIkN1YW50aXRhdGl2YSBkaXNjcmV0YSIsICJDdWFudGl0YXRpdmEgZGlzY3JldGEiLCAiQ3VhbnRpdGF0aXZhIGRpc2NyZXRhIiwgIkN1YW50aXRhdGl2YSBkaXNjcmV0YSIsICJDdWFudGl0YXRpdmEgZGlzY3JldGEiLCAiQ3VhbnRpdGF0aXZhIGRpc2NyZXRhIikKZXNjYWxhIDwtIGMoIk5BIiwgIk5BIiwgIk5BIiwgIk1pbGVzIChSYXrDs24pIiwgImHDsW9zIChpbnRlcnZhbG8pIiwgImHDsW9zIChpbnRlcnZhbG8pIiwgImHDsW9zIChpbnRlcnZhbG8pIiwgImHDsW9zIChpbnRlcnZhbG8pIiwgImHDsW9zIChpbnRlcnZhbG8pIikKQ2xhc2lmaWNhY2lvbiA8LSBkYXRhLmZyYW1lIChWYXJpYWJsZSwgVHlwZSwgZXNjYWxhKQpDbGFzaWZpY2FjaW9uCmBgYAoKIyMgMS4zIExpbXBpZXphIGRlIGRhdG9zCiMjIyBMaW1waWV6YSAxOiBPbWl0aXIgTkHCtHMgeWEgcXVlIG5vIGhhYsOtYSBkYXRvcyByZWdpc3RyYWRvcwpgYGB7cn0Kc3VtKGlzLm5hKGJkKSkKYmQxIDwtIGJkCmJkMSA8LSBuYS5vbWl0KGJkMSkKc3VtKGlzLm5hKGJkMSkpCmBgYAoKTG9zIE5BwrRzIGVzIGluZm9ybWFjacOzbiAgbm8gY2FwdHVyYWRhIHBvciBsbyBxdWUgZXMgaXJyZWxldmFudGUgeSBwdWVkZSBzZXIgZWxpbWluYWRhCgpTYWNhciBlbCBwb3JjZW50YWdlIGRlIGNyZWNpbWllbnRvIGVudHJlIGxvcyBhw7FvcyAyMDE3LTIwMjEKYGBge3J9CiNpbnN0YWxsLnBhY2thZ2VzKCJsdWJyaWRhdGUiKQpsaWJyYXJ5KGx1YnJpZGF0ZSkKYmQyIDwtIGJkMQpiZDIkY3JlY2ltaWVudG8gPC0gKChiZDIkWDIwMjEtYmQyJFgyMDE3KS9iZDIkWDIwMTcpCmJkMiRjcmVjaW1pZW50bwpgYGAKCkFuYWxpemFyIGxvcyB2YWxvcmVzIGRlIGNyZWNpbWllbnRvIGRlIG1heW9yIGEgbWVub3IKYGBge3J9CmJkMyA8LSBiZDIKb3JkZW5fZGVzY2VuZGVudGUgPC0gc29ydChiZDMkY3JlY2ltaWVudG8sZGVjcmVhc2luZz1UUlVFKQpvcmRlbl9kZXNjZW5kZW50ZQpgYGAKCk9yZGVuYXIgbG9zIHZhbG9yZXMgZGUgY3JlY2ltaWVudG8gZGUgbWF5b3IgYSBtZW5vciAKYGBge3J9CmJkNCA8LSBiZDMKYmQ0IDwtIGJkNFtvcmRlcigtYmQzJGNyZWNpbWllbnRvKSxdCmhlYWQoYmQ0KSAgCmBgYAoKIyMjIExpbXBpZXphIDIuIEVsaW1pbmFyIHJlbmdsb25lcyB5IG1hbnRlbmVyIGxvcyB0b3AgMTAgZW4gZWwgcG9yY2VudGFqZSBkZSBjcmVjaW1pZW50bwpgYGB7cn0KYmQ1IDwtIGJkNApiZDUgPC0gYmQ0Wy0oMTE6NTQpLF0KYmQ1CmBgYApOb23DoXMgc2UgbWFudGllbmVuIGxvcyB0b3AgMTAgcGHDrXNlcyBjb24gbWF5b3IgcG9yY2VudGFqZSBkZSBjcmVjaW1pZW50by4KCiMjIEJyZXZlIFJlZmxleGnDs24KT2JzZXJ2YXIgeSBhbmFsaXphciB1bmEgYmFzZSBkZSBkYXRvcyBhbnRlcyBkZSBtYW5pcHVsYXJsYSBlcyBpbXBvcnRhbnRlLCB5YSBxdWUgZGUgZXN0YSBtYW5lcmEgcG9kZW1vcyBjb25vY2VyIGVsIGNvbnRleHRvIGRlIGxhIGVtcHJlc2EgeSBkYXJub3MgdW5hIGlkZWEgZGUgY29tbyBwb2RlbW9zIGhhY2VyIHVzbyBkZSBlc3RvcyBkYXRvcy4gCgpVc2FyIHVuYSBiYXNlIGRlIGRhdG9zIGRlIHVuYSBmdWVudGUgZXh0ZXJuYSBhIGxhIGVtcHJlc2EgZXMgZGUgZ3JhbiBpbXBvcnRhbmNpYSB5YSBxdWUgc2UgZGEgdW5hIHZpc3VhbGl6YWNpw7NuIGRlIGNvbW8gZXN0w6EgbGEgaW5kdXN0cmlhIHkgYXPDrSBwb2RlciB2aXN1YWxpemFyIGEgbGEgZW1wcmVzYSBkZW50cm8gZGUgdW4gY29udGV4dG8gaW50ZXJuYWNpb25hbC4gCgoKCgoKCgoKCgoKCg==