Librerias y message para limpiar

library(dplyr)
library(readr)
library(fdth)
library(knitr)

Cargar los datos de starwars en la ruta https y limpiarlo con UTF

set.seed(2020)

datos <- read.csv("https://raw.githubusercontent.com/rpizarrog/Curso-Titulacion-Data-Science-/master/2020/datos/starwars.csv", encoding = "UTF-8", header = TRUE)

datos 
##     X                  name height   mass    hair_color          skin_color
## 1   1        Luke Skywalker    172   77.0         blond                fair
## 2   2                 C-3PO    167   75.0          <NA>                gold
## 3   3                 R2-D2     96   32.0          <NA>         white, blue
## 4   4           Darth Vader    202  136.0          none               white
## 5   5           Leia Organa    150   49.0         brown               light
## 6   6             Owen Lars    178  120.0   brown, grey               light
## 7   7    Beru Whitesun lars    165   75.0         brown               light
## 8   8                 R5-D4     97   32.0          <NA>          white, red
## 9   9     Biggs Darklighter    183   84.0         black               light
## 10 10        Obi-Wan Kenobi    182   77.0 auburn, white                fair
## 11 11      Anakin Skywalker    188   84.0         blond                fair
## 12 12        Wilhuff Tarkin    180     NA  auburn, grey                fair
## 13 13             Chewbacca    228  112.0         brown             unknown
## 14 14              Han Solo    180   80.0         brown                fair
## 15 15                Greedo    173   74.0          <NA>               green
## 16 16 Jabba Desilijic Tiure    175 1358.0          <NA>    green-tan, brown
## 17 17        Wedge Antilles    170   77.0         brown                fair
## 18 18      Jek Tono Porkins    180  110.0         brown                fair
## 19 19                  Yoda     66   17.0         white               green
## 20 20             Palpatine    170   75.0          grey                pale
## 21 21             Boba Fett    183   78.2         black                fair
## 22 22                 IG-88    200  140.0          none               metal
## 23 23                 Bossk    190  113.0          none               green
## 24 24      Lando Calrissian    177   79.0         black                dark
## 25 25                 Lobot    175   79.0          none               light
## 26 26                Ackbar    180   83.0          none        brown mottle
## 27 27            Mon Mothma    150     NA        auburn                fair
## 28 28          Arvel Crynyd     NA     NA         brown                fair
## 29 29 Wicket Systri Warrick     88   20.0         brown               brown
## 30 30             Nien Nunb    160   68.0          none                grey
## 31 31          Qui-Gon Jinn    193   89.0         brown                fair
## 32 32           Nute Gunray    191   90.0          none       mottled green
## 33 33         Finis Valorum    170     NA         blond                fair
## 34 34         Jar Jar Binks    196   66.0          none              orange
## 35 35          Roos Tarpals    224   82.0          none                grey
## 36 36            Rugor Nass    206     NA          none               green
## 37 37              Ric Olié    183     NA         brown                fair
## 38 38                 Watto    137     NA         black          blue, grey
## 39 39               Sebulba    112   40.0          none           grey, red
## 40 40         Quarsh Panaka    183     NA         black                dark
## 41 41        Shmi Skywalker    163     NA         black                fair
## 42 42            Darth Maul    175   80.0          none                 red
## 43 43           Bib Fortuna    180     NA          none                pale
## 44 44           Ayla Secura    178   55.0          none                blue
## 45 45              Dud Bolt     94   45.0          none          blue, grey
## 46 46               Gasgano    122     NA          none         white, blue
## 47 47        Ben Quadinaros    163   65.0          none grey, green, yellow
## 48 48            Mace Windu    188   84.0          none                dark
## 49 49          Ki-Adi-Mundi    198   82.0         white                pale
## 50 50             Kit Fisto    196   87.0          none               green
## 51 51             Eeth Koth    171     NA         black               brown
## 52 52            Adi Gallia    184   50.0          none                dark
## 53 53           Saesee Tiin    188     NA          none                pale
## 54 54           Yarael Poof    264     NA          none               white
## 55 55              Plo Koon    188   80.0          none              orange
## 56 56            Mas Amedda    196     NA          none                blue
## 57 57          Gregar Typho    185   85.0         black                dark
## 58 58                 Cordé    157     NA         brown               light
## 59 59           Cliegg Lars    183     NA         brown                fair
## 60 60     Poggle the Lesser    183   80.0          none               green
## 61 61       Luminara Unduli    170   56.2         black              yellow
## 62 62         Barriss Offee    166   50.0         black              yellow
## 63 63                 Dormé    165     NA         brown               light
## 64 64                 Dooku    193   80.0         white                fair
## 65 65   Bail Prestor Organa    191     NA         black                 tan
## 66 66            Jango Fett    183   79.0         black                 tan
## 67 67            Zam Wesell    168   55.0        blonde fair, green, yellow
## 68 68       Dexter Jettster    198  102.0          none               brown
## 69 69               Lama Su    229   88.0          none                grey
## 70 70               Taun We    213     NA          none                grey
## 71 71            Jocasta Nu    167     NA         white                fair
## 72 72         Ratts Tyerell     79   15.0          none          grey, blue
## 73 73                R4-P17     96     NA          none         silver, red
## 74 74            Wat Tambor    193   48.0          none         green, grey
## 75 75              San Hill    191     NA          none                grey
## 76 76              Shaak Ti    178   57.0          none    red, blue, white
## 77 77              Grievous    216  159.0          none        brown, white
## 78 78               Tarfful    234  136.0         brown               brown
## 79 79       Raymus Antilles    188   79.0         brown               light
## 80 80             Sly Moore    178   48.0          none                pale
## 81 81            Tion Medon    206   80.0          none                grey
## 82 82                  Finn     NA     NA         black                dark
## 83 83                   Rey     NA     NA         brown               light
## 84 84           Poe Dameron     NA     NA         brown               light
## 85 85                   BB8     NA     NA          none                none
## 86 86        Captain Phasma     NA     NA       unknown             unknown
## 87 87         Padmé Amidala    165   45.0         brown               light
##        eye_color birth_year        gender      homeworld        species
## 1           blue       19.0          male       Tatooine          Human
## 2         yellow      112.0          <NA>       Tatooine          Droid
## 3            red       33.0          <NA>          Naboo          Droid
## 4         yellow       41.9          male       Tatooine          Human
## 5          brown       19.0        female       Alderaan          Human
## 6           blue       52.0          male       Tatooine          Human
## 7           blue       47.0        female       Tatooine          Human
## 8            red         NA          <NA>       Tatooine          Droid
## 9          brown       24.0          male       Tatooine          Human
## 10     blue-gray       57.0          male        Stewjon          Human
## 11          blue       41.9          male       Tatooine          Human
## 12          blue       64.0          male         Eriadu          Human
## 13          blue      200.0          male       Kashyyyk        Wookiee
## 14         brown       29.0          male       Corellia          Human
## 15         black       44.0          male          Rodia         Rodian
## 16        orange      600.0 hermaphrodite      Nal Hutta           Hutt
## 17         hazel       21.0          male       Corellia          Human
## 18          blue         NA          male     Bestine IV          Human
## 19         brown      896.0          male           <NA> Yoda's species
## 20        yellow       82.0          male          Naboo          Human
## 21         brown       31.5          male         Kamino          Human
## 22           red       15.0          none           <NA>          Droid
## 23           red       53.0          male      Trandosha     Trandoshan
## 24         brown       31.0          male        Socorro          Human
## 25          blue       37.0          male         Bespin          Human
## 26        orange       41.0          male       Mon Cala   Mon Calamari
## 27          blue       48.0        female      Chandrila          Human
## 28         brown         NA          male           <NA>          Human
## 29         brown        8.0          male          Endor           Ewok
## 30         black         NA          male        Sullust      Sullustan
## 31          blue       92.0          male           <NA>          Human
## 32           red         NA          male Cato Neimoidia      Neimodian
## 33          blue       91.0          male      Coruscant          Human
## 34        orange       52.0          male          Naboo         Gungan
## 35        orange         NA          male          Naboo         Gungan
## 36        orange         NA          male          Naboo         Gungan
## 37          blue         NA          male          Naboo           <NA>
## 38        yellow         NA          male       Toydaria      Toydarian
## 39        orange         NA          male      Malastare            Dug
## 40         brown       62.0          male          Naboo           <NA>
## 41         brown       72.0        female       Tatooine          Human
## 42        yellow       54.0          male       Dathomir         Zabrak
## 43          pink         NA          male         Ryloth        Twi'lek
## 44         hazel       48.0        female         Ryloth        Twi'lek
## 45        yellow         NA          male        Vulpter     Vulptereen
## 46         black         NA          male        Troiken          Xexto
## 47        orange         NA          male           Tund          Toong
## 48         brown       72.0          male     Haruun Kal          Human
## 49        yellow       92.0          male          Cerea         Cerean
## 50         black         NA          male    Glee Anselm       Nautolan
## 51         brown         NA          male       Iridonia         Zabrak
## 52          blue         NA        female      Coruscant     Tholothian
## 53        orange         NA          male        Iktotch       Iktotchi
## 54        yellow         NA          male        Quermia       Quermian
## 55         black       22.0          male          Dorin        Kel Dor
## 56          blue         NA          male       Champala       Chagrian
## 57         brown         NA          male          Naboo          Human
## 58         brown         NA        female          Naboo          Human
## 59          blue       82.0          male       Tatooine          Human
## 60        yellow         NA          male       Geonosis      Geonosian
## 61          blue       58.0        female         Mirial       Mirialan
## 62          blue       40.0        female         Mirial       Mirialan
## 63         brown         NA        female          Naboo          Human
## 64         brown      102.0          male        Serenno          Human
## 65         brown       67.0          male       Alderaan          Human
## 66         brown       66.0          male   Concord Dawn          Human
## 67        yellow         NA        female          Zolan       Clawdite
## 68        yellow         NA          male           Ojom       Besalisk
## 69         black         NA          male         Kamino       Kaminoan
## 70         black         NA        female         Kamino       Kaminoan
## 71          blue         NA        female      Coruscant          Human
## 72       unknown         NA          male    Aleen Minor         Aleena
## 73     red, blue         NA        female           <NA>           <NA>
## 74       unknown         NA          male          Skako        Skakoan
## 75          gold         NA          male     Muunilinst           Muun
## 76         black         NA        female          Shili        Togruta
## 77 green, yellow         NA          male          Kalee        Kaleesh
## 78          blue         NA          male       Kashyyyk        Wookiee
## 79         brown         NA          male       Alderaan          Human
## 80         white         NA        female         Umbara           <NA>
## 81         black         NA          male         Utapau         Pau'an
## 82          dark         NA          male           <NA>          Human
## 83         hazel         NA        female           <NA>          Human
## 84         brown         NA          male           <NA>          Human
## 85         black         NA          none           <NA>          Droid
## 86       unknown         NA        female           <NA>           <NA>
## 87         brown       46.0        female          Naboo          Human
str(datos)
## 'data.frame':    87 obs. of  11 variables:
##  $ X         : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ name      : chr  "Luke Skywalker" "C-3PO" "R2-D2" "Darth Vader" ...
##  $ height    : int  172 167 96 202 150 178 165 97 183 182 ...
##  $ mass      : num  77 75 32 136 49 120 75 32 84 77 ...
##  $ hair_color: chr  "blond" NA NA "none" ...
##  $ skin_color: chr  "fair" "gold" "white, blue" "white" ...
##  $ eye_color : chr  "blue" "yellow" "red" "yellow" ...
##  $ birth_year: num  19 112 33 41.9 19 52 47 NA 24 57 ...
##  $ gender    : chr  "male" NA NA "male" ...
##  $ homeworld : chr  "Tatooine" "Tatooine" "Naboo" "Tatooine" ...
##  $ species   : chr  "Human" "Droid" "Droid" "Human" ...

Sumamary

summary(datos)
##        X            name               height           mass        
##  Min.   : 1.0   Length:87          Min.   : 66.0   Min.   :  15.00  
##  1st Qu.:22.5   Class :character   1st Qu.:167.0   1st Qu.:  55.60  
##  Median :44.0   Mode  :character   Median :180.0   Median :  79.00  
##  Mean   :44.0                      Mean   :174.4   Mean   :  97.31  
##  3rd Qu.:65.5                      3rd Qu.:191.0   3rd Qu.:  84.50  
##  Max.   :87.0                      Max.   :264.0   Max.   :1358.00  
##                                    NA's   :6       NA's   :28       
##   hair_color         skin_color         eye_color           birth_year    
##  Length:87          Length:87          Length:87          Min.   :  8.00  
##  Class :character   Class :character   Class :character   1st Qu.: 35.00  
##  Mode  :character   Mode  :character   Mode  :character   Median : 52.00  
##                                                           Mean   : 87.57  
##                                                           3rd Qu.: 72.00  
##                                                           Max.   :896.00  
##                                                           NA's   :44      
##     gender           homeworld           species         
##  Length:87          Length:87          Length:87         
##  Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character  
##                                                          
##                                                          
##                                                          
## 

Generar tablas de distibrucion de frecuencias de la variable categoricas gender

tabla.frecuencias <- fdt_cat(x = datos$gender)
tabla.frecuencias <- as.data.frame(tabla.frecuencias)
tabla.frecuencias
##        Category  f         rf     rf(%) cf     cf(%)
## 1          male 62 0.73809524 73.809524 62  73.80952
## 2        female 19 0.22619048 22.619048 81  96.42857
## 3          none  2 0.02380952  2.380952 83  98.80952
## 4 hermaphrodite  1 0.01190476  1.190476 84 100.00000
barplot(height = tabla.frecuencias$f, names.arg = tabla.frecuencias$Category)

Generar tablas de distibrucion de frecuencias de la variable categoricas homeworld

tabla.frecuencias <- fdt_cat(x = datos$homeworld)
tabla.frecuencias <- as.data.frame(tabla.frecuencias)
tabla.frecuencias
##          Category  f         rf     rf(%) cf     cf(%)
## 1           Naboo 11 0.14285714 14.285714 11  14.28571
## 2        Tatooine 10 0.12987013 12.987013 21  27.27273
## 3        Alderaan  3 0.03896104  3.896104 24  31.16883
## 4       Coruscant  3 0.03896104  3.896104 27  35.06494
## 5          Kamino  3 0.03896104  3.896104 30  38.96104
## 6        Corellia  2 0.02597403  2.597403 32  41.55844
## 7        Kashyyyk  2 0.02597403  2.597403 34  44.15584
## 8          Mirial  2 0.02597403  2.597403 36  46.75325
## 9          Ryloth  2 0.02597403  2.597403 38  49.35065
## 10    Aleen Minor  1 0.01298701  1.298701 39  50.64935
## 11         Bespin  1 0.01298701  1.298701 40  51.94805
## 12     Bestine IV  1 0.01298701  1.298701 41  53.24675
## 13 Cato Neimoidia  1 0.01298701  1.298701 42  54.54545
## 14          Cerea  1 0.01298701  1.298701 43  55.84416
## 15       Champala  1 0.01298701  1.298701 44  57.14286
## 16      Chandrila  1 0.01298701  1.298701 45  58.44156
## 17   Concord Dawn  1 0.01298701  1.298701 46  59.74026
## 18       Dathomir  1 0.01298701  1.298701 47  61.03896
## 19          Dorin  1 0.01298701  1.298701 48  62.33766
## 20          Endor  1 0.01298701  1.298701 49  63.63636
## 21         Eriadu  1 0.01298701  1.298701 50  64.93506
## 22       Geonosis  1 0.01298701  1.298701 51  66.23377
## 23    Glee Anselm  1 0.01298701  1.298701 52  67.53247
## 24     Haruun Kal  1 0.01298701  1.298701 53  68.83117
## 25        Iktotch  1 0.01298701  1.298701 54  70.12987
## 26       Iridonia  1 0.01298701  1.298701 55  71.42857
## 27          Kalee  1 0.01298701  1.298701 56  72.72727
## 28      Malastare  1 0.01298701  1.298701 57  74.02597
## 29       Mon Cala  1 0.01298701  1.298701 58  75.32468
## 30     Muunilinst  1 0.01298701  1.298701 59  76.62338
## 31      Nal Hutta  1 0.01298701  1.298701 60  77.92208
## 32           Ojom  1 0.01298701  1.298701 61  79.22078
## 33        Quermia  1 0.01298701  1.298701 62  80.51948
## 34          Rodia  1 0.01298701  1.298701 63  81.81818
## 35        Serenno  1 0.01298701  1.298701 64  83.11688
## 36          Shili  1 0.01298701  1.298701 65  84.41558
## 37          Skako  1 0.01298701  1.298701 66  85.71429
## 38        Socorro  1 0.01298701  1.298701 67  87.01299
## 39        Stewjon  1 0.01298701  1.298701 68  88.31169
## 40        Sullust  1 0.01298701  1.298701 69  89.61039
## 41       Toydaria  1 0.01298701  1.298701 70  90.90909
## 42      Trandosha  1 0.01298701  1.298701 71  92.20779
## 43        Troiken  1 0.01298701  1.298701 72  93.50649
## 44           Tund  1 0.01298701  1.298701 73  94.80519
## 45         Umbara  1 0.01298701  1.298701 74  96.10390
## 46         Utapau  1 0.01298701  1.298701 75  97.40260
## 47        Vulpter  1 0.01298701  1.298701 76  98.70130
## 48          Zolan  1 0.01298701  1.298701 77 100.00000
barplot(height = tabla.frecuencias$f, names.arg = tabla.frecuencias$Category)

Generar tablas de distibrucion de frecuencias de la variable categoricas species

tabla.frecuencias <- fdt_cat(x = datos$species)
tabla.frecuencias <- as.data.frame(tabla.frecuencias)
tabla.frecuencias
##          Category  f         rf     rf(%) cf     cf(%)
## 1           Human 35 0.42682927 42.682927 35  42.68293
## 2           Droid  5 0.06097561  6.097561 40  48.78049
## 3          Gungan  3 0.03658537  3.658537 43  52.43902
## 4        Kaminoan  2 0.02439024  2.439024 45  54.87805
## 5        Mirialan  2 0.02439024  2.439024 47  57.31707
## 6         Twi'lek  2 0.02439024  2.439024 49  59.75610
## 7         Wookiee  2 0.02439024  2.439024 51  62.19512
## 8          Zabrak  2 0.02439024  2.439024 53  64.63415
## 9          Aleena  1 0.01219512  1.219512 54  65.85366
## 10       Besalisk  1 0.01219512  1.219512 55  67.07317
## 11         Cerean  1 0.01219512  1.219512 56  68.29268
## 12       Chagrian  1 0.01219512  1.219512 57  69.51220
## 13       Clawdite  1 0.01219512  1.219512 58  70.73171
## 14            Dug  1 0.01219512  1.219512 59  71.95122
## 15           Ewok  1 0.01219512  1.219512 60  73.17073
## 16      Geonosian  1 0.01219512  1.219512 61  74.39024
## 17           Hutt  1 0.01219512  1.219512 62  75.60976
## 18       Iktotchi  1 0.01219512  1.219512 63  76.82927
## 19        Kaleesh  1 0.01219512  1.219512 64  78.04878
## 20        Kel Dor  1 0.01219512  1.219512 65  79.26829
## 21   Mon Calamari  1 0.01219512  1.219512 66  80.48780
## 22           Muun  1 0.01219512  1.219512 67  81.70732
## 23       Nautolan  1 0.01219512  1.219512 68  82.92683
## 24      Neimodian  1 0.01219512  1.219512 69  84.14634
## 25         Pau'an  1 0.01219512  1.219512 70  85.36585
## 26       Quermian  1 0.01219512  1.219512 71  86.58537
## 27         Rodian  1 0.01219512  1.219512 72  87.80488
## 28        Skakoan  1 0.01219512  1.219512 73  89.02439
## 29      Sullustan  1 0.01219512  1.219512 74  90.24390
## 30     Tholothian  1 0.01219512  1.219512 75  91.46341
## 31        Togruta  1 0.01219512  1.219512 76  92.68293
## 32          Toong  1 0.01219512  1.219512 77  93.90244
## 33      Toydarian  1 0.01219512  1.219512 78  95.12195
## 34     Trandoshan  1 0.01219512  1.219512 79  96.34146
## 35     Vulptereen  1 0.01219512  1.219512 80  97.56098
## 36          Xexto  1 0.01219512  1.219512 81  98.78049
## 37 Yoda's species  1 0.01219512  1.219512 82 100.00000
barplot(height = tabla.frecuencias$f, names.arg = tabla.frecuencias$Category)

Generar tabla de distribución de frecuencias de la variable de height

tabla.frecuencias <- fdt(x = datos$height)
kable(tabla.frecuencias)
Class limits f rf rf(%) cf cf(%)
[65.34,90.5025) 3 0.0370370 3.703704 3 3.703704
[90.5025,115.665) 5 0.0617284 6.172840 8 9.876543
[115.665,140.827) 2 0.0246914 2.469136 10 12.345679
[140.827,165.99) 9 0.1111111 11.111111 19 23.456790
[165.99,191.152) 43 0.5308642 53.086420 62 76.543210
[191.152,216.315) 14 0.1728395 17.283951 76 93.827161
[216.315,241.477) 4 0.0493827 4.938272 80 98.765432
[241.477,266.64) 1 0.0123457 1.234568 81 100.000000
x
start 65.3400
end 266.6400
h 25.1625
right 0.0000
barplot(height = tabla.frecuencias$table$f, names.arg = tabla.frecuencias$table$`Class limits`)

Generar tabla de distribución de frecuencias de la variable de mass

tabla.frecuencias <- fdt(x = datos$mass)
kable(tabla.frecuencias)
Class limits f rf rf(%) cf cf(%)
[14.85,208.6686) 58 0.9830508 98.305085 58 98.30508
[208.6686,402.4871) 0 0.0000000 0.000000 58 98.30508
[402.4871,596.3057) 0 0.0000000 0.000000 58 98.30508
[596.3057,790.1243) 0 0.0000000 0.000000 58 98.30508
[790.1243,983.9429) 0 0.0000000 0.000000 58 98.30508
[983.9429,1177.761) 0 0.0000000 0.000000 58 98.30508
[1177.761,1371.58) 1 0.0169492 1.694915 59 100.00000
x
start 14.8500
end 1371.5800
h 193.8186
right 0.0000
barplot(height = tabla.frecuencias$table$f, names.arg = tabla.frecuencias$table$`Class limits`)