Librerias y message para limpiar
library(dplyr)
library(readr)
library(fdth)
library(knitr)
Cargar los datos de starwars en la ruta https y limpiarlo con UTF
set.seed(2020)
datos <- read.csv("https://raw.githubusercontent.com/rpizarrog/Curso-Titulacion-Data-Science-/master/2020/datos/starwars.csv", encoding = "UTF-8", header = TRUE)
datos
## X name height mass hair_color skin_color
## 1 1 Luke Skywalker 172 77.0 blond fair
## 2 2 C-3PO 167 75.0 <NA> gold
## 3 3 R2-D2 96 32.0 <NA> white, blue
## 4 4 Darth Vader 202 136.0 none white
## 5 5 Leia Organa 150 49.0 brown light
## 6 6 Owen Lars 178 120.0 brown, grey light
## 7 7 Beru Whitesun lars 165 75.0 brown light
## 8 8 R5-D4 97 32.0 <NA> white, red
## 9 9 Biggs Darklighter 183 84.0 black light
## 10 10 Obi-Wan Kenobi 182 77.0 auburn, white fair
## 11 11 Anakin Skywalker 188 84.0 blond fair
## 12 12 Wilhuff Tarkin 180 NA auburn, grey fair
## 13 13 Chewbacca 228 112.0 brown unknown
## 14 14 Han Solo 180 80.0 brown fair
## 15 15 Greedo 173 74.0 <NA> green
## 16 16 Jabba Desilijic Tiure 175 1358.0 <NA> green-tan, brown
## 17 17 Wedge Antilles 170 77.0 brown fair
## 18 18 Jek Tono Porkins 180 110.0 brown fair
## 19 19 Yoda 66 17.0 white green
## 20 20 Palpatine 170 75.0 grey pale
## 21 21 Boba Fett 183 78.2 black fair
## 22 22 IG-88 200 140.0 none metal
## 23 23 Bossk 190 113.0 none green
## 24 24 Lando Calrissian 177 79.0 black dark
## 25 25 Lobot 175 79.0 none light
## 26 26 Ackbar 180 83.0 none brown mottle
## 27 27 Mon Mothma 150 NA auburn fair
## 28 28 Arvel Crynyd NA NA brown fair
## 29 29 Wicket Systri Warrick 88 20.0 brown brown
## 30 30 Nien Nunb 160 68.0 none grey
## 31 31 Qui-Gon Jinn 193 89.0 brown fair
## 32 32 Nute Gunray 191 90.0 none mottled green
## 33 33 Finis Valorum 170 NA blond fair
## 34 34 Jar Jar Binks 196 66.0 none orange
## 35 35 Roos Tarpals 224 82.0 none grey
## 36 36 Rugor Nass 206 NA none green
## 37 37 Ric Olié 183 NA brown fair
## 38 38 Watto 137 NA black blue, grey
## 39 39 Sebulba 112 40.0 none grey, red
## 40 40 Quarsh Panaka 183 NA black dark
## 41 41 Shmi Skywalker 163 NA black fair
## 42 42 Darth Maul 175 80.0 none red
## 43 43 Bib Fortuna 180 NA none pale
## 44 44 Ayla Secura 178 55.0 none blue
## 45 45 Dud Bolt 94 45.0 none blue, grey
## 46 46 Gasgano 122 NA none white, blue
## 47 47 Ben Quadinaros 163 65.0 none grey, green, yellow
## 48 48 Mace Windu 188 84.0 none dark
## 49 49 Ki-Adi-Mundi 198 82.0 white pale
## 50 50 Kit Fisto 196 87.0 none green
## 51 51 Eeth Koth 171 NA black brown
## 52 52 Adi Gallia 184 50.0 none dark
## 53 53 Saesee Tiin 188 NA none pale
## 54 54 Yarael Poof 264 NA none white
## 55 55 Plo Koon 188 80.0 none orange
## 56 56 Mas Amedda 196 NA none blue
## 57 57 Gregar Typho 185 85.0 black dark
## 58 58 Cordé 157 NA brown light
## 59 59 Cliegg Lars 183 NA brown fair
## 60 60 Poggle the Lesser 183 80.0 none green
## 61 61 Luminara Unduli 170 56.2 black yellow
## 62 62 Barriss Offee 166 50.0 black yellow
## 63 63 Dormé 165 NA brown light
## 64 64 Dooku 193 80.0 white fair
## 65 65 Bail Prestor Organa 191 NA black tan
## 66 66 Jango Fett 183 79.0 black tan
## 67 67 Zam Wesell 168 55.0 blonde fair, green, yellow
## 68 68 Dexter Jettster 198 102.0 none brown
## 69 69 Lama Su 229 88.0 none grey
## 70 70 Taun We 213 NA none grey
## 71 71 Jocasta Nu 167 NA white fair
## 72 72 Ratts Tyerell 79 15.0 none grey, blue
## 73 73 R4-P17 96 NA none silver, red
## 74 74 Wat Tambor 193 48.0 none green, grey
## 75 75 San Hill 191 NA none grey
## 76 76 Shaak Ti 178 57.0 none red, blue, white
## 77 77 Grievous 216 159.0 none brown, white
## 78 78 Tarfful 234 136.0 brown brown
## 79 79 Raymus Antilles 188 79.0 brown light
## 80 80 Sly Moore 178 48.0 none pale
## 81 81 Tion Medon 206 80.0 none grey
## 82 82 Finn NA NA black dark
## 83 83 Rey NA NA brown light
## 84 84 Poe Dameron NA NA brown light
## 85 85 BB8 NA NA none none
## 86 86 Captain Phasma NA NA unknown unknown
## 87 87 Padmé Amidala 165 45.0 brown light
## eye_color birth_year gender homeworld species
## 1 blue 19.0 male Tatooine Human
## 2 yellow 112.0 <NA> Tatooine Droid
## 3 red 33.0 <NA> Naboo Droid
## 4 yellow 41.9 male Tatooine Human
## 5 brown 19.0 female Alderaan Human
## 6 blue 52.0 male Tatooine Human
## 7 blue 47.0 female Tatooine Human
## 8 red NA <NA> Tatooine Droid
## 9 brown 24.0 male Tatooine Human
## 10 blue-gray 57.0 male Stewjon Human
## 11 blue 41.9 male Tatooine Human
## 12 blue 64.0 male Eriadu Human
## 13 blue 200.0 male Kashyyyk Wookiee
## 14 brown 29.0 male Corellia Human
## 15 black 44.0 male Rodia Rodian
## 16 orange 600.0 hermaphrodite Nal Hutta Hutt
## 17 hazel 21.0 male Corellia Human
## 18 blue NA male Bestine IV Human
## 19 brown 896.0 male <NA> Yoda's species
## 20 yellow 82.0 male Naboo Human
## 21 brown 31.5 male Kamino Human
## 22 red 15.0 none <NA> Droid
## 23 red 53.0 male Trandosha Trandoshan
## 24 brown 31.0 male Socorro Human
## 25 blue 37.0 male Bespin Human
## 26 orange 41.0 male Mon Cala Mon Calamari
## 27 blue 48.0 female Chandrila Human
## 28 brown NA male <NA> Human
## 29 brown 8.0 male Endor Ewok
## 30 black NA male Sullust Sullustan
## 31 blue 92.0 male <NA> Human
## 32 red NA male Cato Neimoidia Neimodian
## 33 blue 91.0 male Coruscant Human
## 34 orange 52.0 male Naboo Gungan
## 35 orange NA male Naboo Gungan
## 36 orange NA male Naboo Gungan
## 37 blue NA male Naboo <NA>
## 38 yellow NA male Toydaria Toydarian
## 39 orange NA male Malastare Dug
## 40 brown 62.0 male Naboo <NA>
## 41 brown 72.0 female Tatooine Human
## 42 yellow 54.0 male Dathomir Zabrak
## 43 pink NA male Ryloth Twi'lek
## 44 hazel 48.0 female Ryloth Twi'lek
## 45 yellow NA male Vulpter Vulptereen
## 46 black NA male Troiken Xexto
## 47 orange NA male Tund Toong
## 48 brown 72.0 male Haruun Kal Human
## 49 yellow 92.0 male Cerea Cerean
## 50 black NA male Glee Anselm Nautolan
## 51 brown NA male Iridonia Zabrak
## 52 blue NA female Coruscant Tholothian
## 53 orange NA male Iktotch Iktotchi
## 54 yellow NA male Quermia Quermian
## 55 black 22.0 male Dorin Kel Dor
## 56 blue NA male Champala Chagrian
## 57 brown NA male Naboo Human
## 58 brown NA female Naboo Human
## 59 blue 82.0 male Tatooine Human
## 60 yellow NA male Geonosis Geonosian
## 61 blue 58.0 female Mirial Mirialan
## 62 blue 40.0 female Mirial Mirialan
## 63 brown NA female Naboo Human
## 64 brown 102.0 male Serenno Human
## 65 brown 67.0 male Alderaan Human
## 66 brown 66.0 male Concord Dawn Human
## 67 yellow NA female Zolan Clawdite
## 68 yellow NA male Ojom Besalisk
## 69 black NA male Kamino Kaminoan
## 70 black NA female Kamino Kaminoan
## 71 blue NA female Coruscant Human
## 72 unknown NA male Aleen Minor Aleena
## 73 red, blue NA female <NA> <NA>
## 74 unknown NA male Skako Skakoan
## 75 gold NA male Muunilinst Muun
## 76 black NA female Shili Togruta
## 77 green, yellow NA male Kalee Kaleesh
## 78 blue NA male Kashyyyk Wookiee
## 79 brown NA male Alderaan Human
## 80 white NA female Umbara <NA>
## 81 black NA male Utapau Pau'an
## 82 dark NA male <NA> Human
## 83 hazel NA female <NA> Human
## 84 brown NA male <NA> Human
## 85 black NA none <NA> Droid
## 86 unknown NA female <NA> <NA>
## 87 brown 46.0 female Naboo Human
str(datos)
## 'data.frame': 87 obs. of 11 variables:
## $ X : int 1 2 3 4 5 6 7 8 9 10 ...
## $ name : chr "Luke Skywalker" "C-3PO" "R2-D2" "Darth Vader" ...
## $ height : int 172 167 96 202 150 178 165 97 183 182 ...
## $ mass : num 77 75 32 136 49 120 75 32 84 77 ...
## $ hair_color: chr "blond" NA NA "none" ...
## $ skin_color: chr "fair" "gold" "white, blue" "white" ...
## $ eye_color : chr "blue" "yellow" "red" "yellow" ...
## $ birth_year: num 19 112 33 41.9 19 52 47 NA 24 57 ...
## $ gender : chr "male" NA NA "male" ...
## $ homeworld : chr "Tatooine" "Tatooine" "Naboo" "Tatooine" ...
## $ species : chr "Human" "Droid" "Droid" "Human" ...
Sumamary
summary(datos)
## X name height mass
## Min. : 1.0 Length:87 Min. : 66.0 Min. : 15.00
## 1st Qu.:22.5 Class :character 1st Qu.:167.0 1st Qu.: 55.60
## Median :44.0 Mode :character Median :180.0 Median : 79.00
## Mean :44.0 Mean :174.4 Mean : 97.31
## 3rd Qu.:65.5 3rd Qu.:191.0 3rd Qu.: 84.50
## Max. :87.0 Max. :264.0 Max. :1358.00
## NA's :6 NA's :28
## hair_color skin_color eye_color birth_year
## Length:87 Length:87 Length:87 Min. : 8.00
## Class :character Class :character Class :character 1st Qu.: 35.00
## Mode :character Mode :character Mode :character Median : 52.00
## Mean : 87.57
## 3rd Qu.: 72.00
## Max. :896.00
## NA's :44
## gender homeworld species
## Length:87 Length:87 Length:87
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
Generar tablas de distibrucion de frecuencias de la variable categoricas gender
tabla.frecuencias <- fdt_cat(x = datos$gender)
tabla.frecuencias <- as.data.frame(tabla.frecuencias)
tabla.frecuencias
## Category f rf rf(%) cf cf(%)
## 1 male 62 0.73809524 73.809524 62 73.80952
## 2 female 19 0.22619048 22.619048 81 96.42857
## 3 none 2 0.02380952 2.380952 83 98.80952
## 4 hermaphrodite 1 0.01190476 1.190476 84 100.00000
barplot(height = tabla.frecuencias$f, names.arg = tabla.frecuencias$Category)

Generar tablas de distibrucion de frecuencias de la variable categoricas homeworld
tabla.frecuencias <- fdt_cat(x = datos$homeworld)
tabla.frecuencias <- as.data.frame(tabla.frecuencias)
tabla.frecuencias
## Category f rf rf(%) cf cf(%)
## 1 Naboo 11 0.14285714 14.285714 11 14.28571
## 2 Tatooine 10 0.12987013 12.987013 21 27.27273
## 3 Alderaan 3 0.03896104 3.896104 24 31.16883
## 4 Coruscant 3 0.03896104 3.896104 27 35.06494
## 5 Kamino 3 0.03896104 3.896104 30 38.96104
## 6 Corellia 2 0.02597403 2.597403 32 41.55844
## 7 Kashyyyk 2 0.02597403 2.597403 34 44.15584
## 8 Mirial 2 0.02597403 2.597403 36 46.75325
## 9 Ryloth 2 0.02597403 2.597403 38 49.35065
## 10 Aleen Minor 1 0.01298701 1.298701 39 50.64935
## 11 Bespin 1 0.01298701 1.298701 40 51.94805
## 12 Bestine IV 1 0.01298701 1.298701 41 53.24675
## 13 Cato Neimoidia 1 0.01298701 1.298701 42 54.54545
## 14 Cerea 1 0.01298701 1.298701 43 55.84416
## 15 Champala 1 0.01298701 1.298701 44 57.14286
## 16 Chandrila 1 0.01298701 1.298701 45 58.44156
## 17 Concord Dawn 1 0.01298701 1.298701 46 59.74026
## 18 Dathomir 1 0.01298701 1.298701 47 61.03896
## 19 Dorin 1 0.01298701 1.298701 48 62.33766
## 20 Endor 1 0.01298701 1.298701 49 63.63636
## 21 Eriadu 1 0.01298701 1.298701 50 64.93506
## 22 Geonosis 1 0.01298701 1.298701 51 66.23377
## 23 Glee Anselm 1 0.01298701 1.298701 52 67.53247
## 24 Haruun Kal 1 0.01298701 1.298701 53 68.83117
## 25 Iktotch 1 0.01298701 1.298701 54 70.12987
## 26 Iridonia 1 0.01298701 1.298701 55 71.42857
## 27 Kalee 1 0.01298701 1.298701 56 72.72727
## 28 Malastare 1 0.01298701 1.298701 57 74.02597
## 29 Mon Cala 1 0.01298701 1.298701 58 75.32468
## 30 Muunilinst 1 0.01298701 1.298701 59 76.62338
## 31 Nal Hutta 1 0.01298701 1.298701 60 77.92208
## 32 Ojom 1 0.01298701 1.298701 61 79.22078
## 33 Quermia 1 0.01298701 1.298701 62 80.51948
## 34 Rodia 1 0.01298701 1.298701 63 81.81818
## 35 Serenno 1 0.01298701 1.298701 64 83.11688
## 36 Shili 1 0.01298701 1.298701 65 84.41558
## 37 Skako 1 0.01298701 1.298701 66 85.71429
## 38 Socorro 1 0.01298701 1.298701 67 87.01299
## 39 Stewjon 1 0.01298701 1.298701 68 88.31169
## 40 Sullust 1 0.01298701 1.298701 69 89.61039
## 41 Toydaria 1 0.01298701 1.298701 70 90.90909
## 42 Trandosha 1 0.01298701 1.298701 71 92.20779
## 43 Troiken 1 0.01298701 1.298701 72 93.50649
## 44 Tund 1 0.01298701 1.298701 73 94.80519
## 45 Umbara 1 0.01298701 1.298701 74 96.10390
## 46 Utapau 1 0.01298701 1.298701 75 97.40260
## 47 Vulpter 1 0.01298701 1.298701 76 98.70130
## 48 Zolan 1 0.01298701 1.298701 77 100.00000
barplot(height = tabla.frecuencias$f, names.arg = tabla.frecuencias$Category)

Generar tablas de distibrucion de frecuencias de la variable categoricas species
tabla.frecuencias <- fdt_cat(x = datos$species)
tabla.frecuencias <- as.data.frame(tabla.frecuencias)
tabla.frecuencias
## Category f rf rf(%) cf cf(%)
## 1 Human 35 0.42682927 42.682927 35 42.68293
## 2 Droid 5 0.06097561 6.097561 40 48.78049
## 3 Gungan 3 0.03658537 3.658537 43 52.43902
## 4 Kaminoan 2 0.02439024 2.439024 45 54.87805
## 5 Mirialan 2 0.02439024 2.439024 47 57.31707
## 6 Twi'lek 2 0.02439024 2.439024 49 59.75610
## 7 Wookiee 2 0.02439024 2.439024 51 62.19512
## 8 Zabrak 2 0.02439024 2.439024 53 64.63415
## 9 Aleena 1 0.01219512 1.219512 54 65.85366
## 10 Besalisk 1 0.01219512 1.219512 55 67.07317
## 11 Cerean 1 0.01219512 1.219512 56 68.29268
## 12 Chagrian 1 0.01219512 1.219512 57 69.51220
## 13 Clawdite 1 0.01219512 1.219512 58 70.73171
## 14 Dug 1 0.01219512 1.219512 59 71.95122
## 15 Ewok 1 0.01219512 1.219512 60 73.17073
## 16 Geonosian 1 0.01219512 1.219512 61 74.39024
## 17 Hutt 1 0.01219512 1.219512 62 75.60976
## 18 Iktotchi 1 0.01219512 1.219512 63 76.82927
## 19 Kaleesh 1 0.01219512 1.219512 64 78.04878
## 20 Kel Dor 1 0.01219512 1.219512 65 79.26829
## 21 Mon Calamari 1 0.01219512 1.219512 66 80.48780
## 22 Muun 1 0.01219512 1.219512 67 81.70732
## 23 Nautolan 1 0.01219512 1.219512 68 82.92683
## 24 Neimodian 1 0.01219512 1.219512 69 84.14634
## 25 Pau'an 1 0.01219512 1.219512 70 85.36585
## 26 Quermian 1 0.01219512 1.219512 71 86.58537
## 27 Rodian 1 0.01219512 1.219512 72 87.80488
## 28 Skakoan 1 0.01219512 1.219512 73 89.02439
## 29 Sullustan 1 0.01219512 1.219512 74 90.24390
## 30 Tholothian 1 0.01219512 1.219512 75 91.46341
## 31 Togruta 1 0.01219512 1.219512 76 92.68293
## 32 Toong 1 0.01219512 1.219512 77 93.90244
## 33 Toydarian 1 0.01219512 1.219512 78 95.12195
## 34 Trandoshan 1 0.01219512 1.219512 79 96.34146
## 35 Vulptereen 1 0.01219512 1.219512 80 97.56098
## 36 Xexto 1 0.01219512 1.219512 81 98.78049
## 37 Yoda's species 1 0.01219512 1.219512 82 100.00000
barplot(height = tabla.frecuencias$f, names.arg = tabla.frecuencias$Category)

Generar tabla de distribución de frecuencias de la variable de height
tabla.frecuencias <- fdt(x = datos$height)
kable(tabla.frecuencias)
| [65.34,90.5025) |
3 |
0.0370370 |
3.703704 |
3 |
3.703704 |
| [90.5025,115.665) |
5 |
0.0617284 |
6.172840 |
8 |
9.876543 |
| [115.665,140.827) |
2 |
0.0246914 |
2.469136 |
10 |
12.345679 |
| [140.827,165.99) |
9 |
0.1111111 |
11.111111 |
19 |
23.456790 |
| [165.99,191.152) |
43 |
0.5308642 |
53.086420 |
62 |
76.543210 |
| [191.152,216.315) |
14 |
0.1728395 |
17.283951 |
76 |
93.827161 |
| [216.315,241.477) |
4 |
0.0493827 |
4.938272 |
80 |
98.765432 |
| [241.477,266.64) |
1 |
0.0123457 |
1.234568 |
81 |
100.000000 |
|
| start |
65.3400 |
| end |
266.6400 |
| h |
25.1625 |
| right |
0.0000 |
|
barplot(height = tabla.frecuencias$table$f, names.arg = tabla.frecuencias$table$`Class limits`)

Generar tabla de distribución de frecuencias de la variable de mass
tabla.frecuencias <- fdt(x = datos$mass)
kable(tabla.frecuencias)
| [14.85,208.6686) |
58 |
0.9830508 |
98.305085 |
58 |
98.30508 |
| [208.6686,402.4871) |
0 |
0.0000000 |
0.000000 |
58 |
98.30508 |
| [402.4871,596.3057) |
0 |
0.0000000 |
0.000000 |
58 |
98.30508 |
| [596.3057,790.1243) |
0 |
0.0000000 |
0.000000 |
58 |
98.30508 |
| [790.1243,983.9429) |
0 |
0.0000000 |
0.000000 |
58 |
98.30508 |
| [983.9429,1177.761) |
0 |
0.0000000 |
0.000000 |
58 |
98.30508 |
| [1177.761,1371.58) |
1 |
0.0169492 |
1.694915 |
59 |
100.00000 |
|
| start |
14.8500 |
| end |
1371.5800 |
| h |
193.8186 |
| right |
0.0000 |
|
barplot(height = tabla.frecuencias$table$f, names.arg = tabla.frecuencias$table$`Class limits`)
