\[1.1~Filter~Commands\] \[1.1.1~Single-Condition~filter\]
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.5 v dplyr 1.0.7
## v tidyr 1.1.4 v stringr 1.4.0
## v readr 2.0.2 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
data("mtcars")
mtcars
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
## Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
## Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
## Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
## Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
## Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4
## Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3
## Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3
## Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3
## Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4
## Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4
## Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4
## Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1
## Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2
## Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1
## Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1
## Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2
## AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2
## Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4
## Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2
## Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1
## Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2
## Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2
## Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4
## Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6
## Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8
## Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2
#selecciona solo los carros con 6 cilindros
six.cyl.only<-filter(mtcars, cyl == 6)#tener en cuenta en doble ==
six.cyl.only
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
## Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
## Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
## Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
## Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4
## Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6
\[1.1.2~Multiple-Condition~Filter\]
#filter the dataset mtcars for both six cylinders and 110 horsepower
six.cyl.and.110.horse.power<-filter(mtcars, cyl==6,hp==110)#las condiciones se separar por comas
six.cyl.and.110.horse.power
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
\[1.1.3~OR~Logic~for~Filtering\]
#filter database mtcars for 4 gears or more then 8 cylinders
gear.eq.4.or.more.than.8<-filter(mtcars, gear==4|cyl>6)# condición "O"= comando "|"
gear.eq.4.or.more.than.8
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
## Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
## Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
## Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
## Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
## Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
## Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4
## Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3
## Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3
## Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3
## Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4
## Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4
## Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4
## Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1
## Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2
## Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1
## Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2
## AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2
## Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4
## Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2
## Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1
## Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4
## Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8
## Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2
\[1.1.4~Filter~by~Minimums,~Maximums,~and~Other\]
#lets find the car with the smallest engie displacement
Smallest.engine.displacement<-filter(mtcars,disp==max(disp)| disp==min(disp))
Smallest.engine.displacement
## mpg cyl disp hp drat wt qsec vs am gear carb
## Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4
## Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1
#filter with conditions separated by commas
data(ChickWeight)
chick.subset<-filter(ChickWeight, Time<3,weight>53 )
chick.subset
## weight Time Chick Diet
## 1 55 2 22 2
## 2 55 2 40 3
## 3 55 2 43 4
## 4 54 2 50 4
\[1.1.5Filter~Out~Missing~Values~(NAs)~for~a~Specific~Column\]
#hallar valores perdidos
data("airquality")
head(airquality,10)#antes de filtrar
## Ozone Solar.R Wind Temp Month Day
## 1 41 190 7.4 67 5 1
## 2 36 118 8.0 72 5 2
## 3 12 149 12.6 74 5 3
## 4 18 313 11.5 62 5 4
## 5 NA NA 14.3 56 5 5
## 6 28 NA 14.9 66 5 6
## 7 23 299 8.6 65 5 7
## 8 19 99 13.8 59 5 8
## 9 8 19 20.1 61 5 9
## 10 NA 194 8.6 69 5 10
no.missing.ozone<-filter(airquality, !is.na(Ozone))
head(no.missing.ozone,8)#luego de filtrar
## Ozone Solar.R Wind Temp Month Day
## 1 41 190 7.4 67 5 1
## 2 36 118 8.0 72 5 2
## 3 12 149 12.6 74 5 3
## 4 18 313 11.5 62 5 4
## 5 28 NA 14.9 66 5 6
## 6 23 299 8.6 65 5 7
## 7 19 99 13.8 59 5 8
## 8 8 19 20.1 61 5 9
#para valores Solar.R
data("airquality")
head(airquality,10)
## Ozone Solar.R Wind Temp Month Day
## 1 41 190 7.4 67 5 1
## 2 36 118 8.0 72 5 2
## 3 12 149 12.6 74 5 3
## 4 18 313 11.5 62 5 4
## 5 NA NA 14.3 56 5 5
## 6 28 NA 14.9 66 5 6
## 7 23 299 8.6 65 5 7
## 8 19 99 13.8 59 5 8
## 9 8 19 20.1 61 5 9
## 10 NA 194 8.6 69 5 10
no.missing.Solar.R<-filter(airquality, !is.na(Solar.R))
head(no.missing.Solar.R,8)
## Ozone Solar.R Wind Temp Month Day
## 1 41 190 7.4 67 5 1
## 2 36 118 8.0 72 5 2
## 3 12 149 12.6 74 5 3
## 4 18 313 11.5 62 5 4
## 5 23 299 8.6 65 5 7
## 6 19 99 13.8 59 5 8
## 7 8 19 20.1 61 5 9
## 8 NA 194 8.6 69 5 10
\[1.1.6Filter~Rows~with~NAs~Anywhere~in~the~Dataset\]
#remoción de todos los valores perdidos
airqua.no.NA.anywhere<-filter(airquality[1:10,],complete.cases((airquality[1:10,])))
airqua.no.NA.anywhere
## Ozone Solar.R Wind Temp Month Day
## 1 41 190 7.4 67 5 1
## 2 36 118 8.0 72 5 2
## 3 12 149 12.6 74 5 3
## 4 18 313 11.5 62 5 4
## 5 23 299 8.6 65 5 7
## 6 19 99 13.8 59 5 8
## 7 8 19 20.1 61 5 9
\[1.1.7Filter~by~\%in\%\]
# incluir o excluir valores específicos
data("iris")
nrow(iris)#para ver los datos presentes
## [1] 150
iris
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
## 7 4.6 3.4 1.4 0.3 setosa
## 8 5.0 3.4 1.5 0.2 setosa
## 9 4.4 2.9 1.4 0.2 setosa
## 10 4.9 3.1 1.5 0.1 setosa
## 11 5.4 3.7 1.5 0.2 setosa
## 12 4.8 3.4 1.6 0.2 setosa
## 13 4.8 3.0 1.4 0.1 setosa
## 14 4.3 3.0 1.1 0.1 setosa
## 15 5.8 4.0 1.2 0.2 setosa
## 16 5.7 4.4 1.5 0.4 setosa
## 17 5.4 3.9 1.3 0.4 setosa
## 18 5.1 3.5 1.4 0.3 setosa
## 19 5.7 3.8 1.7 0.3 setosa
## 20 5.1 3.8 1.5 0.3 setosa
## 21 5.4 3.4 1.7 0.2 setosa
## 22 5.1 3.7 1.5 0.4 setosa
## 23 4.6 3.6 1.0 0.2 setosa
## 24 5.1 3.3 1.7 0.5 setosa
## 25 4.8 3.4 1.9 0.2 setosa
## 26 5.0 3.0 1.6 0.2 setosa
## 27 5.0 3.4 1.6 0.4 setosa
## 28 5.2 3.5 1.5 0.2 setosa
## 29 5.2 3.4 1.4 0.2 setosa
## 30 4.7 3.2 1.6 0.2 setosa
## 31 4.8 3.1 1.6 0.2 setosa
## 32 5.4 3.4 1.5 0.4 setosa
## 33 5.2 4.1 1.5 0.1 setosa
## 34 5.5 4.2 1.4 0.2 setosa
## 35 4.9 3.1 1.5 0.2 setosa
## 36 5.0 3.2 1.2 0.2 setosa
## 37 5.5 3.5 1.3 0.2 setosa
## 38 4.9 3.6 1.4 0.1 setosa
## 39 4.4 3.0 1.3 0.2 setosa
## 40 5.1 3.4 1.5 0.2 setosa
## 41 5.0 3.5 1.3 0.3 setosa
## 42 4.5 2.3 1.3 0.3 setosa
## 43 4.4 3.2 1.3 0.2 setosa
## 44 5.0 3.5 1.6 0.6 setosa
## 45 5.1 3.8 1.9 0.4 setosa
## 46 4.8 3.0 1.4 0.3 setosa
## 47 5.1 3.8 1.6 0.2 setosa
## 48 4.6 3.2 1.4 0.2 setosa
## 49 5.3 3.7 1.5 0.2 setosa
## 50 5.0 3.3 1.4 0.2 setosa
## 51 7.0 3.2 4.7 1.4 versicolor
## 52 6.4 3.2 4.5 1.5 versicolor
## 53 6.9 3.1 4.9 1.5 versicolor
## 54 5.5 2.3 4.0 1.3 versicolor
## 55 6.5 2.8 4.6 1.5 versicolor
## 56 5.7 2.8 4.5 1.3 versicolor
## 57 6.3 3.3 4.7 1.6 versicolor
## 58 4.9 2.4 3.3 1.0 versicolor
## 59 6.6 2.9 4.6 1.3 versicolor
## 60 5.2 2.7 3.9 1.4 versicolor
## 61 5.0 2.0 3.5 1.0 versicolor
## 62 5.9 3.0 4.2 1.5 versicolor
## 63 6.0 2.2 4.0 1.0 versicolor
## 64 6.1 2.9 4.7 1.4 versicolor
## 65 5.6 2.9 3.6 1.3 versicolor
## 66 6.7 3.1 4.4 1.4 versicolor
## 67 5.6 3.0 4.5 1.5 versicolor
## 68 5.8 2.7 4.1 1.0 versicolor
## 69 6.2 2.2 4.5 1.5 versicolor
## 70 5.6 2.5 3.9 1.1 versicolor
## 71 5.9 3.2 4.8 1.8 versicolor
## 72 6.1 2.8 4.0 1.3 versicolor
## 73 6.3 2.5 4.9 1.5 versicolor
## 74 6.1 2.8 4.7 1.2 versicolor
## 75 6.4 2.9 4.3 1.3 versicolor
## 76 6.6 3.0 4.4 1.4 versicolor
## 77 6.8 2.8 4.8 1.4 versicolor
## 78 6.7 3.0 5.0 1.7 versicolor
## 79 6.0 2.9 4.5 1.5 versicolor
## 80 5.7 2.6 3.5 1.0 versicolor
## 81 5.5 2.4 3.8 1.1 versicolor
## 82 5.5 2.4 3.7 1.0 versicolor
## 83 5.8 2.7 3.9 1.2 versicolor
## 84 6.0 2.7 5.1 1.6 versicolor
## 85 5.4 3.0 4.5 1.5 versicolor
## 86 6.0 3.4 4.5 1.6 versicolor
## 87 6.7 3.1 4.7 1.5 versicolor
## 88 6.3 2.3 4.4 1.3 versicolor
## 89 5.6 3.0 4.1 1.3 versicolor
## 90 5.5 2.5 4.0 1.3 versicolor
## 91 5.5 2.6 4.4 1.2 versicolor
## 92 6.1 3.0 4.6 1.4 versicolor
## 93 5.8 2.6 4.0 1.2 versicolor
## 94 5.0 2.3 3.3 1.0 versicolor
## 95 5.6 2.7 4.2 1.3 versicolor
## 96 5.7 3.0 4.2 1.2 versicolor
## 97 5.7 2.9 4.2 1.3 versicolor
## 98 6.2 2.9 4.3 1.3 versicolor
## 99 5.1 2.5 3.0 1.1 versicolor
## 100 5.7 2.8 4.1 1.3 versicolor
## 101 6.3 3.3 6.0 2.5 virginica
## 102 5.8 2.7 5.1 1.9 virginica
## 103 7.1 3.0 5.9 2.1 virginica
## 104 6.3 2.9 5.6 1.8 virginica
## 105 6.5 3.0 5.8 2.2 virginica
## 106 7.6 3.0 6.6 2.1 virginica
## 107 4.9 2.5 4.5 1.7 virginica
## 108 7.3 2.9 6.3 1.8 virginica
## 109 6.7 2.5 5.8 1.8 virginica
## 110 7.2 3.6 6.1 2.5 virginica
## 111 6.5 3.2 5.1 2.0 virginica
## 112 6.4 2.7 5.3 1.9 virginica
## 113 6.8 3.0 5.5 2.1 virginica
## 114 5.7 2.5 5.0 2.0 virginica
## 115 5.8 2.8 5.1 2.4 virginica
## 116 6.4 3.2 5.3 2.3 virginica
## 117 6.5 3.0 5.5 1.8 virginica
## 118 7.7 3.8 6.7 2.2 virginica
## 119 7.7 2.6 6.9 2.3 virginica
## 120 6.0 2.2 5.0 1.5 virginica
## 121 6.9 3.2 5.7 2.3 virginica
## 122 5.6 2.8 4.9 2.0 virginica
## 123 7.7 2.8 6.7 2.0 virginica
## 124 6.3 2.7 4.9 1.8 virginica
## 125 6.7 3.3 5.7 2.1 virginica
## 126 7.2 3.2 6.0 1.8 virginica
## 127 6.2 2.8 4.8 1.8 virginica
## 128 6.1 3.0 4.9 1.8 virginica
## 129 6.4 2.8 5.6 2.1 virginica
## 130 7.2 3.0 5.8 1.6 virginica
## 131 7.4 2.8 6.1 1.9 virginica
## 132 7.9 3.8 6.4 2.0 virginica
## 133 6.4 2.8 5.6 2.2 virginica
## 134 6.3 2.8 5.1 1.5 virginica
## 135 6.1 2.6 5.6 1.4 virginica
## 136 7.7 3.0 6.1 2.3 virginica
## 137 6.3 3.4 5.6 2.4 virginica
## 138 6.4 3.1 5.5 1.8 virginica
## 139 6.0 3.0 4.8 1.8 virginica
## 140 6.9 3.1 5.4 2.1 virginica
## 141 6.7 3.1 5.6 2.4 virginica
## 142 6.9 3.1 5.1 2.3 virginica
## 143 5.8 2.7 5.1 1.9 virginica
## 144 6.8 3.2 5.9 2.3 virginica
## 145 6.7 3.3 5.7 2.5 virginica
## 146 6.7 3.0 5.2 2.3 virginica
## 147 6.3 2.5 5.0 1.9 virginica
## 148 6.5 3.0 5.2 2.0 virginica
## 149 6.2 3.4 5.4 2.3 virginica
## 150 5.9 3.0 5.1 1.8 virginica
table(iris$Species) # llamar el numero de especies en la base de datos
##
## setosa versicolor virginica
## 50 50 50
Solo.2.especies<-filter(iris, Species %in% c("setosa","virginica"))
Solo.2.especies
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
## 7 4.6 3.4 1.4 0.3 setosa
## 8 5.0 3.4 1.5 0.2 setosa
## 9 4.4 2.9 1.4 0.2 setosa
## 10 4.9 3.1 1.5 0.1 setosa
## 11 5.4 3.7 1.5 0.2 setosa
## 12 4.8 3.4 1.6 0.2 setosa
## 13 4.8 3.0 1.4 0.1 setosa
## 14 4.3 3.0 1.1 0.1 setosa
## 15 5.8 4.0 1.2 0.2 setosa
## 16 5.7 4.4 1.5 0.4 setosa
## 17 5.4 3.9 1.3 0.4 setosa
## 18 5.1 3.5 1.4 0.3 setosa
## 19 5.7 3.8 1.7 0.3 setosa
## 20 5.1 3.8 1.5 0.3 setosa
## 21 5.4 3.4 1.7 0.2 setosa
## 22 5.1 3.7 1.5 0.4 setosa
## 23 4.6 3.6 1.0 0.2 setosa
## 24 5.1 3.3 1.7 0.5 setosa
## 25 4.8 3.4 1.9 0.2 setosa
## 26 5.0 3.0 1.6 0.2 setosa
## 27 5.0 3.4 1.6 0.4 setosa
## 28 5.2 3.5 1.5 0.2 setosa
## 29 5.2 3.4 1.4 0.2 setosa
## 30 4.7 3.2 1.6 0.2 setosa
## 31 4.8 3.1 1.6 0.2 setosa
## 32 5.4 3.4 1.5 0.4 setosa
## 33 5.2 4.1 1.5 0.1 setosa
## 34 5.5 4.2 1.4 0.2 setosa
## 35 4.9 3.1 1.5 0.2 setosa
## 36 5.0 3.2 1.2 0.2 setosa
## 37 5.5 3.5 1.3 0.2 setosa
## 38 4.9 3.6 1.4 0.1 setosa
## 39 4.4 3.0 1.3 0.2 setosa
## 40 5.1 3.4 1.5 0.2 setosa
## 41 5.0 3.5 1.3 0.3 setosa
## 42 4.5 2.3 1.3 0.3 setosa
## 43 4.4 3.2 1.3 0.2 setosa
## 44 5.0 3.5 1.6 0.6 setosa
## 45 5.1 3.8 1.9 0.4 setosa
## 46 4.8 3.0 1.4 0.3 setosa
## 47 5.1 3.8 1.6 0.2 setosa
## 48 4.6 3.2 1.4 0.2 setosa
## 49 5.3 3.7 1.5 0.2 setosa
## 50 5.0 3.3 1.4 0.2 setosa
## 51 6.3 3.3 6.0 2.5 virginica
## 52 5.8 2.7 5.1 1.9 virginica
## 53 7.1 3.0 5.9 2.1 virginica
## 54 6.3 2.9 5.6 1.8 virginica
## 55 6.5 3.0 5.8 2.2 virginica
## 56 7.6 3.0 6.6 2.1 virginica
## 57 4.9 2.5 4.5 1.7 virginica
## 58 7.3 2.9 6.3 1.8 virginica
## 59 6.7 2.5 5.8 1.8 virginica
## 60 7.2 3.6 6.1 2.5 virginica
## 61 6.5 3.2 5.1 2.0 virginica
## 62 6.4 2.7 5.3 1.9 virginica
## 63 6.8 3.0 5.5 2.1 virginica
## 64 5.7 2.5 5.0 2.0 virginica
## 65 5.8 2.8 5.1 2.4 virginica
## 66 6.4 3.2 5.3 2.3 virginica
## 67 6.5 3.0 5.5 1.8 virginica
## 68 7.7 3.8 6.7 2.2 virginica
## 69 7.7 2.6 6.9 2.3 virginica
## 70 6.0 2.2 5.0 1.5 virginica
## 71 6.9 3.2 5.7 2.3 virginica
## 72 5.6 2.8 4.9 2.0 virginica
## 73 7.7 2.8 6.7 2.0 virginica
## 74 6.3 2.7 4.9 1.8 virginica
## 75 6.7 3.3 5.7 2.1 virginica
## 76 7.2 3.2 6.0 1.8 virginica
## 77 6.2 2.8 4.8 1.8 virginica
## 78 6.1 3.0 4.9 1.8 virginica
## 79 6.4 2.8 5.6 2.1 virginica
## 80 7.2 3.0 5.8 1.6 virginica
## 81 7.4 2.8 6.1 1.9 virginica
## 82 7.9 3.8 6.4 2.0 virginica
## 83 6.4 2.8 5.6 2.2 virginica
## 84 6.3 2.8 5.1 1.5 virginica
## 85 6.1 2.6 5.6 1.4 virginica
## 86 7.7 3.0 6.1 2.3 virginica
## 87 6.3 3.4 5.6 2.4 virginica
## 88 6.4 3.1 5.5 1.8 virginica
## 89 6.0 3.0 4.8 1.8 virginica
## 90 6.9 3.1 5.4 2.1 virginica
## 91 6.7 3.1 5.6 2.4 virginica
## 92 6.9 3.1 5.1 2.3 virginica
## 93 5.8 2.7 5.1 1.9 virginica
## 94 6.8 3.2 5.9 2.3 virginica
## 95 6.7 3.3 5.7 2.5 virginica
## 96 6.7 3.0 5.2 2.3 virginica
## 97 6.3 2.5 5.0 1.9 virginica
## 98 6.5 3.0 5.2 2.0 virginica
## 99 6.2 3.4 5.4 2.3 virginica
## 100 5.9 3.0 5.1 1.8 virginica
nrow(Solo.2.especies)#vorlver a ver las especies ojo no confundir con rnorm(datos,media,sd)
## [1] 100
table(Solo.2.especies$Species)
##
## setosa versicolor virginica
## 50 0 50
\[1.1.8Filter~for~Ozone>29~and~Include~Only~Three~Columns\]
data("airquality")
filtrado<-filter(airquality, Ozone>29)[,1:3]#poner la coma y el intervalo por fuera de la función filter
head(filtrado)
## Ozone Solar.R Wind
## 1 41 190 7.4
## 2 36 118 8.0
## 3 34 307 12.0
## 4 30 322 11.5
## 5 32 92 12.0
## 6 45 252 14.9
\[Filter~by~Total~Frequency~of~a~Value~Across~All~Rows\]
head(mtcars)#visualizamos la tabla
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
años.mas.frecuentes<-mtcars %>% group_by(gear) %>% filter(n()>10)#simplifica a los carros con añor más frecuentes
table(años.mas.frecuentes$gear)
##
## 3 4
## 15 12
#como adicionar un criterio extre
años.frecuentes.bajoshp.bajoconsumo<-mtcars %>% group_by(gear) %>% filter(n()>10,hp<100,mpg>30)
head(años.frecuentes.bajoshp.bajoconsumo)
## # A tibble: 3 x 11
## # Groups: gear [1]
## mpg cyl disp hp drat wt qsec vs am gear carb
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 32.4 4 78.7 66 4.08 2.2 19.5 1 1 4 1
## 2 30.4 4 75.7 52 4.93 1.62 18.5 1 1 4 2
## 3 33.9 4 71.1 65 4.22 1.84 19.9 1 1 4 1
\[1.1.10~Filter~by~Column~Name~Using~"starts~With"\]
names(iris)#observa el los nombres de las columnas
## [1] "Sepal.Length" "Sepal.Width" "Petal.Length" "Petal.Width" "Species"
iris.display<- iris %>% dplyr::select(starts_with("S"))#seleccionar las funciones para evitar errores
head(iris.display)
## Sepal.Length Sepal.Width Species
## 1 5.1 3.5 setosa
## 2 4.9 3.0 setosa
## 3 4.7 3.2 setosa
## 4 4.6 3.1 setosa
## 5 5.0 3.6 setosa
## 6 5.4 3.9 setosa
\[1.1.11~Filter~Rows:~Columns~Meet~Criteria(filter~at)\]
#Función filter_at encuentra filas con algun criterio de busqueda
new.mtcars<-mtcars %>% filter_at(vars(cyl, hp),all_vars(. == max(.)))
head(new.mtcars)# solo el maserati bora tenia el máximo de cilindro y hp
## mpg cyl disp hp drat wt qsec vs am gear carb
## Maserati Bora 15 8 301 335 3.54 3.57 14.6 0 1 5 8
#ejemplo de usando una investigación sobre el sueño
msleep<- ggplot2::msleep
msleep
## # A tibble: 83 x 11
## name genus vore order conservation sleep_total sleep_rem sleep_cycle awake
## <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Cheet~ Acin~ carni Carn~ lc 12.1 NA NA 11.9
## 2 Owl m~ Aotus omni Prim~ <NA> 17 1.8 NA 7
## 3 Mount~ Aplo~ herbi Rode~ nt 14.4 2.4 NA 9.6
## 4 Great~ Blar~ omni Sori~ lc 14.9 2.3 0.133 9.1
## 5 Cow Bos herbi Arti~ domesticated 4 0.7 0.667 20
## 6 Three~ Brad~ herbi Pilo~ <NA> 14.4 2.2 0.767 9.6
## 7 North~ Call~ carni Carn~ vu 8.7 1.4 0.383 15.3
## 8 Vespe~ Calo~ <NA> Rode~ <NA> 7 NA NA 17
## 9 Dog Canis carni Carn~ domesticated 10.1 2.9 0.333 13.9
## 10 Roe d~ Capr~ herbi Arti~ lc 3 NA NA 21
## # ... with 73 more rows, and 2 more variables: brainwt <dbl>, bodywt <dbl>
msleep.over5<-msleep %>% select(name,sleep_total:sleep_rem,brainwt:bodywt) %>% filter_at(vars(contains("sleep")),all_vars(.>5)) # selecciono las categorias y luego los que dormian mas de 5 horas
msleep.over5
## # A tibble: 2 x 5
## name sleep_total sleep_rem brainwt bodywt
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Thick-tailed opposum 19.4 6.6 NA 0.37
## 2 Giant armadillo 18.1 6.1 0.081 60
\[1.2~Arrange~(Sort)\] \[1.2.1~Ascending\]
#la función oganiza los datos de acuerdo a los que desee
msleep<- ggplot2::msleep
msleep[,1:4]#ver las columnas de 1 a 4.
## # A tibble: 83 x 4
## name genus vore order
## <chr> <chr> <chr> <chr>
## 1 Cheetah Acinonyx carni Carnivora
## 2 Owl monkey Aotus omni Primates
## 3 Mountain beaver Aplodontia herbi Rodentia
## 4 Greater short-tailed shrew Blarina omni Soricomorpha
## 5 Cow Bos herbi Artiodactyla
## 6 Three-toed sloth Bradypus herbi Pilosa
## 7 Northern fur seal Callorhinus carni Carnivora
## 8 Vesper mouse Calomys <NA> Rodentia
## 9 Dog Canis carni Carnivora
## 10 Roe deer Capreolus herbi Artiodactyla
## # ... with 73 more rows
animal.name.sequence.asc<- arrange(msleep, vore, order)
animal.name.sequence.asc[,1:4]
## # A tibble: 83 x 4
## name genus vore order
## <chr> <chr> <chr> <chr>
## 1 Cheetah Acinonyx carni Carnivora
## 2 Northern fur seal Callorhinus carni Carnivora
## 3 Dog Canis carni Carnivora
## 4 Domestic cat Felis carni Carnivora
## 5 Gray seal Haliochoerus carni Carnivora
## 6 Tiger Panthera carni Carnivora
## 7 Jaguar Panthera carni Carnivora
## 8 Lion Panthera carni Carnivora
## 9 Caspian seal Phoca carni Carnivora
## 10 Genet Genetta carni Carnivora
## # ... with 73 more rows
\[1.2.2~Descending\]
animal.name.sequence.desc<- arrange(msleep, vore, desc(order))
head(animal.name.sequence.desc)[,1:4]
## # A tibble: 6 x 4
## name genus vore order
## <chr> <chr> <chr> <chr>
## 1 Northern grasshopper mouse Onychomys carni Rodentia
## 2 Slow loris Nyctibeus carni Primates
## 3 Thick-tailed opposum Lutreolina carni Didelphimorphia
## 4 Long-nosed armadillo Dasypus carni Cingulata
## 5 Pilot whale Globicephalus carni Cetacea
## 6 Common porpoise Phocoena carni Cetacea
\[1.3~Rename\\[10pt]Rename~one~or~more~columns~in~a~dataset~and~changes~no~data\]
names(iris)
## [1] "Sepal.Length" "Sepal.Width" "Petal.Length" "Petal.Width" "Species"
renames.iris<-rename(iris, ancho.del.petalo= Petal.Width, diversidad.de.especies=Species)
names(renames.iris)
## [1] "Sepal.Length" "Sepal.Width" "Petal.Length"
## [4] "ancho.del.petalo" "diversidad.de.especies"
\[1.4~Mutate\\[10pt]*adds~new~variables~to~a~dataframe\]
data(ChickWeight)
ChickWeight[1:2,]#de acuerdo al lado de , toma columnas:filas
## weight Time Chick Diet
## 1 42 0 1 1
## 2 51 2 1 1
ChickWeight.with.log<-mutate(ChickWeight,log.of.weight=log10(weight))#añadimos una nueva columna con el log10
ChickWeight.with.log[1:2,]
## weight Time Chick Diet log.of.weight
## 1 42 0 1 1 1.623249
## 2 51 2 1 1 1.707570
\[1.4.1~mutate_-all~to~Add~New~fields~All~at~once\]
#Con esta función puedo añadir nuevos campos con datos a partir de la tabla
msleep<- ggplot2::msleep
names(msleep)
## [1] "name" "genus" "vore" "order" "conservation"
## [6] "sleep_total" "sleep_rem" "sleep_cycle" "awake" "brainwt"
## [11] "bodywt"
msleep
## # A tibble: 83 x 11
## name genus vore order conservation sleep_total sleep_rem sleep_cycle awake
## <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Cheet~ Acin~ carni Carn~ lc 12.1 NA NA 11.9
## 2 Owl m~ Aotus omni Prim~ <NA> 17 1.8 NA 7
## 3 Mount~ Aplo~ herbi Rode~ nt 14.4 2.4 NA 9.6
## 4 Great~ Blar~ omni Sori~ lc 14.9 2.3 0.133 9.1
## 5 Cow Bos herbi Arti~ domesticated 4 0.7 0.667 20
## 6 Three~ Brad~ herbi Pilo~ <NA> 14.4 2.2 0.767 9.6
## 7 North~ Call~ carni Carn~ vu 8.7 1.4 0.383 15.3
## 8 Vespe~ Calo~ <NA> Rode~ <NA> 7 NA NA 17
## 9 Dog Canis carni Carn~ domesticated 10.1 2.9 0.333 13.9
## 10 Roe d~ Capr~ herbi Arti~ lc 3 NA NA 21
## # ... with 73 more rows, and 2 more variables: brainwt <dbl>, bodywt <dbl>
#añado raiz cuadrada a todos los datos
msleep.con.raiz.cuadrada<- mutate_all(msleep[,6:11], funs("square root"=sqrt( . )))
## Warning: `funs()` was deprecated in dplyr 0.8.0.
## Please use a list of either functions or lambdas:
##
## # Simple named list:
## list(mean = mean, median = median)
##
## # Auto named with `tibble::lst()`:
## tibble::lst(mean, median)
##
## # Using lambdas
## list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
names(msleep.con.raiz.cuadrada)
## [1] "sleep_total" "sleep_rem"
## [3] "sleep_cycle" "awake"
## [5] "brainwt" "bodywt"
## [7] "sleep_total_square root" "sleep_rem_square root"
## [9] "sleep_cycle_square root" "awake_square root"
## [11] "brainwt_square root" "bodywt_square root"
msleep.con.raiz.cuadrada
## # A tibble: 83 x 12
## sleep_total sleep_rem sleep_cycle awake brainwt bodywt `sleep_total_square~
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 12.1 NA NA 11.9 NA 50 3.48
## 2 17 1.8 NA 7 0.0155 0.48 4.12
## 3 14.4 2.4 NA 9.6 NA 1.35 3.79
## 4 14.9 2.3 0.133 9.1 0.00029 0.019 3.86
## 5 4 0.7 0.667 20 0.423 600 2
## 6 14.4 2.2 0.767 9.6 NA 3.85 3.79
## 7 8.7 1.4 0.383 15.3 NA 20.5 2.95
## 8 7 NA NA 17 NA 0.045 2.65
## 9 10.1 2.9 0.333 13.9 0.07 14 3.18
## 10 3 NA NA 21 0.0982 14.8 1.73
## # ... with 73 more rows, and 5 more variables: sleep_rem_square root <dbl>,
## # sleep_cycle_square root <dbl>, awake_square root <dbl>,
## # brainwt_square root <dbl>, bodywt_square root <dbl>
\[1.4.2~mutate_-at~to~Add~Fields\]
#inicialmente los datos estan en tablas , primero se transforman a un dataframe
data("Titanic")
Titanic<-as.data.frame(Titanic)
head(Titanic)
## Class Sex Age Survived Freq
## 1 1st Male Child No 0
## 2 2nd Male Child No 0
## 3 3rd Male Child No 35
## 4 Crew Male Child No 0
## 5 1st Female Child No 0
## 6 2nd Female Child No 0
#Posteriormente se añaden rangos
Titanic.con.rangos<-mutate_at(Titanic, vars(Class,Age,Survived), funs(Rank = min_rank(desc(.))))
head(Titanic.con.rangos)
## Class Sex Age Survived Freq Class_Rank Age_Rank Survived_Rank
## 1 1st Male Child No 0 25 17 17
## 2 2nd Male Child No 0 17 17 17
## 3 3rd Male Child No 35 9 17 17
## 4 Crew Male Child No 0 1 17 17
## 5 1st Female Child No 0 25 17 17
## 6 2nd Female Child No 0 17 17 17
\[1.4.3~mutate_-if\]
#Crea una nueva variable o altera una existente
# creo la funciom : divido un numero sobre 10
divide.by.10<- function (a.number) (a.number / 10)
head(CO2)
## Plant Type Treatment conc uptake
## 1 Qn1 Quebec nonchilled 95 16.0
## 2 Qn1 Quebec nonchilled 175 30.4
## 3 Qn1 Quebec nonchilled 250 34.8
## 4 Qn1 Quebec nonchilled 350 37.2
## 5 Qn1 Quebec nonchilled 500 35.3
## 6 Qn1 Quebec nonchilled 675 39.2
#aplico la función previa sobre las variables numericas
new.df<- CO2 %>% mutate_if(is.numeric, divide.by.10)
head(new.df)
## Plant Type Treatment conc uptake
## 1 Qn1 Quebec nonchilled 9.5 1.60
## 2 Qn1 Quebec nonchilled 17.5 3.04
## 3 Qn1 Quebec nonchilled 25.0 3.48
## 4 Qn1 Quebec nonchilled 35.0 3.72
## 5 Qn1 Quebec nonchilled 50.0 3.53
## 6 Qn1 Quebec nonchilled 67.5 3.92
# alternativa para cambiar los numeros faltantes por ceros(0)
df<- data.frame(alfa=c(22,1,NA), almendra=c(0,5,10), uva=c(0,2,2),manzana=c(NA,5,10))
df
## alfa almendra uva manzana
## 1 22 0 0 NA
## 2 1 5 2 5
## 3 NA 10 2 10
df.fix.alpha<-df%>% mutate_if(is.numeric, coalesce, ...=0)
df.fix.alpha
## alfa almendra uva manzana
## 1 22 0 0 0
## 2 1 5 2 5
## 3 0 10 2 10
\[1.4.4~String~Detect~and~True/False~Duplicateindicator\]
#excluir una variable por su letra inicial
msleep<-ggplot2::msleep
table(msleep$vore)
##
## carni herbi insecti omni
## 19 32 5 20
msleep.sin.c.o.a<-filter(msleep, !str_detect(vore, paste(c("c","a"), collapse = "|")))
table(msleep.sin.c.o.a$vore)
##
## herbi omni
## 32 20
#añade un campo en particular
msleep.con.dup.indicador<- mutate(msleep, duplicate.indicator= duplicated(conservation))
msleep.con.dup.indicador[1:6,]
## # A tibble: 6 x 12
## name genus vore order conservation sleep_total sleep_rem sleep_cycle awake
## <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Cheetah Acin~ carni Carn~ lc 12.1 NA NA 11.9
## 2 Owl mo~ Aotus omni Prim~ <NA> 17 1.8 NA 7
## 3 Mounta~ Aplo~ herbi Rode~ nt 14.4 2.4 NA 9.6
## 4 Greate~ Blar~ omni Sori~ lc 14.9 2.3 0.133 9.1
## 5 Cow Bos herbi Arti~ domesticated 4 0.7 0.667 20
## 6 Three-~ Brad~ herbi Pilo~ <NA> 14.4 2.2 0.767 9.6
## # ... with 3 more variables: brainwt <dbl>, bodywt <dbl>,
## # duplicate.indicator <lgl>
#como crear el indicador de "duplicate.indicator"
msleep.con.dup.indicador<- mutate(msleep, duplicate.indicator= duplicated(conservation))
msleep.con.dup.indicador[1:6,c(1,2,3,12)]
## # A tibble: 6 x 4
## name genus vore duplicate.indicator
## <chr> <chr> <chr> <lgl>
## 1 Cheetah Acinonyx carni FALSE
## 2 Owl monkey Aotus omni FALSE
## 3 Mountain beaver Aplodontia herbi FALSE
## 4 Greater short-tailed shrew Blarina omni TRUE
## 5 Cow Bos herbi FALSE
## 6 Three-toed sloth Bradypus herbi TRUE
#Ordenar como "conservation" inicial y "genus" como menor
msleep.with.dup.indicator2<- mutate(msleep, duplicate.indicator= duplicated(conservation, genus)) %>% arrange(conservation, genus)
msleep.with.dup.indicator2
## # A tibble: 83 x 12
## name genus vore order conservation sleep_total sleep_rem sleep_cycle awake
## <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Giraf~ Gira~ herbi Arti~ cd 1.9 0.4 NA 22.1
## 2 Pilot~ Glob~ carni Ceta~ cd 2.7 0.1 NA 21.4
## 3 Cow Bos herbi Arti~ domesticated 4 0.7 0.667 20
## 4 Dog Canis carni Carn~ domesticated 10.1 2.9 0.333 13.9
## 5 Guine~ Cavis herbi Rode~ domesticated 9.4 0.8 0.217 14.6
## 6 Chinc~ Chin~ herbi Rode~ domesticated 12.5 1.5 0.117 11.5
## 7 Horse Equus herbi Peri~ domesticated 2.9 0.6 1 21.1
## 8 Donkey Equus herbi Peri~ domesticated 3.1 0.4 NA 20.9
## 9 Domes~ Felis carni Carn~ domesticated 12.5 3.2 0.417 11.5
## 10 Rabbit Oryc~ herbi Lago~ domesticated 8.4 0.9 0.417 15.6
## # ... with 73 more rows, and 3 more variables: brainwt <dbl>, bodywt <dbl>,
## # duplicate.indicator <lgl>
#para el indicador de duplicacion mencione True
fruit<- c("apple","pear","orange","grape","orange","orange")
x<-c(1,2,4,9,4,6)
y<-c(22,3,4,55,15,9)
z<-c(3,1,4,55,15,9)
w<-c(2,2,2,4,5,6)
df<-data.frame(fruit,x,y,z,w)
df
## fruit x y z w
## 1 apple 1 22 3 2
## 2 pear 2 3 1 2
## 3 orange 4 4 4 2
## 4 grape 9 55 55 4
## 5 orange 4 15 15 5
## 6 orange 6 9 9 6
df.sin.duplicados<-mutate(df, duplicate.indicator=duplicated(fruit))
df.sin.duplicados
## fruit x y z w duplicate.indicator
## 1 apple 1 22 3 2 FALSE
## 2 pear 2 3 1 2 FALSE
## 3 orange 4 4 4 2 FALSE
## 4 grape 9 55 55 4 FALSE
## 5 orange 4 15 15 5 TRUE
## 6 orange 6 9 9 6 TRUE
\[1.4.5~Drop~Variables~Using~NULL\]
#como excluir una variable
fruit<- c("apple","pear","orange","grape","orange","orange")
x<-c(1,2,4,9,4,6)
y<-c(22,3,4,55,15,9)
z<-c(3,1,4,10,12,8)
df<-data.frame(fruit,x,y,z)
df<-mutate(df, z=NULL)#excluyo la colunma de z
df
## fruit x y
## 1 apple 1 22
## 2 pear 2 3
## 3 orange 4 4
## 4 grape 9 55
## 5 orange 4 15
## 6 orange 6 9
\[1.4.6~Preferred~coding~sequence\]
if (!require("nycflights13")) install.packages("nycflights13")
## Loading required package: nycflights13
#metodo para implementar mutate en la formula y establecer el método sencillo-norecomendado
mutate(flights,gain=arr_delay-dep_delay, hours=air_time/60,gain_per_hour=gain/hours,gain_per_minute=60*gain_per_hour)
## # A tibble: 336,776 x 23
## year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
## <int> <int> <int> <int> <int> <dbl> <int> <int>
## 1 2013 1 1 517 515 2 830 819
## 2 2013 1 1 533 529 4 850 830
## 3 2013 1 1 542 540 2 923 850
## 4 2013 1 1 544 545 -1 1004 1022
## 5 2013 1 1 554 600 -6 812 837
## 6 2013 1 1 554 558 -4 740 728
## 7 2013 1 1 555 600 -5 913 854
## 8 2013 1 1 557 600 -3 709 723
## 9 2013 1 1 557 600 -3 838 846
## 10 2013 1 1 558 600 -2 753 745
## # ... with 336,766 more rows, and 15 more variables: arr_delay <dbl>,
## # carrier <chr>, flight <int>, tailnum <chr>, origin <chr>, dest <chr>,
## # air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>, time_hour <dttm>,
## # gain <dbl>, hours <dbl>, gain_per_hour <dbl>, gain_per_minute <dbl>
#metodo correcto del uso de mutate, se debe aplica varias veces de acuerdo a lo calculado en la formula
newfield.flights<-flights%>% mutate(gain=arr_delay-dep_delay, hours=air_time/60)%>%mutate(gain_per_hour=gain/hours) %>%mutate(gain_per_minute=60*gain_per_hour)
newfield.flights[1:6,c(1:2,20:23)]
## # A tibble: 6 x 6
## year month gain hours gain_per_hour gain_per_minute
## <int> <int> <dbl> <dbl> <dbl> <dbl>
## 1 2013 1 9 3.78 2.38 143.
## 2 2013 1 16 3.78 4.23 254.
## 3 2013 1 31 2.67 11.6 698.
## 4 2013 1 -17 3.05 -5.57 -334.
## 5 2013 1 -19 1.93 -9.83 -590.
## 6 2013 1 16 2.5 6.4 384
\[1.4.7~Transmute:~Keep~Only~Variables~Created\]
#realizada para generar nuevas variables con varibales existentes del data.frame
fruit<-c("apple","pear","orange","grape","orange","orange")
x<-c(1,2,4,9,4,6)
y<-c(22,3,4,55,15,9)
z<-c(3,1,4,10,12,8)
df<-data.frame(fruit,x,y,z)
df<-transmute(df, sumatoriafruta=x+y+z)
df
## sumatoriafruta
## 1 26
## 2 6
## 3 12
## 4 74
## 5 31
## 6 23
\[1.4.8~Use~Across~to~Apply~a~Funtion~over~Multiple~Columns\]
double.it<-function(x)x*2 #crear la funcion para hacerla valida abajo
head(iris)#originalmente
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
iris %>% mutate(across(where(is.numeric), double.it)) %>% head ()#solicito aplicar la funcion a los valores numericos
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 10.2 7.0 2.8 0.4 setosa
## 2 9.8 6.0 2.8 0.4 setosa
## 3 9.4 6.4 2.6 0.4 setosa
## 4 9.2 6.2 3.0 0.4 setosa
## 5 10.0 7.2 2.8 0.4 setosa
## 6 10.8 7.8 3.4 0.8 setosa
\[1.4.9Conditional~Mutating~Using~case_-when\]
#usando mutate más case_when puedes cambiar campos y luego establecer valores de acuerdo a tus condiciones
fila1<-c("a","b","c","d","e","f","column.to.be.changed")
fila2<-c(1,1,1,6,6,1,2)
fila3<-c(3,4,4,6,4,4,4)
fila4<-c(4,6,25,5,5,2,9)
fila5<-c(5,3,6,3,3,6,2)
df<-as.data.frame(rbind(fila2,fila3,fila4,fila5))
names(df)<-fila1
df#creamos el data.frame para modificarlo luego
## a b c d e f column.to.be.changed
## fila2 1 1 1 6 6 1 2
## fila3 3 4 4 6 4 4 4
## fila4 4 6 25 5 5 2 9
## fila5 5 3 6 3 3 6 2
new.df<- df%>% mutate(column.to.be.changed=case_when(a==2|a==5|a==7|(a==1 & b==4)~2, a==0|a==1|a==4|a==3|c==4~3, TRUE~NA_real_))# condicionales donde alguna es TRUE la "column.to.be.changed" será 2 o 3.
new.df
## a b c d e f column.to.be.changed
## fila2 1 1 1 6 6 1 3
## fila3 3 4 4 6 4 4 3
## fila4 4 6 25 5 5 2 3
## fila5 5 3 6 3 3 6 2
\[1.5~Select~to~Choose~Variables/Columns\\[18pt]1.5.1~Delete~a~column\]
library(tidyverse)
fruit <- c("apple","pear","orange","grape","orange","orange")
x <- c(1,2,4,9,4,6)
y <- c(22,3,4,55,15,9)
z <- c(3,1,4,10,12,8)
df <- data.frame(fruit,x,y,z) # creamos el dataframe antes de seleccionar
df
## fruit x y z
## 1 apple 1 22 3
## 2 pear 2 3 1
## 3 orange 4 4 4
## 4 grape 9 55 10
## 5 orange 4 15 12
## 6 orange 6 9 8
nuevo.sin.fruta<- dplyr::select(df, -fruit)#retiramos la columna fruta
nuevo.sin.fruta
## x y z
## 1 1 22 3
## 2 2 3 1
## 3 4 4 4
## 4 9 55 10
## 5 4 15 12
## 6 6 9 8
\[1.5.2~Delete~Columns~by~Name~Using~start_-with~or~ends_-with\]
data("mtcars")
names(mtcars)
## [1] "mpg" "cyl" "disp" "hp" "drat" "wt" "qsec" "vs" "am" "gear"
## [11] "carb"
#como borrar columnas cuyo nombre empiece con "d"
mtcars.sin.d<- select(mtcars, -starts_with("d"))
mtcars.sin.d
## mpg cyl hp wt qsec vs am gear carb
## Mazda RX4 21.0 6 110 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 110 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 93 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 110 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 175 3.440 17.02 0 0 3 2
## Valiant 18.1 6 105 3.460 20.22 1 0 3 1
## Duster 360 14.3 8 245 3.570 15.84 0 0 3 4
## Merc 240D 24.4 4 62 3.190 20.00 1 0 4 2
## Merc 230 22.8 4 95 3.150 22.90 1 0 4 2
## Merc 280 19.2 6 123 3.440 18.30 1 0 4 4
## Merc 280C 17.8 6 123 3.440 18.90 1 0 4 4
## Merc 450SE 16.4 8 180 4.070 17.40 0 0 3 3
## Merc 450SL 17.3 8 180 3.730 17.60 0 0 3 3
## Merc 450SLC 15.2 8 180 3.780 18.00 0 0 3 3
## Cadillac Fleetwood 10.4 8 205 5.250 17.98 0 0 3 4
## Lincoln Continental 10.4 8 215 5.424 17.82 0 0 3 4
## Chrysler Imperial 14.7 8 230 5.345 17.42 0 0 3 4
## Fiat 128 32.4 4 66 2.200 19.47 1 1 4 1
## Honda Civic 30.4 4 52 1.615 18.52 1 1 4 2
## Toyota Corolla 33.9 4 65 1.835 19.90 1 1 4 1
## Toyota Corona 21.5 4 97 2.465 20.01 1 0 3 1
## Dodge Challenger 15.5 8 150 3.520 16.87 0 0 3 2
## AMC Javelin 15.2 8 150 3.435 17.30 0 0 3 2
## Camaro Z28 13.3 8 245 3.840 15.41 0 0 3 4
## Pontiac Firebird 19.2 8 175 3.845 17.05 0 0 3 2
## Fiat X1-9 27.3 4 66 1.935 18.90 1 1 4 1
## Porsche 914-2 26.0 4 91 2.140 16.70 0 1 5 2
## Lotus Europa 30.4 4 113 1.513 16.90 1 1 5 2
## Ford Pantera L 15.8 8 264 3.170 14.50 0 1 5 4
## Ferrari Dino 19.7 6 175 2.770 15.50 0 1 5 6
## Maserati Bora 15.0 8 335 3.570 14.60 0 1 5 8
## Volvo 142E 21.4 4 109 2.780 18.60 1 1 4 2
#borrar columnas que terminen en "t"
mtcars.sin.t.final<- select(mtcars, -ends_with("d"))
mtcars.sin.t.final
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
## Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
## Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
## Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
## Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
## Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4
## Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3
## Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3
## Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3
## Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4
## Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4
## Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4
## Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1
## Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2
## Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1
## Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1
## Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2
## AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2
## Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4
## Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2
## Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1
## Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2
## Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2
## Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4
## Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6
## Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8
## Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2
\[Rearrange~Column~Order\]
fruit <- c("apple","pear","orange","grape","orange","orange")
x <- c(1,2,4,9,4,6)
y <- c(22,3,4,55,15,9)
z <- c(3,1,4,10,12,8)
df <- data.frame(fruit,x,y,z)# creamos el dataframe
df#de acuerdo al orden que se escriban saldra en la tabla
## fruit x y z
## 1 apple 1 22 3
## 2 pear 2 3 1
## 3 orange 4 4 4
## 4 grape 9 55 10
## 5 orange 4 15 12
## 6 orange 6 9 8
\[1.5.4~select_-all~to~Apply~a~Function~to~All~Columns\]
state <- c("Maryland", "Alaska", "New Jersey")
income <- c(76067,74444,73702)
median.us <- c(61372,61372,61372)
life.expectancy <- c(78.8,78.3,80.3)
top.3.estados <- data.frame(state, income, median.us, life.expectancy)
top.3.estados
## state income median.us life.expectancy
## 1 Maryland 76067 61372 78.8
## 2 Alaska 74444 61372 78.3
## 3 New Jersey 73702 61372 80.3
#como poner letra mayuscula a todas la columnas
new..top.3.estados<-select_all(top.3.estados,toupper)
new..top.3.estados
## STATE INCOME MEDIAN.US LIFE.EXPECTANCY
## 1 Maryland 76067 61372 78.8
## 2 Alaska 74444 61372 78.3
## 3 New Jersey 73702 61372 80.3
\[Select~Columns~usingthe~pull~function\]
top.3.estados <- data.frame(state, income, median.us, life.expectancy)
top.3.estados
## state income median.us life.expectancy
## 1 Maryland 76067 61372 78.8
## 2 Alaska 74444 61372 78.3
## 3 New Jersey 73702 61372 80.3
pull.first.column <- pull(top.3.estados,1)#selecciono la 1mera columna con "pull" o le pongo "-1" para no mostrarla columna 1
pull.first.column
## [1] "Maryland" "Alaska" "New Jersey"
\[1.5.6~Select~Rows:~Any~Variable~Meets~Some~Condition\]
nrow(mtcars)
## [1] 32
#puedo ver cuantos datos hay mayor a 200
mtcars.mayor.200<-filter_all(mtcars, any_vars(.>200))
nrow(mtcars.mayor.200)
## [1] 16
\[1.5.7~SelectColumns:~omit~if~Column~Name~Contains~Specific~Characters\]
names(mtcars)
## [1] "mpg" "cyl" "disp" "hp" "drat" "wt" "qsec" "vs" "am" "gear"
## [11] "carb"
cars.sin.p<-mtcars %>% dplyr::select(-contains("p"))
names(cars.sin.p)
## [1] "cyl" "drat" "wt" "qsec" "vs" "am" "gear" "carb"
\[1.6~Joins:~Manipulations~of~Data~from~two~sources\\[18pt]1.6.1~Left~Join~(most~common)\]
us.areas<-as.data.frame(cbind(state.abb, state.area))
us.areas[1:3,]
## state.abb state.area
## 1 AL 51609
## 2 AK 589757
## 3 AZ 113909
us.area.abre<-as.data.frame(cbind(state.abb, state.name))
us.area.abre[1:3,]
## state.abb state.name
## 1 AL Alabama
## 2 AK Alaska
## 3 AZ Arizona
state.info.abb.area.name <- us.areas %>% left_join(us.area.abre, by = "state.abb")
head(state.info.abb.area.name)
## state.abb state.area state.name
## 1 AL 51609 Alabama
## 2 AK 589757 Alaska
## 3 AZ 113909 Arizona
## 4 AR 53104 Arkansas
## 5 CA 158693 California
## 6 CO 104247 Colorado
\[1.6.2~Inner~join\]
#funcion para juntar los dataframe
#creo el data.frame
names <- c("Sally","Tom","Frieda","Alfonzo")
team.scores <- c(3,5,2,7)
team.league <- c("alpha","beta","gamma", "omicron")
team.info <- data.frame(names, team.scores, team.league)
#Creo un segundo data frame:
names = c("Sally","Tom", "Bill", "Alfonzo")
school.grades <- c("A","B","C","B")
school.info <- data.frame(names, school.grades)
school.and.team <- inner_join(team.info, school.info, by = "names")
school.and.team
## names team.scores team.league school.grades
## 1 Sally 3 alpha A
## 2 Tom 5 beta B
## 3 Alfonzo 7 omicron B
\[1.6.3~Anti-join\]
#1mer data frame
names<- c("Sally","Tom","Frieda","Alfonzo")
team.scores <- c(3,5,2,7)
team.league <- c("alpha","beta","gamma", "omicron")
team.info <- data.frame(names, team.scores, team.league)
team.info
## names team.scores team.league
## 1 Sally 3 alpha
## 2 Tom 5 beta
## 3 Frieda 2 gamma
## 4 Alfonzo 7 omicron
#segundo data frame
names <- c("Sally","Tom", "Bill", "Alfonzo")
school.grades <- c("A","B","C","B")
school.info <- data.frame(names, school.grades)
school.info
## names school.grades
## 1 Sally A
## 2 Tom B
## 3 Bill C
## 4 Alfonzo B
#informacion sin grados
team.info.but.no.grades <- anti_join(team.info, school.info,
by = "names")
team.info.but.no.grades
## names team.scores team.league
## 1 Frieda 2 gamma
\[1.6.4~Full~Join\]
#1mer data frame
names = c("Sally","Tom","Frieda","Alfonzo")
team.scores = c(3,5,2,7)
team.league = c("alpha","beta","gamma", "omicron")
team.info = data.frame(names, team.scores, team.league)
#2do dataframe:
names = c("Sally","Tom", "Bill", "Alfonzo")
school.grades = c("A","B","C","B")
school.info = data.frame(names, school.grades)
# los uno completamente
team.info.and.or.grades<- full_join(team.info, school.info, by = "names")
team.info.and.or.grades
## names team.scores team.league school.grades
## 1 Sally 3 alpha A
## 2 Tom 5 beta B
## 3 Frieda 2 gamma <NA>
## 4 Alfonzo 7 omicron B
## 5 Bill NA <NA> C
\[1.6.5~semi-join\]
#semi union, solo por nombres
team.info.with.grades<- semi_join(team.info, school.info)
## Joining, by = "names"
team.info.with.grades
## names team.scores team.league
## 1 Sally 3 alpha
## 2 Tom 5 beta
## 3 Alfonzo 7 omicron
\[1.6.6~Right~Join\]
#selecciono por area
us.state.areas<- as.data.frame(cbind(state.abb,state.area))
us.state.areas[1:3,]
## state.abb state.area
## 1 AL 51609
## 2 AK 589757
## 3 AZ 113909
#selecciono con abreviacion y nombre
us.state.abbreviation.and.name<- as.data.frame(cbind(state.abb,
state.name))
us.state.abbreviation.and.name[1:3,]
## state.abb state.name
## 1 AL Alabama
## 2 AK Alaska
## 3 AZ Arizona
#con ambas selecciones
us.state.abbreviation.and.name[1,1]<- "Intentional Mismatch"
us.state.with.abbreviation.and.name.and.area<-right_join(us.state.areas,
us.state.abbreviation.and.name, by = "state.abb")
us.state.with.abbreviation.and.name.and.area[1:3,]
## state.abb state.area state.name
## 1 AK 589757 Alaska
## 2 AZ 113909 Arizona
## 3 AR 53104 Arkansas
\[1.7~Slice\]
msleep<- ggplot2::msleep
nrow(msleep)
## [1] 83
msleep.only.first.5<- slice(msleep, -6:-n())
nrow(msleep.only.first.5)
## [1] 5
msleep.20.rows<- msleep%>%slice(20:39)
nrow(msleep.20.rows)
## [1] 20
nrow(msleep)-nrow(msleep.20.rows)
## [1] 63
\[1.8~Summarise\]
library(MASS)
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
data(gehan)
gehan2<-gehan
library(tidyverse)
#cuantos pacientes estan en tratamiento medico?
gehan2 %>% summarise( kount=n())
## kount
## 1 42
#cuantos con tratamiento y no tratamiento
gehan2%>% group_by(treat)%>% summarise(kount=n())
## # A tibble: 2 x 2
## treat kount
## <fct> <int>
## 1 6-MP 21
## 2 control 21
#como hallar el minimo y maximo del grupo:
gehan2%>% group_by(treat)%>%
summarise(minimum.remission = min(time),max.remission = max(time))
## # A tibble: 2 x 3
## treat minimum.remission max.remission
## <fct> <int> <int>
## 1 6-MP 6 35
## 2 control 1 23
\[1.8.1~Summarise~Across\]
library(MASS)
#tomo las 10 columnas primeras
subset.survey <- survey[1:10,]
library(dplyr)
head(subset.survey)
## Sex Wr.Hnd NW.Hnd W.Hnd Fold Pulse Clap Exer Smoke Height M.I
## 1 Female 18.5 18.0 Right R on L 92 Left Some Never 173.00 Metric
## 2 Male 19.5 20.5 Left R on L 104 Left None Regul 177.80 Imperial
## 3 Male 18.0 13.3 Right L on R 87 Neither None Occas NA <NA>
## 4 Male 18.8 18.9 Right R on L NA Neither None Never 160.00 Metric
## 5 Male 20.0 20.0 Right Neither 35 Right Some Never 165.00 Metric
## 6 Female 18.0 17.7 Right L on R 64 Right Some Never 172.72 Imperial
## Age
## 1 18.250
## 2 17.583
## 3 16.917
## 4 20.333
## 5 23.667
## 6 21.000
#remover todo los NAs(sin dato)
subset.survey %>% na.omit() %>%
group_by(Sex) %>%
summarise(across(where(is.numeric), mean,.names = "mean_{col}")) %>%
head()
## # A tibble: 2 x 6
## Sex mean_Wr.Hnd mean_NW.Hnd mean_Pulse mean_Height mean_Age
## <fct> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Female 17.8 17.7 76.7 168. 25.0
## 2 Male 19.1 19.2 76.8 174. 20.3
\[1.9~Gathering:~Convert~Multiple~columns~into~one\]
#como paso de varias a una columna
state<-c("Maryland", "Alaska", "New Jersey" )
income<- c(76067,74444,73702)
median.us <- c(61372,61372,61372)
life.expectancy <- c(78.8,78.3,80.3)
teen.birth.rate.2015<-c(17,29.3,12.1)
teen.birth.rate.2007<- c(34.3,42.9,24.9 )
teen.birth.rate.1991<-c(54.1, 66, 41.3)
top.3.states <- data.frame(state, income, median.us,life.expectancy,teen.birth.rate.2015, teen.birth.rate.2007,teen.birth.rate.1991)
names(top.3.states)<-c("state","income", "median.us","life.expectancy","2015","2007","1991")
top.3.states
## state income median.us life.expectancy 2015 2007 1991
## 1 Maryland 76067 61372 78.8 17.0 34.3 54.1
## 2 Alaska 74444 61372 78.3 29.3 42.9 66.0
## 3 New Jersey 73702 61372 80.3 12.1 24.9 41.3
# uso gather para poner los 3 años en la columna
new.top.states<-top.3.states%>% gather("2015","2007","1991", key="year", value="cases")
new.top.states
## state income median.us life.expectancy year cases
## 1 Maryland 76067 61372 78.8 2015 17.0
## 2 Alaska 74444 61372 78.3 2015 29.3
## 3 New Jersey 73702 61372 80.3 2015 12.1
## 4 Maryland 76067 61372 78.8 2007 34.3
## 5 Alaska 74444 61372 78.3 2007 42.9
## 6 New Jersey 73702 61372 80.3 2007 24.9
## 7 Maryland 76067 61372 78.8 1991 54.1
## 8 Alaska 74444 61372 78.3 1991 66.0
## 9 New Jersey 73702 61372 80.3 1991 41.3
\[1.10~Spreading:Consolidation~of~Multiple~ROws~into~one\]
#creo el dataframe
df_1<- data.frame(Type=c("TypeA","TypeA","TypeB","TypeB"),Answer=c("yes","No",NA,"No"), n=1:4)
df_1
## Type Answer n
## 1 TypeA yes 1
## 2 TypeA No 2
## 3 TypeB <NA> 3
## 4 TypeB No 4
#habilito otro dataframe para fusionar filas
df_2<- df_1 %>% filter(!is.na(Answer))%>%spread(key=Answer, value=n)#junto las de tipo A y B
df_2
## Type No yes
## 1 TypeA 2 1
## 2 TypeB 4 NA
\[1.11~Separate:~Divide~a~Single~Column~into~Multiple~Columns\]
#la función divide 1 columna en varias
state <- c("Maryland", "Alaska", "New Jersey")
income <- c(76067,74444,73702)
median.us <- c(61372,61372,61372)
life.expectancy <- c(78.8,78.3,80.3)
teen.birth <- c("17//34.3//54.1", "29.0//42.9//66.0", "12.1//24.9//41.3") #// señaliza que hay 3 datos po fila
top.3.states <- data.frame(state, income, median.us,
life.expectancy,teen.birth)
top.3.states #creo la tabla con teen.birth repetida en la misma fila
## state income median.us life.expectancy teen.birth
## 1 Maryland 76067 61372 78.8 17//34.3//54.1
## 2 Alaska 74444 61372 78.3 29.0//42.9//66.0
## 3 New Jersey 73702 61372 80.3 12.1//24.9//41.3
top.3.states.separated.years <- top.3.states %>%
separate(teen.birth,
into = c("2015", "2007","1991"), sep = "//")#se separaron por año
top.3.states.separated.years
## state income median.us life.expectancy 2015 2007 1991
## 1 Maryland 76067 61372 78.8 17 34.3 54.1
## 2 Alaska 74444 61372 78.3 29.0 42.9 66.0
## 3 New Jersey 73702 61372 80.3 12.1 24.9 41.3
\[1.12~Recap~of~handly~DPLYR~Functions\\[20pt]1.12.1~Number~of~Observations~(n)~Used~Across~Multiple~DPLYR~functions\]
#Un simple recuento de grupos es una herramienta de usodiario. La función n se aplica a través de mutate, summarise y filter.
\[1.12.2~Basic~Counts\]
data("msleep")
m <- mutate(msleep, kount = n())
m[1:5,c(1:4,10:12)]#limito el numero de columnas
## # A tibble: 5 x 7
## name genus vore order brainwt bodywt kount
## <chr> <chr> <chr> <chr> <dbl> <dbl> <int>
## 1 Cheetah Acinonyx carni Carnivora NA 50 83
## 2 Owl monkey Aotus omni Primates 0.0155 0.48 83
## 3 Mountain beaver Aplodontia herbi Rodentia NA 1.35 83
## 4 Greater short-tailed shrew Blarina omni Soricomorp~ 0.00029 0.019 83
## 5 Cow Bos herbi Artiodacty~ 0.423 600 83
#filtro por recuento de vore superior a 14
f <- filter(msleep, n() > 14)
f[1:5,c(1:4,10:11)]
## # A tibble: 5 x 6
## name genus vore order brainwt bodywt
## <chr> <chr> <chr> <chr> <dbl> <dbl>
## 1 Cheetah Acinonyx carni Carnivora NA 50
## 2 Owl monkey Aotus omni Primates 0.0155 0.48
## 3 Mountain beaver Aplodontia herbi Rodentia NA 1.35
## 4 Greater short-tailed shrew Blarina omni Soricomorpha 0.00029 0.019
## 5 Cow Bos herbi Artiodactyla 0.423 600
\[1.12.3~Nth~Functions\]
#primer ingreso
salary.description <- c("Golden parachute type","Well to do",
"Average","Below average", "bring date seeds instead of flowers")
first(salary.description)
## [1] "Golden parachute type"
#ultimo ingreso
last(salary.description)
## [1] "bring date seeds instead of flowers"
#tercero desde el final
nth(salary.description, -3)
## [1] "Average"
#segundo elemento de vector
nth(salary.description, 2)
## [1] "Well to do"
\[1.12.4~Count~Distinct~Values\]
#creamos un vector con 9 elementos
a.vector<-c(22,23,44,1,2,3,3,3,4)
original.length<-length(a.vector)
original.length
## [1] 9
#muestra los distintos elementos sin repetirse
distinct.a.vector<- n_distinct(a.vector)
distinct.a.vector
## [1] 7
#muestra que hay valores duplicados
test1 <- if_else(original.length == distinct.a.vector, "valores unicos","algunos valores duplicados")
test1
## [1] "algunos valores duplicados"
#ahora solo el vector con valores unicos
b.vector<-c(1,2,3,4,5,6)
length(b.vector)
## [1] 6
#cuenta los valores unicos
distinct.b.vector<-n_distinct(b.vector)
distinct.b.vector
## [1] 6
#muestra que solo hay valores unicos
test2<-if_else(length(b.vector) == distinct.b.vector, "todos los valores unicos", "duplicados")
test2
## [1] "todos los valores unicos"
\[1.12.5~na_-if\]
test<-c(100,0,999)
x<-5000/test
#si algun valor es cero
x<-5000/na_if(test, 0)
x
## [1] 50.000000 NA 5.005005
#class para ver el tipo de variable
class(x)
## [1] "numeric"
\[1.12.6~Coalesce~to~Replace~Missing~Values\]
x<-c(33,4,11,NA,9)
x
## [1] 33 4 11 NA 9
#reemplazo los valores faltantes por cero
x<- coalesce(x,0)
x
## [1] 33 4 11 0 9
\[1.13~Ranking~Functions\\[20pt]1.13.1~Ranking~via~Index\]
y<-c(100,4,12,6,8,3)
rank1<-row_number(y)
rank1
## [1] 6 2 5 3 4 1
#el menor numero del rango
y[rank1[1]]
## [1] 3
#el mayor numero del rango
y[rank1[6]]
## [1] 100
\[1.13.2~Minimum~Rank\]
#similar a row_number
rank2<- min_rank(y)
rank2
## [1] 6 2 5 3 4 1
\[1.13.3~Dense~Rank\]
rank3<- dense_rank(y)
rank3
## [1] 6 2 5 3 4 1
\[1.13.4~Percent~Rank\]
#halla los elemento de acuerdo al percentil 1=100, 2 =200 , el ultimo esta en 0
rank4<-percent_rank(y)
rank4
## [1] 1.0 0.2 0.8 0.4 0.6 0.0
\[1.13.5~Cumulative~Distribution~Function\]
#la funcion muestra la proporcion de valores menores o iguales al rango actual
y<-c(100,4,12,6,8,3)
rank5<- cume_dist(y)
rank5
## [1] 1.0000000 0.3333333 0.8333333 0.5000000 0.6666667 0.1666667
#rompe el vector en n buckets
rank6=ntile(y, 3)
rank6
## [1] 3 1 3 2 2 1
#te situa el cuantil de cada datos
test.vector<-c(2,22,33,44,77,89,99)
quantile(test.vector,prob= seq(0,1,length = 11),type = 5)
## 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%
## 2.0 6.0 20.0 28.6 36.3 44.0 67.1 81.8 90.0 97.0 99.0
\[1.14~Sampling\]
#tomar una muestra aleatorea del total de datos
data("ChickWeight")
my.sample<-sample_n(ChickWeight, 5)
my.sample
## weight Time Chick Diet
## 1 157 21 19 1
## 2 108 14 4 1
## 3 67 10 13 1
## 4 305 21 32 3
## 5 250 20 39 3
set.seed(833)
#Reemplazar por false= no quieres investigar el mismo defecto y True= si sí
my.sample<-sample_n(ChickWeight, 10, replace= TRUE)
my.sample
## weight Time Chick Diet
## 1 98 8 45 4
## 2 42 0 17 1
## 3 98 8 36 3
## 4 51 2 11 1
## 5 198 20 3 1
## 6 237 21 49 4
## 7 205 16 50 4
## 8 170 16 39 3
## 9 332 18 35 3
## 10 144 14 33 3
#quiero los carros con mayor cilindros
my.sample<- sample_n(mtcars, 12,weight=cyl)
my.sample[,1:5]
## mpg cyl disp hp drat
## AMC Javelin 15.2 8 304.0 150 3.15
## Porsche 914-2 26.0 4 120.3 91 4.43
## Merc 280 19.2 6 167.6 123 3.92
## Cadillac Fleetwood 10.4 8 472.0 205 2.93
## Merc 240D 24.4 4 146.7 62 3.69
## Datsun 710 22.8 4 108.0 93 3.85
## Merc 280C 17.8 6 167.6 123 3.92
## Mazda RX4 Wag 21.0 6 160.0 110 3.90
## Merc 450SLC 15.2 8 275.8 180 3.07
## Chrysler Imperial 14.7 8 440.0 230 3.23
## Maserati Bora 15.0 8 301.0 335 3.54
## Valiant 18.1 6 225.0 105 2.76
# se usa _frac para obtener un porcentage igual en los datos
test1<- sample_frac(ChickWeight, 0.02)
test1
## weight Time Chick Diet
## 1 48 2 13 1
## 2 62 6 12 1
## 3 197 20 45 4
## 4 234 18 42 4
## 5 58 4 28 2
## 6 163 16 3 1
## 7 103 8 41 4
## 8 103 8 42 4
## 9 120 18 19 1
## 10 48 2 36 3
## 11 80 6 48 4
## 12 137 12 33 3
#cuando se quiere hallar el porcentage que un grupo
by_hair_color<-starwars%>%group_by(hair_color)
my.sample<-sample_frac(by_hair_color, .07, replace = TRUE)
my.sample[,1:5]
## # A tibble: 5 x 5
## # Groups: hair_color [3]
## name height mass hair_color skin_color
## <chr> <int> <dbl> <chr> <chr>
## 1 Eeth Koth 171 NA black brown
## 2 Dormé 165 NA brown light
## 3 Sebulba 112 40 none grey, red
## 4 Shaak Ti 178 57 none red, blue, white
## 5 Tion Medon 206 80 none grey
#tally cuenta el grupo
row.kount.only<- ChickWeight%>% tally()
row.kount.only
## n
## 1 578
diet.kount<-ChickWeight %>% count(Diet)
diet.kount
## Diet n
## 1 1 220
## 2 2 120
## 3 3 120
## 4 4 118
\[1.15~Miscellaneous~DPLYR~Functions\\[20pt]1.15.1~add_-count~for~Groupwise~filtering\]
#solo las especies con 1 miembro se filtran y se reflejan en la tabla
single.species.kount<-starwars %>%
add_count(species)%>%filter(n == 1)
single.species.kount[,1:6]
## # A tibble: 29 x 6
## name height mass hair_color skin_color eye_color
## <chr> <int> <dbl> <chr> <chr> <chr>
## 1 Greedo 173 74 <NA> green black
## 2 Jabba Desilijic Tiure 175 1358 <NA> green-tan, brown orange
## 3 Yoda 66 17 white green brown
## 4 Bossk 190 113 none green red
## 5 Ackbar 180 83 none brown mottle orange
## 6 Wicket Systri Warrick 88 20 brown brown brown
## 7 Nien Nunb 160 68 none grey black
## 8 Nute Gunray 191 90 none mottled green red
## 9 Watto 137 NA black blue, grey yellow
## 10 Sebulba 112 40 none grey, red orange
## # ... with 19 more rows
\[1.15.2~Rename\]
#renombrar la columna(variable mpg)
mtcars<-rename(mtcars, spam_mpg = mpg)
data(mtcars)
names(mtcars)
## [1] "mpg" "cyl" "disp" "hp" "drat" "wt" "qsec" "vs" "am" "gear"
## [11] "carb"
#si se aplica nuevamente retoma el nombre original
mtcars<- rename(mtcars, spam_mpg=mpg)
names(mtcars)
## [1] "spam_mpg" "cyl" "disp" "hp" "drat" "wt"
## [7] "qsec" "vs" "am" "gear" "carb"
\[1.115.3~case_-when\]
#when esta dentro de mutate y puedes crear una serie de condiciones conjuntas en tu nueva tabla
data(starwars)
new.starwars<-starwars %>%dplyr::select(name, mass, gender, species, height)%>%mutate(type = case_when(height> 200 | mass > 200 ~ "large",species == "Droid" ~ "robot", TRUE ~ "other"))
new.starwars
## # A tibble: 87 x 6
## name mass gender species height type
## <chr> <dbl> <chr> <chr> <int> <chr>
## 1 Luke Skywalker 77 masculine Human 172 other
## 2 C-3PO 75 masculine Droid 167 robot
## 3 R2-D2 32 masculine Droid 96 robot
## 4 Darth Vader 136 masculine Human 202 large
## 5 Leia Organa 49 feminine Human 150 other
## 6 Owen Lars 120 masculine Human 178 other
## 7 Beru Whitesun lars 75 feminine Human 165 other
## 8 R5-D4 32 masculine Droid 97 robot
## 9 Biggs Darklighter 84 masculine Human 183 other
## 10 Obi-Wan Kenobi 77 masculine Human 182 other
## # ... with 77 more rows