\[1.1~Filter~Commands\] \[1.1.1~Single-Condition~filter\]

library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.5     v dplyr   1.0.7
## v tidyr   1.1.4     v stringr 1.4.0
## v readr   2.0.2     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
data("mtcars")
mtcars
##                      mpg cyl  disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4           21.0   6 160.0 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag       21.0   6 160.0 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710          22.8   4 108.0  93 3.85 2.320 18.61  1  1    4    1
## Hornet 4 Drive      21.4   6 258.0 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout   18.7   8 360.0 175 3.15 3.440 17.02  0  0    3    2
## Valiant             18.1   6 225.0 105 2.76 3.460 20.22  1  0    3    1
## Duster 360          14.3   8 360.0 245 3.21 3.570 15.84  0  0    3    4
## Merc 240D           24.4   4 146.7  62 3.69 3.190 20.00  1  0    4    2
## Merc 230            22.8   4 140.8  95 3.92 3.150 22.90  1  0    4    2
## Merc 280            19.2   6 167.6 123 3.92 3.440 18.30  1  0    4    4
## Merc 280C           17.8   6 167.6 123 3.92 3.440 18.90  1  0    4    4
## Merc 450SE          16.4   8 275.8 180 3.07 4.070 17.40  0  0    3    3
## Merc 450SL          17.3   8 275.8 180 3.07 3.730 17.60  0  0    3    3
## Merc 450SLC         15.2   8 275.8 180 3.07 3.780 18.00  0  0    3    3
## Cadillac Fleetwood  10.4   8 472.0 205 2.93 5.250 17.98  0  0    3    4
## Lincoln Continental 10.4   8 460.0 215 3.00 5.424 17.82  0  0    3    4
## Chrysler Imperial   14.7   8 440.0 230 3.23 5.345 17.42  0  0    3    4
## Fiat 128            32.4   4  78.7  66 4.08 2.200 19.47  1  1    4    1
## Honda Civic         30.4   4  75.7  52 4.93 1.615 18.52  1  1    4    2
## Toyota Corolla      33.9   4  71.1  65 4.22 1.835 19.90  1  1    4    1
## Toyota Corona       21.5   4 120.1  97 3.70 2.465 20.01  1  0    3    1
## Dodge Challenger    15.5   8 318.0 150 2.76 3.520 16.87  0  0    3    2
## AMC Javelin         15.2   8 304.0 150 3.15 3.435 17.30  0  0    3    2
## Camaro Z28          13.3   8 350.0 245 3.73 3.840 15.41  0  0    3    4
## Pontiac Firebird    19.2   8 400.0 175 3.08 3.845 17.05  0  0    3    2
## Fiat X1-9           27.3   4  79.0  66 4.08 1.935 18.90  1  1    4    1
## Porsche 914-2       26.0   4 120.3  91 4.43 2.140 16.70  0  1    5    2
## Lotus Europa        30.4   4  95.1 113 3.77 1.513 16.90  1  1    5    2
## Ford Pantera L      15.8   8 351.0 264 4.22 3.170 14.50  0  1    5    4
## Ferrari Dino        19.7   6 145.0 175 3.62 2.770 15.50  0  1    5    6
## Maserati Bora       15.0   8 301.0 335 3.54 3.570 14.60  0  1    5    8
## Volvo 142E          21.4   4 121.0 109 4.11 2.780 18.60  1  1    4    2
#selecciona solo los carros con 6 cilindros
six.cyl.only<-filter(mtcars, cyl == 6)#tener en cuenta en doble ==
six.cyl.only
##                 mpg cyl  disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4      21.0   6 160.0 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag  21.0   6 160.0 110 3.90 2.875 17.02  0  1    4    4
## Hornet 4 Drive 21.4   6 258.0 110 3.08 3.215 19.44  1  0    3    1
## Valiant        18.1   6 225.0 105 2.76 3.460 20.22  1  0    3    1
## Merc 280       19.2   6 167.6 123 3.92 3.440 18.30  1  0    4    4
## Merc 280C      17.8   6 167.6 123 3.92 3.440 18.90  1  0    4    4
## Ferrari Dino   19.7   6 145.0 175 3.62 2.770 15.50  0  1    5    6

\[1.1.2~Multiple-Condition~Filter\]

#filter the dataset mtcars for both six cylinders and 110 horsepower
six.cyl.and.110.horse.power<-filter(mtcars, cyl==6,hp==110)#las condiciones se separar por comas 

six.cyl.and.110.horse.power
##                 mpg cyl disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4      21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag  21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
## Hornet 4 Drive 21.4   6  258 110 3.08 3.215 19.44  1  0    3    1

\[1.1.3~OR~Logic~for~Filtering\]

#filter database mtcars for 4 gears or more then 8 cylinders 
gear.eq.4.or.more.than.8<-filter(mtcars, gear==4|cyl>6)# condición "O"= comando "|"
gear.eq.4.or.more.than.8
##                      mpg cyl  disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4           21.0   6 160.0 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag       21.0   6 160.0 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710          22.8   4 108.0  93 3.85 2.320 18.61  1  1    4    1
## Hornet Sportabout   18.7   8 360.0 175 3.15 3.440 17.02  0  0    3    2
## Duster 360          14.3   8 360.0 245 3.21 3.570 15.84  0  0    3    4
## Merc 240D           24.4   4 146.7  62 3.69 3.190 20.00  1  0    4    2
## Merc 230            22.8   4 140.8  95 3.92 3.150 22.90  1  0    4    2
## Merc 280            19.2   6 167.6 123 3.92 3.440 18.30  1  0    4    4
## Merc 280C           17.8   6 167.6 123 3.92 3.440 18.90  1  0    4    4
## Merc 450SE          16.4   8 275.8 180 3.07 4.070 17.40  0  0    3    3
## Merc 450SL          17.3   8 275.8 180 3.07 3.730 17.60  0  0    3    3
## Merc 450SLC         15.2   8 275.8 180 3.07 3.780 18.00  0  0    3    3
## Cadillac Fleetwood  10.4   8 472.0 205 2.93 5.250 17.98  0  0    3    4
## Lincoln Continental 10.4   8 460.0 215 3.00 5.424 17.82  0  0    3    4
## Chrysler Imperial   14.7   8 440.0 230 3.23 5.345 17.42  0  0    3    4
## Fiat 128            32.4   4  78.7  66 4.08 2.200 19.47  1  1    4    1
## Honda Civic         30.4   4  75.7  52 4.93 1.615 18.52  1  1    4    2
## Toyota Corolla      33.9   4  71.1  65 4.22 1.835 19.90  1  1    4    1
## Dodge Challenger    15.5   8 318.0 150 2.76 3.520 16.87  0  0    3    2
## AMC Javelin         15.2   8 304.0 150 3.15 3.435 17.30  0  0    3    2
## Camaro Z28          13.3   8 350.0 245 3.73 3.840 15.41  0  0    3    4
## Pontiac Firebird    19.2   8 400.0 175 3.08 3.845 17.05  0  0    3    2
## Fiat X1-9           27.3   4  79.0  66 4.08 1.935 18.90  1  1    4    1
## Ford Pantera L      15.8   8 351.0 264 4.22 3.170 14.50  0  1    5    4
## Maserati Bora       15.0   8 301.0 335 3.54 3.570 14.60  0  1    5    8
## Volvo 142E          21.4   4 121.0 109 4.11 2.780 18.60  1  1    4    2

\[1.1.4~Filter~by~Minimums,~Maximums,~and~Other\]

#lets find the car with the smallest engie displacement
Smallest.engine.displacement<-filter(mtcars,disp==max(disp)| disp==min(disp))

Smallest.engine.displacement
##                     mpg cyl  disp  hp drat    wt  qsec vs am gear carb
## Cadillac Fleetwood 10.4   8 472.0 205 2.93 5.250 17.98  0  0    3    4
## Toyota Corolla     33.9   4  71.1  65 4.22 1.835 19.90  1  1    4    1
#filter with conditions separated by commas
data(ChickWeight)
chick.subset<-filter(ChickWeight, Time<3,weight>53 )
chick.subset
##   weight Time Chick Diet
## 1     55    2    22    2
## 2     55    2    40    3
## 3     55    2    43    4
## 4     54    2    50    4

\[1.1.5Filter~Out~Missing~Values~(NAs)~for~a~Specific~Column\]

#hallar valores perdidos
data("airquality")
head(airquality,10)#antes de filtrar
##    Ozone Solar.R Wind Temp Month Day
## 1     41     190  7.4   67     5   1
## 2     36     118  8.0   72     5   2
## 3     12     149 12.6   74     5   3
## 4     18     313 11.5   62     5   4
## 5     NA      NA 14.3   56     5   5
## 6     28      NA 14.9   66     5   6
## 7     23     299  8.6   65     5   7
## 8     19      99 13.8   59     5   8
## 9      8      19 20.1   61     5   9
## 10    NA     194  8.6   69     5  10
no.missing.ozone<-filter(airquality, !is.na(Ozone))
head(no.missing.ozone,8)#luego de filtrar 
##   Ozone Solar.R Wind Temp Month Day
## 1    41     190  7.4   67     5   1
## 2    36     118  8.0   72     5   2
## 3    12     149 12.6   74     5   3
## 4    18     313 11.5   62     5   4
## 5    28      NA 14.9   66     5   6
## 6    23     299  8.6   65     5   7
## 7    19      99 13.8   59     5   8
## 8     8      19 20.1   61     5   9
#para valores Solar.R

data("airquality")
head(airquality,10)
##    Ozone Solar.R Wind Temp Month Day
## 1     41     190  7.4   67     5   1
## 2     36     118  8.0   72     5   2
## 3     12     149 12.6   74     5   3
## 4     18     313 11.5   62     5   4
## 5     NA      NA 14.3   56     5   5
## 6     28      NA 14.9   66     5   6
## 7     23     299  8.6   65     5   7
## 8     19      99 13.8   59     5   8
## 9      8      19 20.1   61     5   9
## 10    NA     194  8.6   69     5  10
no.missing.Solar.R<-filter(airquality, !is.na(Solar.R))
head(no.missing.Solar.R,8)
##   Ozone Solar.R Wind Temp Month Day
## 1    41     190  7.4   67     5   1
## 2    36     118  8.0   72     5   2
## 3    12     149 12.6   74     5   3
## 4    18     313 11.5   62     5   4
## 5    23     299  8.6   65     5   7
## 6    19      99 13.8   59     5   8
## 7     8      19 20.1   61     5   9
## 8    NA     194  8.6   69     5  10

\[1.1.6Filter~Rows~with~NAs~Anywhere~in~the~Dataset\]

#remoción de todos los valores perdidos
airqua.no.NA.anywhere<-filter(airquality[1:10,],complete.cases((airquality[1:10,])))

airqua.no.NA.anywhere
##   Ozone Solar.R Wind Temp Month Day
## 1    41     190  7.4   67     5   1
## 2    36     118  8.0   72     5   2
## 3    12     149 12.6   74     5   3
## 4    18     313 11.5   62     5   4
## 5    23     299  8.6   65     5   7
## 6    19      99 13.8   59     5   8
## 7     8      19 20.1   61     5   9

\[1.1.7Filter~by~\%in\%\]

# incluir o excluir valores específicos
data("iris")
nrow(iris)#para ver los datos presentes
## [1] 150
iris
##     Sepal.Length Sepal.Width Petal.Length Petal.Width    Species
## 1            5.1         3.5          1.4         0.2     setosa
## 2            4.9         3.0          1.4         0.2     setosa
## 3            4.7         3.2          1.3         0.2     setosa
## 4            4.6         3.1          1.5         0.2     setosa
## 5            5.0         3.6          1.4         0.2     setosa
## 6            5.4         3.9          1.7         0.4     setosa
## 7            4.6         3.4          1.4         0.3     setosa
## 8            5.0         3.4          1.5         0.2     setosa
## 9            4.4         2.9          1.4         0.2     setosa
## 10           4.9         3.1          1.5         0.1     setosa
## 11           5.4         3.7          1.5         0.2     setosa
## 12           4.8         3.4          1.6         0.2     setosa
## 13           4.8         3.0          1.4         0.1     setosa
## 14           4.3         3.0          1.1         0.1     setosa
## 15           5.8         4.0          1.2         0.2     setosa
## 16           5.7         4.4          1.5         0.4     setosa
## 17           5.4         3.9          1.3         0.4     setosa
## 18           5.1         3.5          1.4         0.3     setosa
## 19           5.7         3.8          1.7         0.3     setosa
## 20           5.1         3.8          1.5         0.3     setosa
## 21           5.4         3.4          1.7         0.2     setosa
## 22           5.1         3.7          1.5         0.4     setosa
## 23           4.6         3.6          1.0         0.2     setosa
## 24           5.1         3.3          1.7         0.5     setosa
## 25           4.8         3.4          1.9         0.2     setosa
## 26           5.0         3.0          1.6         0.2     setosa
## 27           5.0         3.4          1.6         0.4     setosa
## 28           5.2         3.5          1.5         0.2     setosa
## 29           5.2         3.4          1.4         0.2     setosa
## 30           4.7         3.2          1.6         0.2     setosa
## 31           4.8         3.1          1.6         0.2     setosa
## 32           5.4         3.4          1.5         0.4     setosa
## 33           5.2         4.1          1.5         0.1     setosa
## 34           5.5         4.2          1.4         0.2     setosa
## 35           4.9         3.1          1.5         0.2     setosa
## 36           5.0         3.2          1.2         0.2     setosa
## 37           5.5         3.5          1.3         0.2     setosa
## 38           4.9         3.6          1.4         0.1     setosa
## 39           4.4         3.0          1.3         0.2     setosa
## 40           5.1         3.4          1.5         0.2     setosa
## 41           5.0         3.5          1.3         0.3     setosa
## 42           4.5         2.3          1.3         0.3     setosa
## 43           4.4         3.2          1.3         0.2     setosa
## 44           5.0         3.5          1.6         0.6     setosa
## 45           5.1         3.8          1.9         0.4     setosa
## 46           4.8         3.0          1.4         0.3     setosa
## 47           5.1         3.8          1.6         0.2     setosa
## 48           4.6         3.2          1.4         0.2     setosa
## 49           5.3         3.7          1.5         0.2     setosa
## 50           5.0         3.3          1.4         0.2     setosa
## 51           7.0         3.2          4.7         1.4 versicolor
## 52           6.4         3.2          4.5         1.5 versicolor
## 53           6.9         3.1          4.9         1.5 versicolor
## 54           5.5         2.3          4.0         1.3 versicolor
## 55           6.5         2.8          4.6         1.5 versicolor
## 56           5.7         2.8          4.5         1.3 versicolor
## 57           6.3         3.3          4.7         1.6 versicolor
## 58           4.9         2.4          3.3         1.0 versicolor
## 59           6.6         2.9          4.6         1.3 versicolor
## 60           5.2         2.7          3.9         1.4 versicolor
## 61           5.0         2.0          3.5         1.0 versicolor
## 62           5.9         3.0          4.2         1.5 versicolor
## 63           6.0         2.2          4.0         1.0 versicolor
## 64           6.1         2.9          4.7         1.4 versicolor
## 65           5.6         2.9          3.6         1.3 versicolor
## 66           6.7         3.1          4.4         1.4 versicolor
## 67           5.6         3.0          4.5         1.5 versicolor
## 68           5.8         2.7          4.1         1.0 versicolor
## 69           6.2         2.2          4.5         1.5 versicolor
## 70           5.6         2.5          3.9         1.1 versicolor
## 71           5.9         3.2          4.8         1.8 versicolor
## 72           6.1         2.8          4.0         1.3 versicolor
## 73           6.3         2.5          4.9         1.5 versicolor
## 74           6.1         2.8          4.7         1.2 versicolor
## 75           6.4         2.9          4.3         1.3 versicolor
## 76           6.6         3.0          4.4         1.4 versicolor
## 77           6.8         2.8          4.8         1.4 versicolor
## 78           6.7         3.0          5.0         1.7 versicolor
## 79           6.0         2.9          4.5         1.5 versicolor
## 80           5.7         2.6          3.5         1.0 versicolor
## 81           5.5         2.4          3.8         1.1 versicolor
## 82           5.5         2.4          3.7         1.0 versicolor
## 83           5.8         2.7          3.9         1.2 versicolor
## 84           6.0         2.7          5.1         1.6 versicolor
## 85           5.4         3.0          4.5         1.5 versicolor
## 86           6.0         3.4          4.5         1.6 versicolor
## 87           6.7         3.1          4.7         1.5 versicolor
## 88           6.3         2.3          4.4         1.3 versicolor
## 89           5.6         3.0          4.1         1.3 versicolor
## 90           5.5         2.5          4.0         1.3 versicolor
## 91           5.5         2.6          4.4         1.2 versicolor
## 92           6.1         3.0          4.6         1.4 versicolor
## 93           5.8         2.6          4.0         1.2 versicolor
## 94           5.0         2.3          3.3         1.0 versicolor
## 95           5.6         2.7          4.2         1.3 versicolor
## 96           5.7         3.0          4.2         1.2 versicolor
## 97           5.7         2.9          4.2         1.3 versicolor
## 98           6.2         2.9          4.3         1.3 versicolor
## 99           5.1         2.5          3.0         1.1 versicolor
## 100          5.7         2.8          4.1         1.3 versicolor
## 101          6.3         3.3          6.0         2.5  virginica
## 102          5.8         2.7          5.1         1.9  virginica
## 103          7.1         3.0          5.9         2.1  virginica
## 104          6.3         2.9          5.6         1.8  virginica
## 105          6.5         3.0          5.8         2.2  virginica
## 106          7.6         3.0          6.6         2.1  virginica
## 107          4.9         2.5          4.5         1.7  virginica
## 108          7.3         2.9          6.3         1.8  virginica
## 109          6.7         2.5          5.8         1.8  virginica
## 110          7.2         3.6          6.1         2.5  virginica
## 111          6.5         3.2          5.1         2.0  virginica
## 112          6.4         2.7          5.3         1.9  virginica
## 113          6.8         3.0          5.5         2.1  virginica
## 114          5.7         2.5          5.0         2.0  virginica
## 115          5.8         2.8          5.1         2.4  virginica
## 116          6.4         3.2          5.3         2.3  virginica
## 117          6.5         3.0          5.5         1.8  virginica
## 118          7.7         3.8          6.7         2.2  virginica
## 119          7.7         2.6          6.9         2.3  virginica
## 120          6.0         2.2          5.0         1.5  virginica
## 121          6.9         3.2          5.7         2.3  virginica
## 122          5.6         2.8          4.9         2.0  virginica
## 123          7.7         2.8          6.7         2.0  virginica
## 124          6.3         2.7          4.9         1.8  virginica
## 125          6.7         3.3          5.7         2.1  virginica
## 126          7.2         3.2          6.0         1.8  virginica
## 127          6.2         2.8          4.8         1.8  virginica
## 128          6.1         3.0          4.9         1.8  virginica
## 129          6.4         2.8          5.6         2.1  virginica
## 130          7.2         3.0          5.8         1.6  virginica
## 131          7.4         2.8          6.1         1.9  virginica
## 132          7.9         3.8          6.4         2.0  virginica
## 133          6.4         2.8          5.6         2.2  virginica
## 134          6.3         2.8          5.1         1.5  virginica
## 135          6.1         2.6          5.6         1.4  virginica
## 136          7.7         3.0          6.1         2.3  virginica
## 137          6.3         3.4          5.6         2.4  virginica
## 138          6.4         3.1          5.5         1.8  virginica
## 139          6.0         3.0          4.8         1.8  virginica
## 140          6.9         3.1          5.4         2.1  virginica
## 141          6.7         3.1          5.6         2.4  virginica
## 142          6.9         3.1          5.1         2.3  virginica
## 143          5.8         2.7          5.1         1.9  virginica
## 144          6.8         3.2          5.9         2.3  virginica
## 145          6.7         3.3          5.7         2.5  virginica
## 146          6.7         3.0          5.2         2.3  virginica
## 147          6.3         2.5          5.0         1.9  virginica
## 148          6.5         3.0          5.2         2.0  virginica
## 149          6.2         3.4          5.4         2.3  virginica
## 150          5.9         3.0          5.1         1.8  virginica
table(iris$Species) # llamar el numero de especies en la base de datos
## 
##     setosa versicolor  virginica 
##         50         50         50
Solo.2.especies<-filter(iris, Species %in% c("setosa","virginica"))
Solo.2.especies
##     Sepal.Length Sepal.Width Petal.Length Petal.Width   Species
## 1            5.1         3.5          1.4         0.2    setosa
## 2            4.9         3.0          1.4         0.2    setosa
## 3            4.7         3.2          1.3         0.2    setosa
## 4            4.6         3.1          1.5         0.2    setosa
## 5            5.0         3.6          1.4         0.2    setosa
## 6            5.4         3.9          1.7         0.4    setosa
## 7            4.6         3.4          1.4         0.3    setosa
## 8            5.0         3.4          1.5         0.2    setosa
## 9            4.4         2.9          1.4         0.2    setosa
## 10           4.9         3.1          1.5         0.1    setosa
## 11           5.4         3.7          1.5         0.2    setosa
## 12           4.8         3.4          1.6         0.2    setosa
## 13           4.8         3.0          1.4         0.1    setosa
## 14           4.3         3.0          1.1         0.1    setosa
## 15           5.8         4.0          1.2         0.2    setosa
## 16           5.7         4.4          1.5         0.4    setosa
## 17           5.4         3.9          1.3         0.4    setosa
## 18           5.1         3.5          1.4         0.3    setosa
## 19           5.7         3.8          1.7         0.3    setosa
## 20           5.1         3.8          1.5         0.3    setosa
## 21           5.4         3.4          1.7         0.2    setosa
## 22           5.1         3.7          1.5         0.4    setosa
## 23           4.6         3.6          1.0         0.2    setosa
## 24           5.1         3.3          1.7         0.5    setosa
## 25           4.8         3.4          1.9         0.2    setosa
## 26           5.0         3.0          1.6         0.2    setosa
## 27           5.0         3.4          1.6         0.4    setosa
## 28           5.2         3.5          1.5         0.2    setosa
## 29           5.2         3.4          1.4         0.2    setosa
## 30           4.7         3.2          1.6         0.2    setosa
## 31           4.8         3.1          1.6         0.2    setosa
## 32           5.4         3.4          1.5         0.4    setosa
## 33           5.2         4.1          1.5         0.1    setosa
## 34           5.5         4.2          1.4         0.2    setosa
## 35           4.9         3.1          1.5         0.2    setosa
## 36           5.0         3.2          1.2         0.2    setosa
## 37           5.5         3.5          1.3         0.2    setosa
## 38           4.9         3.6          1.4         0.1    setosa
## 39           4.4         3.0          1.3         0.2    setosa
## 40           5.1         3.4          1.5         0.2    setosa
## 41           5.0         3.5          1.3         0.3    setosa
## 42           4.5         2.3          1.3         0.3    setosa
## 43           4.4         3.2          1.3         0.2    setosa
## 44           5.0         3.5          1.6         0.6    setosa
## 45           5.1         3.8          1.9         0.4    setosa
## 46           4.8         3.0          1.4         0.3    setosa
## 47           5.1         3.8          1.6         0.2    setosa
## 48           4.6         3.2          1.4         0.2    setosa
## 49           5.3         3.7          1.5         0.2    setosa
## 50           5.0         3.3          1.4         0.2    setosa
## 51           6.3         3.3          6.0         2.5 virginica
## 52           5.8         2.7          5.1         1.9 virginica
## 53           7.1         3.0          5.9         2.1 virginica
## 54           6.3         2.9          5.6         1.8 virginica
## 55           6.5         3.0          5.8         2.2 virginica
## 56           7.6         3.0          6.6         2.1 virginica
## 57           4.9         2.5          4.5         1.7 virginica
## 58           7.3         2.9          6.3         1.8 virginica
## 59           6.7         2.5          5.8         1.8 virginica
## 60           7.2         3.6          6.1         2.5 virginica
## 61           6.5         3.2          5.1         2.0 virginica
## 62           6.4         2.7          5.3         1.9 virginica
## 63           6.8         3.0          5.5         2.1 virginica
## 64           5.7         2.5          5.0         2.0 virginica
## 65           5.8         2.8          5.1         2.4 virginica
## 66           6.4         3.2          5.3         2.3 virginica
## 67           6.5         3.0          5.5         1.8 virginica
## 68           7.7         3.8          6.7         2.2 virginica
## 69           7.7         2.6          6.9         2.3 virginica
## 70           6.0         2.2          5.0         1.5 virginica
## 71           6.9         3.2          5.7         2.3 virginica
## 72           5.6         2.8          4.9         2.0 virginica
## 73           7.7         2.8          6.7         2.0 virginica
## 74           6.3         2.7          4.9         1.8 virginica
## 75           6.7         3.3          5.7         2.1 virginica
## 76           7.2         3.2          6.0         1.8 virginica
## 77           6.2         2.8          4.8         1.8 virginica
## 78           6.1         3.0          4.9         1.8 virginica
## 79           6.4         2.8          5.6         2.1 virginica
## 80           7.2         3.0          5.8         1.6 virginica
## 81           7.4         2.8          6.1         1.9 virginica
## 82           7.9         3.8          6.4         2.0 virginica
## 83           6.4         2.8          5.6         2.2 virginica
## 84           6.3         2.8          5.1         1.5 virginica
## 85           6.1         2.6          5.6         1.4 virginica
## 86           7.7         3.0          6.1         2.3 virginica
## 87           6.3         3.4          5.6         2.4 virginica
## 88           6.4         3.1          5.5         1.8 virginica
## 89           6.0         3.0          4.8         1.8 virginica
## 90           6.9         3.1          5.4         2.1 virginica
## 91           6.7         3.1          5.6         2.4 virginica
## 92           6.9         3.1          5.1         2.3 virginica
## 93           5.8         2.7          5.1         1.9 virginica
## 94           6.8         3.2          5.9         2.3 virginica
## 95           6.7         3.3          5.7         2.5 virginica
## 96           6.7         3.0          5.2         2.3 virginica
## 97           6.3         2.5          5.0         1.9 virginica
## 98           6.5         3.0          5.2         2.0 virginica
## 99           6.2         3.4          5.4         2.3 virginica
## 100          5.9         3.0          5.1         1.8 virginica
nrow(Solo.2.especies)#vorlver a ver las especies ojo no confundir con rnorm(datos,media,sd)
## [1] 100
table(Solo.2.especies$Species)
## 
##     setosa versicolor  virginica 
##         50          0         50

\[1.1.8Filter~for~Ozone>29~and~Include~Only~Three~Columns\]

data("airquality")
filtrado<-filter(airquality, Ozone>29)[,1:3]#poner la coma y el intervalo por fuera de la función filter
head(filtrado)
##   Ozone Solar.R Wind
## 1    41     190  7.4
## 2    36     118  8.0
## 3    34     307 12.0
## 4    30     322 11.5
## 5    32      92 12.0
## 6    45     252 14.9

\[Filter~by~Total~Frequency~of~a~Value~Across~All~Rows\]

head(mtcars)#visualizamos la tabla
##                    mpg cyl disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
## Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
## Valiant           18.1   6  225 105 2.76 3.460 20.22  1  0    3    1
años.mas.frecuentes<-mtcars %>% group_by(gear) %>% filter(n()>10)#simplifica a los carros con añor más frecuentes

table(años.mas.frecuentes$gear)
## 
##  3  4 
## 15 12
#como adicionar un criterio extre
años.frecuentes.bajoshp.bajoconsumo<-mtcars %>% group_by(gear) %>% filter(n()>10,hp<100,mpg>30)
head(años.frecuentes.bajoshp.bajoconsumo)
## # A tibble: 3 x 11
## # Groups:   gear [1]
##     mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
##   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1  32.4     4  78.7    66  4.08  2.2   19.5     1     1     4     1
## 2  30.4     4  75.7    52  4.93  1.62  18.5     1     1     4     2
## 3  33.9     4  71.1    65  4.22  1.84  19.9     1     1     4     1

\[1.1.10~Filter~by~Column~Name~Using~"starts~With"\]

names(iris)#observa el los nombres de las columnas
## [1] "Sepal.Length" "Sepal.Width"  "Petal.Length" "Petal.Width"  "Species"
iris.display<- iris %>% dplyr::select(starts_with("S"))#seleccionar las funciones para evitar errores
head(iris.display)
##   Sepal.Length Sepal.Width Species
## 1          5.1         3.5  setosa
## 2          4.9         3.0  setosa
## 3          4.7         3.2  setosa
## 4          4.6         3.1  setosa
## 5          5.0         3.6  setosa
## 6          5.4         3.9  setosa

\[1.1.11~Filter~Rows:~Columns~Meet~Criteria(filter~at)\]

#Función filter_at encuentra filas con algun criterio de busqueda

new.mtcars<-mtcars %>% filter_at(vars(cyl, hp),all_vars(. == max(.)))
head(new.mtcars)# solo el maserati bora tenia el máximo de cilindro y hp
##               mpg cyl disp  hp drat   wt qsec vs am gear carb
## Maserati Bora  15   8  301 335 3.54 3.57 14.6  0  1    5    8
#ejemplo de usando una investigación sobre el sueño
msleep<- ggplot2::msleep
msleep
## # A tibble: 83 x 11
##    name   genus vore  order conservation sleep_total sleep_rem sleep_cycle awake
##    <chr>  <chr> <chr> <chr> <chr>              <dbl>     <dbl>       <dbl> <dbl>
##  1 Cheet~ Acin~ carni Carn~ lc                  12.1      NA        NA      11.9
##  2 Owl m~ Aotus omni  Prim~ <NA>                17         1.8      NA       7  
##  3 Mount~ Aplo~ herbi Rode~ nt                  14.4       2.4      NA       9.6
##  4 Great~ Blar~ omni  Sori~ lc                  14.9       2.3       0.133   9.1
##  5 Cow    Bos   herbi Arti~ domesticated         4         0.7       0.667  20  
##  6 Three~ Brad~ herbi Pilo~ <NA>                14.4       2.2       0.767   9.6
##  7 North~ Call~ carni Carn~ vu                   8.7       1.4       0.383  15.3
##  8 Vespe~ Calo~ <NA>  Rode~ <NA>                 7        NA        NA      17  
##  9 Dog    Canis carni Carn~ domesticated        10.1       2.9       0.333  13.9
## 10 Roe d~ Capr~ herbi Arti~ lc                   3        NA        NA      21  
## # ... with 73 more rows, and 2 more variables: brainwt <dbl>, bodywt <dbl>
msleep.over5<-msleep %>% select(name,sleep_total:sleep_rem,brainwt:bodywt) %>% filter_at(vars(contains("sleep")),all_vars(.>5)) # selecciono las categorias y luego los que dormian mas de 5 horas

msleep.over5
## # A tibble: 2 x 5
##   name                 sleep_total sleep_rem brainwt bodywt
##   <chr>                      <dbl>     <dbl>   <dbl>  <dbl>
## 1 Thick-tailed opposum        19.4       6.6  NA       0.37
## 2 Giant armadillo             18.1       6.1   0.081  60

\[1.2~Arrange~(Sort)\] \[1.2.1~Ascending\]

#la función oganiza los datos de acuerdo a los que desee
msleep<- ggplot2::msleep
msleep[,1:4]#ver las columnas de 1 a 4.
## # A tibble: 83 x 4
##    name                       genus       vore  order       
##    <chr>                      <chr>       <chr> <chr>       
##  1 Cheetah                    Acinonyx    carni Carnivora   
##  2 Owl monkey                 Aotus       omni  Primates    
##  3 Mountain beaver            Aplodontia  herbi Rodentia    
##  4 Greater short-tailed shrew Blarina     omni  Soricomorpha
##  5 Cow                        Bos         herbi Artiodactyla
##  6 Three-toed sloth           Bradypus    herbi Pilosa      
##  7 Northern fur seal          Callorhinus carni Carnivora   
##  8 Vesper mouse               Calomys     <NA>  Rodentia    
##  9 Dog                        Canis       carni Carnivora   
## 10 Roe deer                   Capreolus   herbi Artiodactyla
## # ... with 73 more rows
animal.name.sequence.asc<- arrange(msleep, vore, order)
animal.name.sequence.asc[,1:4]
## # A tibble: 83 x 4
##    name              genus        vore  order    
##    <chr>             <chr>        <chr> <chr>    
##  1 Cheetah           Acinonyx     carni Carnivora
##  2 Northern fur seal Callorhinus  carni Carnivora
##  3 Dog               Canis        carni Carnivora
##  4 Domestic cat      Felis        carni Carnivora
##  5 Gray seal         Haliochoerus carni Carnivora
##  6 Tiger             Panthera     carni Carnivora
##  7 Jaguar            Panthera     carni Carnivora
##  8 Lion              Panthera     carni Carnivora
##  9 Caspian seal      Phoca        carni Carnivora
## 10 Genet             Genetta      carni Carnivora
## # ... with 73 more rows

\[1.2.2~Descending\]

animal.name.sequence.desc<- arrange(msleep, vore, desc(order))

head(animal.name.sequence.desc)[,1:4]
## # A tibble: 6 x 4
##   name                       genus         vore  order          
##   <chr>                      <chr>         <chr> <chr>          
## 1 Northern grasshopper mouse Onychomys     carni Rodentia       
## 2 Slow loris                 Nyctibeus     carni Primates       
## 3 Thick-tailed opposum       Lutreolina    carni Didelphimorphia
## 4 Long-nosed armadillo       Dasypus       carni Cingulata      
## 5 Pilot whale                Globicephalus carni Cetacea        
## 6 Common porpoise            Phocoena      carni Cetacea

\[1.3~Rename\\[10pt]Rename~one~or~more~columns~in~a~dataset~and~changes~no~data\]

names(iris)
## [1] "Sepal.Length" "Sepal.Width"  "Petal.Length" "Petal.Width"  "Species"
renames.iris<-rename(iris, ancho.del.petalo= Petal.Width, diversidad.de.especies=Species)

names(renames.iris)
## [1] "Sepal.Length"           "Sepal.Width"            "Petal.Length"          
## [4] "ancho.del.petalo"       "diversidad.de.especies"

\[1.4~Mutate\\[10pt]*adds~new~variables~to~a~dataframe\]

data(ChickWeight)
ChickWeight[1:2,]#de acuerdo al lado de , toma columnas:filas
##   weight Time Chick Diet
## 1     42    0     1    1
## 2     51    2     1    1
ChickWeight.with.log<-mutate(ChickWeight,log.of.weight=log10(weight))#añadimos una nueva columna con el log10

ChickWeight.with.log[1:2,]
##   weight Time Chick Diet log.of.weight
## 1     42    0     1    1      1.623249
## 2     51    2     1    1      1.707570

\[1.4.1~mutate_-all~to~Add~New~fields~All~at~once\]

#Con esta función puedo añadir nuevos campos con datos a partir de la tabla 

msleep<- ggplot2::msleep
names(msleep)
##  [1] "name"         "genus"        "vore"         "order"        "conservation"
##  [6] "sleep_total"  "sleep_rem"    "sleep_cycle"  "awake"        "brainwt"     
## [11] "bodywt"
msleep
## # A tibble: 83 x 11
##    name   genus vore  order conservation sleep_total sleep_rem sleep_cycle awake
##    <chr>  <chr> <chr> <chr> <chr>              <dbl>     <dbl>       <dbl> <dbl>
##  1 Cheet~ Acin~ carni Carn~ lc                  12.1      NA        NA      11.9
##  2 Owl m~ Aotus omni  Prim~ <NA>                17         1.8      NA       7  
##  3 Mount~ Aplo~ herbi Rode~ nt                  14.4       2.4      NA       9.6
##  4 Great~ Blar~ omni  Sori~ lc                  14.9       2.3       0.133   9.1
##  5 Cow    Bos   herbi Arti~ domesticated         4         0.7       0.667  20  
##  6 Three~ Brad~ herbi Pilo~ <NA>                14.4       2.2       0.767   9.6
##  7 North~ Call~ carni Carn~ vu                   8.7       1.4       0.383  15.3
##  8 Vespe~ Calo~ <NA>  Rode~ <NA>                 7        NA        NA      17  
##  9 Dog    Canis carni Carn~ domesticated        10.1       2.9       0.333  13.9
## 10 Roe d~ Capr~ herbi Arti~ lc                   3        NA        NA      21  
## # ... with 73 more rows, and 2 more variables: brainwt <dbl>, bodywt <dbl>
#añado raiz cuadrada a todos los datos 

msleep.con.raiz.cuadrada<- mutate_all(msleep[,6:11], funs("square root"=sqrt( . )))
## Warning: `funs()` was deprecated in dplyr 0.8.0.
## Please use a list of either functions or lambdas: 
## 
##   # Simple named list: 
##   list(mean = mean, median = median)
## 
##   # Auto named with `tibble::lst()`: 
##   tibble::lst(mean, median)
## 
##   # Using lambdas
##   list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
names(msleep.con.raiz.cuadrada)
##  [1] "sleep_total"             "sleep_rem"              
##  [3] "sleep_cycle"             "awake"                  
##  [5] "brainwt"                 "bodywt"                 
##  [7] "sleep_total_square root" "sleep_rem_square root"  
##  [9] "sleep_cycle_square root" "awake_square root"      
## [11] "brainwt_square root"     "bodywt_square root"
msleep.con.raiz.cuadrada
## # A tibble: 83 x 12
##    sleep_total sleep_rem sleep_cycle awake  brainwt  bodywt `sleep_total_square~
##          <dbl>     <dbl>       <dbl> <dbl>    <dbl>   <dbl>                <dbl>
##  1        12.1      NA        NA      11.9 NA        50                     3.48
##  2        17         1.8      NA       7    0.0155    0.48                  4.12
##  3        14.4       2.4      NA       9.6 NA         1.35                  3.79
##  4        14.9       2.3       0.133   9.1  0.00029   0.019                 3.86
##  5         4         0.7       0.667  20    0.423   600                     2   
##  6        14.4       2.2       0.767   9.6 NA         3.85                  3.79
##  7         8.7       1.4       0.383  15.3 NA        20.5                   2.95
##  8         7        NA        NA      17   NA         0.045                 2.65
##  9        10.1       2.9       0.333  13.9  0.07     14                     3.18
## 10         3        NA        NA      21    0.0982   14.8                   1.73
## # ... with 73 more rows, and 5 more variables: sleep_rem_square root <dbl>,
## #   sleep_cycle_square root <dbl>, awake_square root <dbl>,
## #   brainwt_square root <dbl>, bodywt_square root <dbl>

\[1.4.2~mutate_-at~to~Add~Fields\]

#inicialmente los datos estan en  tablas , primero se transforman a un dataframe

data("Titanic")
Titanic<-as.data.frame(Titanic)
head(Titanic)
##   Class    Sex   Age Survived Freq
## 1   1st   Male Child       No    0
## 2   2nd   Male Child       No    0
## 3   3rd   Male Child       No   35
## 4  Crew   Male Child       No    0
## 5   1st Female Child       No    0
## 6   2nd Female Child       No    0
#Posteriormente se añaden rangos
Titanic.con.rangos<-mutate_at(Titanic, vars(Class,Age,Survived), funs(Rank = min_rank(desc(.))))
head(Titanic.con.rangos)
##   Class    Sex   Age Survived Freq Class_Rank Age_Rank Survived_Rank
## 1   1st   Male Child       No    0         25       17            17
## 2   2nd   Male Child       No    0         17       17            17
## 3   3rd   Male Child       No   35          9       17            17
## 4  Crew   Male Child       No    0          1       17            17
## 5   1st Female Child       No    0         25       17            17
## 6   2nd Female Child       No    0         17       17            17

\[1.4.3~mutate_-if\]

#Crea una nueva variable o altera una existente
# creo la funciom : divido un numero sobre 10

divide.by.10<- function (a.number) (a.number / 10)


head(CO2)
##   Plant   Type  Treatment conc uptake
## 1   Qn1 Quebec nonchilled   95   16.0
## 2   Qn1 Quebec nonchilled  175   30.4
## 3   Qn1 Quebec nonchilled  250   34.8
## 4   Qn1 Quebec nonchilled  350   37.2
## 5   Qn1 Quebec nonchilled  500   35.3
## 6   Qn1 Quebec nonchilled  675   39.2
#aplico la función previa sobre las variables numericas
new.df<- CO2 %>% mutate_if(is.numeric, divide.by.10)

head(new.df)
##   Plant   Type  Treatment conc uptake
## 1   Qn1 Quebec nonchilled  9.5   1.60
## 2   Qn1 Quebec nonchilled 17.5   3.04
## 3   Qn1 Quebec nonchilled 25.0   3.48
## 4   Qn1 Quebec nonchilled 35.0   3.72
## 5   Qn1 Quebec nonchilled 50.0   3.53
## 6   Qn1 Quebec nonchilled 67.5   3.92
# alternativa para cambiar los numeros faltantes por ceros(0)

df<- data.frame(alfa=c(22,1,NA), almendra=c(0,5,10), uva=c(0,2,2),manzana=c(NA,5,10))

df
##   alfa almendra uva manzana
## 1   22        0   0      NA
## 2    1        5   2       5
## 3   NA       10   2      10
df.fix.alpha<-df%>% mutate_if(is.numeric, coalesce, ...=0)
df.fix.alpha
##   alfa almendra uva manzana
## 1   22        0   0       0
## 2    1        5   2       5
## 3    0       10   2      10

\[1.4.4~String~Detect~and~True/False~Duplicateindicator\]

#excluir una variable por su letra inicial
msleep<-ggplot2::msleep
table(msleep$vore)
## 
##   carni   herbi insecti    omni 
##      19      32       5      20
msleep.sin.c.o.a<-filter(msleep, !str_detect(vore, paste(c("c","a"), collapse = "|")))
table(msleep.sin.c.o.a$vore)
## 
## herbi  omni 
##    32    20
#añade un campo en particular 
msleep.con.dup.indicador<- mutate(msleep, duplicate.indicator= duplicated(conservation))

msleep.con.dup.indicador[1:6,]
## # A tibble: 6 x 12
##   name    genus vore  order conservation sleep_total sleep_rem sleep_cycle awake
##   <chr>   <chr> <chr> <chr> <chr>              <dbl>     <dbl>       <dbl> <dbl>
## 1 Cheetah Acin~ carni Carn~ lc                  12.1      NA        NA      11.9
## 2 Owl mo~ Aotus omni  Prim~ <NA>                17         1.8      NA       7  
## 3 Mounta~ Aplo~ herbi Rode~ nt                  14.4       2.4      NA       9.6
## 4 Greate~ Blar~ omni  Sori~ lc                  14.9       2.3       0.133   9.1
## 5 Cow     Bos   herbi Arti~ domesticated         4         0.7       0.667  20  
## 6 Three-~ Brad~ herbi Pilo~ <NA>                14.4       2.2       0.767   9.6
## # ... with 3 more variables: brainwt <dbl>, bodywt <dbl>,
## #   duplicate.indicator <lgl>
#como crear el indicador de "duplicate.indicator"
msleep.con.dup.indicador<- mutate(msleep, duplicate.indicator= duplicated(conservation))

msleep.con.dup.indicador[1:6,c(1,2,3,12)]
## # A tibble: 6 x 4
##   name                       genus      vore  duplicate.indicator
##   <chr>                      <chr>      <chr> <lgl>              
## 1 Cheetah                    Acinonyx   carni FALSE              
## 2 Owl monkey                 Aotus      omni  FALSE              
## 3 Mountain beaver            Aplodontia herbi FALSE              
## 4 Greater short-tailed shrew Blarina    omni  TRUE               
## 5 Cow                        Bos        herbi FALSE              
## 6 Three-toed sloth           Bradypus   herbi TRUE
#Ordenar como "conservation" inicial y "genus" como menor

msleep.with.dup.indicator2<- mutate(msleep, duplicate.indicator= duplicated(conservation, genus)) %>% arrange(conservation, genus)

msleep.with.dup.indicator2
## # A tibble: 83 x 12
##    name   genus vore  order conservation sleep_total sleep_rem sleep_cycle awake
##    <chr>  <chr> <chr> <chr> <chr>              <dbl>     <dbl>       <dbl> <dbl>
##  1 Giraf~ Gira~ herbi Arti~ cd                   1.9       0.4      NA      22.1
##  2 Pilot~ Glob~ carni Ceta~ cd                   2.7       0.1      NA      21.4
##  3 Cow    Bos   herbi Arti~ domesticated         4         0.7       0.667  20  
##  4 Dog    Canis carni Carn~ domesticated        10.1       2.9       0.333  13.9
##  5 Guine~ Cavis herbi Rode~ domesticated         9.4       0.8       0.217  14.6
##  6 Chinc~ Chin~ herbi Rode~ domesticated        12.5       1.5       0.117  11.5
##  7 Horse  Equus herbi Peri~ domesticated         2.9       0.6       1      21.1
##  8 Donkey Equus herbi Peri~ domesticated         3.1       0.4      NA      20.9
##  9 Domes~ Felis carni Carn~ domesticated        12.5       3.2       0.417  11.5
## 10 Rabbit Oryc~ herbi Lago~ domesticated         8.4       0.9       0.417  15.6
## # ... with 73 more rows, and 3 more variables: brainwt <dbl>, bodywt <dbl>,
## #   duplicate.indicator <lgl>
#para el indicador de duplicacion mencione True

fruit<- c("apple","pear","orange","grape","orange","orange")
x<-c(1,2,4,9,4,6)
y<-c(22,3,4,55,15,9)
z<-c(3,1,4,55,15,9)
w<-c(2,2,2,4,5,6)

df<-data.frame(fruit,x,y,z,w)
df
##    fruit x  y  z w
## 1  apple 1 22  3 2
## 2   pear 2  3  1 2
## 3 orange 4  4  4 2
## 4  grape 9 55 55 4
## 5 orange 4 15 15 5
## 6 orange 6  9  9 6
df.sin.duplicados<-mutate(df, duplicate.indicator=duplicated(fruit))

df.sin.duplicados
##    fruit x  y  z w duplicate.indicator
## 1  apple 1 22  3 2               FALSE
## 2   pear 2  3  1 2               FALSE
## 3 orange 4  4  4 2               FALSE
## 4  grape 9 55 55 4               FALSE
## 5 orange 4 15 15 5                TRUE
## 6 orange 6  9  9 6                TRUE

\[1.4.5~Drop~Variables~Using~NULL\]

#como excluir una variable
fruit<- c("apple","pear","orange","grape","orange","orange")
x<-c(1,2,4,9,4,6)
y<-c(22,3,4,55,15,9)
z<-c(3,1,4,10,12,8)
df<-data.frame(fruit,x,y,z)
df<-mutate(df, z=NULL)#excluyo la colunma de z
df
##    fruit x  y
## 1  apple 1 22
## 2   pear 2  3
## 3 orange 4  4
## 4  grape 9 55
## 5 orange 4 15
## 6 orange 6  9

\[1.4.6~Preferred~coding~sequence\]

if (!require("nycflights13")) install.packages("nycflights13")
## Loading required package: nycflights13
#metodo para implementar mutate en la formula y establecer el método sencillo-norecomendado
mutate(flights,gain=arr_delay-dep_delay, hours=air_time/60,gain_per_hour=gain/hours,gain_per_minute=60*gain_per_hour)
## # A tibble: 336,776 x 23
##     year month   day dep_time sched_dep_time dep_delay arr_time sched_arr_time
##    <int> <int> <int>    <int>          <int>     <dbl>    <int>          <int>
##  1  2013     1     1      517            515         2      830            819
##  2  2013     1     1      533            529         4      850            830
##  3  2013     1     1      542            540         2      923            850
##  4  2013     1     1      544            545        -1     1004           1022
##  5  2013     1     1      554            600        -6      812            837
##  6  2013     1     1      554            558        -4      740            728
##  7  2013     1     1      555            600        -5      913            854
##  8  2013     1     1      557            600        -3      709            723
##  9  2013     1     1      557            600        -3      838            846
## 10  2013     1     1      558            600        -2      753            745
## # ... with 336,766 more rows, and 15 more variables: arr_delay <dbl>,
## #   carrier <chr>, flight <int>, tailnum <chr>, origin <chr>, dest <chr>,
## #   air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>, time_hour <dttm>,
## #   gain <dbl>, hours <dbl>, gain_per_hour <dbl>, gain_per_minute <dbl>
#metodo correcto del uso de mutate, se debe aplica varias veces de acuerdo a lo calculado en la formula 
newfield.flights<-flights%>% mutate(gain=arr_delay-dep_delay, hours=air_time/60)%>%mutate(gain_per_hour=gain/hours) %>%mutate(gain_per_minute=60*gain_per_hour)

newfield.flights[1:6,c(1:2,20:23)]
## # A tibble: 6 x 6
##    year month  gain hours gain_per_hour gain_per_minute
##   <int> <int> <dbl> <dbl>         <dbl>           <dbl>
## 1  2013     1     9  3.78          2.38            143.
## 2  2013     1    16  3.78          4.23            254.
## 3  2013     1    31  2.67         11.6             698.
## 4  2013     1   -17  3.05         -5.57           -334.
## 5  2013     1   -19  1.93         -9.83           -590.
## 6  2013     1    16  2.5           6.4             384

\[1.4.7~Transmute:~Keep~Only~Variables~Created\]

#realizada para generar nuevas variables con varibales existentes del data.frame

fruit<-c("apple","pear","orange","grape","orange","orange")
x<-c(1,2,4,9,4,6)
y<-c(22,3,4,55,15,9)
z<-c(3,1,4,10,12,8)
df<-data.frame(fruit,x,y,z)

df<-transmute(df, sumatoriafruta=x+y+z)
df
##   sumatoriafruta
## 1             26
## 2              6
## 3             12
## 4             74
## 5             31
## 6             23

\[1.4.8~Use~Across~to~Apply~a~Funtion~over~Multiple~Columns\]

double.it<-function(x)x*2 #crear la funcion para hacerla valida abajo
head(iris)#originalmente 
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa
iris %>% mutate(across(where(is.numeric), double.it)) %>% head ()#solicito aplicar la funcion a los valores numericos
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1         10.2         7.0          2.8         0.4  setosa
## 2          9.8         6.0          2.8         0.4  setosa
## 3          9.4         6.4          2.6         0.4  setosa
## 4          9.2         6.2          3.0         0.4  setosa
## 5         10.0         7.2          2.8         0.4  setosa
## 6         10.8         7.8          3.4         0.8  setosa

\[1.4.9Conditional~Mutating~Using~case_-when\]

#usando mutate más case_when puedes cambiar campos y luego establecer valores de acuerdo a tus condiciones

fila1<-c("a","b","c","d","e","f","column.to.be.changed")
fila2<-c(1,1,1,6,6,1,2)
fila3<-c(3,4,4,6,4,4,4)
fila4<-c(4,6,25,5,5,2,9)
fila5<-c(5,3,6,3,3,6,2)

df<-as.data.frame(rbind(fila2,fila3,fila4,fila5))
names(df)<-fila1

df#creamos el data.frame para modificarlo luego
##       a b  c d e f column.to.be.changed
## fila2 1 1  1 6 6 1                    2
## fila3 3 4  4 6 4 4                    4
## fila4 4 6 25 5 5 2                    9
## fila5 5 3  6 3 3 6                    2
new.df<- df%>% mutate(column.to.be.changed=case_when(a==2|a==5|a==7|(a==1 & b==4)~2, a==0|a==1|a==4|a==3|c==4~3, TRUE~NA_real_))# condicionales donde alguna es TRUE la "column.to.be.changed" será 2 o 3.

new.df
##       a b  c d e f column.to.be.changed
## fila2 1 1  1 6 6 1                    3
## fila3 3 4  4 6 4 4                    3
## fila4 4 6 25 5 5 2                    3
## fila5 5 3  6 3 3 6                    2

\[1.5~Select~to~Choose~Variables/Columns\\[18pt]1.5.1~Delete~a~column\]

library(tidyverse)
fruit <- c("apple","pear","orange","grape","orange","orange")
x <- c(1,2,4,9,4,6)
y <- c(22,3,4,55,15,9)
z <- c(3,1,4,10,12,8)
df <- data.frame(fruit,x,y,z) # creamos el dataframe antes de seleccionar
df
##    fruit x  y  z
## 1  apple 1 22  3
## 2   pear 2  3  1
## 3 orange 4  4  4
## 4  grape 9 55 10
## 5 orange 4 15 12
## 6 orange 6  9  8
nuevo.sin.fruta<- dplyr::select(df, -fruit)#retiramos la columna fruta
nuevo.sin.fruta
##   x  y  z
## 1 1 22  3
## 2 2  3  1
## 3 4  4  4
## 4 9 55 10
## 5 4 15 12
## 6 6  9  8

\[1.5.2~Delete~Columns~by~Name~Using~start_-with~or~ends_-with\]

data("mtcars")
names(mtcars)
##  [1] "mpg"  "cyl"  "disp" "hp"   "drat" "wt"   "qsec" "vs"   "am"   "gear"
## [11] "carb"
#como borrar columnas cuyo nombre empiece con "d"

mtcars.sin.d<- select(mtcars, -starts_with("d"))

mtcars.sin.d
##                      mpg cyl  hp    wt  qsec vs am gear carb
## Mazda RX4           21.0   6 110 2.620 16.46  0  1    4    4
## Mazda RX4 Wag       21.0   6 110 2.875 17.02  0  1    4    4
## Datsun 710          22.8   4  93 2.320 18.61  1  1    4    1
## Hornet 4 Drive      21.4   6 110 3.215 19.44  1  0    3    1
## Hornet Sportabout   18.7   8 175 3.440 17.02  0  0    3    2
## Valiant             18.1   6 105 3.460 20.22  1  0    3    1
## Duster 360          14.3   8 245 3.570 15.84  0  0    3    4
## Merc 240D           24.4   4  62 3.190 20.00  1  0    4    2
## Merc 230            22.8   4  95 3.150 22.90  1  0    4    2
## Merc 280            19.2   6 123 3.440 18.30  1  0    4    4
## Merc 280C           17.8   6 123 3.440 18.90  1  0    4    4
## Merc 450SE          16.4   8 180 4.070 17.40  0  0    3    3
## Merc 450SL          17.3   8 180 3.730 17.60  0  0    3    3
## Merc 450SLC         15.2   8 180 3.780 18.00  0  0    3    3
## Cadillac Fleetwood  10.4   8 205 5.250 17.98  0  0    3    4
## Lincoln Continental 10.4   8 215 5.424 17.82  0  0    3    4
## Chrysler Imperial   14.7   8 230 5.345 17.42  0  0    3    4
## Fiat 128            32.4   4  66 2.200 19.47  1  1    4    1
## Honda Civic         30.4   4  52 1.615 18.52  1  1    4    2
## Toyota Corolla      33.9   4  65 1.835 19.90  1  1    4    1
## Toyota Corona       21.5   4  97 2.465 20.01  1  0    3    1
## Dodge Challenger    15.5   8 150 3.520 16.87  0  0    3    2
## AMC Javelin         15.2   8 150 3.435 17.30  0  0    3    2
## Camaro Z28          13.3   8 245 3.840 15.41  0  0    3    4
## Pontiac Firebird    19.2   8 175 3.845 17.05  0  0    3    2
## Fiat X1-9           27.3   4  66 1.935 18.90  1  1    4    1
## Porsche 914-2       26.0   4  91 2.140 16.70  0  1    5    2
## Lotus Europa        30.4   4 113 1.513 16.90  1  1    5    2
## Ford Pantera L      15.8   8 264 3.170 14.50  0  1    5    4
## Ferrari Dino        19.7   6 175 2.770 15.50  0  1    5    6
## Maserati Bora       15.0   8 335 3.570 14.60  0  1    5    8
## Volvo 142E          21.4   4 109 2.780 18.60  1  1    4    2
#borrar columnas que terminen en "t"
mtcars.sin.t.final<- select(mtcars, -ends_with("d"))

mtcars.sin.t.final
##                      mpg cyl  disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4           21.0   6 160.0 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag       21.0   6 160.0 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710          22.8   4 108.0  93 3.85 2.320 18.61  1  1    4    1
## Hornet 4 Drive      21.4   6 258.0 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout   18.7   8 360.0 175 3.15 3.440 17.02  0  0    3    2
## Valiant             18.1   6 225.0 105 2.76 3.460 20.22  1  0    3    1
## Duster 360          14.3   8 360.0 245 3.21 3.570 15.84  0  0    3    4
## Merc 240D           24.4   4 146.7  62 3.69 3.190 20.00  1  0    4    2
## Merc 230            22.8   4 140.8  95 3.92 3.150 22.90  1  0    4    2
## Merc 280            19.2   6 167.6 123 3.92 3.440 18.30  1  0    4    4
## Merc 280C           17.8   6 167.6 123 3.92 3.440 18.90  1  0    4    4
## Merc 450SE          16.4   8 275.8 180 3.07 4.070 17.40  0  0    3    3
## Merc 450SL          17.3   8 275.8 180 3.07 3.730 17.60  0  0    3    3
## Merc 450SLC         15.2   8 275.8 180 3.07 3.780 18.00  0  0    3    3
## Cadillac Fleetwood  10.4   8 472.0 205 2.93 5.250 17.98  0  0    3    4
## Lincoln Continental 10.4   8 460.0 215 3.00 5.424 17.82  0  0    3    4
## Chrysler Imperial   14.7   8 440.0 230 3.23 5.345 17.42  0  0    3    4
## Fiat 128            32.4   4  78.7  66 4.08 2.200 19.47  1  1    4    1
## Honda Civic         30.4   4  75.7  52 4.93 1.615 18.52  1  1    4    2
## Toyota Corolla      33.9   4  71.1  65 4.22 1.835 19.90  1  1    4    1
## Toyota Corona       21.5   4 120.1  97 3.70 2.465 20.01  1  0    3    1
## Dodge Challenger    15.5   8 318.0 150 2.76 3.520 16.87  0  0    3    2
## AMC Javelin         15.2   8 304.0 150 3.15 3.435 17.30  0  0    3    2
## Camaro Z28          13.3   8 350.0 245 3.73 3.840 15.41  0  0    3    4
## Pontiac Firebird    19.2   8 400.0 175 3.08 3.845 17.05  0  0    3    2
## Fiat X1-9           27.3   4  79.0  66 4.08 1.935 18.90  1  1    4    1
## Porsche 914-2       26.0   4 120.3  91 4.43 2.140 16.70  0  1    5    2
## Lotus Europa        30.4   4  95.1 113 3.77 1.513 16.90  1  1    5    2
## Ford Pantera L      15.8   8 351.0 264 4.22 3.170 14.50  0  1    5    4
## Ferrari Dino        19.7   6 145.0 175 3.62 2.770 15.50  0  1    5    6
## Maserati Bora       15.0   8 301.0 335 3.54 3.570 14.60  0  1    5    8
## Volvo 142E          21.4   4 121.0 109 4.11 2.780 18.60  1  1    4    2

\[Rearrange~Column~Order\]

fruit <- c("apple","pear","orange","grape","orange","orange")
x <- c(1,2,4,9,4,6)
y <- c(22,3,4,55,15,9)
z <- c(3,1,4,10,12,8)
df <- data.frame(fruit,x,y,z)# creamos el dataframe
df#de acuerdo al orden que se escriban saldra en la tabla
##    fruit x  y  z
## 1  apple 1 22  3
## 2   pear 2  3  1
## 3 orange 4  4  4
## 4  grape 9 55 10
## 5 orange 4 15 12
## 6 orange 6  9  8

\[1.5.4~select_-all~to~Apply~a~Function~to~All~Columns\]

state <- c("Maryland", "Alaska", "New Jersey")
income <- c(76067,74444,73702)
median.us <- c(61372,61372,61372)
life.expectancy <- c(78.8,78.3,80.3)
top.3.estados <- data.frame(state, income, median.us, life.expectancy)
top.3.estados 
##        state income median.us life.expectancy
## 1   Maryland  76067     61372            78.8
## 2     Alaska  74444     61372            78.3
## 3 New Jersey  73702     61372            80.3
#como poner letra mayuscula a todas la columnas
new..top.3.estados<-select_all(top.3.estados,toupper)

new..top.3.estados
##        STATE INCOME MEDIAN.US LIFE.EXPECTANCY
## 1   Maryland  76067     61372            78.8
## 2     Alaska  74444     61372            78.3
## 3 New Jersey  73702     61372            80.3

\[Select~Columns~usingthe~pull~function\]

top.3.estados <- data.frame(state, income, median.us, life.expectancy)
top.3.estados
##        state income median.us life.expectancy
## 1   Maryland  76067     61372            78.8
## 2     Alaska  74444     61372            78.3
## 3 New Jersey  73702     61372            80.3
pull.first.column <- pull(top.3.estados,1)#selecciono la 1mera columna con "pull" o le pongo "-1" para no mostrarla columna 1
pull.first.column
## [1] "Maryland"   "Alaska"     "New Jersey"

\[1.5.6~Select~Rows:~Any~Variable~Meets~Some~Condition\]

nrow(mtcars)
## [1] 32
#puedo ver cuantos datos hay mayor a 200
mtcars.mayor.200<-filter_all(mtcars, any_vars(.>200))
nrow(mtcars.mayor.200)
## [1] 16

\[1.5.7~SelectColumns:~omit~if~Column~Name~Contains~Specific~Characters\]

names(mtcars)
##  [1] "mpg"  "cyl"  "disp" "hp"   "drat" "wt"   "qsec" "vs"   "am"   "gear"
## [11] "carb"
cars.sin.p<-mtcars %>% dplyr::select(-contains("p"))

names(cars.sin.p)
## [1] "cyl"  "drat" "wt"   "qsec" "vs"   "am"   "gear" "carb"

\[1.6~Joins:~Manipulations~of~Data~from~two~sources\\[18pt]1.6.1~Left~Join~(most~common)\]

us.areas<-as.data.frame(cbind(state.abb, state.area))
us.areas[1:3,]
##   state.abb state.area
## 1        AL      51609
## 2        AK     589757
## 3        AZ     113909
us.area.abre<-as.data.frame(cbind(state.abb, state.name))
us.area.abre[1:3,]
##   state.abb state.name
## 1        AL    Alabama
## 2        AK     Alaska
## 3        AZ    Arizona
state.info.abb.area.name <- us.areas %>% left_join(us.area.abre, by = "state.abb")
head(state.info.abb.area.name)
##   state.abb state.area state.name
## 1        AL      51609    Alabama
## 2        AK     589757     Alaska
## 3        AZ     113909    Arizona
## 4        AR      53104   Arkansas
## 5        CA     158693 California
## 6        CO     104247   Colorado

\[1.6.2~Inner~join\]

#funcion para juntar los dataframe
#creo el data.frame
names <- c("Sally","Tom","Frieda","Alfonzo") 
team.scores <- c(3,5,2,7)
team.league <- c("alpha","beta","gamma", "omicron")
team.info <- data.frame(names, team.scores, team.league)

#Creo un segundo data frame:
names = c("Sally","Tom", "Bill", "Alfonzo")
school.grades <- c("A","B","C","B")
school.info <- data.frame(names, school.grades)
school.and.team <- inner_join(team.info, school.info, by = "names")
school.and.team
##     names team.scores team.league school.grades
## 1   Sally           3       alpha             A
## 2     Tom           5        beta             B
## 3 Alfonzo           7     omicron             B

\[1.6.3~Anti-join\]

#1mer data frame
names<- c("Sally","Tom","Frieda","Alfonzo")
team.scores <- c(3,5,2,7)
team.league <- c("alpha","beta","gamma", "omicron")
team.info <- data.frame(names, team.scores, team.league)
team.info
##     names team.scores team.league
## 1   Sally           3       alpha
## 2     Tom           5        beta
## 3  Frieda           2       gamma
## 4 Alfonzo           7     omicron
#segundo data frame
names <- c("Sally","Tom", "Bill", "Alfonzo")
school.grades <- c("A","B","C","B")
school.info <- data.frame(names, school.grades)
school.info
##     names school.grades
## 1   Sally             A
## 2     Tom             B
## 3    Bill             C
## 4 Alfonzo             B
#informacion sin grados

team.info.but.no.grades <- anti_join(team.info, school.info,
 by = "names")
team.info.but.no.grades
##    names team.scores team.league
## 1 Frieda           2       gamma

\[1.6.4~Full~Join\]

#1mer data frame
names = c("Sally","Tom","Frieda","Alfonzo")
team.scores = c(3,5,2,7)
team.league = c("alpha","beta","gamma", "omicron")
team.info = data.frame(names, team.scores, team.league)

#2do dataframe:
names = c("Sally","Tom", "Bill", "Alfonzo")
school.grades = c("A","B","C","B")
school.info = data.frame(names, school.grades)

# los uno completamente
team.info.and.or.grades<- full_join(team.info, school.info, by = "names")
team.info.and.or.grades
##     names team.scores team.league school.grades
## 1   Sally           3       alpha             A
## 2     Tom           5        beta             B
## 3  Frieda           2       gamma          <NA>
## 4 Alfonzo           7     omicron             B
## 5    Bill          NA        <NA>             C

\[1.6.5~semi-join\]

#semi union, solo por nombres
team.info.with.grades<- semi_join(team.info, school.info)
## Joining, by = "names"
team.info.with.grades
##     names team.scores team.league
## 1   Sally           3       alpha
## 2     Tom           5        beta
## 3 Alfonzo           7     omicron

\[1.6.6~Right~Join\]

#selecciono por area
us.state.areas<- as.data.frame(cbind(state.abb,state.area))
us.state.areas[1:3,]
##   state.abb state.area
## 1        AL      51609
## 2        AK     589757
## 3        AZ     113909
#selecciono con abreviacion y nombre
us.state.abbreviation.and.name<- as.data.frame(cbind(state.abb,
state.name))

us.state.abbreviation.and.name[1:3,]
##   state.abb state.name
## 1        AL    Alabama
## 2        AK     Alaska
## 3        AZ    Arizona
#con ambas selecciones
us.state.abbreviation.and.name[1,1]<- "Intentional Mismatch"
us.state.with.abbreviation.and.name.and.area<-right_join(us.state.areas,
 us.state.abbreviation.and.name, by = "state.abb")

us.state.with.abbreviation.and.name.and.area[1:3,]
##   state.abb state.area state.name
## 1        AK     589757     Alaska
## 2        AZ     113909    Arizona
## 3        AR      53104   Arkansas

\[1.7~Slice\]

msleep<- ggplot2::msleep
nrow(msleep)
## [1] 83
msleep.only.first.5<- slice(msleep, -6:-n())
nrow(msleep.only.first.5)
## [1] 5
msleep.20.rows<- msleep%>%slice(20:39)
nrow(msleep.20.rows)
## [1] 20
nrow(msleep)-nrow(msleep.20.rows)
## [1] 63

\[1.8~Summarise\]

library(MASS)
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
data(gehan)
gehan2<-gehan
library(tidyverse)

#cuantos pacientes estan en tratamiento medico?

gehan2 %>% summarise( kount=n())
##   kount
## 1    42
#cuantos con tratamiento y no tratamiento

gehan2%>% group_by(treat)%>% summarise(kount=n())
## # A tibble: 2 x 2
##   treat   kount
##   <fct>   <int>
## 1 6-MP       21
## 2 control    21
#como hallar el minimo y maximo del grupo:
gehan2%>% group_by(treat)%>%
summarise(minimum.remission = min(time),max.remission = max(time))
## # A tibble: 2 x 3
##   treat   minimum.remission max.remission
##   <fct>               <int>         <int>
## 1 6-MP                    6            35
## 2 control                 1            23

\[1.8.1~Summarise~Across\]

library(MASS)
#tomo las 10 columnas primeras
subset.survey <- survey[1:10,]
library(dplyr)
head(subset.survey)
##      Sex Wr.Hnd NW.Hnd W.Hnd    Fold Pulse    Clap Exer Smoke Height      M.I
## 1 Female   18.5   18.0 Right  R on L    92    Left Some Never 173.00   Metric
## 2   Male   19.5   20.5  Left  R on L   104    Left None Regul 177.80 Imperial
## 3   Male   18.0   13.3 Right  L on R    87 Neither None Occas     NA     <NA>
## 4   Male   18.8   18.9 Right  R on L    NA Neither None Never 160.00   Metric
## 5   Male   20.0   20.0 Right Neither    35   Right Some Never 165.00   Metric
## 6 Female   18.0   17.7 Right  L on R    64   Right Some Never 172.72 Imperial
##      Age
## 1 18.250
## 2 17.583
## 3 16.917
## 4 20.333
## 5 23.667
## 6 21.000
#remover todo los NAs(sin dato)
subset.survey %>% na.omit() %>% 
group_by(Sex) %>%
summarise(across(where(is.numeric), mean,.names = "mean_{col}")) %>%
head()
## # A tibble: 2 x 6
##   Sex    mean_Wr.Hnd mean_NW.Hnd mean_Pulse mean_Height mean_Age
##   <fct>        <dbl>       <dbl>      <dbl>       <dbl>    <dbl>
## 1 Female        17.8        17.7       76.7        168.     25.0
## 2 Male          19.1        19.2       76.8        174.     20.3

\[1.9~Gathering:~Convert~Multiple~columns~into~one\]

#como paso de varias a una columna
state<-c("Maryland", "Alaska", "New Jersey" )
income<- c(76067,74444,73702)
median.us <- c(61372,61372,61372)
life.expectancy <- c(78.8,78.3,80.3)
teen.birth.rate.2015<-c(17,29.3,12.1)
teen.birth.rate.2007<- c(34.3,42.9,24.9 )
teen.birth.rate.1991<-c(54.1, 66, 41.3)
top.3.states <- data.frame(state, income, median.us,life.expectancy,teen.birth.rate.2015, teen.birth.rate.2007,teen.birth.rate.1991)
names(top.3.states)<-c("state","income", "median.us","life.expectancy","2015","2007","1991")
top.3.states
##        state income median.us life.expectancy 2015 2007 1991
## 1   Maryland  76067     61372            78.8 17.0 34.3 54.1
## 2     Alaska  74444     61372            78.3 29.3 42.9 66.0
## 3 New Jersey  73702     61372            80.3 12.1 24.9 41.3
# uso gather para poner los 3 años en la columna
new.top.states<-top.3.states%>% gather("2015","2007","1991", key="year", value="cases")

new.top.states
##        state income median.us life.expectancy year cases
## 1   Maryland  76067     61372            78.8 2015  17.0
## 2     Alaska  74444     61372            78.3 2015  29.3
## 3 New Jersey  73702     61372            80.3 2015  12.1
## 4   Maryland  76067     61372            78.8 2007  34.3
## 5     Alaska  74444     61372            78.3 2007  42.9
## 6 New Jersey  73702     61372            80.3 2007  24.9
## 7   Maryland  76067     61372            78.8 1991  54.1
## 8     Alaska  74444     61372            78.3 1991  66.0
## 9 New Jersey  73702     61372            80.3 1991  41.3

\[1.10~Spreading:Consolidation~of~Multiple~ROws~into~one\]

#creo el dataframe
df_1<- data.frame(Type=c("TypeA","TypeA","TypeB","TypeB"),Answer=c("yes","No",NA,"No"), n=1:4)

df_1
##    Type Answer n
## 1 TypeA    yes 1
## 2 TypeA     No 2
## 3 TypeB   <NA> 3
## 4 TypeB     No 4
#habilito otro dataframe para fusionar filas

df_2<- df_1 %>% filter(!is.na(Answer))%>%spread(key=Answer, value=n)#junto las de tipo A y B 
df_2
##    Type No yes
## 1 TypeA  2   1
## 2 TypeB  4  NA

\[1.11~Separate:~Divide~a~Single~Column~into~Multiple~Columns\]

#la función divide 1 columna en varias


state <- c("Maryland", "Alaska", "New Jersey")
income <- c(76067,74444,73702)
median.us <- c(61372,61372,61372)
life.expectancy <- c(78.8,78.3,80.3)
teen.birth <- c("17//34.3//54.1", "29.0//42.9//66.0", "12.1//24.9//41.3") #// señaliza que hay 3 datos po fila

top.3.states <- data.frame(state, income, median.us,
 life.expectancy,teen.birth)
top.3.states #creo la tabla con teen.birth repetida en la misma fila
##        state income median.us life.expectancy       teen.birth
## 1   Maryland  76067     61372            78.8   17//34.3//54.1
## 2     Alaska  74444     61372            78.3 29.0//42.9//66.0
## 3 New Jersey  73702     61372            80.3 12.1//24.9//41.3
top.3.states.separated.years <- top.3.states %>%
 separate(teen.birth,
 into = c("2015", "2007","1991"), sep = "//")#se separaron por año
top.3.states.separated.years
##        state income median.us life.expectancy 2015 2007 1991
## 1   Maryland  76067     61372            78.8   17 34.3 54.1
## 2     Alaska  74444     61372            78.3 29.0 42.9 66.0
## 3 New Jersey  73702     61372            80.3 12.1 24.9 41.3

\[1.12~Recap~of~handly~DPLYR~Functions\\[20pt]1.12.1~Number~of~Observations~(n)~Used~Across~Multiple~DPLYR~functions\]

#Un simple recuento de grupos es una herramienta de usodiario. La función n se aplica a través de mutate, summarise y filter.

\[1.12.2~Basic~Counts\]

data("msleep")
m <- mutate(msleep, kount = n())

m[1:5,c(1:4,10:12)]#limito el numero de columnas
## # A tibble: 5 x 7
##   name                       genus      vore  order        brainwt  bodywt kount
##   <chr>                      <chr>      <chr> <chr>          <dbl>   <dbl> <int>
## 1 Cheetah                    Acinonyx   carni Carnivora   NA        50        83
## 2 Owl monkey                 Aotus      omni  Primates     0.0155    0.48     83
## 3 Mountain beaver            Aplodontia herbi Rodentia    NA         1.35     83
## 4 Greater short-tailed shrew Blarina    omni  Soricomorp~  0.00029   0.019    83
## 5 Cow                        Bos        herbi Artiodacty~  0.423   600        83
#filtro por recuento de vore superior a 14
f <- filter(msleep, n() > 14)
f[1:5,c(1:4,10:11)]
## # A tibble: 5 x 6
##   name                       genus      vore  order         brainwt  bodywt
##   <chr>                      <chr>      <chr> <chr>           <dbl>   <dbl>
## 1 Cheetah                    Acinonyx   carni Carnivora    NA        50    
## 2 Owl monkey                 Aotus      omni  Primates      0.0155    0.48 
## 3 Mountain beaver            Aplodontia herbi Rodentia     NA         1.35 
## 4 Greater short-tailed shrew Blarina    omni  Soricomorpha  0.00029   0.019
## 5 Cow                        Bos        herbi Artiodactyla  0.423   600

\[1.12.3~Nth~Functions\]

#primer ingreso

salary.description <- c("Golden parachute type","Well to do",
"Average","Below average", "bring date seeds instead of flowers")
first(salary.description)
## [1] "Golden parachute type"
#ultimo ingreso
last(salary.description)
## [1] "bring date seeds instead of flowers"
#tercero desde el final
nth(salary.description, -3)
## [1] "Average"
#segundo elemento de vector
nth(salary.description, 2)
## [1] "Well to do"

\[1.12.4~Count~Distinct~Values\]

#creamos un vector con 9 elementos
a.vector<-c(22,23,44,1,2,3,3,3,4)
original.length<-length(a.vector)
original.length
## [1] 9
#muestra los distintos elementos sin repetirse
distinct.a.vector<- n_distinct(a.vector)

distinct.a.vector
## [1] 7
#muestra que hay valores duplicados

test1 <- if_else(original.length == distinct.a.vector, "valores unicos","algunos valores duplicados")
test1
## [1] "algunos valores duplicados"
#ahora solo el vector con valores unicos
 b.vector<-c(1,2,3,4,5,6)
length(b.vector)
## [1] 6
#cuenta los valores unicos
distinct.b.vector<-n_distinct(b.vector)
distinct.b.vector 
## [1] 6
#muestra que solo hay valores unicos
test2<-if_else(length(b.vector) == distinct.b.vector, "todos los valores unicos", "duplicados")
test2
## [1] "todos los valores unicos"

\[1.12.5~na_-if\]

test<-c(100,0,999)
x<-5000/test


#si algun valor es cero
x<-5000/na_if(test, 0)
x
## [1] 50.000000        NA  5.005005
#class para ver el tipo de variable
class(x)
## [1] "numeric"

\[1.12.6~Coalesce~to~Replace~Missing~Values\]

x<-c(33,4,11,NA,9)
x
## [1] 33  4 11 NA  9
#reemplazo los valores faltantes por cero

x<- coalesce(x,0)
x
## [1] 33  4 11  0  9

\[1.13~Ranking~Functions\\[20pt]1.13.1~Ranking~via~Index\]

y<-c(100,4,12,6,8,3)
rank1<-row_number(y)
rank1
## [1] 6 2 5 3 4 1
#el menor numero del rango
y[rank1[1]] 
## [1] 3
#el mayor numero del rango
y[rank1[6]] 
## [1] 100

\[1.13.2~Minimum~Rank\]

#similar a row_number
rank2<- min_rank(y)
rank2
## [1] 6 2 5 3 4 1

\[1.13.3~Dense~Rank\]

rank3<- dense_rank(y)
rank3
## [1] 6 2 5 3 4 1

\[1.13.4~Percent~Rank\]

#halla los elemento de acuerdo al percentil 1=100, 2 =200 , el ultimo esta en 0

rank4<-percent_rank(y)
rank4
## [1] 1.0 0.2 0.8 0.4 0.6 0.0

\[1.13.5~Cumulative~Distribution~Function\]

#la funcion muestra la proporcion de valores menores o iguales al rango actual
y<-c(100,4,12,6,8,3)
rank5<- cume_dist(y)
rank5
## [1] 1.0000000 0.3333333 0.8333333 0.5000000 0.6666667 0.1666667
#rompe el vector en n buckets
rank6=ntile(y, 3)
rank6
## [1] 3 1 3 2 2 1
#te situa el cuantil de cada datos
test.vector<-c(2,22,33,44,77,89,99)

quantile(test.vector,prob= seq(0,1,length = 11),type = 5)
##   0%  10%  20%  30%  40%  50%  60%  70%  80%  90% 100% 
##  2.0  6.0 20.0 28.6 36.3 44.0 67.1 81.8 90.0 97.0 99.0

\[1.14~Sampling\]

#tomar una muestra aleatorea del total de datos
data("ChickWeight")
my.sample<-sample_n(ChickWeight, 5)
my.sample
##   weight Time Chick Diet
## 1    157   21    19    1
## 2    108   14     4    1
## 3     67   10    13    1
## 4    305   21    32    3
## 5    250   20    39    3
set.seed(833)


#Reemplazar por false= no quieres investigar el mismo defecto y True= si sí
my.sample<-sample_n(ChickWeight, 10, replace= TRUE)
my.sample
##    weight Time Chick Diet
## 1      98    8    45    4
## 2      42    0    17    1
## 3      98    8    36    3
## 4      51    2    11    1
## 5     198   20     3    1
## 6     237   21    49    4
## 7     205   16    50    4
## 8     170   16    39    3
## 9     332   18    35    3
## 10    144   14    33    3
#quiero los carros con mayor cilindros

my.sample<- sample_n(mtcars, 12,weight=cyl)
my.sample[,1:5]
##                     mpg cyl  disp  hp drat
## AMC Javelin        15.2   8 304.0 150 3.15
## Porsche 914-2      26.0   4 120.3  91 4.43
## Merc 280           19.2   6 167.6 123 3.92
## Cadillac Fleetwood 10.4   8 472.0 205 2.93
## Merc 240D          24.4   4 146.7  62 3.69
## Datsun 710         22.8   4 108.0  93 3.85
## Merc 280C          17.8   6 167.6 123 3.92
## Mazda RX4 Wag      21.0   6 160.0 110 3.90
## Merc 450SLC        15.2   8 275.8 180 3.07
## Chrysler Imperial  14.7   8 440.0 230 3.23
## Maserati Bora      15.0   8 301.0 335 3.54
## Valiant            18.1   6 225.0 105 2.76
# se usa _frac para obtener un porcentage igual en los datos

test1<- sample_frac(ChickWeight, 0.02)
test1
##    weight Time Chick Diet
## 1      48    2    13    1
## 2      62    6    12    1
## 3     197   20    45    4
## 4     234   18    42    4
## 5      58    4    28    2
## 6     163   16     3    1
## 7     103    8    41    4
## 8     103    8    42    4
## 9     120   18    19    1
## 10     48    2    36    3
## 11     80    6    48    4
## 12    137   12    33    3
#cuando se quiere hallar el porcentage que un grupo
by_hair_color<-starwars%>%group_by(hair_color)
my.sample<-sample_frac(by_hair_color, .07, replace = TRUE)
my.sample[,1:5]
## # A tibble: 5 x 5
## # Groups:   hair_color [3]
##   name       height  mass hair_color skin_color      
##   <chr>       <int> <dbl> <chr>      <chr>           
## 1 Eeth Koth     171    NA black      brown           
## 2 Dormé         165    NA brown      light           
## 3 Sebulba       112    40 none       grey, red       
## 4 Shaak Ti      178    57 none       red, blue, white
## 5 Tion Medon    206    80 none       grey
#tally cuenta el grupo

row.kount.only<- ChickWeight%>% tally()
row.kount.only
##     n
## 1 578
diet.kount<-ChickWeight %>% count(Diet)
diet.kount
##   Diet   n
## 1    1 220
## 2    2 120
## 3    3 120
## 4    4 118

\[1.15~Miscellaneous~DPLYR~Functions\\[20pt]1.15.1~add_-count~for~Groupwise~filtering\]

#solo las especies con 1 miembro se filtran y se reflejan en la tabla
single.species.kount<-starwars %>%
add_count(species)%>%filter(n == 1)

single.species.kount[,1:6]
## # A tibble: 29 x 6
##    name                  height  mass hair_color skin_color       eye_color
##    <chr>                  <int> <dbl> <chr>      <chr>            <chr>    
##  1 Greedo                   173    74 <NA>       green            black    
##  2 Jabba Desilijic Tiure    175  1358 <NA>       green-tan, brown orange   
##  3 Yoda                      66    17 white      green            brown    
##  4 Bossk                    190   113 none       green            red      
##  5 Ackbar                   180    83 none       brown mottle     orange   
##  6 Wicket Systri Warrick     88    20 brown      brown            brown    
##  7 Nien Nunb                160    68 none       grey             black    
##  8 Nute Gunray              191    90 none       mottled green    red      
##  9 Watto                    137    NA black      blue, grey       yellow   
## 10 Sebulba                  112    40 none       grey, red        orange   
## # ... with 19 more rows

\[1.15.2~Rename\]

#renombrar la columna(variable mpg)
mtcars<-rename(mtcars, spam_mpg = mpg)
data(mtcars)
names(mtcars)
##  [1] "mpg"  "cyl"  "disp" "hp"   "drat" "wt"   "qsec" "vs"   "am"   "gear"
## [11] "carb"
#si se aplica nuevamente retoma el nombre original
mtcars<- rename(mtcars, spam_mpg=mpg)
names(mtcars)
##  [1] "spam_mpg" "cyl"      "disp"     "hp"       "drat"     "wt"      
##  [7] "qsec"     "vs"       "am"       "gear"     "carb"

\[1.115.3~case_-when\]

#when esta dentro de mutate y puedes crear una serie de condiciones conjuntas en tu nueva tabla
data(starwars)
new.starwars<-starwars %>%dplyr::select(name, mass, gender, species, height)%>%mutate(type = case_when(height> 200 | mass > 200 ~ "large",species == "Droid" ~ "robot", TRUE ~ "other"))

new.starwars
## # A tibble: 87 x 6
##    name                mass gender    species height type 
##    <chr>              <dbl> <chr>     <chr>    <int> <chr>
##  1 Luke Skywalker        77 masculine Human      172 other
##  2 C-3PO                 75 masculine Droid      167 robot
##  3 R2-D2                 32 masculine Droid       96 robot
##  4 Darth Vader          136 masculine Human      202 large
##  5 Leia Organa           49 feminine  Human      150 other
##  6 Owen Lars            120 masculine Human      178 other
##  7 Beru Whitesun lars    75 feminine  Human      165 other
##  8 R5-D4                 32 masculine Droid       97 robot
##  9 Biggs Darklighter     84 masculine Human      183 other
## 10 Obi-Wan Kenobi        77 masculine Human      182 other
## # ... with 77 more rows