library(tidyverse)
data("mtcars")
#seleccionar unicamente los carros con 6 cilindros
(seis_cil<-filter(mtcars, cyl==6))
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
## Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
## Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
## Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
## Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4
## Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6
#Seleccionar los carros con seis cilindros y con 110 caballos de fuerza
(seis_cil_y_110cab<-filter(mtcars, cyl==6, hp==110))
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
#Seleccionar los carros que tengan 4 engranajes o más de 6 cilindros
(eng_eq4_o_mas_de8<-filter(mtcars, gear==4|cyl>6))
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
## Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
## Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
## Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
## Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
## Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
## Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4
## Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3
## Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3
## Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3
## Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4
## Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4
## Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4
## Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1
## Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2
## Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1
## Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2
## AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2
## Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4
## Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2
## Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1
## Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4
## Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8
## Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2
# Seleccionar el desplazamiento más pequeño
(des_min<-filter(mtcars,disp==min(disp)))
## mpg cyl disp hp drat wt qsec vs am gear carb
## Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.9 1 1 4 1
data("ChickWeight")
(pollos_subset<-filter(ChickWeight, Time<3, weight>53))
## weight Time Chick Diet
## 1 55 2 22 2
## 2 55 2 40 3
## 3 55 2 43 4
## 4 54 2 50 4
data("airquality")
head(airquality, 10) #ver los datos antes de filtrar
## Ozone Solar.R Wind Temp Month Day
## 1 41 190 7.4 67 5 1
## 2 36 118 8.0 72 5 2
## 3 12 149 12.6 74 5 3
## 4 18 313 11.5 62 5 4
## 5 NA NA 14.3 56 5 5
## 6 28 NA 14.9 66 5 6
## 7 23 299 8.6 65 5 7
## 8 19 99 13.8 59 5 8
## 9 8 19 20.1 61 5 9
## 10 NA 194 8.6 69 5 10
#remover cualquier fila con valores perdidos en la columna uno
no_NA_ozone<-filter(airquality, !is.na(Ozone))
head(no_NA_ozone, 8)
## Ozone Solar.R Wind Temp Month Day
## 1 41 190 7.4 67 5 1
## 2 36 118 8.0 72 5 2
## 3 12 149 12.6 74 5 3
## 4 18 313 11.5 62 5 4
## 5 28 NA 14.9 66 5 6
## 6 23 299 8.6 65 5 7
## 7 19 99 13.8 59 5 8
## 8 8 19 20.1 61 5 9
#usar la función complete.cases() para remover cualquier fila que contenga valores perdidos en cualquier columna
(air_NA_any<-filter(airquality[1:10,],
complete.cases(airquality[1:10,])))
## Ozone Solar.R Wind Temp Month Day
## 1 41 190 7.4 67 5 1
## 2 36 118 8.0 72 5 2
## 3 12 149 12.6 74 5 3
## 4 18 313 11.5 62 5 4
## 5 23 299 8.6 65 5 7
## 6 19 99 13.8 59 5 8
## 7 8 19 20.1 61 5 9
data("iris")
table(iris$Species) #cuenta las especies dentro del set de datos
##
## setosa versicolor virginica
## 50 50 50
iris_2_sp <- filter(iris,
Species %in% c("setosa", "virginica"))
table(iris_2_sp$Species)
##
## setosa versicolor virginica
## 50 0 50
#con la función nrow() se muestra el número de filas
nrow(iris)
## [1] 150
nrow(iris_2_sp)
## [1] 100
data("airquality")
air_3_col<-filter(airquality, Ozone >29)[,1:3]
head(air_3_col)
## Ozone Solar.R Wind
## 1 41 190 7.4
## 2 36 118 8.0
## 3 34 307 12.0
## 4 30 322 11.5
## 5 32 92 12.0
## 6 45 252 14.9
table(mtcars$gear) #para obtener el conteo de filas por número de engranajes
##
## 3 4 5
## 15 12 5
mas_freq_no_eng<-mtcars %>%
group_by(gear) %>%
filter(n() > 10)
table(mas_freq_no_eng$gear)
##
## 3 4
## 15 12
#tambien se puede añadir otro parametro para filtrar
mas_freq_eng_y_menos_cab<- mtcars %>%
group_by(gear) %>%
filter(n() > 10, hp < 105)
table(mas_freq_eng_y_menos_cab$gear)
##
## 3 4
## 1 7
#seleccionar la columna que empiece por S
names(iris) #para ver los nombres de las columnas
## [1] "Sepal.Length" "Sepal.Width" "Petal.Length" "Petal.Width" "Species"
iris.s <- iris %>% dplyr::select(starts_with("S"))
head(iris.s)
## Sepal.Length Sepal.Width Species
## 1 5.1 3.5 setosa
## 2 4.9 3.0 setosa
## 3 4.7 3.2 setosa
## 4 4.6 3.1 setosa
## 5 5.0 3.6 setosa
## 6 5.4 3.9 setosa
(new.mtcars <- mtcars %>% filter_at(vars(cyl, hp),
all_vars(. == max(.))))
## mpg cyl disp hp drat wt qsec vs am gear carb
## Maserati Bora 15 8 301 335 3.54 3.57 14.6 0 1 5 8
(msleep<-ggplot2::msleep)
## # A tibble: 83 × 11
## name genus vore order conservation sleep_total sleep_rem sleep_cycle awake
## <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Cheet… Acin… carni Carn… lc 12.1 NA NA 11.9
## 2 Owl m… Aotus omni Prim… <NA> 17 1.8 NA 7
## 3 Mount… Aplo… herbi Rode… nt 14.4 2.4 NA 9.6
## 4 Great… Blar… omni Sori… lc 14.9 2.3 0.133 9.1
## 5 Cow Bos herbi Arti… domesticated 4 0.7 0.667 20
## 6 Three… Brad… herbi Pilo… <NA> 14.4 2.2 0.767 9.6
## 7 North… Call… carni Carn… vu 8.7 1.4 0.383 15.3
## 8 Vespe… Calo… <NA> Rode… <NA> 7 NA NA 17
## 9 Dog Canis carni Carn… domesticated 10.1 2.9 0.333 13.9
## 10 Roe d… Capr… herbi Arti… lc 3 NA NA 21
## # … with 73 more rows, and 2 more variables: brainwt <dbl>, bodywt <dbl>
msleep.over.5 <- msleep %>%
select(name, sleep_total:sleep_rem, brainwt:bodywt) %>%
filter_at(vars(contains("sleep")), all_vars(.>5))
msleep.over.5
## # A tibble: 2 × 5
## name sleep_total sleep_rem brainwt bodywt
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Thick-tailed opposum 19.4 6.6 NA 0.37
## 2 Giant armadillo 18.1 6.1 0.081 60
msleep<-ggplot2::msleep
msleep[,1:4]
## # A tibble: 83 × 4
## name genus vore order
## <chr> <chr> <chr> <chr>
## 1 Cheetah Acinonyx carni Carnivora
## 2 Owl monkey Aotus omni Primates
## 3 Mountain beaver Aplodontia herbi Rodentia
## 4 Greater short-tailed shrew Blarina omni Soricomorpha
## 5 Cow Bos herbi Artiodactyla
## 6 Three-toed sloth Bradypus herbi Pilosa
## 7 Northern fur seal Callorhinus carni Carnivora
## 8 Vesper mouse Calomys <NA> Rodentia
## 9 Dog Canis carni Carnivora
## 10 Roe deer Capreolus herbi Artiodactyla
## # … with 73 more rows
animal_name1<- arrange(msleep, vore, order)
animal_name1[,1:4]
## # A tibble: 83 × 4
## name genus vore order
## <chr> <chr> <chr> <chr>
## 1 Cheetah Acinonyx carni Carnivora
## 2 Northern fur seal Callorhinus carni Carnivora
## 3 Dog Canis carni Carnivora
## 4 Domestic cat Felis carni Carnivora
## 5 Gray seal Haliochoerus carni Carnivora
## 6 Tiger Panthera carni Carnivora
## 7 Jaguar Panthera carni Carnivora
## 8 Lion Panthera carni Carnivora
## 9 Caspian seal Phoca carni Carnivora
## 10 Genet Genetta carni Carnivora
## # … with 73 more rows
animal_name2 <- arrange(msleep, vore, desc(order))
head(animal_name2[,1:4])
## # A tibble: 6 × 4
## name genus vore order
## <chr> <chr> <chr> <chr>
## 1 Northern grasshopper mouse Onychomys carni Rodentia
## 2 Slow loris Nyctibeus carni Primates
## 3 Thick-tailed opposum Lutreolina carni Didelphimorphia
## 4 Long-nosed armadillo Dasypus carni Cingulata
## 5 Pilot whale Globicephalus carni Cetacea
## 6 Common porpoise Phocoena carni Cetacea
# renombrar uno o más columnas en un set de datos
names(iris)
## [1] "Sepal.Length" "Sepal.Width" "Petal.Length" "Petal.Width" "Species"
rename_iris<-rename(iris, ancho.petalos=Petal.Width, nombre.plantas=Species)
names(rename_iris)
## [1] "Sepal.Length" "Sepal.Width" "Petal.Length" "ancho.petalos"
## [5] "nombre.plantas"
data("ChickWeight")
ChickWeight[1:2,] #primeras 2 filas
## weight Time Chick Diet
## 1 42 0 1 1
## 2 51 2 1 1
#añado el nuevo campo
pollos_con_log <- mutate(ChickWeight,
log.of.weight = log10(weight))
pollos_con_log[1:2,]
## weight Time Chick Diet log.of.weight
## 1 42 0 1 1 1.623249
## 2 51 2 1 1 1.707570
msleep <- ggplot2::msleep
names(msleep)
## [1] "name" "genus" "vore" "order" "conservation"
## [6] "sleep_total" "sleep_rem" "sleep_cycle" "awake" "brainwt"
## [11] "bodywt"
#añado los campos raiz cuadrada a las variables numericas en las columnas 6-11
msleep_raiz<- mutate_all(msleep[,6:11],
funs("square root" = sqrt( . )))
## Warning: `funs()` was deprecated in dplyr 0.8.0.
## Please use a list of either functions or lambdas:
##
## # Simple named list:
## list(mean = mean, median = median)
##
## # Auto named with `tibble::lst()`:
## tibble::lst(mean, median)
##
## # Using lambdas
## list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
names(msleep_raiz)
## [1] "sleep_total" "sleep_rem"
## [3] "sleep_cycle" "awake"
## [5] "brainwt" "bodywt"
## [7] "sleep_total_square root" "sleep_rem_square root"
## [9] "sleep_cycle_square root" "awake_square root"
## [11] "brainwt_square root" "bodywt_square root"
msleep_raiz
## # A tibble: 83 × 12
## sleep_total sleep_rem sleep_cycle awake brainwt bodywt `sleep_total_square…
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 12.1 NA NA 11.9 NA 50 3.48
## 2 17 1.8 NA 7 0.0155 0.48 4.12
## 3 14.4 2.4 NA 9.6 NA 1.35 3.79
## 4 14.9 2.3 0.133 9.1 0.00029 0.019 3.86
## 5 4 0.7 0.667 20 0.423 600 2
## 6 14.4 2.2 0.767 9.6 NA 3.85 3.79
## 7 8.7 1.4 0.383 15.3 NA 20.5 2.95
## 8 7 NA NA 17 NA 0.045 2.65
## 9 10.1 2.9 0.333 13.9 0.07 14 3.18
## 10 3 NA NA 21 0.0982 14.8 1.73
## # … with 73 more rows, and 5 more variables: sleep_rem_square root <dbl>,
## # sleep_cycle_square root <dbl>, awake_square root <dbl>,
## # brainwt_square root <dbl>, bodywt_square root <dbl>
data("Titanis")
## Warning in data("Titanis"): data set 'Titanis' not found
Titanic<- as.data.frame(Titanic)
head(Titanic)
## Class Sex Age Survived Freq
## 1 1st Male Child No 0
## 2 2nd Male Child No 0
## 3 3rd Male Child No 35
## 4 Crew Male Child No 0
## 5 1st Female Child No 0
## 6 2nd Female Child No 0
#utilizo la función mutate_all()
titanic_rangos<- mutate_at(Titanic, vars(Class,Age,Survived),
funs(Rank = min_rank(desc(.))))
head(titanic_rangos)
## Class Sex Age Survived Freq Class_Rank Age_Rank Survived_Rank
## 1 1st Male Child No 0 25 17 17
## 2 2nd Male Child No 0 17 17 17
## 3 3rd Male Child No 35 9 17 17
## 4 Crew Male Child No 0 1 17 17
## 5 1st Female Child No 0 25 17 17
## 6 2nd Female Child No 0 17 17 17
#crear una función para dividir un número en 10
divide_10<-function(a.number) (a.number/10)
#usar la base de datos CO2 y usar la función en cualquier columna
data("CO2")
head(CO2)
## Plant Type Treatment conc uptake
## 1 Qn1 Quebec nonchilled 95 16.0
## 2 Qn1 Quebec nonchilled 175 30.4
## 3 Qn1 Quebec nonchilled 250 34.8
## 4 Qn1 Quebec nonchilled 350 37.2
## 5 Qn1 Quebec nonchilled 500 35.3
## 6 Qn1 Quebec nonchilled 675 39.2
new.df <- CO2 %>%
mutate_if(is.numeric, divide_10)
head(new.df)
## Plant Type Treatment conc uptake
## 1 Qn1 Quebec nonchilled 9.5 1.60
## 2 Qn1 Quebec nonchilled 17.5 3.04
## 3 Qn1 Quebec nonchilled 25.0 3.48
## 4 Qn1 Quebec nonchilled 35.0 3.72
## 5 Qn1 Quebec nonchilled 50.0 3.53
## 6 Qn1 Quebec nonchilled 67.5 3.92
(df <- data.frame(
alpha = c(22, 1, NA),
almond = c(0, 5, 10),
grape = c(0, 2, 2),
apple = c(NA, 5, 10)))
## alpha almond grape apple
## 1 22 0 0 NA
## 2 1 5 2 5
## 3 NA 10 2 10
(df.fix.alpha <- df %>% mutate_if(is.numeric, coalesce, ... = 0))
## alpha almond grape apple
## 1 22 0 0 0
## 2 1 5 2 5
## 3 0 10 2 10
msleep<-ggplot2::msleep
table(msleep$vore)
##
## carni herbi insecti omni
## 19 32 5 20
msleep.no.c.or.a <- filter(msleep, !str_detect(vore,
paste(c("c","a"), collapse = "|")))
table(msleep.no.c.or.a$vore)
##
## herbi omni
## 32 20
msleep_dup<- mutate(msleep, duplicate.indicator =
duplicated(conservation))
msleep_dup[1:6,]
## # A tibble: 6 × 12
## name genus vore order conservation sleep_total sleep_rem sleep_cycle awake
## <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Cheetah Acin… carni Carn… lc 12.1 NA NA 11.9
## 2 Owl mo… Aotus omni Prim… <NA> 17 1.8 NA 7
## 3 Mounta… Aplo… herbi Rode… nt 14.4 2.4 NA 9.6
## 4 Greate… Blar… omni Sori… lc 14.9 2.3 0.133 9.1
## 5 Cow Bos herbi Arti… domesticated 4 0.7 0.667 20
## 6 Three-… Brad… herbi Pilo… <NA> 14.4 2.2 0.767 9.6
## # … with 3 more variables: brainwt <dbl>, bodywt <dbl>,
## # duplicate.indicator <lgl>
#otra forma de escribir lo anterior
msleep_dup_otro<- mutate(msleep,
duplicate.indicator = duplicated(conservation))
msleep_dup_otro[1:6,c(1,2,3,12)]
## # A tibble: 6 × 4
## name genus vore duplicate.indicator
## <chr> <chr> <chr> <lgl>
## 1 Cheetah Acinonyx carni FALSE
## 2 Owl monkey Aotus omni FALSE
## 3 Mountain beaver Aplodontia herbi FALSE
## 4 Greater short-tailed shrew Blarina omni TRUE
## 5 Cow Bos herbi FALSE
## 6 Three-toed sloth Bradypus herbi TRUE
#otro ejemplo
msleep_dup2 <- mutate(msleep,
duplicate.indicator = duplicated(conservation, genus)) %>%
arrange(conservation,genus)
msleep_dup2
## # A tibble: 83 × 12
## name genus vore order conservation sleep_total sleep_rem sleep_cycle awake
## <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Giraf… Gira… herbi Arti… cd 1.9 0.4 NA 22.1
## 2 Pilot… Glob… carni Ceta… cd 2.7 0.1 NA 21.4
## 3 Cow Bos herbi Arti… domesticated 4 0.7 0.667 20
## 4 Dog Canis carni Carn… domesticated 10.1 2.9 0.333 13.9
## 5 Guine… Cavis herbi Rode… domesticated 9.4 0.8 0.217 14.6
## 6 Chinc… Chin… herbi Rode… domesticated 12.5 1.5 0.117 11.5
## 7 Horse Equus herbi Peri… domesticated 2.9 0.6 1 21.1
## 8 Donkey Equus herbi Peri… domesticated 3.1 0.4 NA 20.9
## 9 Domes… Felis carni Carn… domesticated 12.5 3.2 0.417 11.5
## 10 Rabbit Oryc… herbi Lago… domesticated 8.4 0.9 0.417 15.6
## # … with 73 more rows, and 3 more variables: brainwt <dbl>, bodywt <dbl>,
## # duplicate.indicator <lgl>
fruit <- c("apple","pear","orange","grape", "orange","orange")
x <- c(1,2,4,9,4,6)
y <- c(22,3,4,55,15,9)
z <- c(3,1,4,10,12,8)
w <- c(2,2,2,4,5,6)
fruta<-data.frame(fruit,x,y,z,w)
#aplico la función mutate() y duplicate()
(fruta_dup<- mutate(fruta, duplicate.indicator = duplicated(fruit)))
## fruit x y z w duplicate.indicator
## 1 apple 1 22 3 2 FALSE
## 2 pear 2 3 1 2 FALSE
## 3 orange 4 4 4 2 FALSE
## 4 grape 9 55 10 4 FALSE
## 5 orange 4 15 12 5 TRUE
## 6 orange 6 9 8 6 TRUE
fruit <- c("apple","pear","orange","grape", "orange","orange")
x <- c(1,2,4,9,4,6)
y <- c(22,3,4,55,15,9)
z <- c(3,1,4,10,12,8)
fruta1 <- data.frame(fruit,x,y,z)
(fruta1_sinz <- mutate(df, z = NULL))
## alpha almond grape apple
## 1 22 0 0 NA
## 2 1 5 2 5
## 3 NA 10 2 10
if (!require("nycflights13")) install.packages("nycflights13")
## Loading required package: nycflights13
mutate(flights,
gain = arr_delay - dep_delay,
hours = air_time / 60,
gain_per_hour = gain / hours,
gain_per_minute = 60 * gain_per_hour)
## # A tibble: 336,776 × 23
## year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
## <int> <int> <int> <int> <int> <dbl> <int> <int>
## 1 2013 1 1 517 515 2 830 819
## 2 2013 1 1 533 529 4 850 830
## 3 2013 1 1 542 540 2 923 850
## 4 2013 1 1 544 545 -1 1004 1022
## 5 2013 1 1 554 600 -6 812 837
## 6 2013 1 1 554 558 -4 740 728
## 7 2013 1 1 555 600 -5 913 854
## 8 2013 1 1 557 600 -3 709 723
## 9 2013 1 1 557 600 -3 838 846
## 10 2013 1 1 558 600 -2 753 745
## # … with 336,766 more rows, and 15 more variables: arr_delay <dbl>,
## # carrier <chr>, flight <int>, tailnum <chr>, origin <chr>, dest <chr>,
## # air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>, time_hour <dttm>,
## # gain <dbl>, hours <dbl>, gain_per_hour <dbl>, gain_per_minute <dbl>
if (!require("nycflights13")) install.packages("nycflights13")
newfield.flights <- flights %>%
mutate(gain = arr_delay - dep_delay,
hours = air_time / 60) %>%
mutate(gain_per_hour = gain / hours) %>%
mutate(gain_per_minute = 60 * gain_per_hour)
fruit <- c("apple","pear","orange","grape", "orange","orange")
x <- c(1,2,4,9,4,6)
y <- c(22,3,4,55,15,9)
z <- c(3,1,4,10,12,8)
fruta<-data.frame(fruit,x,y,z) #antes de transmute
(fruta_tras<- transmute(fruta,nueva_var=x+y+z))
## nueva_var
## 1 26
## 2 6
## 3 12
## 4 74
## 5 31
## 6 23
#crear la función double.it
double.it<-function(x)x*2
#lo anterior lo aplico a la base de datos iris
iris %>%
mutate(across(where(is.numeric), double.it)) %>%
head()
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 10.2 7.0 2.8 0.4 setosa
## 2 9.8 6.0 2.8 0.4 setosa
## 3 9.4 6.4 2.6 0.4 setosa
## 4 9.2 6.2 3.0 0.4 setosa
## 5 10.0 7.2 2.8 0.4 setosa
## 6 10.8 7.8 3.4 0.8 setosa
row1 <- c("a","b","c","d","e","f","col_cambiar")
row2 <- c(1,1,1,6,6,1,2)
row3 <- c(3,4,4,6,4,4,4)
row4 <- c(4,6,25,5,5,2,9)
row5 <- c(5,3,6,3,3,6,2)
filas <- as.data.frame(rbind(row2,row3,row4,row5))
(names(filas) <- row1)
## [1] "a" "b" "c" "d" "e"
## [6] "f" "col_cambiar"
(new_filas<-filas %>%
mutate(col_cambiar=case_when(a == 2 | a == 5 |
a == 7 | (a == 1 & b == 4) ~ 2, a == 0 | a == 1 | a == 4 |
a == 3 | c == 4 ~ 3, TRUE ~ NA_real_)))
## a b c d e f col_cambiar
## row2 1 1 1 6 6 1 3
## row3 3 4 4 6 4 4 3
## row4 4 6 25 5 5 2 3
## row5 5 3 6 3 3 6 2
##usando el set de datos de frutas creado anteriormente
fruta
## fruit x y z
## 1 apple 1 22 3
## 2 pear 2 3 1
## 3 orange 4 4 4
## 4 grape 9 55 10
## 5 orange 4 15 12
## 6 orange 6 9 8
#si se añade un signo - antes de cualquier variable se elimina, quitemos la columna fruit
(new_fruta_<-dplyr::select(fruta,-fruit))
## x y z
## 1 1 22 3
## 2 2 3 1
## 3 4 4 4
## 4 9 55 10
## 5 4 15 12
## 6 6 9 8
data("mtcars")
names(mtcars) #nombres de las columnas dentro de un dataframe
## [1] "mpg" "cyl" "disp" "hp" "drat" "wt" "qsec" "vs" "am" "gear"
## [11] "carb"
#eliminar cualquier columna que tenga un nombre que inicie con d
mtcars_d<-select(mtcars, -starts_with("d"))
names(mtcars_d)
## [1] "mpg" "cyl" "hp" "wt" "qsec" "vs" "am" "gear" "carb"
#eliminar cualquier columna que tengo un nombre que termine en t
mtcars_t<- select(mtcars, -ends_with("t"))
names(mtcars_t)
## [1] "mpg" "cyl" "disp" "hp" "qsec" "vs" "am" "gear" "carb"
fruit <- c("apple","pear","orange","grape", "orange","orange")
x <- c(1,2,4,9,4,6)
y <- c(22,3,4,55,15,9)
z <- c(3,1,4,10,12,8)
(df <- data.frame(fruit,x,y,z))
## fruit x y z
## 1 apple 1 22 3
## 2 pear 2 3 1
## 3 orange 4 4 4
## 4 grape 9 55 10
## 5 orange 4 15 12
## 6 orange 6 9 8
###1.5.4 select_all para aplicar una funcióna todas las columnas
#crear un nuevo dataframe
estado <- c("Maryland", "Alaska", "New Jersey")
ingresos <- c(76067,74444,73702)
media_dolar <- c(61372,61372,61372)
esp_vida <- c(78.8,78.3,80.3)
top_3_est <- data.frame(estado, ingresos, media_dolar,
esp_vida)
#poner en mayuscula los nombres de las columnas usando la función touppr
(top_3_est_mayus<-select_all(top_3_est, toupper))
## ESTADO INGRESOS MEDIA_DOLAR ESP_VIDA
## 1 Maryland 76067 61372 78.8
## 2 Alaska 74444 61372 78.3
## 3 New Jersey 73702 61372 80.3
(top_3_est <- data.frame(estado, ingresos, media_dolar,
esp_vida))
## estado ingresos media_dolar esp_vida
## 1 Maryland 76067 61372 78.8
## 2 Alaska 74444 61372 78.3
## 3 New Jersey 73702 61372 80.3
#extraer solo la columna estado
pull_first_col<-pull(top_3_est,1)
#si se usa el signo - la función toma la columna de la derecha
(pull_last_col<-pull(top_3_est,-1))
## [1] 78.8 78.3 80.3
nrow(mtcars) #para saber el numero de columnas originales del dataframe
## [1] 32
#filtro con cualquier cosa y en cuqluier lugar con más de 200
mtcars_mas_200<-filter_all(mtcars, any_vars(.>200))
nrow(mtcars_mas_200)
## [1] 16
names(mtcars) #no existe una columna con p
## [1] "mpg" "cyl" "disp" "hp" "drat" "wt" "qsec" "vs" "am" "gear"
## [11] "carb"
cars_sin_p<-mtcars %>%
dplyr::select(-contains("p"))
names(cars_sin_p)
## [1] "cyl" "drat" "wt" "qsec" "vs" "am" "gear" "carb"
sub_mtcars<-select(mtcars,
matches("pg|gea")) #función más general que contains()
names(sub_mtcars)
## [1] "mpg" "gear"
us_state_areas<-as.data.frame(cbind(state.abb, state.area)) #cbind() combina columnas
us_state_areas[1:3,]
## state.abb state.area
## 1 AL 51609
## 2 AK 589757
## 3 AZ 113909
#otro ejemplo
us_state_abr_nom<-as.data.frame(cbind(state.abb, state.name))
us_state_abr_nom[1:3,]
## state.abb state.name
## 1 AL Alabama
## 2 AK Alaska
## 3 AZ Arizona
#uso la función left_join para unir lod 2 dataframeanteriores
state_info<-us_state_areas %>%
left_join(us_state_abr_nom, by="state.abb")
head(state_info)
## state.abb state.area state.name
## 1 AL 51609 Alabama
## 2 AK 589757 Alaska
## 3 AZ 113909 Arizona
## 4 AR 53104 Arkansas
## 5 CA 158693 California
## 6 CO 104247 Colorado
##primer dataframe
nombres <- c("Sally","Tom","Frieda","Alfonzo")
puntaje_eq <- c(3,5,2,7)
equipo_liga <- c("alpha","beta","gamma", "omicron")
equipo_info <- data.frame(nombres, puntaje_eq, equipo_liga)
#segundo dataframe
nombres = c("Sally","Tom", "Bill", "Alfonzo")
escuela_notas <- c("A","B","C","B")
escuela_info <- data.frame(nombres, escuela_notas)
(escuela_equipo <- inner_join(equipo_info, escuela_info, by = "nombres"))
## nombres puntaje_eq equipo_liga escuela_notas
## 1 Sally 3 alpha A
## 2 Tom 5 beta B
## 3 Alfonzo 7 omicron B
#uso los dataframe creados anteriormente
equipo_info
## nombres puntaje_eq equipo_liga
## 1 Sally 3 alpha
## 2 Tom 5 beta
## 3 Frieda 2 gamma
## 4 Alfonzo 7 omicron
escuela_info
## nombres escuela_notas
## 1 Sally A
## 2 Tom B
## 3 Bill C
## 4 Alfonzo B
#selecciono de los 2 dataframe el que no tenga calificaciones
(equipo_info_sin_notas<-anti_join(equipo_info, escuela_info, by="nombres"))
## nombres puntaje_eq equipo_liga
## 1 Frieda 2 gamma
#vuelvo a utilizar los dataframe anteriores
equipo_info
## nombres puntaje_eq equipo_liga
## 1 Sally 3 alpha
## 2 Tom 5 beta
## 3 Frieda 2 gamma
## 4 Alfonzo 7 omicron
escuela_info
## nombres escuela_notas
## 1 Sally A
## 2 Tom B
## 3 Bill C
## 4 Alfonzo B
#como frida no tiene notas aparecera como NA
(equipo_info_o_notas<-full_join(equipo_info, escuela_info, by="nombres"))
## nombres puntaje_eq equipo_liga escuela_notas
## 1 Sally 3 alpha A
## 2 Tom 5 beta B
## 3 Frieda 2 gamma <NA>
## 4 Alfonzo 7 omicron B
## 5 Bill NA <NA> C
#usamos equipo_info y escuela_info nuevamente
(equipo_info_con_notas<-semi_join(equipo_info, escuela_info)) #como Frida no tiene notas no fue incluida en el resultado
## Joining, by = "nombres"
## nombres puntaje_eq equipo_liga
## 1 Sally 3 alpha
## 2 Tom 5 beta
## 3 Alfonzo 7 omicron
#uso los dataframe
us_state_areas[1:3,] #dataframe1
## state.abb state.area
## 1 AL 51609
## 2 AK 589757
## 3 AZ 113909
us_state_abr_nom[1:3,] #dataframe 2
## state.abb state.name
## 1 AL Alabama
## 2 AK Alaska
## 3 AZ Arizona
#del segunda dataframe cambio el nombre de alabama por Intenctional mismatch
us_state_abr_nom[1,1]<-"intentional mismatch"
us_state_abb_name_area<-right_join(us_state_areas,
us_state_abr_nom, by="state.abb")
us_state_abb_name_area[1:3,]
## state.abb state.area state.name
## 1 AK 589757 Alaska
## 2 AZ 113909 Arizona
## 3 AR 53104 Arkansas
msleep
## # A tibble: 83 × 11
## name genus vore order conservation sleep_total sleep_rem sleep_cycle awake
## <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Cheet… Acin… carni Carn… lc 12.1 NA NA 11.9
## 2 Owl m… Aotus omni Prim… <NA> 17 1.8 NA 7
## 3 Mount… Aplo… herbi Rode… nt 14.4 2.4 NA 9.6
## 4 Great… Blar… omni Sori… lc 14.9 2.3 0.133 9.1
## 5 Cow Bos herbi Arti… domesticated 4 0.7 0.667 20
## 6 Three… Brad… herbi Pilo… <NA> 14.4 2.2 0.767 9.6
## 7 North… Call… carni Carn… vu 8.7 1.4 0.383 15.3
## 8 Vespe… Calo… <NA> Rode… <NA> 7 NA NA 17
## 9 Dog Canis carni Carn… domesticated 10.1 2.9 0.333 13.9
## 10 Roe d… Capr… herbi Arti… lc 3 NA NA 21
## # … with 73 more rows, and 2 more variables: brainwt <dbl>, bodywt <dbl>
nrow(msleep) #con 83 filas
## [1] 83
msleep_first_5<-slice(msleep, -6:-n()) #las filas de la 6 a la 83 se eliminaron, como no se sabe cuantas filas existen se usa n() que es el número total de filas
nrow(msleep_first_5)
## [1] 5
#ahora escojo 20 filas
msleep_20<-msleep %>%
slice(20:39)
nrow(msleep_20)
## [1] 20
#puede usar - para ver la diferencia entre las dataframe anteriores y el original
nrow(msleep) - nrow(msleep_20)
## [1] 63
library(MASS) #se carga primero porque tiene algunos conflictos con dplyr
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
data(gehan)
gehan2<-gehan
library(tidyverse)
#¿cuántos pacientes están en tratamiento medico?
gehan %>% summarise(kount=n())
## kount
## 1 42
#¿cuál es la cuenta de en tratamiento y no en tratamiento?
gehan2 %>%
group_by(treat) %>%
summarise(kount=n())
## # A tibble: 2 × 2
## treat kount
## <fct> <int>
## 1 6-MP 21
## 2 control 21
#¿cuáles son las estadisticas generales para lo anterior?
gehan2 %>%
group_by(treat) %>%
summarise(average.remiss.time = mean(time),
median.remiss.time = median(time),
std.dev.remiss.time = sd(time),
median.abs.deviation = mad(time),
IQR.remiss.time = IQR(time))
## # A tibble: 2 × 6
## treat average.remiss.time median.remiss.t… std.dev.remiss.… median.abs.devi…
## <fct> <dbl> <int> <dbl> <dbl>
## 1 6-MP 17.1 16 10.0 10.4
## 2 control 8.67 8 6.47 5.93
## # … with 1 more variable: IQR.remiss.time <dbl>
#usando summarise para encontrar el minimo/maximo dentro de "by"
gehan2 %>%
group_by(treat) %>%
summarise(minimum.remission = min(time),
max.remission = max(time))
## # A tibble: 2 × 3
## treat minimum.remission max.remission
## <fct> <int> <int>
## 1 6-MP 6 35
## 2 control 1 23
#usaremos la base de datos survey
data(survey)
sub_survey<-survey[1:10,]
head(sub_survey)
## Sex Wr.Hnd NW.Hnd W.Hnd Fold Pulse Clap Exer Smoke Height M.I
## 1 Female 18.5 18.0 Right R on L 92 Left Some Never 173.00 Metric
## 2 Male 19.5 20.5 Left R on L 104 Left None Regul 177.80 Imperial
## 3 Male 18.0 13.3 Right L on R 87 Neither None Occas NA <NA>
## 4 Male 18.8 18.9 Right R on L NA Neither None Never 160.00 Metric
## 5 Male 20.0 20.0 Right Neither 35 Right Some Never 165.00 Metric
## 6 Female 18.0 17.7 Right L on R 64 Right Some Never 172.72 Imperial
## Age
## 1 18.250
## 2 17.583
## 3 16.917
## 4 20.333
## 5 23.667
## 6 21.000
sub_survey %>%
na.omit() %>% #remueve cualquier Na
group_by(Sex) %>%
summarise(across(where(is.numeric), mean,
.names = "mean_{col}")) %>%
head()
## # A tibble: 2 × 6
## Sex mean_Wr.Hnd mean_NW.Hnd mean_Pulse mean_Height mean_Age
## <fct> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Female 17.8 17.7 76.7 168. 25.0
## 2 Male 19.1 19.2 76.8 174. 20.3
#uso la base de datos msleep
nueva_msleep<- msleep %>%
group_by(vore, order)
#uso summarise para contar las combinaciones vore-order
s<-summarise(nueva_msleep, n())
## `summarise()` has grouped output by 'vore'. You can override using the `.groups` argument.
#si me interesan solo los totales
nueva_msleep_total<- msleep %>%
group_by(vore, order) %>%
summarise(n())
## `summarise()` has grouped output by 'vore'. You can override using the `.groups` argument.
estado1 <- c("Maryland", "Alaska", "New Jersey")
ingresos1 <- c(76067,74444,73702)
media_dolar1 <- c(61372,61372,61372)
esp_vida1 <- c(78.8,78.3,80.3)
nac_2015<- c(17,29.3,12.1)
nac_2007 <- c(34.3,42.9,24.9)
nac_1991 <- c(54.1, 66, 41.3)
top_3_st <- data.frame(estado1, ingresos1, media_dolar1,esp_vida1,
nac_2015, nac_2007,nac_1991)
names(top_3_st) <- c("estado", "ingresos", "media dolar",
"life.expectancy","2015","2007","1991")
top_3_st
## estado ingresos media dolar life.expectancy 2015 2007 1991
## 1 Maryland 76067 61372 78.8 17.0 34.3 54.1
## 2 Alaska 74444 61372 78.3 29.3 42.9 66.0
## 3 New Jersey 73702 61372 80.3 12.1 24.9 41.3
#uso la función gather()
(new_top_3_st<-top_3_st %>%
gather("2015", "2007", "1991", key="year", value="cases"))
## estado ingresos media dolar life.expectancy year cases
## 1 Maryland 76067 61372 78.8 2015 17.0
## 2 Alaska 74444 61372 78.3 2015 29.3
## 3 New Jersey 73702 61372 80.3 2015 12.1
## 4 Maryland 76067 61372 78.8 2007 34.3
## 5 Alaska 74444 61372 78.3 2007 42.9
## 6 New Jersey 73702 61372 80.3 2007 24.9
## 7 Maryland 76067 61372 78.8 1991 54.1
## 8 Alaska 74444 61372 78.3 1991 66.0
## 9 New Jersey 73702 61372 80.3 1991 41.3
datos <- data_frame(Type = c("TypeA", "TypeA", "TypeB", "TypeB"),
Answer = c("Yes", "No", NA, "No"), n = 1:4) #aqui tanto si como no están en la misma columna
## Warning: `data_frame()` was deprecated in tibble 1.1.0.
## Please use `tibble()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
##usamos la función spread()
(datos_2 <- datos %>%
filter(!is.na(Answer)) %>%
spread(key=Answer, value=n)) #despues de usar la función si y no están en columnas separadas
## # A tibble: 2 × 3
## Type No Yes
## <chr> <int> <int>
## 1 TypeA 2 1
## 2 TypeB 4 NA
#tenemos los siguientes vectores, cumpleaños tiene 3 datos en una misma columna
estado <- c("Maryland", "Alaska", "New Jersey")
ingresos <- c(76067,74444,73702)
media_dolar <- c(61372,61372,61372)
esp_vida <- c(78.8,78.3,80.3)
nacimiento <- c("17//34.3//54.1", "29.0//42.9//66.0", "12.1//24.9//41.3")
#creo el dataframe correspondiente
(top3<-data.frame(estado, ingresos, media_dolar, esp_vida, nacimiento))
## estado ingresos media_dolar esp_vida nacimiento
## 1 Maryland 76067 61372 78.8 17//34.3//54.1
## 2 Alaska 74444 61372 78.3 29.0//42.9//66.0
## 3 New Jersey 73702 61372 80.3 12.1//24.9//41.3
#usando la función separate
(top3_separado <- top3 %>%
separate(nacimiento,
into = c("2015", "2007","1991"), sep = "//"))
## estado ingresos media_dolar esp_vida 2015 2007 1991
## 1 Maryland 76067 61372 78.8 17 34.3 54.1
## 2 Alaska 74444 61372 78.3 29.0 42.9 66.0
## 3 New Jersey 73702 61372 80.3 12.1 24.9 41.3
ejemplo <- mutate(msleep, kount = n()) #la variable count se añadio al extremo derecho
ejemplo[1:5,c(1:4,10:12)] #numero limitado de columnas
## # A tibble: 5 × 7
## name genus vore order brainwt bodywt kount
## <chr> <chr> <chr> <chr> <dbl> <dbl> <int>
## 1 Cheetah Acinonyx carni Carnivora NA 50 83
## 2 Owl monkey Aotus omni Primates 0.0155 0.48 83
## 3 Mountain beaver Aplodontia herbi Rodentia NA 1.35 83
## 4 Greater short-tailed shrew Blarina omni Soricomorp… 0.00029 0.019 83
## 5 Cow Bos herbi Artiodacty… 0.423 600 83
#filtrar con n()>14
f <- filter(msleep, n() > 14)
f[1:5,c(1:4,10:11)]
## # A tibble: 5 × 6
## name genus vore order brainwt bodywt
## <chr> <chr> <chr> <chr> <dbl> <dbl>
## 1 Cheetah Acinonyx carni Carnivora NA 50
## 2 Owl monkey Aotus omni Primates 0.0155 0.48
## 3 Mountain beaver Aplodontia herbi Rodentia NA 1.35
## 4 Greater short-tailed shrew Blarina omni Soricomorpha 0.00029 0.019
## 5 Cow Bos herbi Artiodactyla 0.423 600
#introducir primero los datos
des_salario<- c("Golden parachute type","Well to do",
"Average","Below average", "bring date seeds instead of flowers")
first(des_salario)
## [1] "Golden parachute type"
last(des_salario)
## [1] "bring date seeds instead of flowers"
nth(des_salario, -3) #tercera observación desde el final
## [1] "Average"
nth(des_salario, 2) #segunda observación del vector
## [1] "Well to do"
vectorA<-c(22,33,44,1,2,3,3,3,4)
(orig_long <- length(vectorA))
## [1] 9
(vectorA_dist<- n_distinct(vectorA)) #no cuenta los numeros repetidos
## [1] 7
(test1 <- if_else(orig_long == vectorA_dist, "all values
unique","some duplicate values in vector")) #si hay valores duplicados que vote un mensaje que lo indique
## [1] "some duplicate values in vector"
vectorB<-c(1,2,3,4,5,6)
long_B<-length(vectorB)
(vectorB_dist<-n_distinct(vectorB)) #igual al anterior porque todos los números son diferentes
## [1] 6
(test2 <- if_else(long_B == vectorB_dist, "all values unique", "duplicates")) #si hay valores unicos que vote un mensaje que lo indique
## [1] "all values unique"
ej_na<-c(100,0,999)
(e<-5000/ej_na) #me da infinito cuando tengo división entre cero
## [1] 50.000000 Inf 5.005005
e<-5000/na_if(ej_na,0) #pondra Na si tengo cero en el vector ej_na
class(e) #me dice el tipo de variable que tengo
## [1] "numeric"
(h<-c(33,4,11,NA,9))
## [1] 33 4 11 NA 9
(h<-coalesce(e, 0)) #para reemplazar Na por cero
## [1] 50.000000 0.000000 5.005005
p<-c(100,4,12,6,8,3)
(rank1<-row_number(p))
## [1] 6 2 5 3 4 1
p[rank1[1]] #me muestra el número más pequeño
## [1] 3
p[rank1[6]] #me muestra el número más grande
## [1] 100
(rank2 <- min_rank(p))
## [1] 6 2 5 3 4 1
(rank3<-dense_rank(p)) #como min_rank perosin espacios entre rangos
## [1] 6 2 5 3 4 1
(rank4<-percent_rank(p))
## [1] 1.0 0.2 0.8 0.4 0.6 0.0
(rank5<-cume_dist(p))
## [1] 1.0000000 0.3333333 0.8333333 0.5000000 0.6666667 0.1666667
#dividir el vector de entrada en n buckets
(rank6<-ntile(p,3)) #agrupo los datos en buckets, en este caso de mayor a menor
## [1] 3 1 3 2 2 1
#escojamos al azar 5 filas del set de datos "chick_Weight"
data("ChickWeight")
(muestreo_pollos<-sample_n(ChickWeight,5))
## weight Time Chick Diet
## 1 71 10 19 1
## 2 134 10 34 3
## 3 117 21 20 1
## 4 124 21 10 1
## 5 164 12 14 1
(muestreo_pollos<-sample_n(ChickWeight,10,replace=TRUE)) #los datos de la columna "Chick" aparecen más de una vez
## weight Time Chick Diet
## 1 39 0 27 2
## 2 125 8 21 2
## 3 170 14 48 4
## 4 250 18 7 1
## 5 125 10 48 4
## 6 72 8 12 1
## 7 307 18 21 2
## 8 42 0 44 4
## 9 279 20 29 2
## 10 157 18 37 3
#otro ejemplo usando mtcars
muestreo_cars<-sample_n(mtcars, 12, weight = cyl)
muestreo_cars[,1:5]
## mpg cyl disp hp drat
## Hornet Sportabout 18.7 8 360.0 175 3.15
## Cadillac Fleetwood 10.4 8 472.0 205 2.93
## Datsun 710 22.8 4 108.0 93 3.85
## Valiant 18.1 6 225.0 105 2.76
## Merc 280 19.2 6 167.6 123 3.92
## Chrysler Imperial 14.7 8 440.0 230 3.23
## Maserati Bora 15.0 8 301.0 335 3.54
## AMC Javelin 15.2 8 304.0 150 3.15
## Camaro Z28 13.3 8 350.0 245 3.73
## Merc 450SE 16.4 8 275.8 180 3.07
## Pontiac Firebird 19.2 8 400.0 175 3.08
## Merc 240D 24.4 4 146.7 62 3.69
#si se usa sample_frac se obtiene una muestra igual a un porcentaje especifico del dataframe
(per_pollos_mu<-sample_frac(ChickWeight, 0.02))
## weight Time Chick Diet
## 1 85 6 49 4
## 2 163 18 23 2
## 3 53 2 31 3
## 4 83 10 37 3
## 5 116 10 36 3
## 6 46 2 28 2
## 7 47 2 20 1
## 8 43 0 11 1
## 9 210 18 46 4
## 10 62 4 48 4
## 11 280 20 38 3
## 12 53 2 35 3
cabello_color<- starwars %>% group_by(hair_color)
muestreo_starwars_cabello<-sample_frac(cabello_color, 0.07, replace=TRUE)
muestreo_starwars_cabello[,1:5]
## # A tibble: 5 × 5
## # Groups: hair_color [3]
## name height mass hair_color skin_color
## <chr> <int> <dbl> <chr> <chr>
## 1 Lando Calrissian 177 79 black dark
## 2 Beru Whitesun lars 165 75 brown light
## 3 Grievous 216 159 none brown, white
## 4 R4-P17 96 NA none silver, red
## 5 Sly Moore 178 48 none pale
(fila_colo_conteo<- ChickWeight %>% tally())
## n
## 1 578
(conteo_dieta<- ChickWeight %>% count(Diet))
## Diet n
## 1 1 220
## 2 2 120
## 3 3 120
## 4 4 118
unica_especie_conteo<- starwars %>%
add_count(species) %>%
filter(n==1)
unica_especie_conteo[,1:6]
## # A tibble: 29 × 6
## name height mass hair_color skin_color eye_color
## <chr> <int> <dbl> <chr> <chr> <chr>
## 1 Greedo 173 74 <NA> green black
## 2 Jabba Desilijic Tiure 175 1358 <NA> green-tan, brown orange
## 3 Yoda 66 17 white green brown
## 4 Bossk 190 113 none green red
## 5 Ackbar 180 83 none brown mottle orange
## 6 Wicket Systri Warrick 88 20 brown brown brown
## 7 Nien Nunb 160 68 none grey black
## 8 Nute Gunray 191 90 none mottled green red
## 9 Watto 137 NA black blue, grey yellow
## 10 Sebulba 112 40 none grey, red orange
## # … with 19 more rows
mtcars<-rename(mtcars, spam_mpg=mpg)
names(mtcars) #el nombre de la columna cambio
## [1] "spam_mpg" "cyl" "disp" "hp" "drat" "wt"
## [7] "qsec" "vs" "am" "gear" "carb"
data(starwars)
(starwars_nuevo <- starwars %>%
dplyr::select(name, mass, gender, species, height) %>%
mutate(type = case_when(height > 200 | mass > 200 ~ "large",
species == "Droid" ~ "robot", TRUE ~ "other")))
## # A tibble: 87 × 6
## name mass gender species height type
## <chr> <dbl> <chr> <chr> <int> <chr>
## 1 Luke Skywalker 77 masculine Human 172 other
## 2 C-3PO 75 masculine Droid 167 robot
## 3 R2-D2 32 masculine Droid 96 robot
## 4 Darth Vader 136 masculine Human 202 large
## 5 Leia Organa 49 feminine Human 150 other
## 6 Owen Lars 120 masculine Human 178 other
## 7 Beru Whitesun lars 75 feminine Human 165 other
## 8 R5-D4 32 masculine Droid 97 robot
## 9 Biggs Darklighter 84 masculine Human 183 other
## 10 Obi-Wan Kenobi 77 masculine Human 182 other
## # … with 77 more rows