df <- iris
df
df$Sepal.Length
## [1] 5.1 4.9 4.7 4.6 5.0 5.4 4.6 5.0 4.4 4.9 5.4 4.8 4.8 4.3 5.8 5.7 5.4
## [18] 5.1 5.7 5.1 5.4 5.1 4.6 5.1 4.8 5.0 5.0 5.2 5.2 4.7 4.8 5.4 5.2 5.5
## [35] 4.9 5.0 5.5 4.9 4.4 5.1 5.0 4.5 4.4 5.0 5.1 4.8 5.1 4.6 5.3 5.0 7.0
## [52] 6.4 6.9 5.5 6.5 5.7 6.3 4.9 6.6 5.2 5.0 5.9 6.0 6.1 5.6 6.7 5.6 5.8
## [69] 6.2 5.6 5.9 6.1 6.3 6.1 6.4 6.6 6.8 6.7 6.0 5.7 5.5 5.5 5.8 6.0 5.4
## [86] 6.0 6.7 6.3 5.6 5.5 5.5 6.1 5.8 5.0 5.6 5.7 5.7 6.2 5.1 5.7 6.3 5.8
## [103] 7.1 6.3 6.5 7.6 4.9 7.3 6.7 7.2 6.5 6.4 6.8 5.7 5.8 6.4 6.5 7.7 7.7
## [120] 6.0 6.9 5.6 7.7 6.3 6.7 7.2 6.2 6.1 6.4 7.2 7.4 7.9 6.4 6.3 6.1 7.7
## [137] 6.3 6.4 6.0 6.9 6.7 6.9 5.8 6.8 6.7 6.7 6.3 6.5 6.2 5.9
df[1]
df[,1]
## [1] 5.1 4.9 4.7 4.6 5.0 5.4 4.6 5.0 4.4 4.9 5.4 4.8 4.8 4.3 5.8 5.7 5.4
## [18] 5.1 5.7 5.1 5.4 5.1 4.6 5.1 4.8 5.0 5.0 5.2 5.2 4.7 4.8 5.4 5.2 5.5
## [35] 4.9 5.0 5.5 4.9 4.4 5.1 5.0 4.5 4.4 5.0 5.1 4.8 5.1 4.6 5.3 5.0 7.0
## [52] 6.4 6.9 5.5 6.5 5.7 6.3 4.9 6.6 5.2 5.0 5.9 6.0 6.1 5.6 6.7 5.6 5.8
## [69] 6.2 5.6 5.9 6.1 6.3 6.1 6.4 6.6 6.8 6.7 6.0 5.7 5.5 5.5 5.8 6.0 5.4
## [86] 6.0 6.7 6.3 5.6 5.5 5.5 6.1 5.8 5.0 5.6 5.7 5.7 6.2 5.1 5.7 6.3 5.8
## [103] 7.1 6.3 6.5 7.6 4.9 7.3 6.7 7.2 6.5 6.4 6.8 5.7 5.8 6.4 6.5 7.7 7.7
## [120] 6.0 6.9 5.6 7.7 6.3 6.7 7.2 6.2 6.1 6.4 7.2 7.4 7.9 6.4 6.3 6.1 7.7
## [137] 6.3 6.4 6.0 6.9 6.7 6.9 5.8 6.8 6.7 6.7 6.3 6.5 6.2 5.9
df[1,]
df[1:10,]
df[1:10, 1:2]
df[1:10, -1]
df[1:10, -(1:3)]
df$id <- 1:nrow(df)
df
which(df$Species == "virginica")
## [1] 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117
## [18] 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134
## [35] 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150
df[101:150,]
which() function we used beforedf[which(df$Species == "virginica"), ]
test <- df[which(df$Species == "virginica"), ]
test
test <- df[which(df$Sepal.Length < 6), 4:6]
test
library(tidyverse)
filter()
select()
mutate()
group_by()
summarize()
filter() subsets observations (rows) based on a conditionsetosas <- filter(df, Species == "setosa")
setosas
select() subsets variables (columns)setosas_sepal <- select(setosas, Sepal.Length, Species, id)
setosas_sepal
setosas_sepal <- select(filter(df, Species == "setosa"), Sepal.Length, Species, id)
setosas_sepal
# Example from Hadley Wickham
foo_foo <- little_bunny()
bop(scoop_up(hop_through(foo_foo, forest), field_mouse), head)
foo_foo %>%
hop_through(forest) %>%
scoop_up(field_mouse) %>%
bop(head)
setosas <- df %>%
filter(Species == "setosa")
setosas
setosas <- df %>%
filter(Species == "setosa") %>%
select(Sepal.Length, Species, id)
setosas
df2 <- iris %>%
mutate(id = row_number())
df2
gather()tidy <- df %>%
gather(key = variable, value = value, Sepal.Length:Petal.Width)
tidy
wide <- read_csv("http://dartgo.org/qbs_iris")
wide
gather() and separate() to clean the datatidy <- wide %>%
gather(key = bad_id, value = value, setosa_1:ncol(wide))
tidy
tidy <- wide %>%
gather(key = bad_id, value = value, setosa_1:ncol(wide)) %>%
separate(col = bad_id, into = c("species", "id"), sep = "_")
tidy
df_normal <- wide %>%
gather(key = bad_id, value = value, setosa_1:ncol(wide)) %>%
separate(col = bad_id, into = c("species", "id"), sep = "_") %>%
spread(key = variable, value = value)
df_normal
group_by() and summarize()species_means <- df %>%
group_by(Species) %>%
summarize(pl = mean(Petal.Length), pw = mean(Petal.Width), sl = mean(Sepal.Length), sw = mean(Sepal.Width))
species_means
starwars
sw_heights <- starwars %>%
group_by(homeworld, species) %>%
summarize(mean_height = mean(height))
sw_heights