library(tidyverse)
heart <- read_csv("https://raw.githubusercontent.com/javernw/DATA607GroupProjects/master/heart.csv")
heart <- as.tibble(heart)
head(heart)
## # A tibble: 6 x 14
## age sex cp trestbps chol fbs restecg thalach exang oldpeak
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 63 1 3 145 233 1 0 150 0 2.3
## 2 37 1 2 130 250 0 1 187 0 3.5
## 3 41 0 1 130 204 0 0 172 0 1.4
## 4 56 1 1 120 236 0 1 178 0 0.8
## 5 57 0 0 120 354 0 1 163 1 0.6
## 6 57 1 0 140 192 0 1 148 0 0.4
## # ... with 4 more variables: slope <dbl>, ca <dbl>, thal <dbl>,
## # target <dbl>
The selected package I want to use id dplyr.
slice
capability tutorialUsing slice
we can select rows by specifying the row number.
slice(.data, …)
To select rows 6 to 12
slice(heart, 6:12)
## Warning: package 'bindrcpp' was built under R version 3.5.2
## # A tibble: 7 x 14
## age sex cp trestbps chol fbs restecg thalach exang oldpeak
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 57 1 0 140 192 0 1 148 0 0.4
## 2 56 0 1 140 294 0 0 153 0 1.3
## 3 44 1 1 120 263 0 1 173 0 0
## 4 52 1 2 172 199 1 1 162 0 0.5
## 5 57 1 2 150 168 0 1 174 0 1.6
## 6 54 1 0 140 239 0 1 160 0 1.2
## 7 48 0 2 130 275 0 1 139 0 0.2
## # ... with 4 more variables: slope <dbl>, ca <dbl>, thal <dbl>,
## # target <dbl>
OR
To select rows 10 to 15, 18 and 299 to 302
heart %>% slice(c(10:15, 18, 299:302)) #passed as a vector
## # A tibble: 11 x 14
## age sex cp trestbps chol fbs restecg thalach exang oldpeak
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 57 1 2 150 168 0 1 174 0 1.6
## 2 54 1 0 140 239 0 1 160 0 1.2
## 3 48 0 2 130 275 0 1 139 0 0.2
## 4 49 1 1 130 266 0 1 171 0 0.6
## 5 64 1 3 110 211 0 0 144 1 1.8
## 6 58 0 3 150 283 1 0 162 0 1
## 7 66 0 3 150 226 0 1 114 0 2.6
## 8 57 0 0 140 241 0 1 123 1 0.2
## 9 45 1 3 110 264 0 1 132 0 1.2
## 10 68 1 0 144 193 1 1 141 0 3.4
## 11 57 1 0 130 131 0 1 115 1 1.2
## # ... with 4 more variables: slope <dbl>, ca <dbl>, thal <dbl>,
## # target <dbl>
mutate
capability tutorialCreates new columns based on existing ones
mutate(.data, …)
Let’s look at a ratio of resting blood pressure to cholesterol under a new column name “Ratio”
#heart %>% mutate(Ratio = trestbps/chol)
heart %>% select(trestbps, chol) %>% mutate(Ratio = trestbps/chol)
## # A tibble: 303 x 3
## trestbps chol Ratio
## <dbl> <dbl> <dbl>
## 1 145 233 0.622
## 2 130 250 0.52
## 3 130 204 0.637
## 4 120 236 0.508
## 5 120 354 0.339
## 6 140 192 0.729
## 7 140 294 0.476
## 8 120 263 0.456
## 9 172 199 0.864
## 10 150 168 0.893
## # ... with 293 more rows
summarise
capability tutorialApplies functions that return results of length 1. Can perform multiple calculations in the same call.
summarise(data, …)
Lets see mean and median cholesterol along with mean and median maximum heart rate achieved.
heart %>% summarise(Avg_cholesteral = mean(chol), Median_cholesteral = median(chol), Avg_heartrate = mean(thalach), Median_heartrate = median(thalach))
## # A tibble: 1 x 4
## Avg_cholesteral Median_cholesteral Avg_heartrate Median_heartrate
## <dbl> <dbl> <dbl> <dbl>
## 1 246. 240 150. 153
do
capability tutorial (do anything)Performs any arbitrary computations on the data
do(.data, …)
We can create a function that sorts the data by age then returns the first 3 for each age group.
x = 3
top <- function(t, x){
t %>% arrange(desc(age)) %>% head(x)
}
heart %>% group_by(age) %>% do(top(., x))
## # A tibble: 113 x 14
## # Groups: age [41]
## age sex cp trestbps chol fbs restecg thalach exang oldpeak
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 29 1 1 130 204 0 0 202 0 0
## 2 34 1 3 118 182 0 0 174 0 0
## 3 34 0 1 118 210 0 1 192 0 0.7
## 4 35 0 0 138 183 0 1 182 0 1.4
## 5 35 1 1 122 192 0 1 174 0 0
## 6 35 1 0 120 198 0 1 130 1 1.6
## 7 37 1 2 130 250 0 1 187 0 3.5
## 8 37 0 2 120 215 0 1 170 0 0
## 9 38 1 2 138 175 0 1 173 0 0
## 10 38 1 2 138 175 0 1 173 0 0
## # ... with 103 more rows, and 4 more variables: slope <dbl>, ca <dbl>,
## # thal <dbl>, target <dbl>