library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.5.2
## -- Attaching packages ----------------------------------------------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.1.0 v purrr 0.2.5
## v tibble 2.0.1 v dplyr 0.8.0.1
## v tidyr 0.8.2 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.4.0
## Warning: package 'ggplot2' was built under R version 3.5.2
## Warning: package 'tibble' was built under R version 3.5.2
## Warning: package 'tidyr' was built under R version 3.5.2
## Warning: package 'readr' was built under R version 3.5.2
## Warning: package 'purrr' was built under R version 3.5.2
## Warning: package 'dplyr' was built under R version 3.5.2
## Warning: package 'stringr' was built under R version 3.5.2
## Warning: package 'forcats' was built under R version 3.5.2
## -- Conflicts -------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
disease <- read_csv("heart.csv")
## Parsed with column specification:
## cols(
## age = col_double(),
## sex = col_double(),
## cp = col_double(),
## trestbps = col_double(),
## chol = col_double(),
## fbs = col_double(),
## restecg = col_double(),
## thalach = col_double(),
## exang = col_double(),
## oldpeak = col_double(),
## slope = col_double(),
## ca = col_double(),
## thal = col_double(),
## target = col_double()
## )
head(disease)
## # A tibble: 6 x 14
## age sex cp trestbps chol fbs restecg thalach exang oldpeak
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 63 1 3 145 233 1 0 150 0 2.3
## 2 37 1 2 130 250 0 1 187 0 3.5
## 3 41 0 1 130 204 0 0 172 0 1.4
## 4 56 1 1 120 236 0 1 178 0 0.8
## 5 57 0 0 120 354 0 1 163 1 0.6
## 6 57 1 0 140 192 0 1 148 0 0.4
## # ... with 4 more variables: slope <dbl>, ca <dbl>, thal <dbl>,
## # target <dbl>
The selected package I want to use id dplyr.
Using filter we can select rows of the data frame matching conditions.
filter(data) ### Example
To select the people of over 20 and less than 65 we can pass the data disease and condtion age>20 and age < 65 to the function . It’ll return matching rows of heart disease.
filter(disease, age>20 & age < 65)
## # A tibble: 262 x 14
## age sex cp trestbps chol fbs restecg thalach exang oldpeak
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 63 1 3 145 233 1 0 150 0 2.3
## 2 37 1 2 130 250 0 1 187 0 3.5
## 3 41 0 1 130 204 0 0 172 0 1.4
## 4 56 1 1 120 236 0 1 178 0 0.8
## 5 57 0 0 120 354 0 1 163 1 0.6
## 6 57 1 0 140 192 0 1 148 0 0.4
## 7 56 0 1 140 294 0 0 153 0 1.3
## 8 44 1 1 120 263 0 1 173 0 0
## 9 52 1 2 172 199 1 1 162 0 0.5
## 10 57 1 2 150 168 0 1 174 0 1.6
## # ... with 252 more rows, and 4 more variables: slope <dbl>, ca <dbl>,
## # thal <dbl>, target <dbl>
Using select we can keep the selected variables
select(data, …)
To keep only age, sex,cp variable we can pass the data disease and age, sex,cp to the function .
df<- select(disease, c("age","sex","cp"))
head(df)
## # A tibble: 6 x 3
## age sex cp
## <dbl> <dbl> <dbl>
## 1 63 1 3
## 2 37 1 2
## 3 41 0 1
## 4 56 1 1
## 5 57 0 0
## 6 57 1 0
Using arrange we can order the rows in an expression involving variables
To arrange the rows by sex and age
df <- df %>% arrange(sex, age)
head(df)
## # A tibble: 6 x 3
## age sex cp
## <dbl> <dbl> <dbl>
## 1 34 0 1
## 2 35 0 0
## 3 37 0 2
## 4 39 0 2
## 5 39 0 2
## 6 41 0 1
tail(df)
## # A tibble: 6 x 3
## age sex cp
## <dbl> <dbl> <dbl>
## 1 69 1 2
## 2 70 1 1
## 3 70 1 0
## 4 70 1 0
## 5 70 1 2
## 6 77 1 0