Pipe operator and Slice function.

library(dslabs)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
data(murders)
rate <- (murders$total / murders$population) * 100000
mutate(murders, rate)
##                   state abb        region population total       rate
## 1               Alabama  AL         South    4779736   135  2.8244238
## 2                Alaska  AK          West     710231    19  2.6751860
## 3               Arizona  AZ          West    6392017   232  3.6295273
## 4              Arkansas  AR         South    2915918    93  3.1893901
## 5            California  CA          West   37253956  1257  3.3741383
## 6              Colorado  CO          West    5029196    65  1.2924531
## 7           Connecticut  CT     Northeast    3574097    97  2.7139722
## 8              Delaware  DE         South     897934    38  4.2319369
## 9  District of Columbia  DC         South     601723    99 16.4527532
## 10              Florida  FL         South   19687653   669  3.3980688
## 11              Georgia  GA         South    9920000   376  3.7903226
## 12               Hawaii  HI          West    1360301     7  0.5145920
## 13                Idaho  ID          West    1567582    12  0.7655102
## 14             Illinois  IL North Central   12830632   364  2.8369608
## 15              Indiana  IN North Central    6483802   142  2.1900730
## 16                 Iowa  IA North Central    3046355    21  0.6893484
## 17               Kansas  KS North Central    2853118    63  2.2081106
## 18             Kentucky  KY         South    4339367   116  2.6732010
## 19            Louisiana  LA         South    4533372   351  7.7425810
## 20                Maine  ME     Northeast    1328361    11  0.8280881
## 21             Maryland  MD         South    5773552   293  5.0748655
## 22        Massachusetts  MA     Northeast    6547629   118  1.8021791
## 23             Michigan  MI North Central    9883640   413  4.1786225
## 24            Minnesota  MN North Central    5303925    53  0.9992600
## 25          Mississippi  MS         South    2967297   120  4.0440846
## 26             Missouri  MO North Central    5988927   321  5.3598917
## 27              Montana  MT          West     989415    12  1.2128379
## 28             Nebraska  NE North Central    1826341    32  1.7521372
## 29               Nevada  NV          West    2700551    84  3.1104763
## 30        New Hampshire  NH     Northeast    1316470     5  0.3798036
## 31           New Jersey  NJ     Northeast    8791894   246  2.7980319
## 32           New Mexico  NM          West    2059179    67  3.2537239
## 33             New York  NY     Northeast   19378102   517  2.6679599
## 34       North Carolina  NC         South    9535483   286  2.9993237
## 35         North Dakota  ND North Central     672591     4  0.5947151
## 36                 Ohio  OH North Central   11536504   310  2.6871225
## 37             Oklahoma  OK         South    3751351   111  2.9589340
## 38               Oregon  OR          West    3831074    36  0.9396843
## 39         Pennsylvania  PA     Northeast   12702379   457  3.5977513
## 40         Rhode Island  RI     Northeast    1052567    16  1.5200933
## 41       South Carolina  SC         South    4625364   207  4.4753235
## 42         South Dakota  SD North Central     814180     8  0.9825837
## 43            Tennessee  TN         South    6346105   219  3.4509357
## 44                Texas  TX         South   25145561   805  3.2013603
## 45                 Utah  UT          West    2763885    22  0.7959810
## 46              Vermont  VT     Northeast     625741     2  0.3196211
## 47             Virginia  VA         South    8001024   250  3.1246001
## 48           Washington  WA          West    6724540    93  1.3829942
## 49        West Virginia  WV         South    1852994    27  1.4571013
## 50            Wisconsin  WI North Central    5686986    97  1.7056487
## 51              Wyoming  WY          West     563626     5  0.8871131
murders |> 
  arrange(region, rate) |>
  head()
##           state abb    region population total
## 1       Vermont  VT Northeast     625741     2
## 2 New Hampshire  NH Northeast    1316470     5
## 3         Maine  ME Northeast    1328361    11
## 4  Rhode Island  RI Northeast    1052567    16
## 5 Massachusetts  MA Northeast    6547629   118
## 6      New York  NY Northeast   19378102   517
murders$rate
## NULL
#top n

murders |>
  slice_max(rate, n = 5)
##                  state abb        region population total
## 1 District of Columbia  DC         South     601723    99
## 2            Louisiana  LA         South    4533372   351
## 3             Missouri  MO North Central    5988927   321
## 4             Maryland  MD         South    5773552   293
## 5       South Carolina  SC         South    4625364   207

Tibbles

murders |> group_by(region) |> class() # returns tibble (tbl) data.frame as data type
## [1] "grouped_df" "tbl_df"     "tbl"        "data.frame"
# Tibbles vs DataFrames
# -> Tibble is a type of dataframe for manipulations
# ->  Functions like group_by and summarise always return these kind of grouped_df(tibbles)

# 1. Tibbles display better.
class(murders) # "data.frame"
## [1] "data.frame"
class(murders[,4]) # "numeric"
## [1] "numeric"
class(as_tibble(murders)[,4]) # "tbl_df"     "tbl"        "data.frame"
## [1] "tbl_df"     "tbl"        "data.frame"
# this is useful as in tidyverse many functions need dataframe as input.

# 2. Tibbles can have complex entries.

tibble(id = c(1, 2, 3), func = c(mean, mode, median))
## # A tibble: 3 × 2
##      id func  
##   <dbl> <list>
## 1     1 <fn>  
## 2     2 <fn>  
## 3     3 <fn>
# 3. Tibbles can have multiple complex objects as inputs, ex - list or functions

grades = tibble(name = c("A", "B", "C", "D"),
                 exam_1 = c(11, 34, 21, 24),
                 exam_2 = c(45, 12, 46, 67))

grades
## # A tibble: 4 × 3
##   name  exam_1 exam_2
##   <chr>  <dbl>  <dbl>
## 1 A         11     45
## 2 B         34     12
## 3 C         21     46
## 4 D         24     67
grades_df = data.frame(name = c("A", "B", "C", "D"),
                 exam_1 = c(11, 34, 21, 24),
                 exam_2 = c(45, 12, 46, 67))

grades_df
##   name exam_1 exam_2
## 1    A     11     45
## 2    B     34     12
## 3    C     21     46
## 4    D     24     67
as_tibble(grades_df)
## # A tibble: 4 × 3
##   name  exam_1 exam_2
##   <chr>  <dbl>  <dbl>
## 1 A         11     45
## 2 B         34     12
## 3 C         21     46
## 4 D         24     67
class(grades)
## [1] "tbl_df"     "tbl"        "data.frame"
class(grades_df)
## [1] "data.frame"
class(as_tibble(grades_df))
## [1] "tbl_df"     "tbl"        "data.frame"

tidyverse Conditionals

#case_when

x = c(-1, -2, 0, 1, 2)

case_when(x < 0 ~ "Negative",
          x > 0 ~ "Positive",
          TRUE ~ "Zero")
## [1] "Negative" "Negative" "Zero"     "Positive" "Positive"
# Suppose we want to compare the murder rate in 4 group of states

murders$rate = rate
murders$rate
##  [1]  2.8244238  2.6751860  3.6295273  3.1893901  3.3741383  1.2924531
##  [7]  2.7139722  4.2319369 16.4527532  3.3980688  3.7903226  0.5145920
## [13]  0.7655102  2.8369608  2.1900730  0.6893484  2.2081106  2.6732010
## [19]  7.7425810  0.8280881  5.0748655  1.8021791  4.1786225  0.9992600
## [25]  4.0440846  5.3598917  1.2128379  1.7521372  3.1104763  0.3798036
## [31]  2.7980319  3.2537239  2.6679599  2.9993237  0.5947151  2.6871225
## [37]  2.9589340  0.9396843  3.5977513  1.5200933  4.4753235  0.9825837
## [43]  3.4509357  3.2013603  0.7959810  0.3196211  3.1246001  1.3829942
## [49]  1.4571013  1.7056487  0.8871131
head(murders)
##        state abb region population total     rate
## 1    Alabama  AL  South    4779736   135 2.824424
## 2     Alaska  AK   West     710231    19 2.675186
## 3    Arizona  AZ   West    6392017   232 3.629527
## 4   Arkansas  AR  South    2915918    93 3.189390
## 5 California  CA   West   37253956  1257 3.374138
## 6   Colorado  CO   West    5029196    65 1.292453
murders |>
  group_by(region) |>
  summarise(
    mean_rate = mean(rate),
    median_rate = median(rate),
    sd_rate = sd(rate)
  )
## # A tibble: 4 × 4
##   region        mean_rate median_rate sd_rate
##   <fct>             <dbl>       <dbl>   <dbl>
## 1 Northeast          1.85        1.80    1.17
## 2 South              4.42        3.40    3.37
## 3 North Central      2.18        1.97    1.44
## 4 West               1.83        1.29    1.17
# between function in dplyr

a = c(1, 0, 3, -4, -9)
b = c(1.4, 3.5, 5.67, 4.98, 9.0)

x >= a & x <= b
## [1] FALSE FALSE FALSE  TRUE  TRUE
between(x, a, b)
## [1] FALSE FALSE FALSE  TRUE  TRUE

Questions:

Import dplyr, dslabs, murders 1. Create a new column population in millions. 2. Rank the population 3. Top 5 states with murder_rate using rank 4. Create a new dataframe no florida that remove a state florida. 5. Create a new dataframe no SOUTH that removes state from south region. 6. Number of states in Northeast or West.

pop_mill = murders$population / 1000000

murders$pop_mill = pop_mill

murders$pop_mill
##  [1]  4.779736  0.710231  6.392017  2.915918 37.253956  5.029196  3.574097
##  [8]  0.897934  0.601723 19.687653  9.920000  1.360301  1.567582 12.830632
## [15]  6.483802  3.046355  2.853118  4.339367  4.533372  1.328361  5.773552
## [22]  6.547629  9.883640  5.303925  2.967297  5.988927  0.989415  1.826341
## [29]  2.700551  1.316470  8.791894  2.059179 19.378102  9.535483  0.672591
## [36] 11.536504  3.751351  3.831074 12.702379  1.052567  4.625364  0.814180
## [43]  6.346105 25.145561  2.763885  0.625741  8.001024  6.724540  1.852994
## [50]  5.686986  0.563626
head(murders)
##        state abb region population total     rate  pop_mill
## 1    Alabama  AL  South    4779736   135 2.824424  4.779736
## 2     Alaska  AK   West     710231    19 2.675186  0.710231
## 3    Arizona  AZ   West    6392017   232 3.629527  6.392017
## 4   Arkansas  AR  South    2915918    93 3.189390  2.915918
## 5 California  CA   West   37253956  1257 3.374138 37.253956
## 6   Colorado  CO   West    5029196    65 1.292453  5.029196
murders$pop_rank <- rank(-murders$population)
head(murders)
##        state abb region population total     rate  pop_mill pop_rank
## 1    Alabama  AL  South    4779736   135 2.824424  4.779736       23
## 2     Alaska  AK   West     710231    19 2.675186  0.710231       47
## 3    Arizona  AZ   West    6392017   232 3.629527  6.392017       16
## 4   Arkansas  AR  South    2915918    93 3.189390  2.915918       32
## 5 California  CA   West   37253956  1257 3.374138 37.253956        1
## 6   Colorado  CO   West    5029196    65 1.292453  5.029196       22
murders$murder_rank <- rank(murders$rate)

top5 <- murders[order(murders$murder_rank), ][1:5,
           c("state", "rate", "murder_rank")]

top5
##            state      rate murder_rank
## 46       Vermont 0.3196211           1
## 30 New Hampshire 0.3798036           2
## 12        Hawaii 0.5145920           3
## 35  North Dakota 0.5947151           4
## 16          Iowa 0.6893484           5
nrow(murders)
## [1] 51
no_florida <- filter(murders, state != "Florida")

no_florida
##                   state abb        region population total       rate  pop_mill
## 1               Alabama  AL         South    4779736   135  2.8244238  4.779736
## 2                Alaska  AK          West     710231    19  2.6751860  0.710231
## 3               Arizona  AZ          West    6392017   232  3.6295273  6.392017
## 4              Arkansas  AR         South    2915918    93  3.1893901  2.915918
## 5            California  CA          West   37253956  1257  3.3741383 37.253956
## 6              Colorado  CO          West    5029196    65  1.2924531  5.029196
## 7           Connecticut  CT     Northeast    3574097    97  2.7139722  3.574097
## 8              Delaware  DE         South     897934    38  4.2319369  0.897934
## 9  District of Columbia  DC         South     601723    99 16.4527532  0.601723
## 10              Georgia  GA         South    9920000   376  3.7903226  9.920000
## 11               Hawaii  HI          West    1360301     7  0.5145920  1.360301
## 12                Idaho  ID          West    1567582    12  0.7655102  1.567582
## 13             Illinois  IL North Central   12830632   364  2.8369608 12.830632
## 14              Indiana  IN North Central    6483802   142  2.1900730  6.483802
## 15                 Iowa  IA North Central    3046355    21  0.6893484  3.046355
## 16               Kansas  KS North Central    2853118    63  2.2081106  2.853118
## 17             Kentucky  KY         South    4339367   116  2.6732010  4.339367
## 18            Louisiana  LA         South    4533372   351  7.7425810  4.533372
## 19                Maine  ME     Northeast    1328361    11  0.8280881  1.328361
## 20             Maryland  MD         South    5773552   293  5.0748655  5.773552
## 21        Massachusetts  MA     Northeast    6547629   118  1.8021791  6.547629
## 22             Michigan  MI North Central    9883640   413  4.1786225  9.883640
## 23            Minnesota  MN North Central    5303925    53  0.9992600  5.303925
## 24          Mississippi  MS         South    2967297   120  4.0440846  2.967297
## 25             Missouri  MO North Central    5988927   321  5.3598917  5.988927
## 26              Montana  MT          West     989415    12  1.2128379  0.989415
## 27             Nebraska  NE North Central    1826341    32  1.7521372  1.826341
## 28               Nevada  NV          West    2700551    84  3.1104763  2.700551
## 29        New Hampshire  NH     Northeast    1316470     5  0.3798036  1.316470
## 30           New Jersey  NJ     Northeast    8791894   246  2.7980319  8.791894
## 31           New Mexico  NM          West    2059179    67  3.2537239  2.059179
## 32             New York  NY     Northeast   19378102   517  2.6679599 19.378102
## 33       North Carolina  NC         South    9535483   286  2.9993237  9.535483
## 34         North Dakota  ND North Central     672591     4  0.5947151  0.672591
## 35                 Ohio  OH North Central   11536504   310  2.6871225 11.536504
## 36             Oklahoma  OK         South    3751351   111  2.9589340  3.751351
## 37               Oregon  OR          West    3831074    36  0.9396843  3.831074
## 38         Pennsylvania  PA     Northeast   12702379   457  3.5977513 12.702379
## 39         Rhode Island  RI     Northeast    1052567    16  1.5200933  1.052567
## 40       South Carolina  SC         South    4625364   207  4.4753235  4.625364
## 41         South Dakota  SD North Central     814180     8  0.9825837  0.814180
## 42            Tennessee  TN         South    6346105   219  3.4509357  6.346105
## 43                Texas  TX         South   25145561   805  3.2013603 25.145561
## 44                 Utah  UT          West    2763885    22  0.7959810  2.763885
## 45              Vermont  VT     Northeast     625741     2  0.3196211  0.625741
## 46             Virginia  VA         South    8001024   250  3.1246001  8.001024
## 47           Washington  WA          West    6724540    93  1.3829942  6.724540
## 48        West Virginia  WV         South    1852994    27  1.4571013  1.852994
## 49            Wisconsin  WI North Central    5686986    97  1.7056487  5.686986
## 50              Wyoming  WY          West     563626     5  0.8871131  0.563626
##    pop_rank murder_rank
## 1        23          29
## 2        47          25
## 3        16          42
## 4        32          35
## 5         1          38
## 6        22          14
## 7        29          27
## 8        45          46
## 9        50          51
## 10        8          43
## 11       40           3
## 12       39           6
## 13        5          30
## 14       15          21
## 15       30           5
## 16       33          22
## 17       26          24
## 18       25          50
## 19       41           8
## 20       19          48
## 21       14          20
## 22        9          45
## 23       21          12
## 24       31          44
## 25       18          49
## 26       44          13
## 27       38          19
## 28       35          33
## 29       42           2
## 30       11          28
## 31       36          37
## 32        4          23
## 33       10          32
## 34       48           4
## 35        7          26
## 36       28          31
## 37       27          10
## 38        6          41
## 39       43          17
## 40       24          47
## 41       46          11
## 42       17          40
## 43        2          36
## 44       34           7
## 45       49           1
## 46       12          34
## 47       13          15
## 48       37          16
## 49       20          18
## 50       51           9
no_south <- murders[murders$region != "South", ]

no_south
##            state abb        region population total      rate  pop_mill
## 2         Alaska  AK          West     710231    19 2.6751860  0.710231
## 3        Arizona  AZ          West    6392017   232 3.6295273  6.392017
## 5     California  CA          West   37253956  1257 3.3741383 37.253956
## 6       Colorado  CO          West    5029196    65 1.2924531  5.029196
## 7    Connecticut  CT     Northeast    3574097    97 2.7139722  3.574097
## 12        Hawaii  HI          West    1360301     7 0.5145920  1.360301
## 13         Idaho  ID          West    1567582    12 0.7655102  1.567582
## 14      Illinois  IL North Central   12830632   364 2.8369608 12.830632
## 15       Indiana  IN North Central    6483802   142 2.1900730  6.483802
## 16          Iowa  IA North Central    3046355    21 0.6893484  3.046355
## 17        Kansas  KS North Central    2853118    63 2.2081106  2.853118
## 20         Maine  ME     Northeast    1328361    11 0.8280881  1.328361
## 22 Massachusetts  MA     Northeast    6547629   118 1.8021791  6.547629
## 23      Michigan  MI North Central    9883640   413 4.1786225  9.883640
## 24     Minnesota  MN North Central    5303925    53 0.9992600  5.303925
## 26      Missouri  MO North Central    5988927   321 5.3598917  5.988927
## 27       Montana  MT          West     989415    12 1.2128379  0.989415
## 28      Nebraska  NE North Central    1826341    32 1.7521372  1.826341
## 29        Nevada  NV          West    2700551    84 3.1104763  2.700551
## 30 New Hampshire  NH     Northeast    1316470     5 0.3798036  1.316470
## 31    New Jersey  NJ     Northeast    8791894   246 2.7980319  8.791894
## 32    New Mexico  NM          West    2059179    67 3.2537239  2.059179
## 33      New York  NY     Northeast   19378102   517 2.6679599 19.378102
## 35  North Dakota  ND North Central     672591     4 0.5947151  0.672591
## 36          Ohio  OH North Central   11536504   310 2.6871225 11.536504
## 38        Oregon  OR          West    3831074    36 0.9396843  3.831074
## 39  Pennsylvania  PA     Northeast   12702379   457 3.5977513 12.702379
## 40  Rhode Island  RI     Northeast    1052567    16 1.5200933  1.052567
## 42  South Dakota  SD North Central     814180     8 0.9825837  0.814180
## 45          Utah  UT          West    2763885    22 0.7959810  2.763885
## 46       Vermont  VT     Northeast     625741     2 0.3196211  0.625741
## 48    Washington  WA          West    6724540    93 1.3829942  6.724540
## 50     Wisconsin  WI North Central    5686986    97 1.7056487  5.686986
## 51       Wyoming  WY          West     563626     5 0.8871131  0.563626
##    pop_rank murder_rank
## 2        47          25
## 3        16          42
## 5         1          38
## 6        22          14
## 7        29          27
## 12       40           3
## 13       39           6
## 14        5          30
## 15       15          21
## 16       30           5
## 17       33          22
## 20       41           8
## 22       14          20
## 23        9          45
## 24       21          12
## 26       18          49
## 27       44          13
## 28       38          19
## 29       35          33
## 30       42           2
## 31       11          28
## 32       36          37
## 33        4          23
## 35       48           4
## 36        7          26
## 38       27          10
## 39        6          41
## 40       43          17
## 42       46          11
## 45       34           7
## 46       49           1
## 48       13          15
## 50       20          18
## 51       51           9
filter(murders, region == "Northeast" | region == "West")
##            state abb    region population total      rate  pop_mill pop_rank
## 1         Alaska  AK      West     710231    19 2.6751860  0.710231       47
## 2        Arizona  AZ      West    6392017   232 3.6295273  6.392017       16
## 3     California  CA      West   37253956  1257 3.3741383 37.253956        1
## 4       Colorado  CO      West    5029196    65 1.2924531  5.029196       22
## 5    Connecticut  CT Northeast    3574097    97 2.7139722  3.574097       29
## 6         Hawaii  HI      West    1360301     7 0.5145920  1.360301       40
## 7          Idaho  ID      West    1567582    12 0.7655102  1.567582       39
## 8          Maine  ME Northeast    1328361    11 0.8280881  1.328361       41
## 9  Massachusetts  MA Northeast    6547629   118 1.8021791  6.547629       14
## 10       Montana  MT      West     989415    12 1.2128379  0.989415       44
## 11        Nevada  NV      West    2700551    84 3.1104763  2.700551       35
## 12 New Hampshire  NH Northeast    1316470     5 0.3798036  1.316470       42
## 13    New Jersey  NJ Northeast    8791894   246 2.7980319  8.791894       11
## 14    New Mexico  NM      West    2059179    67 3.2537239  2.059179       36
## 15      New York  NY Northeast   19378102   517 2.6679599 19.378102        4
## 16        Oregon  OR      West    3831074    36 0.9396843  3.831074       27
## 17  Pennsylvania  PA Northeast   12702379   457 3.5977513 12.702379        6
## 18  Rhode Island  RI Northeast    1052567    16 1.5200933  1.052567       43
## 19          Utah  UT      West    2763885    22 0.7959810  2.763885       34
## 20       Vermont  VT Northeast     625741     2 0.3196211  0.625741       49
## 21    Washington  WA      West    6724540    93 1.3829942  6.724540       13
## 22       Wyoming  WY      West     563626     5 0.8871131  0.563626       51
##    murder_rank
## 1           25
## 2           42
## 3           38
## 4           14
## 5           27
## 6            3
## 7            6
## 8            8
## 9           20
## 10          13
## 11          33
## 12           2
## 13          28
## 14          37
## 15          23
## 16          10
## 17          41
## 18          17
## 19           7
## 20           1
## 21          15
## 22           9

NHANES Library

library(NHANES)
data("NHANES")
head(NHANES)
## # A tibble: 6 × 76
##      ID SurveyYr Gender   Age AgeDecade AgeMonths Race1 Race3 Education   
##   <int> <fct>    <fct>  <int> <fct>         <int> <fct> <fct> <fct>       
## 1 51624 2009_10  male      34 " 30-39"        409 White <NA>  High School 
## 2 51624 2009_10  male      34 " 30-39"        409 White <NA>  High School 
## 3 51624 2009_10  male      34 " 30-39"        409 White <NA>  High School 
## 4 51625 2009_10  male       4 " 0-9"           49 Other <NA>  <NA>        
## 5 51630 2009_10  female    49 " 40-49"        596 White <NA>  Some College
## 6 51638 2009_10  male       9 " 0-9"          115 White <NA>  <NA>        
## # ℹ 67 more variables: MaritalStatus <fct>, HHIncome <fct>, HHIncomeMid <int>,
## #   Poverty <dbl>, HomeRooms <int>, HomeOwn <fct>, Work <fct>, Weight <dbl>,
## #   Length <dbl>, HeadCirc <dbl>, Height <dbl>, BMI <dbl>,
## #   BMICatUnder20yrs <fct>, BMI_WHO <fct>, Pulse <int>, BPSysAve <int>,
## #   BPDiaAve <int>, BPSys1 <int>, BPDia1 <int>, BPSys2 <int>, BPDia2 <int>,
## #   BPSys3 <int>, BPDia3 <int>, Testosterone <dbl>, DirectChol <dbl>,
## #   TotChol <dbl>, UrineVol1 <int>, UrineFlow1 <dbl>, UrineVol2 <int>, …
# To ignore any missing value we need to use (na.rm = TRUE)
# We need to filter NHANES the data for gender = "Female", age decade from 20 to 29

filter(NHANES, Gender == "female",
         AgeDecade == " 20-29")
## # A tibble: 681 × 76
##       ID SurveyYr Gender   Age AgeDecade AgeMonths Race1    Race3 Education     
##    <int> <fct>    <fct>  <int> <fct>         <int> <fct>    <fct> <fct>         
##  1 51710 2009_10  female    26 " 20-29"        319 White    <NA>  College Grad  
##  2 51731 2009_10  female    28 " 20-29"        346 Black    <NA>  High School   
##  3 51741 2009_10  female    21 " 20-29"        253 Black    <NA>  Some College  
##  4 51741 2009_10  female    21 " 20-29"        253 Black    <NA>  Some College  
##  5 51760 2009_10  female    27 " 20-29"        334 Hispanic <NA>  9 - 11th Grade
##  6 51764 2009_10  female    29 " 20-29"        357 White    <NA>  College Grad  
##  7 51764 2009_10  female    29 " 20-29"        357 White    <NA>  College Grad  
##  8 51764 2009_10  female    29 " 20-29"        357 White    <NA>  College Grad  
##  9 51774 2009_10  female    26 " 20-29"        312 White    <NA>  8th Grade     
## 10 51774 2009_10  female    26 " 20-29"        312 White    <NA>  8th Grade     
## # ℹ 671 more rows
## # ℹ 67 more variables: MaritalStatus <fct>, HHIncome <fct>, HHIncomeMid <int>,
## #   Poverty <dbl>, HomeRooms <int>, HomeOwn <fct>, Work <fct>, Weight <dbl>,
## #   Length <dbl>, HeadCirc <dbl>, Height <dbl>, BMI <dbl>,
## #   BMICatUnder20yrs <fct>, BMI_WHO <fct>, Pulse <int>, BPSysAve <int>,
## #   BPDiaAve <int>, BPSys1 <int>, BPDia1 <int>, BPSys2 <int>, BPDia2 <int>,
## #   BPSys3 <int>, BPDia3 <int>, Testosterone <dbl>, DirectChol <dbl>, …
female_20s <- NHANES |>
  filter(Gender == "female",
         AgeDecade == " 20-29")

female_20s
## # A tibble: 681 × 76
##       ID SurveyYr Gender   Age AgeDecade AgeMonths Race1    Race3 Education     
##    <int> <fct>    <fct>  <int> <fct>         <int> <fct>    <fct> <fct>         
##  1 51710 2009_10  female    26 " 20-29"        319 White    <NA>  College Grad  
##  2 51731 2009_10  female    28 " 20-29"        346 Black    <NA>  High School   
##  3 51741 2009_10  female    21 " 20-29"        253 Black    <NA>  Some College  
##  4 51741 2009_10  female    21 " 20-29"        253 Black    <NA>  Some College  
##  5 51760 2009_10  female    27 " 20-29"        334 Hispanic <NA>  9 - 11th Grade
##  6 51764 2009_10  female    29 " 20-29"        357 White    <NA>  College Grad  
##  7 51764 2009_10  female    29 " 20-29"        357 White    <NA>  College Grad  
##  8 51764 2009_10  female    29 " 20-29"        357 White    <NA>  College Grad  
##  9 51774 2009_10  female    26 " 20-29"        312 White    <NA>  8th Grade     
## 10 51774 2009_10  female    26 " 20-29"        312 White    <NA>  8th Grade     
## # ℹ 671 more rows
## # ℹ 67 more variables: MaritalStatus <fct>, HHIncome <fct>, HHIncomeMid <int>,
## #   Poverty <dbl>, HomeRooms <int>, HomeOwn <fct>, Work <fct>, Weight <dbl>,
## #   Length <dbl>, HeadCirc <dbl>, Height <dbl>, BMI <dbl>,
## #   BMICatUnder20yrs <fct>, BMI_WHO <fct>, Pulse <int>, BPSysAve <int>,
## #   BPDiaAve <int>, BPSys1 <int>, BPDia1 <int>, BPSys2 <int>, BPDia2 <int>,
## #   BPSys3 <int>, BPDia3 <int>, Testosterone <dbl>, DirectChol <dbl>, …
female_20s |> 
  summarise(
    avg = mean(BPSysAve, na.rm = TRUE),
    sd = sd(BPSysAve, na.rm = TRUE)
  )
## # A tibble: 1 × 2
##     avg    sd
##   <dbl> <dbl>
## 1  108.  10.1
# Min and Max value of female_20s

female_20s |> 
  summarise(
    min = min(BPSysAve, na.rm = TRUE),
    max = max(BPSysAve, na.rm = TRUE)
  )
## # A tibble: 1 × 2
##     min   max
##   <int> <int>
## 1    84   179
# Group_by age decade and get mean and std

NHANES |>
  filter(Gender == "female") |>
  group_by(AgeDecade) |>
  summarise(
    mean_BPSysAve = mean(BPSysAve, na.rm = TRUE),
    sd_BPSysAve = sd(BPSysAve, na.rm = TRUE)
  )
## # A tibble: 9 × 3
##   AgeDecade mean_BPSysAve sd_BPSysAve
##   <fct>             <dbl>       <dbl>
## 1 " 0-9"            100.0        9.07
## 2 " 10-19"          104.         9.46
## 3 " 20-29"          108.        10.1 
## 4 " 30-39"          111.        12.3 
## 5 " 40-49"          115.        14.5 
## 6 " 50-59"          122.        16.2 
## 7 " 60-69"          127.        17.1 
## 8 " 70+"            134.        19.8 
## 9  <NA>             142.        22.9