library(dslabs)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
data(murders)
rate <- (murders$total / murders$population) * 100000
mutate(murders, rate)
## state abb region population total rate
## 1 Alabama AL South 4779736 135 2.8244238
## 2 Alaska AK West 710231 19 2.6751860
## 3 Arizona AZ West 6392017 232 3.6295273
## 4 Arkansas AR South 2915918 93 3.1893901
## 5 California CA West 37253956 1257 3.3741383
## 6 Colorado CO West 5029196 65 1.2924531
## 7 Connecticut CT Northeast 3574097 97 2.7139722
## 8 Delaware DE South 897934 38 4.2319369
## 9 District of Columbia DC South 601723 99 16.4527532
## 10 Florida FL South 19687653 669 3.3980688
## 11 Georgia GA South 9920000 376 3.7903226
## 12 Hawaii HI West 1360301 7 0.5145920
## 13 Idaho ID West 1567582 12 0.7655102
## 14 Illinois IL North Central 12830632 364 2.8369608
## 15 Indiana IN North Central 6483802 142 2.1900730
## 16 Iowa IA North Central 3046355 21 0.6893484
## 17 Kansas KS North Central 2853118 63 2.2081106
## 18 Kentucky KY South 4339367 116 2.6732010
## 19 Louisiana LA South 4533372 351 7.7425810
## 20 Maine ME Northeast 1328361 11 0.8280881
## 21 Maryland MD South 5773552 293 5.0748655
## 22 Massachusetts MA Northeast 6547629 118 1.8021791
## 23 Michigan MI North Central 9883640 413 4.1786225
## 24 Minnesota MN North Central 5303925 53 0.9992600
## 25 Mississippi MS South 2967297 120 4.0440846
## 26 Missouri MO North Central 5988927 321 5.3598917
## 27 Montana MT West 989415 12 1.2128379
## 28 Nebraska NE North Central 1826341 32 1.7521372
## 29 Nevada NV West 2700551 84 3.1104763
## 30 New Hampshire NH Northeast 1316470 5 0.3798036
## 31 New Jersey NJ Northeast 8791894 246 2.7980319
## 32 New Mexico NM West 2059179 67 3.2537239
## 33 New York NY Northeast 19378102 517 2.6679599
## 34 North Carolina NC South 9535483 286 2.9993237
## 35 North Dakota ND North Central 672591 4 0.5947151
## 36 Ohio OH North Central 11536504 310 2.6871225
## 37 Oklahoma OK South 3751351 111 2.9589340
## 38 Oregon OR West 3831074 36 0.9396843
## 39 Pennsylvania PA Northeast 12702379 457 3.5977513
## 40 Rhode Island RI Northeast 1052567 16 1.5200933
## 41 South Carolina SC South 4625364 207 4.4753235
## 42 South Dakota SD North Central 814180 8 0.9825837
## 43 Tennessee TN South 6346105 219 3.4509357
## 44 Texas TX South 25145561 805 3.2013603
## 45 Utah UT West 2763885 22 0.7959810
## 46 Vermont VT Northeast 625741 2 0.3196211
## 47 Virginia VA South 8001024 250 3.1246001
## 48 Washington WA West 6724540 93 1.3829942
## 49 West Virginia WV South 1852994 27 1.4571013
## 50 Wisconsin WI North Central 5686986 97 1.7056487
## 51 Wyoming WY West 563626 5 0.8871131
murders |>
arrange(region, rate) |>
head()
## state abb region population total
## 1 Vermont VT Northeast 625741 2
## 2 New Hampshire NH Northeast 1316470 5
## 3 Maine ME Northeast 1328361 11
## 4 Rhode Island RI Northeast 1052567 16
## 5 Massachusetts MA Northeast 6547629 118
## 6 New York NY Northeast 19378102 517
murders$rate
## NULL
#top n
murders |>
slice_max(rate, n = 5)
## state abb region population total
## 1 District of Columbia DC South 601723 99
## 2 Louisiana LA South 4533372 351
## 3 Missouri MO North Central 5988927 321
## 4 Maryland MD South 5773552 293
## 5 South Carolina SC South 4625364 207
murders |> group_by(region) |> class() # returns tibble (tbl) data.frame as data type
## [1] "grouped_df" "tbl_df" "tbl" "data.frame"
# Tibbles vs DataFrames
# -> Tibble is a type of dataframe for manipulations
# -> Functions like group_by and summarise always return these kind of grouped_df(tibbles)
# 1. Tibbles display better.
class(murders) # "data.frame"
## [1] "data.frame"
class(murders[,4]) # "numeric"
## [1] "numeric"
class(as_tibble(murders)[,4]) # "tbl_df" "tbl" "data.frame"
## [1] "tbl_df" "tbl" "data.frame"
# this is useful as in tidyverse many functions need dataframe as input.
# 2. Tibbles can have complex entries.
tibble(id = c(1, 2, 3), func = c(mean, mode, median))
## # A tibble: 3 × 2
## id func
## <dbl> <list>
## 1 1 <fn>
## 2 2 <fn>
## 3 3 <fn>
# 3. Tibbles can have multiple complex objects as inputs, ex - list or functions
grades = tibble(name = c("A", "B", "C", "D"),
exam_1 = c(11, 34, 21, 24),
exam_2 = c(45, 12, 46, 67))
grades
## # A tibble: 4 × 3
## name exam_1 exam_2
## <chr> <dbl> <dbl>
## 1 A 11 45
## 2 B 34 12
## 3 C 21 46
## 4 D 24 67
grades_df = data.frame(name = c("A", "B", "C", "D"),
exam_1 = c(11, 34, 21, 24),
exam_2 = c(45, 12, 46, 67))
grades_df
## name exam_1 exam_2
## 1 A 11 45
## 2 B 34 12
## 3 C 21 46
## 4 D 24 67
as_tibble(grades_df)
## # A tibble: 4 × 3
## name exam_1 exam_2
## <chr> <dbl> <dbl>
## 1 A 11 45
## 2 B 34 12
## 3 C 21 46
## 4 D 24 67
class(grades)
## [1] "tbl_df" "tbl" "data.frame"
class(grades_df)
## [1] "data.frame"
class(as_tibble(grades_df))
## [1] "tbl_df" "tbl" "data.frame"
#case_when
x = c(-1, -2, 0, 1, 2)
case_when(x < 0 ~ "Negative",
x > 0 ~ "Positive",
TRUE ~ "Zero")
## [1] "Negative" "Negative" "Zero" "Positive" "Positive"
# Suppose we want to compare the murder rate in 4 group of states
murders$rate = rate
murders$rate
## [1] 2.8244238 2.6751860 3.6295273 3.1893901 3.3741383 1.2924531
## [7] 2.7139722 4.2319369 16.4527532 3.3980688 3.7903226 0.5145920
## [13] 0.7655102 2.8369608 2.1900730 0.6893484 2.2081106 2.6732010
## [19] 7.7425810 0.8280881 5.0748655 1.8021791 4.1786225 0.9992600
## [25] 4.0440846 5.3598917 1.2128379 1.7521372 3.1104763 0.3798036
## [31] 2.7980319 3.2537239 2.6679599 2.9993237 0.5947151 2.6871225
## [37] 2.9589340 0.9396843 3.5977513 1.5200933 4.4753235 0.9825837
## [43] 3.4509357 3.2013603 0.7959810 0.3196211 3.1246001 1.3829942
## [49] 1.4571013 1.7056487 0.8871131
head(murders)
## state abb region population total rate
## 1 Alabama AL South 4779736 135 2.824424
## 2 Alaska AK West 710231 19 2.675186
## 3 Arizona AZ West 6392017 232 3.629527
## 4 Arkansas AR South 2915918 93 3.189390
## 5 California CA West 37253956 1257 3.374138
## 6 Colorado CO West 5029196 65 1.292453
murders |>
group_by(region) |>
summarise(
mean_rate = mean(rate),
median_rate = median(rate),
sd_rate = sd(rate)
)
## # A tibble: 4 × 4
## region mean_rate median_rate sd_rate
## <fct> <dbl> <dbl> <dbl>
## 1 Northeast 1.85 1.80 1.17
## 2 South 4.42 3.40 3.37
## 3 North Central 2.18 1.97 1.44
## 4 West 1.83 1.29 1.17
# between function in dplyr
a = c(1, 0, 3, -4, -9)
b = c(1.4, 3.5, 5.67, 4.98, 9.0)
x >= a & x <= b
## [1] FALSE FALSE FALSE TRUE TRUE
between(x, a, b)
## [1] FALSE FALSE FALSE TRUE TRUE
Import dplyr, dslabs, murders 1. Create a new column population in millions. 2. Rank the population 3. Top 5 states with murder_rate using rank 4. Create a new dataframe no florida that remove a state florida. 5. Create a new dataframe no SOUTH that removes state from south region. 6. Number of states in Northeast or West.
pop_mill = murders$population / 1000000
murders$pop_mill = pop_mill
murders$pop_mill
## [1] 4.779736 0.710231 6.392017 2.915918 37.253956 5.029196 3.574097
## [8] 0.897934 0.601723 19.687653 9.920000 1.360301 1.567582 12.830632
## [15] 6.483802 3.046355 2.853118 4.339367 4.533372 1.328361 5.773552
## [22] 6.547629 9.883640 5.303925 2.967297 5.988927 0.989415 1.826341
## [29] 2.700551 1.316470 8.791894 2.059179 19.378102 9.535483 0.672591
## [36] 11.536504 3.751351 3.831074 12.702379 1.052567 4.625364 0.814180
## [43] 6.346105 25.145561 2.763885 0.625741 8.001024 6.724540 1.852994
## [50] 5.686986 0.563626
head(murders)
## state abb region population total rate pop_mill
## 1 Alabama AL South 4779736 135 2.824424 4.779736
## 2 Alaska AK West 710231 19 2.675186 0.710231
## 3 Arizona AZ West 6392017 232 3.629527 6.392017
## 4 Arkansas AR South 2915918 93 3.189390 2.915918
## 5 California CA West 37253956 1257 3.374138 37.253956
## 6 Colorado CO West 5029196 65 1.292453 5.029196
murders$pop_rank <- rank(-murders$population)
head(murders)
## state abb region population total rate pop_mill pop_rank
## 1 Alabama AL South 4779736 135 2.824424 4.779736 23
## 2 Alaska AK West 710231 19 2.675186 0.710231 47
## 3 Arizona AZ West 6392017 232 3.629527 6.392017 16
## 4 Arkansas AR South 2915918 93 3.189390 2.915918 32
## 5 California CA West 37253956 1257 3.374138 37.253956 1
## 6 Colorado CO West 5029196 65 1.292453 5.029196 22
murders$murder_rank <- rank(murders$rate)
top5 <- murders[order(murders$murder_rank), ][1:5,
c("state", "rate", "murder_rank")]
top5
## state rate murder_rank
## 46 Vermont 0.3196211 1
## 30 New Hampshire 0.3798036 2
## 12 Hawaii 0.5145920 3
## 35 North Dakota 0.5947151 4
## 16 Iowa 0.6893484 5
nrow(murders)
## [1] 51
no_florida <- filter(murders, state != "Florida")
no_florida
## state abb region population total rate pop_mill
## 1 Alabama AL South 4779736 135 2.8244238 4.779736
## 2 Alaska AK West 710231 19 2.6751860 0.710231
## 3 Arizona AZ West 6392017 232 3.6295273 6.392017
## 4 Arkansas AR South 2915918 93 3.1893901 2.915918
## 5 California CA West 37253956 1257 3.3741383 37.253956
## 6 Colorado CO West 5029196 65 1.2924531 5.029196
## 7 Connecticut CT Northeast 3574097 97 2.7139722 3.574097
## 8 Delaware DE South 897934 38 4.2319369 0.897934
## 9 District of Columbia DC South 601723 99 16.4527532 0.601723
## 10 Georgia GA South 9920000 376 3.7903226 9.920000
## 11 Hawaii HI West 1360301 7 0.5145920 1.360301
## 12 Idaho ID West 1567582 12 0.7655102 1.567582
## 13 Illinois IL North Central 12830632 364 2.8369608 12.830632
## 14 Indiana IN North Central 6483802 142 2.1900730 6.483802
## 15 Iowa IA North Central 3046355 21 0.6893484 3.046355
## 16 Kansas KS North Central 2853118 63 2.2081106 2.853118
## 17 Kentucky KY South 4339367 116 2.6732010 4.339367
## 18 Louisiana LA South 4533372 351 7.7425810 4.533372
## 19 Maine ME Northeast 1328361 11 0.8280881 1.328361
## 20 Maryland MD South 5773552 293 5.0748655 5.773552
## 21 Massachusetts MA Northeast 6547629 118 1.8021791 6.547629
## 22 Michigan MI North Central 9883640 413 4.1786225 9.883640
## 23 Minnesota MN North Central 5303925 53 0.9992600 5.303925
## 24 Mississippi MS South 2967297 120 4.0440846 2.967297
## 25 Missouri MO North Central 5988927 321 5.3598917 5.988927
## 26 Montana MT West 989415 12 1.2128379 0.989415
## 27 Nebraska NE North Central 1826341 32 1.7521372 1.826341
## 28 Nevada NV West 2700551 84 3.1104763 2.700551
## 29 New Hampshire NH Northeast 1316470 5 0.3798036 1.316470
## 30 New Jersey NJ Northeast 8791894 246 2.7980319 8.791894
## 31 New Mexico NM West 2059179 67 3.2537239 2.059179
## 32 New York NY Northeast 19378102 517 2.6679599 19.378102
## 33 North Carolina NC South 9535483 286 2.9993237 9.535483
## 34 North Dakota ND North Central 672591 4 0.5947151 0.672591
## 35 Ohio OH North Central 11536504 310 2.6871225 11.536504
## 36 Oklahoma OK South 3751351 111 2.9589340 3.751351
## 37 Oregon OR West 3831074 36 0.9396843 3.831074
## 38 Pennsylvania PA Northeast 12702379 457 3.5977513 12.702379
## 39 Rhode Island RI Northeast 1052567 16 1.5200933 1.052567
## 40 South Carolina SC South 4625364 207 4.4753235 4.625364
## 41 South Dakota SD North Central 814180 8 0.9825837 0.814180
## 42 Tennessee TN South 6346105 219 3.4509357 6.346105
## 43 Texas TX South 25145561 805 3.2013603 25.145561
## 44 Utah UT West 2763885 22 0.7959810 2.763885
## 45 Vermont VT Northeast 625741 2 0.3196211 0.625741
## 46 Virginia VA South 8001024 250 3.1246001 8.001024
## 47 Washington WA West 6724540 93 1.3829942 6.724540
## 48 West Virginia WV South 1852994 27 1.4571013 1.852994
## 49 Wisconsin WI North Central 5686986 97 1.7056487 5.686986
## 50 Wyoming WY West 563626 5 0.8871131 0.563626
## pop_rank murder_rank
## 1 23 29
## 2 47 25
## 3 16 42
## 4 32 35
## 5 1 38
## 6 22 14
## 7 29 27
## 8 45 46
## 9 50 51
## 10 8 43
## 11 40 3
## 12 39 6
## 13 5 30
## 14 15 21
## 15 30 5
## 16 33 22
## 17 26 24
## 18 25 50
## 19 41 8
## 20 19 48
## 21 14 20
## 22 9 45
## 23 21 12
## 24 31 44
## 25 18 49
## 26 44 13
## 27 38 19
## 28 35 33
## 29 42 2
## 30 11 28
## 31 36 37
## 32 4 23
## 33 10 32
## 34 48 4
## 35 7 26
## 36 28 31
## 37 27 10
## 38 6 41
## 39 43 17
## 40 24 47
## 41 46 11
## 42 17 40
## 43 2 36
## 44 34 7
## 45 49 1
## 46 12 34
## 47 13 15
## 48 37 16
## 49 20 18
## 50 51 9
no_south <- murders[murders$region != "South", ]
no_south
## state abb region population total rate pop_mill
## 2 Alaska AK West 710231 19 2.6751860 0.710231
## 3 Arizona AZ West 6392017 232 3.6295273 6.392017
## 5 California CA West 37253956 1257 3.3741383 37.253956
## 6 Colorado CO West 5029196 65 1.2924531 5.029196
## 7 Connecticut CT Northeast 3574097 97 2.7139722 3.574097
## 12 Hawaii HI West 1360301 7 0.5145920 1.360301
## 13 Idaho ID West 1567582 12 0.7655102 1.567582
## 14 Illinois IL North Central 12830632 364 2.8369608 12.830632
## 15 Indiana IN North Central 6483802 142 2.1900730 6.483802
## 16 Iowa IA North Central 3046355 21 0.6893484 3.046355
## 17 Kansas KS North Central 2853118 63 2.2081106 2.853118
## 20 Maine ME Northeast 1328361 11 0.8280881 1.328361
## 22 Massachusetts MA Northeast 6547629 118 1.8021791 6.547629
## 23 Michigan MI North Central 9883640 413 4.1786225 9.883640
## 24 Minnesota MN North Central 5303925 53 0.9992600 5.303925
## 26 Missouri MO North Central 5988927 321 5.3598917 5.988927
## 27 Montana MT West 989415 12 1.2128379 0.989415
## 28 Nebraska NE North Central 1826341 32 1.7521372 1.826341
## 29 Nevada NV West 2700551 84 3.1104763 2.700551
## 30 New Hampshire NH Northeast 1316470 5 0.3798036 1.316470
## 31 New Jersey NJ Northeast 8791894 246 2.7980319 8.791894
## 32 New Mexico NM West 2059179 67 3.2537239 2.059179
## 33 New York NY Northeast 19378102 517 2.6679599 19.378102
## 35 North Dakota ND North Central 672591 4 0.5947151 0.672591
## 36 Ohio OH North Central 11536504 310 2.6871225 11.536504
## 38 Oregon OR West 3831074 36 0.9396843 3.831074
## 39 Pennsylvania PA Northeast 12702379 457 3.5977513 12.702379
## 40 Rhode Island RI Northeast 1052567 16 1.5200933 1.052567
## 42 South Dakota SD North Central 814180 8 0.9825837 0.814180
## 45 Utah UT West 2763885 22 0.7959810 2.763885
## 46 Vermont VT Northeast 625741 2 0.3196211 0.625741
## 48 Washington WA West 6724540 93 1.3829942 6.724540
## 50 Wisconsin WI North Central 5686986 97 1.7056487 5.686986
## 51 Wyoming WY West 563626 5 0.8871131 0.563626
## pop_rank murder_rank
## 2 47 25
## 3 16 42
## 5 1 38
## 6 22 14
## 7 29 27
## 12 40 3
## 13 39 6
## 14 5 30
## 15 15 21
## 16 30 5
## 17 33 22
## 20 41 8
## 22 14 20
## 23 9 45
## 24 21 12
## 26 18 49
## 27 44 13
## 28 38 19
## 29 35 33
## 30 42 2
## 31 11 28
## 32 36 37
## 33 4 23
## 35 48 4
## 36 7 26
## 38 27 10
## 39 6 41
## 40 43 17
## 42 46 11
## 45 34 7
## 46 49 1
## 48 13 15
## 50 20 18
## 51 51 9
filter(murders, region == "Northeast" | region == "West")
## state abb region population total rate pop_mill pop_rank
## 1 Alaska AK West 710231 19 2.6751860 0.710231 47
## 2 Arizona AZ West 6392017 232 3.6295273 6.392017 16
## 3 California CA West 37253956 1257 3.3741383 37.253956 1
## 4 Colorado CO West 5029196 65 1.2924531 5.029196 22
## 5 Connecticut CT Northeast 3574097 97 2.7139722 3.574097 29
## 6 Hawaii HI West 1360301 7 0.5145920 1.360301 40
## 7 Idaho ID West 1567582 12 0.7655102 1.567582 39
## 8 Maine ME Northeast 1328361 11 0.8280881 1.328361 41
## 9 Massachusetts MA Northeast 6547629 118 1.8021791 6.547629 14
## 10 Montana MT West 989415 12 1.2128379 0.989415 44
## 11 Nevada NV West 2700551 84 3.1104763 2.700551 35
## 12 New Hampshire NH Northeast 1316470 5 0.3798036 1.316470 42
## 13 New Jersey NJ Northeast 8791894 246 2.7980319 8.791894 11
## 14 New Mexico NM West 2059179 67 3.2537239 2.059179 36
## 15 New York NY Northeast 19378102 517 2.6679599 19.378102 4
## 16 Oregon OR West 3831074 36 0.9396843 3.831074 27
## 17 Pennsylvania PA Northeast 12702379 457 3.5977513 12.702379 6
## 18 Rhode Island RI Northeast 1052567 16 1.5200933 1.052567 43
## 19 Utah UT West 2763885 22 0.7959810 2.763885 34
## 20 Vermont VT Northeast 625741 2 0.3196211 0.625741 49
## 21 Washington WA West 6724540 93 1.3829942 6.724540 13
## 22 Wyoming WY West 563626 5 0.8871131 0.563626 51
## murder_rank
## 1 25
## 2 42
## 3 38
## 4 14
## 5 27
## 6 3
## 7 6
## 8 8
## 9 20
## 10 13
## 11 33
## 12 2
## 13 28
## 14 37
## 15 23
## 16 10
## 17 41
## 18 17
## 19 7
## 20 1
## 21 15
## 22 9
library(NHANES)
data("NHANES")
head(NHANES)
## # A tibble: 6 × 76
## ID SurveyYr Gender Age AgeDecade AgeMonths Race1 Race3 Education
## <int> <fct> <fct> <int> <fct> <int> <fct> <fct> <fct>
## 1 51624 2009_10 male 34 " 30-39" 409 White <NA> High School
## 2 51624 2009_10 male 34 " 30-39" 409 White <NA> High School
## 3 51624 2009_10 male 34 " 30-39" 409 White <NA> High School
## 4 51625 2009_10 male 4 " 0-9" 49 Other <NA> <NA>
## 5 51630 2009_10 female 49 " 40-49" 596 White <NA> Some College
## 6 51638 2009_10 male 9 " 0-9" 115 White <NA> <NA>
## # ℹ 67 more variables: MaritalStatus <fct>, HHIncome <fct>, HHIncomeMid <int>,
## # Poverty <dbl>, HomeRooms <int>, HomeOwn <fct>, Work <fct>, Weight <dbl>,
## # Length <dbl>, HeadCirc <dbl>, Height <dbl>, BMI <dbl>,
## # BMICatUnder20yrs <fct>, BMI_WHO <fct>, Pulse <int>, BPSysAve <int>,
## # BPDiaAve <int>, BPSys1 <int>, BPDia1 <int>, BPSys2 <int>, BPDia2 <int>,
## # BPSys3 <int>, BPDia3 <int>, Testosterone <dbl>, DirectChol <dbl>,
## # TotChol <dbl>, UrineVol1 <int>, UrineFlow1 <dbl>, UrineVol2 <int>, …
# To ignore any missing value we need to use (na.rm = TRUE)
# We need to filter NHANES the data for gender = "Female", age decade from 20 to 29
filter(NHANES, Gender == "female",
AgeDecade == " 20-29")
## # A tibble: 681 × 76
## ID SurveyYr Gender Age AgeDecade AgeMonths Race1 Race3 Education
## <int> <fct> <fct> <int> <fct> <int> <fct> <fct> <fct>
## 1 51710 2009_10 female 26 " 20-29" 319 White <NA> College Grad
## 2 51731 2009_10 female 28 " 20-29" 346 Black <NA> High School
## 3 51741 2009_10 female 21 " 20-29" 253 Black <NA> Some College
## 4 51741 2009_10 female 21 " 20-29" 253 Black <NA> Some College
## 5 51760 2009_10 female 27 " 20-29" 334 Hispanic <NA> 9 - 11th Grade
## 6 51764 2009_10 female 29 " 20-29" 357 White <NA> College Grad
## 7 51764 2009_10 female 29 " 20-29" 357 White <NA> College Grad
## 8 51764 2009_10 female 29 " 20-29" 357 White <NA> College Grad
## 9 51774 2009_10 female 26 " 20-29" 312 White <NA> 8th Grade
## 10 51774 2009_10 female 26 " 20-29" 312 White <NA> 8th Grade
## # ℹ 671 more rows
## # ℹ 67 more variables: MaritalStatus <fct>, HHIncome <fct>, HHIncomeMid <int>,
## # Poverty <dbl>, HomeRooms <int>, HomeOwn <fct>, Work <fct>, Weight <dbl>,
## # Length <dbl>, HeadCirc <dbl>, Height <dbl>, BMI <dbl>,
## # BMICatUnder20yrs <fct>, BMI_WHO <fct>, Pulse <int>, BPSysAve <int>,
## # BPDiaAve <int>, BPSys1 <int>, BPDia1 <int>, BPSys2 <int>, BPDia2 <int>,
## # BPSys3 <int>, BPDia3 <int>, Testosterone <dbl>, DirectChol <dbl>, …
female_20s <- NHANES |>
filter(Gender == "female",
AgeDecade == " 20-29")
female_20s
## # A tibble: 681 × 76
## ID SurveyYr Gender Age AgeDecade AgeMonths Race1 Race3 Education
## <int> <fct> <fct> <int> <fct> <int> <fct> <fct> <fct>
## 1 51710 2009_10 female 26 " 20-29" 319 White <NA> College Grad
## 2 51731 2009_10 female 28 " 20-29" 346 Black <NA> High School
## 3 51741 2009_10 female 21 " 20-29" 253 Black <NA> Some College
## 4 51741 2009_10 female 21 " 20-29" 253 Black <NA> Some College
## 5 51760 2009_10 female 27 " 20-29" 334 Hispanic <NA> 9 - 11th Grade
## 6 51764 2009_10 female 29 " 20-29" 357 White <NA> College Grad
## 7 51764 2009_10 female 29 " 20-29" 357 White <NA> College Grad
## 8 51764 2009_10 female 29 " 20-29" 357 White <NA> College Grad
## 9 51774 2009_10 female 26 " 20-29" 312 White <NA> 8th Grade
## 10 51774 2009_10 female 26 " 20-29" 312 White <NA> 8th Grade
## # ℹ 671 more rows
## # ℹ 67 more variables: MaritalStatus <fct>, HHIncome <fct>, HHIncomeMid <int>,
## # Poverty <dbl>, HomeRooms <int>, HomeOwn <fct>, Work <fct>, Weight <dbl>,
## # Length <dbl>, HeadCirc <dbl>, Height <dbl>, BMI <dbl>,
## # BMICatUnder20yrs <fct>, BMI_WHO <fct>, Pulse <int>, BPSysAve <int>,
## # BPDiaAve <int>, BPSys1 <int>, BPDia1 <int>, BPSys2 <int>, BPDia2 <int>,
## # BPSys3 <int>, BPDia3 <int>, Testosterone <dbl>, DirectChol <dbl>, …
female_20s |>
summarise(
avg = mean(BPSysAve, na.rm = TRUE),
sd = sd(BPSysAve, na.rm = TRUE)
)
## # A tibble: 1 × 2
## avg sd
## <dbl> <dbl>
## 1 108. 10.1
# Min and Max value of female_20s
female_20s |>
summarise(
min = min(BPSysAve, na.rm = TRUE),
max = max(BPSysAve, na.rm = TRUE)
)
## # A tibble: 1 × 2
## min max
## <int> <int>
## 1 84 179
# Group_by age decade and get mean and std
NHANES |>
filter(Gender == "female") |>
group_by(AgeDecade) |>
summarise(
mean_BPSysAve = mean(BPSysAve, na.rm = TRUE),
sd_BPSysAve = sd(BPSysAve, na.rm = TRUE)
)
## # A tibble: 9 × 3
## AgeDecade mean_BPSysAve sd_BPSysAve
## <fct> <dbl> <dbl>
## 1 " 0-9" 100.0 9.07
## 2 " 10-19" 104. 9.46
## 3 " 20-29" 108. 10.1
## 4 " 30-39" 111. 12.3
## 5 " 40-49" 115. 14.5
## 6 " 50-59" 122. 16.2
## 7 " 60-69" 127. 17.1
## 8 " 70+" 134. 19.8
## 9 <NA> 142. 22.9