library(haven)
Warning: package 'haven' was built under R version 4.4.2
<- read_dta("C:/Users/mkabi/Downloads/Compressed/bdhs2014.dta/bdhs2014.dta")
bdhs2014 View(bdhs2014)
library(haven)
Warning: package 'haven' was built under R version 4.4.2
<- read_dta("C:/Users/mkabi/Downloads/Compressed/bdhs2014.dta/bdhs2014.dta")
bdhs2014 View(bdhs2014)
library(dplyr)
Warning: package 'dplyr' was built under R version 4.4.2
Attaching package: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
library(tidyverse)
Warning: package 'tidyverse' was built under R version 4.4.2
Warning: package 'ggplot2' was built under R version 4.4.2
Warning: package 'tibble' was built under R version 4.4.2
Warning: package 'tidyr' was built under R version 4.4.2
Warning: package 'readr' was built under R version 4.4.2
Warning: package 'purrr' was built under R version 4.4.2
Warning: package 'forcats' was built under R version 4.4.2
Warning: package 'lubridate' was built under R version 4.4.2
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ forcats 1.0.0 ✔ readr 2.1.5
✔ ggplot2 3.5.1 ✔ stringr 1.5.1
✔ lubridate 1.9.4 ✔ tibble 3.2.1
✔ purrr 1.0.2 ✔ tidyr 1.3.1
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(labelled)
Warning: package 'labelled' was built under R version 4.4.2
print(sprintf("the bdhs2014 data have %s observations and variables",dim_desc(bdhs2014)))
[1] "the bdhs2014 data have [7,886 x 76] observations and variables"
|>
bdhs2014 rename(religion = v130)
# A tibble: 7,886 × 76
caseid hidx v000 v001 v002 v003 v004 v008 v011 v012 v013 v015
<chr> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl+l> <dbl+l>
1 " … 1 BD6 306 29 2 306 1377 1066 25 3 [25-… 1 [com…
2 " … 1 BD6 568 87 7 568 1377 1161 18 1 [15-… 1 [com…
3 " … 1 BD6 298 37 4 298 1376 1076 25 3 [25-… 1 [com…
4 " … 1 BD6 289 95 2 289 1378 1100 23 2 [20-… 1 [com…
5 " … 1 BD6 337 25 2 337 1379 968 34 4 [30-… 1 [com…
6 " … 1 BD6 500 95 4 500 1376 1126 20 2 [20-… 1 [com…
7 " … 1 BD6 25 121 2 25 1375 1122 21 2 [20-… 1 [com…
8 " … 1 BD6 327 18 2 327 1375 1006 30 4 [30-… 1 [com…
9 " … 1 BD6 473 90 2 473 1376 1110 22 2 [20-… 1 [com…
10 " … 1 BD6 473 69 2 473 1376 1068 25 3 [25-… 1 [com…
# ℹ 7,876 more rows
# ℹ 64 more variables: v020 <dbl+lbl>, v024 <dbl+lbl>, v025 <dbl+lbl>,
# v102 <dbl+lbl>, v106 <dbl+lbl>, v107 <dbl+lbl>, v113 <dbl+lbl>,
# v116 <dbl+lbl>, v119 <dbl+lbl>, v120 <dbl+lbl>, v121 <dbl+lbl>,
# v122 <dbl+lbl>, v123 <dbl+lbl>, v124 <dbl+lbl>, v125 <dbl+lbl>,
# v127 <dbl+lbl>, v128 <dbl+lbl>, v129 <dbl+lbl>, religion <dbl+lbl>,
# v133 <dbl+lbl>, v135 <dbl+lbl>, v136 <dbl>, v137 <dbl>, v138 <dbl>, …
<-
bdhs_age_20_plus |>
bdhs2014 filter(v012 >25)
bdhs_age_20_plus
# A tibble: 3,547 × 76
caseid hidx v000 v001 v002 v003 v004 v008 v011 v012 v013 v015
<chr> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl+l> <dbl+l>
1 " … 1 BD6 337 25 2 337 1379 968 34 4 [30-… 1 [com…
2 " … 1 BD6 327 18 2 327 1375 1006 30 4 [30-… 1 [com…
3 " … 1 BD6 80 35 2 80 1377 982 32 4 [30-… 1 [com…
4 " … 1 BD6 213 106 2 213 1375 940 36 5 [35-… 1 [com…
5 " … 1 BD6 52 50 2 52 1375 1010 30 4 [30-… 1 [com…
6 " … 1 BD6 455 115 2 455 1377 1030 28 3 [25-… 1 [com…
7 " … 1 BD6 549 77 2 549 1375 1054 26 3 [25-… 1 [com…
8 " … 1 BD6 85 85 2 85 1376 952 35 5 [35-… 1 [com…
9 " … 1 BD6 210 19 2 210 1378 881 41 6 [40-… 1 [com…
10 " … 1 BD6 512 40 7 512 1375 1051 27 3 [25-… 1 [com…
# ℹ 3,537 more rows
# ℹ 64 more variables: v020 <dbl+lbl>, v024 <dbl+lbl>, v025 <dbl+lbl>,
# v102 <dbl+lbl>, v106 <dbl+lbl>, v107 <dbl+lbl>, v113 <dbl+lbl>,
# v116 <dbl+lbl>, v119 <dbl+lbl>, v120 <dbl+lbl>, v121 <dbl+lbl>,
# v122 <dbl+lbl>, v123 <dbl+lbl>, v124 <dbl+lbl>, v125 <dbl+lbl>,
# v127 <dbl+lbl>, v128 <dbl+lbl>, v129 <dbl+lbl>, v130 <dbl+lbl>,
# v133 <dbl+lbl>, v135 <dbl+lbl>, v136 <dbl>, v137 <dbl>, v138 <dbl>, …
|>
bdhs2014 mutate(age_gap = v730 - v012)|>
select(caseid, v730, v012, age_gap)
# A tibble: 7,886 × 4
caseid v730 v012 age_gap
<chr> <dbl+lbl> <dbl> <dbl>
1 " 306 29 2" 32 25 7
2 " 568 87 7" 25 18 7
3 " 298 37 4" 35 25 10
4 " 289 95 2" 40 23 17
5 " 337 25 2" 48 34 14
6 " 500 95 4" 26 20 6
7 " 25121 2" 27 21 6
8 " 327 18 2" 34 30 4
9 " 473 90 2" 26 22 4
10 " 473 69 2" 35 25 10
# ℹ 7,876 more rows
|>
bdhs2014 mutate(
ecb = if_else(v212 < 18, 1,0)
|>
) select(caseid, v212, ecb)
# A tibble: 7,886 × 3
caseid v212 ecb
<chr> <dbl> <dbl>
1 " 306 29 2" 23 0
2 " 568 87 7" 16 1
3 " 298 37 4" 19 0
4 " 289 95 2" 19 0
5 " 337 25 2" 19 0
6 " 500 95 4" 19 0
7 " 25121 2" 15 1
8 " 327 18 2" 21 0
9 " 473 90 2" 16 1
10 " 473 69 2" 17 1
# ℹ 7,876 more rows
|>
bdhs2014 mutate(
age_category = case_when(
>= 15 & v012 < 17 ~ "[15-17)",
v012 >= 18 & v012 < 19 ~ "[18-19)",
v012 >= 20 & v012 < 29 ~ "[20-29)",
v012 >= 30 & v012 < 39 ~ "[30-39)",
v012 >= 40 & v012 < 50 ~ "[40-50)"
v012
)|>
) select(caseid, v012, age_category)
# A tibble: 7,886 × 3
caseid v012 age_category
<chr> <dbl> <chr>
1 " 306 29 2" 25 [20-29)
2 " 568 87 7" 18 [18-19)
3 " 298 37 4" 25 [20-29)
4 " 289 95 2" 23 [20-29)
5 " 337 25 2" 34 [30-39)
6 " 500 95 4" 20 [20-29)
7 " 25121 2" 21 [20-29)
8 " 327 18 2" 30 [30-39)
9 " 473 90 2" 22 [20-29)
10 " 473 69 2" 25 [20-29)
# ℹ 7,876 more rows
|>
bdhs2014 arrange(v002)
# A tibble: 7,886 × 76
caseid hidx v000 v001 v002 v003 v004 v008 v011 v012 v013 v015
<chr> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl+l> <dbl+l>
1 " … 2 BD6 370 1 2 370 1377 1117 21 2 [20-… 1 [com…
2 " … 1 BD6 36 1 3 36 1376 1115 21 2 [20-… 1 [com…
3 " … 1 BD6 59 1 2 59 1377 1073 25 3 [25-… 1 [com…
4 " … 1 BD6 9 1 3 9 1376 1135 20 2 [20-… 1 [com…
5 " … 1 BD6 160 1 2 160 1377 931 37 5 [35-… 1 [com…
6 " … 1 BD6 397 1 2 397 1378 999 31 4 [30-… 1 [com…
7 " … 1 BD6 370 1 2 370 1377 1117 21 2 [20-… 1 [com…
8 " … 1 BD6 510 1 6 510 1378 999 31 4 [30-… 1 [com…
9 " … 1 BD6 89 1 2 89 1375 956 34 4 [30-… 1 [com…
10 " … 1 BD6 87 1 11 87 1375 1142 19 1 [15-… 1 [com…
# ℹ 7,876 more rows
# ℹ 64 more variables: v020 <dbl+lbl>, v024 <dbl+lbl>, v025 <dbl+lbl>,
# v102 <dbl+lbl>, v106 <dbl+lbl>, v107 <dbl+lbl>, v113 <dbl+lbl>,
# v116 <dbl+lbl>, v119 <dbl+lbl>, v120 <dbl+lbl>, v121 <dbl+lbl>,
# v122 <dbl+lbl>, v123 <dbl+lbl>, v124 <dbl+lbl>, v125 <dbl+lbl>,
# v127 <dbl+lbl>, v128 <dbl+lbl>, v129 <dbl+lbl>, v130 <dbl+lbl>,
# v133 <dbl+lbl>, v135 <dbl+lbl>, v136 <dbl>, v137 <dbl>, v138 <dbl>, …
|>
bdhs2014 select(v012) |>
summarise(
mean_age = mean(v012),
median_age = median(v012),
mode_age = mode(v012),
min_age = min(v012),
max_age = max(v012),
sd_age = sd(v012),
iqr_age = IQR(v012)
)
# A tibble: 1 × 7
mean_age median_age mode_age min_age max_age sd_age iqr_age
<dbl> <dbl> <chr> <dbl> <dbl> <dbl> <dbl>
1 25.5 25 numeric 15 49 5.87 8
|>
bdhs2014 mutate(
ecb = if_else(v212 < 18, 1,0)
|>
)count(ecb)
# A tibble: 2 × 2
ecb n
<dbl> <int>
1 0 4310
2 1 3576
|>
bdhs2014 mutate(
age_category = case_when(
>= 15 & v012 < 17 ~ "[15-17)",
v012 >= 18 & v012 < 19 ~ "[18-19)",
v012 >= 20 & v012 < 29 ~ "[20-29)",
v012 >= 30 & v012 < 39 ~ "[30-39)",
v012 >= 40 & v012 < 50 ~ "[40-50)"
v012
)|>
) count(age_category)
# A tibble: 6 × 2
age_category n
<chr> <int>
1 [15-17) 163
2 [18-19) 329
3 [20-29) 4517
4 [30-39) 1650
5 [40-50) 152
6 <NA> 1075
|>
bdhs2014 rename(religion = v130)|>
count(religion)
# A tibble: 5 × 2
religion n
<dbl+lbl> <int>
1 1 [islam] 7249
2 2 [hinduism] 576
3 3 [buddhism] 47
4 4 [christianity] 13
5 96 [other] 1
|>
bdhs2014 rename(welath_index = v190) |>
count(welath_index)
# A tibble: 5 × 2
welath_index n
<dbl+lbl> <int>
1 1 [poorest] 1737
2 2 [poorer] 1503
3 3 [middle] 1516
4 4 [richer] 1602
5 5 [richest] 1528
|>
bdhs2014 count(v106, v025)
# A tibble: 8 × 3
v106 v025 n
<dbl+lbl> <dbl+lbl> <int>
1 0 [no education] 1 [urban] 309
2 0 [no education] 2 [rural] 924
3 1 [primary] 1 [urban] 592
4 1 [primary] 2 [rural] 1614
5 2 [secondary] 1 [urban] 1138
6 2 [secondary] 2 [rural] 2483
7 3 [higher] 1 [urban] 449
8 3 [higher] 2 [rural] 377
library(dplyr)
data(starwars)
glimpse(starwars)
Rows: 87
Columns: 14
$ name <chr> "Luke Skywalker", "C-3PO", "R2-D2", "Darth Vader", "Leia Or…
$ height <int> 172, 167, 96, 202, 150, 178, 165, 97, 183, 182, 188, 180, 2…
$ mass <dbl> 77.0, 75.0, 32.0, 136.0, 49.0, 120.0, 75.0, 32.0, 84.0, 77.…
$ hair_color <chr> "blond", NA, NA, "none", "brown", "brown, grey", "brown", N…
$ skin_color <chr> "fair", "gold", "white, blue", "white", "light", "light", "…
$ eye_color <chr> "blue", "yellow", "red", "yellow", "brown", "blue", "blue",…
$ birth_year <dbl> 19.0, 112.0, 33.0, 41.9, 19.0, 52.0, 47.0, NA, 24.0, 57.0, …
$ sex <chr> "male", "none", "none", "male", "female", "male", "female",…
$ gender <chr> "masculine", "masculine", "masculine", "masculine", "femini…
$ homeworld <chr> "Tatooine", "Tatooine", "Naboo", "Tatooine", "Alderaan", "T…
$ species <chr> "Human", "Droid", "Droid", "Human", "Human", "Human", "Huma…
$ films <list> <"A New Hope", "The Empire Strikes Back", "Return of the J…
$ vehicles <list> <"Snowspeeder", "Imperial Speeder Bike">, <>, <>, <>, "Imp…
$ starships <list> <"X-wing", "Imperial shuttle">, <>, <>, "TIE Advanced x1",…
####1
|>
starwars count(name) |>
summarise(total_humans = sum(n))
# A tibble: 1 × 1
total_humans
<int>
1 87
|>
starwars count(gender)
# A tibble: 3 × 2
gender n
<chr> <int>
1 feminine 17
2 masculine 66
3 <NA> 4
|>
starwars summarise("Most individuals" = max(homeworld, na.rm=T))
# A tibble: 1 × 1
`Most individuals`
<chr>
1 Zolan
|>
starwars filter(eye_color == "orange") |>
summarise("Mean height of all individuals with orange eyes" = mean(height))
# A tibble: 1 × 1
`Mean height of all individuals with orange eyes`
<dbl>
1 180.
|>
starwars filter(species == "Droid") |>
summarise(
"Median height for all droids" = median(height, na.rm=T),
"Mean height for all droids" = mean(height, na.rm=T),
"standard deviation of height for all droids" = sd(height, na.rm=T)
)
# A tibble: 1 × 3
`Median height for all droids` Mean height for all dr…¹ standard deviation o…²
<int> <dbl> <dbl>
1 97 131. 49.1
# ℹ abbreviated names: ¹`Mean height for all droids`,
# ²`standard deviation of height for all droids`