library(ipumsr)
usa_00013 <- read_ipums_ddi("usa_00013.xml")
tx_00013 <- read_ipums_micro(usa_00013, data_file = ("usa_00013.dat.gz"), verbose = FALSE)
library(stringr)
names(tx_00013)<-tolower(names(tx_00013))
names(tx_00013)
## [1] "year" "multyear" "sample" "serial" "cbserial" "hhwt"
## [7] "cluster" "statefip" "puma" "strata" "gq" "ownershp"
## [13] "ownershpd" "mortgage" "multgen" "multgend" "pernum" "perwt"
## [19] "sex" "age" "fertyr" "race" "raced" "hispan"
## [25] "hispand" "hcovany" "educ" "educd" "empstat" "empstatd"
## [31] "labforce" "occ" "ind" "uhrswork" "inctot" "poverty"
## [37] "presgl" "migrate1" "migrate1d"
#CS Code
tx_00013<-zap_labels(tx_00013)
tx_00013$newpuma<- paste (str_pad(tx_00013$statefip, 2,"left", "0"), str_pad(tx_00013$puma,5,"left", "0") , sep="")
table(tx_00013$newpuma)
##
## 4800100 4800200 4800300 4800400 4800501 4800502 4800600 4800700 4800800 4800900
## 8976 4362 4674 6757 3800 5870 7094 4974 8381 6331
## 4801000 4801100 4801200 4801300 4801400 4801501 4801502 4801600 4801700 4801800
## 6359 4275 3594 4239 4289 2823 3451 3754 4852 5583
## 4801901 4801902 4801903 4801904 4801905 4801906 4801907 4802001 4802002 4802003
## 4402 4245 3728 3195 4205 6061 4681 4436 4103 6084
## 4802004 4802005 4802006 4802101 4802102 4802200 4802301 4802302 4802303 4802304
## 4873 4049 5078 5882 5309 4226 2988 3015 3388 2921
## 4802305 4802306 4802307 4802308 4802309 4802310 4802311 4802312 4802313 4802314
## 3255 3718 3948 3274 3816 3944 4200 4436 5082 3627
## 4802315 4802316 4802317 4802318 4802319 4802320 4802321 4802322 4802400 4802501
## 3899 2974 2488 3253 3424 3566 3905 4536 4496 3737
## 4802502 4802503 4802504 4802505 4802506 4802507 4802508 4802509 4802510 4802511
## 4185 3696 2676 3742 3053 3873 3622 4406 4376 2785
## 4802512 4802513 4802514 4802515 4802516 4802600 4802700 4802800 4802900 4803000
## 3138 4605 4330 3849 3494 9343 5077 4847 3644 3894
## 4803100 4803200 4803301 4803302 4803303 4803304 4803305 4803306 4803400 4803501
## 4029 4379 4575 4536 3704 2996 3661 4159 6975 4733
## 4803502 4803601 4803602 4803700 4803801 4803802 4803900 4804000 4804100 4804200
## 5277 5462 7410 8434 3518 5522 6597 5112 3565 4323
## 4804301 4804302 4804400 4804501 4804502 4804503 4804504 4804601 4804602 4804603
## 3282 4948 3389 3376 3264 2591 2489 3635 3768 4783
## 4804604 4804605 4804606 4804607 4804608 4804609 4804610 4804611 4804612 4804613
## 4719 3325 3548 3001 3208 3701 2969 2641 3627 3319
## 4804614 4804615 4804616 4804617 4804618 4804619 4804620 4804621 4804622 4804623
## 3146 2923 3399 2564 2998 2644 2778 3648 2757 2811
## 4804624 4804625 4804626 4804627 4804628 4804629 4804630 4804631 4804632 4804633
## 3105 2733 3005 2988 3085 2941 2632 3105 3428 2002
## 4804634 4804635 4804636 4804637 4804638 4804701 4804702 4804801 4804802 4804803
## 2120 2990 2901 3075 2706 5956 3790 2101 3777 3715
## 4804901 4804902 4804903 4804904 4804905 4805000 4805100 4805201 4805202 4805203
## 3025 3696 2939 2716 4373 5532 5005 4511 3605 3445
## 4805204 4805301 4805302 4805303 4805304 4805305 4805306 4805307 4805308 4805309
## 4498 3153 3689 4928 4124 3561 6162 4390 4123 4819
## 4805400 4805500 4805600 4805700 4805800 4805901 4805902 4805903 4805904 4805905
## 5755 6315 3466 5226 3730 3654 3649 3516 3355 3693
## 4805906 4805907 4805908 4805909 4805910 4805911 4805912 4805913 4805914 4805915
## 3386 4084 3118 3324 3944 3566 3237 2997 4406 3570
## 4805916 4806000 4806100 4806200 4806301 4806302 4806400 4806500 4806601 4806602
## 3317 3527 3634 4059 4778 3826 3097 4814 3861 3349
## 4806603 4806701 4806702 4806703 4806801 4806802 4806803 4806804 4806805 4806806
## 3786 3088 4194 4537 2613 2169 2594 2336 3324 2414
## 4806807 4806900
## 2775 2869
bordp<-readr::read_csv("C:/Users/codar/OneDrive/Documents/Stats II/Data/border_100mi_pumas_table.csv")
## Parsed with column specification:
## cols(
## fid = col_double(),
## STATEFP10 = col_double(),
## PUMACE10 = col_character(),
## AFFGEOID10 = col_character(),
## GEOID10 = col_double(),
## NAME10 = col_character(),
## LSAD10 = col_character(),
## ALAND10 = col_double(),
## AWATER10 = col_double()
## )
mdat<-merge(tx_00013, bordp, by.x="newpuma", by.y="GEOID10")
table(mdat$newpuma)
##
## 4802800 4803200 4803301 4803302 4803303 4803304 4803305 4803306 4806000 4806100
## 4847 4379 4575 4536 3704 2996 3661 4159 3527 3634
## 4806200 4806301 4806302 4806400 4806701 4806702 4806703 4806801 4806802 4806803
## 4059 4778 3826 3097 3088 4194 4537 2613 2169 2594
## 4806804 4806805 4806806 4806807 4806900
## 2336 3324 2414 2775 2869
library(dplyr)
tx_00013<-tx_00013%>%
filter(newpuma %in% c( "4802800", "4803200","4806000", "4806100", "4806200", "4806301", "4806302", "4806701", "4806702", "4806703", "4806900" ))
View(tx_00013)
names(tx_00013)
## [1] "year" "multyear" "sample" "serial" "cbserial" "hhwt"
## [7] "cluster" "statefip" "puma" "strata" "gq" "ownershp"
## [13] "ownershpd" "mortgage" "multgen" "multgend" "pernum" "perwt"
## [19] "sex" "age" "fertyr" "race" "raced" "hispan"
## [25] "hispand" "hcovany" "educ" "educd" "empstat" "empstatd"
## [31] "labforce" "occ" "ind" "uhrswork" "inctot" "poverty"
## [37] "presgl" "migrate1" "migrate1d" "newpuma"
# describe(tx_00012$income)
# summary(tx_00012$educ)
# class(tx_00012$educ)
#Recodes
tx_5 <-tx_00013 %>%
mutate(sex=case_when(sex == 1~0,
sex == 2~ 1,
TRUE ~ NA_real_),
sexb=case_when(sex==1 ~ 'male',
sex ==2 ~ 'female',
TRUE~ NA_character_),
lfpart=case_when(labforce== 1 ~ 0,
labforce== 2 ~ 1,
TRUE ~ NA_real_),
edu=case_when(educ== 0 ~ 'none',
educ %in% 1:5 ~ 'hs incomplete',
educ %in% 6 ~ 'hs complete',
educ %in% 7:11 ~ 'college',
TRUE ~ NA_character_),
edu3=case_when(educ %in% 1:5 ~ 1,
educ %in% 6 ~ 2,
educ %in% 7:11 ~ 3,
TRUE~NA_real_),
race=case_when(race== 1 ~ 'white',
race== 2 ~ 'black',
# race== 3 ~'aian',
race %in% 4:5 ~ 'asian',
race== 6 ~ 'oapi',
race== 7 ~ 'other',
race== 8 ~ 'twomajor',
race== 9 ~ 'threemoremaj',
TRUE ~ NA_character_),
hisp= case_when(hispan !=0 ~ "Latino",
hispan==0 ~'NL',
hispan==9 ~ 'NL',
TRUE ~ NA_character_),
migrate1=case_when(migrate1==1 ~ 'same house',
migrate1==2 ~ 'movinstate',
migrate1==3 ~ 'abroad1yr',
TRUE ~ NA_character_),
fertyr=case_when(fertyr== 1 ~ 0,
fertyr== 2 ~ 1,
TRUE~ NA_real_ ),
poverty1=case_when(poverty==001 ~ "1% or less",
poverty ==501 ~ "501% or more",
TRUE~ NA_character_),
hcov=case_when(hcovany == 1 ~ 0,
hcovany == 2 ~ 1,
TRUE~NA_real_),
ownhome=case_when(ownershp==1 ~ 1,
ownershp==2 ~ 0,
TRUE ~ NA_real_),
multgen1=case_when(multgen==1 ~ 1,
multgen==2 ~ 2,
multgen==3 ~ 3,
TRUE~NA_real_))
# mgmt = if_else(occ %in% c(10:160) | occ %in% c(220:730), 1, 0)) #occupational prestige
# occ=case_when(occ %in% 10:160 ~ 'Mgmt/Biz',
# occ %in% 220:730 ~ 'Mgmt/Biz',
# occ %in% 800:950 ~ 'Finance',
# # occ %in% 1000:1240 ~ 'STEM',
# occ %in% 1300:1540 ~ 'Arch/Eng',
# occ %in% 1550:1560 ~ 'Technical',
# # occ %in% 1600:1760 ~ 'STEM',
# occ %in% 1800:1840 ~ 'SocSTEM',
# occ %in% 1900:1980 ~ 'Technical',
# occ %in% 2000:2060 ~ 'PublicServ',
# occ == 2100 ~ 'Law',
# occ %in% 2140:2150 ~ 'Technical',
# occ %in% 2200:2430 ~ 'Education',
# occ %in% 2440:2550 ~ 'Technical',
# occ %in% 2600:2910 ~ 'A&E/Sports/Media',
# occ == 2920 ~ 'Technical',
# occ %in% 3000:3500 ~ 'Health/Med',
# occ %in% 3510:3650 ~ 'Technical',
# occ %in% 3700:3950 ~ 'PublicServ',
# occ == 4000 ~ 'A&E/Sports/Media',
# occ %in% 4010:4965 ~ 'Sales/Service',
# occ %in% 5000: 5940 ~ 'Office/Admin',
# occ %in% 6200:8965 ~ 'SkilledTrade',
# occ %in% 9000:9750 ~ 'Transport',
# TRUE~ NA_character_))
View(tx_5)
#Summary statistics and correlation matrix
tx_5a<-tx_5%>%
filter(complete.cases(perwt, strata, newpuma,edu3, edu, lfpart,sex, race, presgl, empstat, inctot, age)) %>%
select(perwt, strata, newpuma,edu3, edu, lfpart,sex, race, presgl, empstat, fertyr, inctot, age)
summary(tx_5a)
## perwt strata newpuma edu3
## Min. : 1.00 Min. :280048 Length:41392 Min. :1.000
## 1st Qu.: 10.00 1st Qu.:600048 Class :character 1st Qu.:2.000
## Median : 16.00 Median :630148 Mode :character Median :2.000
## Mean : 20.98 Mean :569347 Mean :2.144
## 3rd Qu.: 27.00 3rd Qu.:670248 3rd Qu.:3.000
## Max. :331.00 Max. :690048 Max. :3.000
##
## edu lfpart sex race
## Length:41392 Min. :0.0000 Min. :0.0000 Length:41392
## Class :character 1st Qu.:0.0000 1st Qu.:0.0000 Class :character
## Mode :character Median :1.0000 Median :0.0000 Mode :character
## Mean :0.6412 Mean :0.4969
## 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000
##
## presgl empstat fertyr inctot
## Min. : 0.00 Min. :1.000 Min. :0.000 Min. : -6600
## 1st Qu.: 0.00 1st Qu.:1.000 1st Qu.:0.000 1st Qu.: 1058
## Median :32.10 Median :1.000 Median :0.000 Median : 15878
## Mean :28.75 Mean :1.755 Mean :0.065 Mean : 28905
## 3rd Qu.:46.90 3rd Qu.:3.000 3rd Qu.:0.000 3rd Qu.: 39800
## Max. :81.50 Max. :3.000 Max. :1.000 Max. :816000
## NA's :27330
## age
## Min. :16.00
## 1st Qu.:27.00
## Median :40.00
## Mean :39.99
## 3rd Qu.:53.00
## Max. :64.00
##
View(tx_5a)
# my_summary1 <-
# list("Labor Force Participation" =
# list("min" = ~ min(lfpart),
# "max" = ~ max(lfpart),
# "mean (sd)" = ~ qwraps2::mean_sd(lfpart)),
# "Fertility" =
# list("min" = ~ min(fertyr),
# "max" = ~ max(fertyr),
# "mean (sd)" = ~ qwraps2::mean_sd(fertyr)),
# "Educational Attainment" =
# list("min" = ~ min(edu3),
# "max" = ~ max(edu3),
# "mean (sd)" = ~ qwraps2::mean_sd(edu3))
# )
#
#
# library(qwraps2)
# bysex <- summary_table(dplyr::group_by(tx_hw71, sex), my_summary1)
# print(bysex)
CreateTableOne(data=tx_5a)
##
## Overall
## n 41392
## perwt (mean (SD)) 20.98 (17.79)
## strata (mean (SD)) 569347.12 (143178.43)
## newpuma (%)
## 4802800 4635 (11.2)
## 4803200 4169 (10.1)
## 4806000 3368 ( 8.1)
## 4806100 3498 ( 8.5)
## 4806200 3809 ( 9.2)
## 4806301 4504 (10.9)
## 4806302 3590 ( 8.7)
## 4806701 2889 ( 7.0)
## 4806702 3933 ( 9.5)
## 4806703 4251 (10.3)
## 4806900 2746 ( 6.6)
## edu3 (mean (SD)) 2.14 (0.77)
## edu (%)
## college 15684 (37.9)
## hs complete 16005 (38.7)
## hs incomplete 9703 (23.4)
## lfpart (mean (SD)) 0.64 (0.48)
## sex (mean (SD)) 0.50 (0.50)
## race (%)
## asian 55 ( 0.1)
## black 718 ( 1.7)
## oapi 275 ( 0.7)
## other 2397 ( 5.8)
## threemoremaj 30 ( 0.1)
## twomajor 542 ( 1.3)
## white 37375 (90.3)
## presgl (mean (SD)) 28.76 (20.62)
## empstat (mean (SD)) 1.76 (0.95)
## fertyr (mean (SD)) 0.06 (0.25)
## inctot (mean (SD)) 28904.83 (45658.90)
## age (mean (SD)) 39.99 (14.59)
# library(tableone)
# t2<-CreateTableOne(vars = c(edu3, fertyr,empstat, strata = "sex", test = T, data = tx_hw71)
# The research question to be examined is: How does labor force participation for women along border PUMAS vary based on race, education, occupation, and fertility?
library(tableone)
# # Create a variable list which we want in Table 1
# listVars <- c("empstat", "fertyr", "edu", "edu3", "sex", "presgl", "inctot", "age")
#
# # Define categorical variables
# catVars <- c("sex","edu","empstat", "fertyr")
#
# table1 <- CreateTableOne(vars = listVars, data = tx_5a, factorVars = catVars, strata = c("sex"))
#
#
# a <- print(table1, quote = TRUE, noSpaces = TRUE)
#
# as.data.frame(a)
# my_skim <- skim_with(
# numeric = sfl(iqr = IQR, mad = mad, p99 = ~ quantile(., probs = .99)),
# append = FALSE
# )
# my_skim(iris, Sepal.Length)
library(skimr)
## Warning: package 'skimr' was built under R version 4.0.4
skim(tx_5a)
Data summary
Name |
tx_5a |
Number of rows |
41392 |
Number of columns |
13 |
_______________________ |
|
Column type frequency: |
|
character |
3 |
numeric |
10 |
________________________ |
|
Group variables |
None |
Variable type: character
newpuma |
0 |
1 |
7 |
7 |
0 |
11 |
0 |
edu |
0 |
1 |
7 |
13 |
0 |
3 |
0 |
race |
0 |
1 |
4 |
12 |
0 |
7 |
0 |
Variable type: numeric
perwt |
0 |
1.00 |
20.98 |
17.79 |
1 |
10 |
16.0 |
27.0 |
331.0 |
▇▁▁▁▁ |
strata |
0 |
1.00 |
569347.12 |
143178.43 |
280048 |
600048 |
630148.0 |
670248.0 |
690048.0 |
▂▁▁▁▇ |
edu3 |
0 |
1.00 |
2.14 |
0.77 |
1 |
2 |
2.0 |
3.0 |
3.0 |
▅▁▇▁▇ |
lfpart |
0 |
1.00 |
0.64 |
0.48 |
0 |
0 |
1.0 |
1.0 |
1.0 |
▅▁▁▁▇ |
sex |
0 |
1.00 |
0.50 |
0.50 |
0 |
0 |
0.0 |
1.0 |
1.0 |
▇▁▁▁▇ |
presgl |
0 |
1.00 |
28.76 |
20.62 |
0 |
0 |
32.1 |
46.9 |
81.5 |
▇▆▇▆▁ |
empstat |
0 |
1.00 |
1.76 |
0.95 |
1 |
1 |
1.0 |
3.0 |
3.0 |
▇▁▁▁▅ |
fertyr |
27330 |
0.34 |
0.06 |
0.25 |
0 |
0 |
0.0 |
0.0 |
1.0 |
▇▁▁▁▁ |
inctot |
0 |
1.00 |
28904.83 |
45658.90 |
-6600 |
1058 |
15878.0 |
39800.0 |
816000.0 |
▇▁▁▁▁ |
age |
0 |
1.00 |
39.99 |
14.59 |
16 |
27 |
40.0 |
53.0 |
64.0 |
▇▆▆▇▇ |
tx_5a %>%
dplyr::group_by(sex) %>%
skim()
Data summary
Name |
Piped data |
Number of rows |
41392 |
Number of columns |
13 |
_______________________ |
|
Column type frequency: |
|
character |
3 |
numeric |
9 |
________________________ |
|
Group variables |
sex |
Variable type: character
newpuma |
0 |
0 |
1 |
7 |
7 |
0 |
11 |
0 |
newpuma |
1 |
0 |
1 |
7 |
7 |
0 |
11 |
0 |
edu |
0 |
0 |
1 |
7 |
13 |
0 |
3 |
0 |
edu |
1 |
0 |
1 |
7 |
13 |
0 |
3 |
0 |
race |
0 |
0 |
1 |
4 |
12 |
0 |
7 |
0 |
race |
1 |
0 |
1 |
4 |
12 |
0 |
7 |
0 |
Variable type: numeric
perwt |
0 |
0 |
1.00 |
21.12 |
18.36 |
1 |
10.0 |
16.0 |
27.0 |
331.0 |
▇▁▁▁▁ |
perwt |
1 |
0 |
1.00 |
20.84 |
17.18 |
1 |
10.0 |
16.0 |
27.0 |
237.0 |
▇▁▁▁▁ |
strata |
0 |
0 |
1.00 |
561866.90 |
147650.07 |
280048 |
600048.0 |
630148.0 |
670248.0 |
690048.0 |
▃▁▁▁▇ |
strata |
1 |
0 |
1.00 |
576921.91 |
138094.51 |
280048 |
600048.0 |
630148.0 |
670248.0 |
690048.0 |
▂▁▁▁▇ |
edu3 |
0 |
0 |
1.00 |
2.09 |
0.77 |
1 |
1.0 |
2.0 |
3.0 |
3.0 |
▅▁▇▁▇ |
edu3 |
1 |
0 |
1.00 |
2.20 |
0.77 |
1 |
2.0 |
2.0 |
3.0 |
3.0 |
▅▁▇▁▇ |
lfpart |
0 |
0 |
1.00 |
0.68 |
0.47 |
0 |
0.0 |
1.0 |
1.0 |
1.0 |
▃▁▁▁▇ |
lfpart |
1 |
0 |
1.00 |
0.61 |
0.49 |
0 |
0.0 |
1.0 |
1.0 |
1.0 |
▅▁▁▁▇ |
presgl |
0 |
0 |
1.00 |
28.78 |
19.32 |
0 |
17.5 |
32.1 |
45.3 |
81.5 |
▆▇▆▅▁ |
presgl |
1 |
0 |
1.00 |
28.73 |
21.86 |
0 |
0.0 |
32.8 |
47.8 |
81.5 |
▇▃▇▅▁ |
empstat |
0 |
0 |
1.00 |
1.69 |
0.93 |
1 |
1.0 |
1.0 |
3.0 |
3.0 |
▇▁▁▁▅ |
empstat |
1 |
0 |
1.00 |
1.82 |
0.97 |
1 |
1.0 |
1.0 |
3.0 |
3.0 |
▇▁▁▁▆ |
fertyr |
0 |
20826 |
0.00 |
NaN |
NA |
NA |
NA |
NA |
NA |
NA |
|
fertyr |
1 |
6504 |
0.68 |
0.06 |
0.25 |
0 |
0.0 |
0.0 |
0.0 |
1.0 |
▇▁▁▁▁ |
inctot |
0 |
0 |
1.00 |
36630.61 |
55275.11 |
-6242 |
3176.0 |
21170.0 |
50000.0 |
816000.0 |
▇▁▁▁▁ |
inctot |
1 |
0 |
1.00 |
21081.38 |
31309.88 |
-6600 |
0.0 |
12296.0 |
30000.0 |
587116.0 |
▇▁▁▁▁ |
age |
0 |
0 |
1.00 |
39.30 |
14.59 |
16 |
26.0 |
39.0 |
52.0 |
64.0 |
▇▆▆▆▇ |
age |
1 |
0 |
1.00 |
40.69 |
14.56 |
16 |
28.0 |
41.0 |
54.0 |
64.0 |
▇▆▆▇▇ |
#skim by certain columns
skim(tx_5a, sex, empstat, fertyr, edu, presgl, inctot, age)
Data summary
Name |
tx_5a |
Number of rows |
41392 |
Number of columns |
13 |
_______________________ |
|
Column type frequency: |
|
character |
1 |
numeric |
6 |
________________________ |
|
Group variables |
None |
Variable type: character
Variable type: numeric
sex |
0 |
1.00 |
0.50 |
0.50 |
0 |
0 |
0.0 |
1.0 |
1.0 |
▇▁▁▁▇ |
empstat |
0 |
1.00 |
1.76 |
0.95 |
1 |
1 |
1.0 |
3.0 |
3.0 |
▇▁▁▁▅ |
fertyr |
27330 |
0.34 |
0.06 |
0.25 |
0 |
0 |
0.0 |
0.0 |
1.0 |
▇▁▁▁▁ |
presgl |
0 |
1.00 |
28.76 |
20.62 |
0 |
0 |
32.1 |
46.9 |
81.5 |
▇▆▇▆▁ |
inctot |
0 |
1.00 |
28904.83 |
45658.90 |
-6600 |
1058 |
15878.0 |
39800.0 |
816000.0 |
▇▁▁▁▁ |
age |
0 |
1.00 |
39.99 |
14.59 |
16 |
27 |
40.0 |
53.0 |
64.0 |
▇▆▆▇▇ |
#skim by currently employed females only and
tx_5a %>%
dplyr::group_by(empstat) %>%
filter(sex == "1") %>%
skim()
Data summary
Name |
Piped data |
Number of rows |
20566 |
Number of columns |
13 |
_______________________ |
|
Column type frequency: |
|
character |
3 |
numeric |
9 |
________________________ |
|
Group variables |
empstat |
Variable type: character
newpuma |
1 |
0 |
1 |
7 |
7 |
0 |
11 |
0 |
newpuma |
2 |
0 |
1 |
7 |
7 |
0 |
11 |
0 |
newpuma |
3 |
0 |
1 |
7 |
7 |
0 |
11 |
0 |
edu |
1 |
0 |
1 |
7 |
13 |
0 |
3 |
0 |
edu |
2 |
0 |
1 |
7 |
13 |
0 |
3 |
0 |
edu |
3 |
0 |
1 |
7 |
13 |
0 |
3 |
0 |
race |
1 |
0 |
1 |
4 |
12 |
0 |
7 |
0 |
race |
2 |
0 |
1 |
4 |
12 |
0 |
6 |
0 |
race |
3 |
0 |
1 |
4 |
12 |
0 |
7 |
0 |
Variable type: numeric
perwt |
1 |
0 |
1.00 |
20.78 |
17.42 |
1 |
10.00 |
16.0 |
26.00 |
237.0 |
▇▁▁▁▁ |
perwt |
2 |
0 |
1.00 |
22.17 |
16.95 |
1 |
10.00 |
16.0 |
29.00 |
86.0 |
▇▃▂▁▁ |
perwt |
3 |
0 |
1.00 |
20.80 |
16.85 |
1 |
10.00 |
16.0 |
27.00 |
170.0 |
▇▁▁▁▁ |
strata |
1 |
0 |
1.00 |
573787.09 |
139793.96 |
280048 |
600048.00 |
630148.0 |
670248.00 |
690048.0 |
▂▁▁▁▇ |
strata |
2 |
0 |
1.00 |
596106.65 |
124566.60 |
280048 |
610048.00 |
630148.0 |
670248.00 |
690048.0 |
▂▁▁▁▇ |
strata |
3 |
0 |
1.00 |
579733.09 |
136584.15 |
280048 |
600048.00 |
630148.0 |
670248.00 |
690048.0 |
▂▁▁▁▇ |
edu3 |
1 |
0 |
1.00 |
2.38 |
0.70 |
1 |
2.00 |
3.0 |
3.00 |
3.0 |
▂▁▆▁▇ |
edu3 |
2 |
0 |
1.00 |
2.13 |
0.72 |
1 |
2.00 |
2.0 |
3.00 |
3.0 |
▃▁▇▁▆ |
edu3 |
3 |
0 |
1.00 |
1.94 |
0.79 |
1 |
1.00 |
2.0 |
3.00 |
3.0 |
▇▁▇▁▆ |
lfpart |
1 |
0 |
1.00 |
1.00 |
0.00 |
1 |
1.00 |
1.0 |
1.00 |
1.0 |
▁▁▇▁▁ |
lfpart |
2 |
0 |
1.00 |
1.00 |
0.00 |
1 |
1.00 |
1.0 |
1.00 |
1.0 |
▁▁▇▁▁ |
lfpart |
3 |
0 |
1.00 |
0.00 |
0.00 |
0 |
0.00 |
0.0 |
0.00 |
0.0 |
▁▁▇▁▁ |
sex |
1 |
0 |
1.00 |
1.00 |
0.00 |
1 |
1.00 |
1.0 |
1.00 |
1.0 |
▁▁▇▁▁ |
sex |
2 |
0 |
1.00 |
1.00 |
0.00 |
1 |
1.00 |
1.0 |
1.00 |
1.0 |
▁▁▇▁▁ |
sex |
3 |
0 |
1.00 |
1.00 |
0.00 |
1 |
1.00 |
1.0 |
1.00 |
1.0 |
▁▁▇▁▁ |
presgl |
1 |
0 |
1.00 |
41.10 |
14.29 |
0 |
32.80 |
41.9 |
50.30 |
81.5 |
▂▃▇▆▁ |
presgl |
2 |
0 |
1.00 |
25.97 |
18.55 |
0 |
0.00 |
30.9 |
36.30 |
78.3 |
▇▆▇▃▁ |
presgl |
3 |
0 |
1.00 |
11.09 |
18.77 |
0 |
0.00 |
0.0 |
20.30 |
78.3 |
▇▁▁▁▁ |
fertyr |
1 |
3680 |
0.69 |
0.05 |
0.23 |
0 |
0.00 |
0.0 |
0.00 |
1.0 |
▇▁▁▁▁ |
fertyr |
2 |
96 |
0.87 |
0.06 |
0.23 |
0 |
0.00 |
0.0 |
0.00 |
1.0 |
▇▁▁▁▁ |
fertyr |
3 |
2728 |
0.66 |
0.08 |
0.27 |
0 |
0.00 |
0.0 |
0.00 |
1.0 |
▇▁▁▁▁ |
inctot |
1 |
0 |
1.00 |
32279.97 |
33676.35 |
-6600 |
13223.25 |
24333.0 |
43035.00 |
587116.0 |
▇▁▁▁▁ |
inctot |
2 |
0 |
1.00 |
7775.86 |
16956.35 |
-2618 |
0.00 |
1952.0 |
9545.25 |
328138.0 |
▇▁▁▁▁ |
inctot |
3 |
0 |
1.00 |
6081.29 |
19948.83 |
-5184 |
0.00 |
0.0 |
6284.00 |
437801.0 |
▇▁▁▁▁ |
age |
1 |
0 |
1.00 |
41.91 |
13.04 |
16 |
31.00 |
43.0 |
53.00 |
64.0 |
▅▆▆▇▇ |
age |
2 |
0 |
1.00 |
33.23 |
13.12 |
16 |
21.00 |
30.0 |
44.00 |
64.0 |
▇▅▃▃▂ |
age |
3 |
0 |
1.00 |
39.58 |
16.36 |
16 |
24.00 |
40.0 |
55.00 |
64.0 |
▇▅▅▅▇ |
#usin psych
psych::describe.by(tx_5a, tx_5a$sex, digits = 2)
## Warning: describe.by is deprecated. Please use the describeBy function
## Warning in FUN(newX[, i], ...): no non-missing arguments to min; returning Inf
## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf
##
## Descriptive statistics by group
## group: 0
## vars n mean sd median trimmed mad min
## perwt 1 20826 21.12 18.36 16.0 17.98 11.86 1
## strata 2 20826 561866.90 147650.07 630148.0 581822.58 59304.00 280048
## newpuma* 3 20826 5.64 3.20 6.0 5.58 4.45 1
## edu3 4 20826 2.09 0.77 2.0 2.12 1.48 1
## edu* 5 20826 1.91 0.77 2.0 1.88 1.48 1
## lfpart 6 20826 0.68 0.47 1.0 0.72 0.00 0
## sex 7 20826 0.00 0.00 0.0 0.00 0.00 0
## race* 8 20826 6.64 1.11 7.0 6.99 0.00 1
## presgl 9 20826 28.78 19.32 32.1 28.63 21.65 0
## empstat 10 20826 1.69 0.93 1.0 1.61 0.00 1
## fertyr 11 0 NaN NA NA NaN NA Inf
## inctot 12 20826 36630.61 55275.11 21170.0 26690.66 30772.10 -6242
## age 13 20826 39.30 14.59 39.0 39.22 19.27 16
## max range skew kurtosis se
## perwt 331.0 330.0 2.78 14.79 0.13
## strata 690048.0 410000.0 -1.17 -0.44 1023.13
## newpuma* 11.0 10.0 0.11 -1.23 0.02
## edu3 3.0 2.0 -0.16 -1.29 0.01
## edu* 3.0 2.0 0.16 -1.29 0.01
## lfpart 1.0 1.0 -0.75 -1.43 0.00
## sex 0.0 0.0 NaN NaN 0.00
## race* 7.0 6.0 -3.09 8.52 0.01
## presgl 81.5 81.5 -0.08 -0.82 0.13
## empstat 3.0 2.0 0.66 -1.52 0.01
## fertyr -Inf -Inf NA NA NA
## inctot 816000.0 822242.0 4.82 36.03 383.02
## age 64.0 48.0 0.02 -1.26 0.10
## ------------------------------------------------------------
## group: 1
## vars n mean sd median trimmed mad min
## perwt 1 20566 20.84 17.18 16.0 17.97 11.86 1
## strata 2 20566 576921.91 138094.51 630148.0 600764.15 59304.00 280048
## newpuma* 3 20566 5.94 3.13 6.0 5.97 4.45 1
## edu3 4 20566 2.20 0.77 2.0 2.24 1.48 1
## edu* 5 20566 1.80 0.77 2.0 1.76 1.48 1
## lfpart 6 20566 0.61 0.49 1.0 0.63 0.00 0
## sex 7 20566 1.00 0.00 1.0 1.00 0.00 1
## race* 8 20566 6.74 0.91 7.0 7.00 0.00 1
## presgl 9 20566 28.73 21.86 32.8 28.20 25.95 0
## empstat 10 20566 1.82 0.97 1.0 1.78 0.00 1
## fertyr 11 14062 0.06 0.25 0.0 0.00 0.00 0
## inctot 12 20566 21081.38 31309.88 12296.0 15685.76 18230.05 -6600
## age 13 20566 40.69 14.56 41.0 40.88 19.27 16
## max range skew kurtosis se
## perwt 237.0 236.0 2.38 9.69 0.12
## strata 690048.0 410000.0 -1.44 0.33 962.95
## newpuma* 11.0 10.0 -0.03 -1.21 0.02
## edu3 3.0 2.0 -0.35 -1.24 0.01
## edu* 3.0 2.0 0.35 -1.24 0.01
## lfpart 1.0 1.0 -0.43 -1.81 0.00
## sex 1.0 0.0 NaN NaN 0.00
## race* 7.0 6.0 -3.59 12.33 0.01
## presgl 81.5 81.5 -0.08 -1.29 0.15
## empstat 3.0 2.0 0.36 -1.83 0.01
## fertyr 1.0 1.0 3.54 10.50 0.00
## inctot 587116.0 593716.0 5.50 57.65 218.33
## age 64.0 48.0 -0.10 -1.23 0.10
library(table1)
## Warning: package 'table1' was built under R version 4.0.4
##
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
##
## units, units<-
#Frequency Table
#random visualizations
ggplot2::ggplot(data = tx_5a, aes(x = empstat)) +
geom_bar() +
xlab("X-axis label") +
ylab("Frequency")

ggplot(tx_5a, aes(x = `newpuma`)) +
geom_bar() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

library(ggplot2)
#HIstogram of age
ggplot(tx_5a, aes(x = `age`)) +
geom_bar() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

sapply(tx_5a, class)
## perwt strata newpuma edu3 edu lfpart
## "numeric" "numeric" "character" "numeric" "character" "numeric"
## sex race presgl empstat fertyr inctot
## "numeric" "character" "numeric" "integer" "numeric" "numeric"
## age
## "integer"
# library(summarytools)
# summarytools::freq(tx_5a$Type, order = "freq")
# options(survey.lonely.psu = "adjust")
#
# des<-svydesign(ids=~1, strata=~strata, weights=~perwt, data = tx_5a )
table(tx_5a$sex, tx_5a$edu)
##
## college hs complete hs incomplete
## 0 7198 8394 5234
## 1 8486 7611 4469