Blog Entry #2

library(ipumsr)
usa_00013 <- read_ipums_ddi("usa_00013.xml")
tx_00013 <- read_ipums_micro(usa_00013, data_file = ("usa_00013.dat.gz"), verbose = FALSE)

library(stringr)
names(tx_00013)<-tolower(names(tx_00013))

names(tx_00013)

##  [1] "year"      "multyear"  "sample"    "serial"    "cbserial"  "hhwt"     
##  [7] "cluster"   "statefip"  "puma"      "strata"    "gq"        "ownershp" 
## [13] "ownershpd" "mortgage"  "multgen"   "multgend"  "pernum"    "perwt"    
## [19] "sex"       "age"       "fertyr"    "race"      "raced"     "hispan"   
## [25] "hispand"   "hcovany"   "educ"      "educd"     "empstat"   "empstatd" 
## [31] "labforce"  "occ"       "ind"       "uhrswork"  "inctot"    "poverty"  
## [37] "presgl"    "migrate1"  "migrate1d"

#CS Code
tx_00013<-zap_labels(tx_00013)
tx_00013$newpuma<- paste (str_pad(tx_00013$statefip, 2,"left", "0"), str_pad(tx_00013$puma,5,"left", "0") , sep="")
table(tx_00013$newpuma)

## 
## 4800100 4800200 4800300 4800400 4800501 4800502 4800600 4800700 4800800 4800900 
##    8976    4362    4674    6757    3800    5870    7094    4974    8381    6331 
## 4801000 4801100 4801200 4801300 4801400 4801501 4801502 4801600 4801700 4801800 
##    6359    4275    3594    4239    4289    2823    3451    3754    4852    5583 
## 4801901 4801902 4801903 4801904 4801905 4801906 4801907 4802001 4802002 4802003 
##    4402    4245    3728    3195    4205    6061    4681    4436    4103    6084 
## 4802004 4802005 4802006 4802101 4802102 4802200 4802301 4802302 4802303 4802304 
##    4873    4049    5078    5882    5309    4226    2988    3015    3388    2921 
## 4802305 4802306 4802307 4802308 4802309 4802310 4802311 4802312 4802313 4802314 
##    3255    3718    3948    3274    3816    3944    4200    4436    5082    3627 
## 4802315 4802316 4802317 4802318 4802319 4802320 4802321 4802322 4802400 4802501 
##    3899    2974    2488    3253    3424    3566    3905    4536    4496    3737 
## 4802502 4802503 4802504 4802505 4802506 4802507 4802508 4802509 4802510 4802511 
##    4185    3696    2676    3742    3053    3873    3622    4406    4376    2785 
## 4802512 4802513 4802514 4802515 4802516 4802600 4802700 4802800 4802900 4803000 
##    3138    4605    4330    3849    3494    9343    5077    4847    3644    3894 
## 4803100 4803200 4803301 4803302 4803303 4803304 4803305 4803306 4803400 4803501 
##    4029    4379    4575    4536    3704    2996    3661    4159    6975    4733 
## 4803502 4803601 4803602 4803700 4803801 4803802 4803900 4804000 4804100 4804200 
##    5277    5462    7410    8434    3518    5522    6597    5112    3565    4323 
## 4804301 4804302 4804400 4804501 4804502 4804503 4804504 4804601 4804602 4804603 
##    3282    4948    3389    3376    3264    2591    2489    3635    3768    4783 
## 4804604 4804605 4804606 4804607 4804608 4804609 4804610 4804611 4804612 4804613 
##    4719    3325    3548    3001    3208    3701    2969    2641    3627    3319 
## 4804614 4804615 4804616 4804617 4804618 4804619 4804620 4804621 4804622 4804623 
##    3146    2923    3399    2564    2998    2644    2778    3648    2757    2811 
## 4804624 4804625 4804626 4804627 4804628 4804629 4804630 4804631 4804632 4804633 
##    3105    2733    3005    2988    3085    2941    2632    3105    3428    2002 
## 4804634 4804635 4804636 4804637 4804638 4804701 4804702 4804801 4804802 4804803 
##    2120    2990    2901    3075    2706    5956    3790    2101    3777    3715 
## 4804901 4804902 4804903 4804904 4804905 4805000 4805100 4805201 4805202 4805203 
##    3025    3696    2939    2716    4373    5532    5005    4511    3605    3445 
## 4805204 4805301 4805302 4805303 4805304 4805305 4805306 4805307 4805308 4805309 
##    4498    3153    3689    4928    4124    3561    6162    4390    4123    4819 
## 4805400 4805500 4805600 4805700 4805800 4805901 4805902 4805903 4805904 4805905 
##    5755    6315    3466    5226    3730    3654    3649    3516    3355    3693 
## 4805906 4805907 4805908 4805909 4805910 4805911 4805912 4805913 4805914 4805915 
##    3386    4084    3118    3324    3944    3566    3237    2997    4406    3570 
## 4805916 4806000 4806100 4806200 4806301 4806302 4806400 4806500 4806601 4806602 
##    3317    3527    3634    4059    4778    3826    3097    4814    3861    3349 
## 4806603 4806701 4806702 4806703 4806801 4806802 4806803 4806804 4806805 4806806 
##    3786    3088    4194    4537    2613    2169    2594    2336    3324    2414 
## 4806807 4806900 
##    2775    2869

bordp<-readr::read_csv("C:/Users/codar/OneDrive/Documents/Stats II/Data/border_100mi_pumas_table.csv")

## Parsed with column specification:
## cols(
##   fid = col_double(),
##   STATEFP10 = col_double(),
##   PUMACE10 = col_character(),
##   AFFGEOID10 = col_character(),
##   GEOID10 = col_double(),
##   NAME10 = col_character(),
##   LSAD10 = col_character(),
##   ALAND10 = col_double(),
##   AWATER10 = col_double()
## )

mdat<-merge(tx_00013, bordp, by.x="newpuma", by.y="GEOID10")
table(mdat$newpuma)

## 
## 4802800 4803200 4803301 4803302 4803303 4803304 4803305 4803306 4806000 4806100 
##    4847    4379    4575    4536    3704    2996    3661    4159    3527    3634 
## 4806200 4806301 4806302 4806400 4806701 4806702 4806703 4806801 4806802 4806803 
##    4059    4778    3826    3097    3088    4194    4537    2613    2169    2594 
## 4806804 4806805 4806806 4806807 4806900 
##    2336    3324    2414    2775    2869

library(dplyr)
tx_00013<-tx_00013%>% 
filter(newpuma %in% c( "4802800", "4803200","4806000", "4806100", "4806200", "4806301", "4806302", "4806701", "4806702", "4806703", "4806900" ))
 View(tx_00013)
  names(tx_00013)

##  [1] "year"      "multyear"  "sample"    "serial"    "cbserial"  "hhwt"     
##  [7] "cluster"   "statefip"  "puma"      "strata"    "gq"        "ownershp" 
## [13] "ownershpd" "mortgage"  "multgen"   "multgend"  "pernum"    "perwt"    
## [19] "sex"       "age"       "fertyr"    "race"      "raced"     "hispan"   
## [25] "hispand"   "hcovany"   "educ"      "educd"     "empstat"   "empstatd" 
## [31] "labforce"  "occ"       "ind"       "uhrswork"  "inctot"    "poverty"  
## [37] "presgl"    "migrate1"  "migrate1d" "newpuma"

# describe(tx_00012$income)
# summary(tx_00012$educ)
# class(tx_00012$educ)

#Recodes
tx_5 <-tx_00013 %>%
  mutate(sex=case_when(sex == 1~0,
                       sex == 2~ 1,
                       TRUE ~ NA_real_),
         sexb=case_when(sex==1 ~ 'male',
                        sex ==2 ~ 'female',
                        TRUE~ NA_character_),
        lfpart=case_when(labforce== 1 ~ 0,
                          labforce== 2 ~ 1,
                         TRUE ~ NA_real_),
         edu=case_when(educ== 0 ~ 'none',
                        educ %in% 1:5 ~ 'hs incomplete',
                        educ %in% 6 ~ 'hs complete',
                        educ %in% 7:11 ~ 'college',
                       TRUE ~ NA_character_),
        edu3=case_when(educ %in% 1:5 ~ 1,
                       educ %in% 6 ~ 2,
                       educ %in% 7:11 ~ 3,
                       TRUE~NA_real_),
         race=case_when(race== 1 ~ 'white',
                        race== 2 ~ 'black',
                        # race== 3 ~'aian',
                        race %in% 4:5 ~ 'asian',
                        race== 6 ~ 'oapi',
                        race== 7 ~ 'other',
                        race== 8 ~ 'twomajor',
                        race== 9 ~ 'threemoremaj',
                        TRUE ~ NA_character_),
         hisp= case_when(hispan !=0 ~ "Latino",
                         hispan==0 ~'NL',
                         hispan==9 ~ 'NL',
                         TRUE ~ NA_character_),
         migrate1=case_when(migrate1==1 ~ 'same house',
                            migrate1==2 ~ 'movinstate',
                            migrate1==3 ~ 'abroad1yr',
                            TRUE ~ NA_character_),
         fertyr=case_when(fertyr== 1 ~ 0, 
                          fertyr== 2 ~ 1,
                          TRUE~ NA_real_ ),
         poverty1=case_when(poverty==001 ~ "1% or less",
                           poverty ==501 ~ "501% or more",
                           TRUE~ NA_character_),
         hcov=case_when(hcovany == 1 ~ 0,
                        hcovany == 2 ~ 1,
                        TRUE~NA_real_),
         ownhome=case_when(ownershp==1 ~ 1,
                            ownershp==2 ~ 0,
                            TRUE ~ NA_real_),
        multgen1=case_when(multgen==1 ~ 1,
                           multgen==2 ~ 2,
                           multgen==3 ~ 3,
                           TRUE~NA_real_))
         # mgmt = if_else(occ %in% c(10:160) | occ %in% c(220:730), 1, 0))    #occupational prestige
        
         # occ=case_when(occ %in% 10:160 ~ 'Mgmt/Biz',
         #                    occ %in% 220:730 ~ 'Mgmt/Biz',
         #                    occ %in% 800:950 ~ 'Finance',
         #                    # occ %in% 1000:1240 ~ 'STEM',
         #                    occ %in% 1300:1540 ~ 'Arch/Eng',
         #                    occ %in% 1550:1560 ~ 'Technical',
         #                    # occ %in% 1600:1760 ~ 'STEM',
         #                    occ %in% 1800:1840 ~ 'SocSTEM',
         #                    occ %in% 1900:1980 ~ 'Technical',
         #                    occ %in% 2000:2060 ~ 'PublicServ',
         #                    occ == 2100 ~ 'Law',
         #                    occ %in% 2140:2150 ~ 'Technical',
         #                    occ %in% 2200:2430 ~ 'Education',
         #                    occ %in% 2440:2550 ~ 'Technical',
         #                    occ %in% 2600:2910 ~ 'A&E/Sports/Media',
         #                    occ == 2920 ~ 'Technical',
         #                    occ %in% 3000:3500 ~ 'Health/Med',
         #                    occ %in% 3510:3650 ~ 'Technical',
         #                    occ %in% 3700:3950 ~ 'PublicServ',
         #                    occ == 4000 ~ 'A&E/Sports/Media',
         #                    occ %in% 4010:4965 ~ 'Sales/Service',
         #                    occ %in% 5000: 5940 ~ 'Office/Admin',
         #                    occ %in% 6200:8965 ~ 'SkilledTrade',
         #                    occ %in% 9000:9750 ~ 'Transport',
         #                    TRUE~ NA_character_))
View(tx_5)

#Summary statistics and correlation matrix 

tx_5a<-tx_5%>%
  filter(complete.cases(perwt, strata, newpuma,edu3, edu, lfpart,sex, race, presgl, empstat, inctot, age)) %>%
  select(perwt, strata, newpuma,edu3, edu, lfpart,sex, race, presgl, empstat, fertyr, inctot, age)
summary(tx_5a)

##      perwt            strata         newpuma               edu3      
##  Min.   :  1.00   Min.   :280048   Length:41392       Min.   :1.000  
##  1st Qu.: 10.00   1st Qu.:600048   Class :character   1st Qu.:2.000  
##  Median : 16.00   Median :630148   Mode  :character   Median :2.000  
##  Mean   : 20.98   Mean   :569347                      Mean   :2.144  
##  3rd Qu.: 27.00   3rd Qu.:670248                      3rd Qu.:3.000  
##  Max.   :331.00   Max.   :690048                      Max.   :3.000  
##                                                                      
##      edu                lfpart            sex             race          
##  Length:41392       Min.   :0.0000   Min.   :0.0000   Length:41392      
##  Class :character   1st Qu.:0.0000   1st Qu.:0.0000   Class :character  
##  Mode  :character   Median :1.0000   Median :0.0000   Mode  :character  
##                     Mean   :0.6412   Mean   :0.4969                     
##                     3rd Qu.:1.0000   3rd Qu.:1.0000                     
##                     Max.   :1.0000   Max.   :1.0000                     
##                                                                         
##      presgl         empstat          fertyr          inctot      
##  Min.   : 0.00   Min.   :1.000   Min.   :0.000   Min.   : -6600  
##  1st Qu.: 0.00   1st Qu.:1.000   1st Qu.:0.000   1st Qu.:  1058  
##  Median :32.10   Median :1.000   Median :0.000   Median : 15878  
##  Mean   :28.75   Mean   :1.755   Mean   :0.065   Mean   : 28905  
##  3rd Qu.:46.90   3rd Qu.:3.000   3rd Qu.:0.000   3rd Qu.: 39800  
##  Max.   :81.50   Max.   :3.000   Max.   :1.000   Max.   :816000  
##                                  NA's   :27330                   
##       age       
##  Min.   :16.00  
##  1st Qu.:27.00  
##  Median :40.00  
##  Mean   :39.99  
##  3rd Qu.:53.00  
##  Max.   :64.00  
##

View(tx_5a)

# my_summary1 <-
#   list("Labor Force Participation" =
#        list("min"       = ~ min(lfpart),
#             "max"       = ~ max(lfpart),
#             "mean (sd)" = ~ qwraps2::mean_sd(lfpart)),
#        "Fertility" =
#        list("min"       = ~ min(fertyr),
#             "max"       = ~ max(fertyr),
#             "mean (sd)" = ~ qwraps2::mean_sd(fertyr)),
#        "Educational Attainment" =
#        list("min"       = ~ min(edu3),
#             "max"       = ~ max(edu3),
#             "mean (sd)" = ~ qwraps2::mean_sd(edu3))
#        )
# 
# 
# library(qwraps2)
# bysex <- summary_table(dplyr::group_by(tx_hw71, sex), my_summary1)
# print(bysex)


CreateTableOne(data=tx_5a)

##                      
##                       Overall              
##   n                       41392            
##   perwt (mean (SD))       20.98 (17.79)    
##   strata (mean (SD))  569347.12 (143178.43)
##   newpuma (%)                              
##      4802800               4635 (11.2)     
##      4803200               4169 (10.1)     
##      4806000               3368 ( 8.1)     
##      4806100               3498 ( 8.5)     
##      4806200               3809 ( 9.2)     
##      4806301               4504 (10.9)     
##      4806302               3590 ( 8.7)     
##      4806701               2889 ( 7.0)     
##      4806702               3933 ( 9.5)     
##      4806703               4251 (10.3)     
##      4806900               2746 ( 6.6)     
##   edu3 (mean (SD))         2.14 (0.77)     
##   edu (%)                                  
##      college              15684 (37.9)     
##      hs complete          16005 (38.7)     
##      hs incomplete         9703 (23.4)     
##   lfpart (mean (SD))       0.64 (0.48)     
##   sex (mean (SD))          0.50 (0.50)     
##   race (%)                                 
##      asian                   55 ( 0.1)     
##      black                  718 ( 1.7)     
##      oapi                   275 ( 0.7)     
##      other                 2397 ( 5.8)     
##      threemoremaj            30 ( 0.1)     
##      twomajor               542 ( 1.3)     
##      white                37375 (90.3)     
##   presgl (mean (SD))      28.76 (20.62)    
##   empstat (mean (SD))      1.76 (0.95)     
##   fertyr (mean (SD))       0.06 (0.25)     
##   inctot (mean (SD))   28904.83 (45658.90) 
##   age (mean (SD))         39.99 (14.59)

# library(tableone)
# t2<-CreateTableOne(vars = c(edu3, fertyr,empstat, strata = "sex", test = T, data = tx_hw71)

# The research question to be examined is: How does labor force participation for women along border PUMAS vary based on race, education, occupation, and fertility?

library(tableone)

# # Create a variable list which we want in Table 1
# listVars <- c("empstat", "fertyr", "edu", "edu3", "sex", "presgl", "inctot", "age")
# 
# # Define categorical variables
# catVars <- c("sex","edu","empstat", "fertyr")
# 
# table1 <- CreateTableOne(vars = listVars, data = tx_5a, factorVars = catVars, strata = c("sex"))
# 
# 
# a <- print(table1, quote = TRUE, noSpaces = TRUE)
# 
# as.data.frame(a)

# my_skim <- skim_with(
#   numeric = sfl(iqr = IQR, mad = mad, p99 = ~ quantile(., probs = .99)),
#   append = FALSE
# )
# my_skim(iris, Sepal.Length)


library(skimr)

## Warning: package 'skimr' was built under R version 4.0.4

skim(tx_5a)

Data summary
Name	tx_5a
Number of rows	41392
Number of columns	13
_______________________
Column type frequency:
character	3
numeric	10
________________________
Group variables	None

Variable type: character

skim_variable	complete_rate	min	max	n_unique
newpuma	1	7	7	11
edu	1	7	13	3
race	1	4	12	7

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
perwt	0	1.00	20.98	17.79	1	10	16.0	27.0	331.0	▇▁▁▁▁
strata	0	1.00	569347.12	143178.43	280048	600048	630148.0	670248.0	690048.0	▂▁▁▁▇
edu3	0	1.00	2.14	0.77	1	2	2.0	3.0	3.0	▅▁▇▁▇
lfpart	0	1.00	0.64	0.48	0	0	1.0	1.0	1.0	▅▁▁▁▇
sex	0	1.00	0.50	0.50	0	0	0.0	1.0	1.0	▇▁▁▁▇
presgl	0	1.00	28.76	20.62	0	0	32.1	46.9	81.5	▇▆▇▆▁
empstat	0	1.00	1.76	0.95	1	1	1.0	3.0	3.0	▇▁▁▁▅
fertyr	27330	0.34	0.06	0.25	0	0	0.0	0.0	1.0	▇▁▁▁▁
inctot	0	1.00	28904.83	45658.90	-6600	1058	15878.0	39800.0	816000.0	▇▁▁▁▁
age	0	1.00	39.99	14.59	16	27	40.0	53.0	64.0	▇▆▆▇▇

tx_5a %>%
  dplyr::group_by(sex) %>%
  skim()

Data summary
Name	Piped data
Number of rows	41392
Number of columns	13
_______________________
Column type frequency:
character	3
numeric	9
________________________
Group variables	sex

Variable type: character

skim_variable	sex	complete_rate	min	max	n_unique
newpuma	0	1	7	7	11
newpuma	1	1	7	7	11
edu	0	1	7	13	3
edu	1	1	7	13	3
race	0	1	4	12	7
race	1	1	4	12	7

Variable type: numeric

skim_variable	sex	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
perwt	0	0	1.00	21.12	18.36	1	10.0	16.0	27.0	331.0	▇▁▁▁▁
perwt	1	0	1.00	20.84	17.18	1	10.0	16.0	27.0	237.0	▇▁▁▁▁
strata	0	0	1.00	561866.90	147650.07	280048	600048.0	630148.0	670248.0	690048.0	▃▁▁▁▇
strata	1	0	1.00	576921.91	138094.51	280048	600048.0	630148.0	670248.0	690048.0	▂▁▁▁▇
edu3	0	0	1.00	2.09	0.77	1	1.0	2.0	3.0	3.0	▅▁▇▁▇
edu3	1	0	1.00	2.20	0.77	1	2.0	2.0	3.0	3.0	▅▁▇▁▇
lfpart	0	0	1.00	0.68	0.47	0	0.0	1.0	1.0	1.0	▃▁▁▁▇
lfpart	1	0	1.00	0.61	0.49	0	0.0	1.0	1.0	1.0	▅▁▁▁▇
presgl	0	0	1.00	28.78	19.32	0	17.5	32.1	45.3	81.5	▆▇▆▅▁
presgl	1	0	1.00	28.73	21.86	0	0.0	32.8	47.8	81.5	▇▃▇▅▁
empstat	0	0	1.00	1.69	0.93	1	1.0	1.0	3.0	3.0	▇▁▁▁▅
empstat	1	0	1.00	1.82	0.97	1	1.0	1.0	3.0	3.0	▇▁▁▁▆
fertyr	0	20826	0.00	NaN	NA	NA	NA	NA	NA	NA
fertyr	1	6504	0.68	0.06	0.25	0	0.0	0.0	0.0	1.0	▇▁▁▁▁
inctot	0	0	1.00	36630.61	55275.11	-6242	3176.0	21170.0	50000.0	816000.0	▇▁▁▁▁
inctot	1	0	1.00	21081.38	31309.88	-6600	0.0	12296.0	30000.0	587116.0	▇▁▁▁▁
age	0	0	1.00	39.30	14.59	16	26.0	39.0	52.0	64.0	▇▆▆▆▇
age	1	0	1.00	40.69	14.56	16	28.0	41.0	54.0	64.0	▇▆▆▇▇

#skim by certain columns
skim(tx_5a, sex, empstat, fertyr, edu, presgl, inctot, age)

Data summary
Name	tx_5a
Number of rows	41392
Number of columns	13
_______________________
Column type frequency:
character	1
numeric	6
________________________
Group variables	None

Variable type: character

skim_variable	n_missing	complete_rate	min	max	empty	n_unique	whitespace
edu	0	1	7	13	0	3	0

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
sex	0	1.00	0.50	0.50	0	0	0.0	1.0	1.0	▇▁▁▁▇
empstat	0	1.00	1.76	0.95	1	1	1.0	3.0	3.0	▇▁▁▁▅
fertyr	27330	0.34	0.06	0.25	0	0	0.0	0.0	1.0	▇▁▁▁▁
presgl	0	1.00	28.76	20.62	0	0	32.1	46.9	81.5	▇▆▇▆▁
inctot	0	1.00	28904.83	45658.90	-6600	1058	15878.0	39800.0	816000.0	▇▁▁▁▁
age	0	1.00	39.99	14.59	16	27	40.0	53.0	64.0	▇▆▆▇▇

#skim by currently employed females only and 
tx_5a %>% 
  dplyr::group_by(empstat) %>%
  filter(sex == "1") %>% 
  skim()

Data summary
Name	Piped data
Number of rows	20566
Number of columns	13
_______________________
Column type frequency:
character	3
numeric	9
________________________
Group variables	empstat

Variable type: character

skim_variable	empstat	complete_rate	min	max	n_unique
newpuma	1	1	7	7	11
newpuma	2	1	7	7	11
newpuma	3	1	7	7	11
edu	1	1	7	13	3
edu	2	1	7	13	3
edu	3	1	7	13	3
race	1	1	4	12	7
race	2	1	4	12	6
race	3	1	4	12	7

Variable type: numeric

skim_variable	empstat	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
perwt	1	0	1.00	20.78	17.42	1	10.00	16.0	26.00	237.0	▇▁▁▁▁
perwt	2	0	1.00	22.17	16.95	1	10.00	16.0	29.00	86.0	▇▃▂▁▁
perwt	3	0	1.00	20.80	16.85	1	10.00	16.0	27.00	170.0	▇▁▁▁▁
strata	1	0	1.00	573787.09	139793.96	280048	600048.00	630148.0	670248.00	690048.0	▂▁▁▁▇
strata	2	0	1.00	596106.65	124566.60	280048	610048.00	630148.0	670248.00	690048.0	▂▁▁▁▇
strata	3	0	1.00	579733.09	136584.15	280048	600048.00	630148.0	670248.00	690048.0	▂▁▁▁▇
edu3	1	0	1.00	2.38	0.70	1	2.00	3.0	3.00	3.0	▂▁▆▁▇
edu3	2	0	1.00	2.13	0.72	1	2.00	2.0	3.00	3.0	▃▁▇▁▆
edu3	3	0	1.00	1.94	0.79	1	1.00	2.0	3.00	3.0	▇▁▇▁▆
lfpart	1	0	1.00	1.00	0.00	1	1.00	1.0	1.00	1.0	▁▁▇▁▁
lfpart	2	0	1.00	1.00	0.00	1	1.00	1.0	1.00	1.0	▁▁▇▁▁
lfpart	3	0	1.00	0.00	0.00	0	0.00	0.0	0.00	0.0	▁▁▇▁▁
sex	1	0	1.00	1.00	0.00	1	1.00	1.0	1.00	1.0	▁▁▇▁▁
sex	2	0	1.00	1.00	0.00	1	1.00	1.0	1.00	1.0	▁▁▇▁▁
sex	3	0	1.00	1.00	0.00	1	1.00	1.0	1.00	1.0	▁▁▇▁▁
presgl	1	0	1.00	41.10	14.29	0	32.80	41.9	50.30	81.5	▂▃▇▆▁
presgl	2	0	1.00	25.97	18.55	0	0.00	30.9	36.30	78.3	▇▆▇▃▁
presgl	3	0	1.00	11.09	18.77	0	0.00	0.0	20.30	78.3	▇▁▁▁▁
fertyr	1	3680	0.69	0.05	0.23	0	0.00	0.0	0.00	1.0	▇▁▁▁▁
fertyr	2	96	0.87	0.06	0.23	0	0.00	0.0	0.00	1.0	▇▁▁▁▁
fertyr	3	2728	0.66	0.08	0.27	0	0.00	0.0	0.00	1.0	▇▁▁▁▁
inctot	1	0	1.00	32279.97	33676.35	-6600	13223.25	24333.0	43035.00	587116.0	▇▁▁▁▁
inctot	2	0	1.00	7775.86	16956.35	-2618	0.00	1952.0	9545.25	328138.0	▇▁▁▁▁
inctot	3	0	1.00	6081.29	19948.83	-5184	0.00	0.0	6284.00	437801.0	▇▁▁▁▁
age	1	0	1.00	41.91	13.04	16	31.00	43.0	53.00	64.0	▅▆▆▇▇
age	2	0	1.00	33.23	13.12	16	21.00	30.0	44.00	64.0	▇▅▃▃▂
age	3	0	1.00	39.58	16.36	16	24.00	40.0	55.00	64.0	▇▅▅▅▇

#usin psych

psych::describe.by(tx_5a, tx_5a$sex, digits = 2)

## Warning: describe.by is deprecated. Please use the describeBy function

## Warning in FUN(newX[, i], ...): no non-missing arguments to min; returning Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf

## 
##  Descriptive statistics by group 
## group: 0
##          vars     n      mean        sd   median   trimmed      mad    min
## perwt       1 20826     21.12     18.36     16.0     17.98    11.86      1
## strata      2 20826 561866.90 147650.07 630148.0 581822.58 59304.00 280048
## newpuma*    3 20826      5.64      3.20      6.0      5.58     4.45      1
## edu3        4 20826      2.09      0.77      2.0      2.12     1.48      1
## edu*        5 20826      1.91      0.77      2.0      1.88     1.48      1
## lfpart      6 20826      0.68      0.47      1.0      0.72     0.00      0
## sex         7 20826      0.00      0.00      0.0      0.00     0.00      0
## race*       8 20826      6.64      1.11      7.0      6.99     0.00      1
## presgl      9 20826     28.78     19.32     32.1     28.63    21.65      0
## empstat    10 20826      1.69      0.93      1.0      1.61     0.00      1
## fertyr     11     0       NaN        NA       NA       NaN       NA    Inf
## inctot     12 20826  36630.61  55275.11  21170.0  26690.66 30772.10  -6242
## age        13 20826     39.30     14.59     39.0     39.22    19.27     16
##               max    range  skew kurtosis      se
## perwt       331.0    330.0  2.78    14.79    0.13
## strata   690048.0 410000.0 -1.17    -0.44 1023.13
## newpuma*     11.0     10.0  0.11    -1.23    0.02
## edu3          3.0      2.0 -0.16    -1.29    0.01
## edu*          3.0      2.0  0.16    -1.29    0.01
## lfpart        1.0      1.0 -0.75    -1.43    0.00
## sex           0.0      0.0   NaN      NaN    0.00
## race*         7.0      6.0 -3.09     8.52    0.01
## presgl       81.5     81.5 -0.08    -0.82    0.13
## empstat       3.0      2.0  0.66    -1.52    0.01
## fertyr       -Inf     -Inf    NA       NA      NA
## inctot   816000.0 822242.0  4.82    36.03  383.02
## age          64.0     48.0  0.02    -1.26    0.10
## ------------------------------------------------------------ 
## group: 1
##          vars     n      mean        sd   median   trimmed      mad    min
## perwt       1 20566     20.84     17.18     16.0     17.97    11.86      1
## strata      2 20566 576921.91 138094.51 630148.0 600764.15 59304.00 280048
## newpuma*    3 20566      5.94      3.13      6.0      5.97     4.45      1
## edu3        4 20566      2.20      0.77      2.0      2.24     1.48      1
## edu*        5 20566      1.80      0.77      2.0      1.76     1.48      1
## lfpart      6 20566      0.61      0.49      1.0      0.63     0.00      0
## sex         7 20566      1.00      0.00      1.0      1.00     0.00      1
## race*       8 20566      6.74      0.91      7.0      7.00     0.00      1
## presgl      9 20566     28.73     21.86     32.8     28.20    25.95      0
## empstat    10 20566      1.82      0.97      1.0      1.78     0.00      1
## fertyr     11 14062      0.06      0.25      0.0      0.00     0.00      0
## inctot     12 20566  21081.38  31309.88  12296.0  15685.76 18230.05  -6600
## age        13 20566     40.69     14.56     41.0     40.88    19.27     16
##               max    range  skew kurtosis     se
## perwt       237.0    236.0  2.38     9.69   0.12
## strata   690048.0 410000.0 -1.44     0.33 962.95
## newpuma*     11.0     10.0 -0.03    -1.21   0.02
## edu3          3.0      2.0 -0.35    -1.24   0.01
## edu*          3.0      2.0  0.35    -1.24   0.01
## lfpart        1.0      1.0 -0.43    -1.81   0.00
## sex           1.0      0.0   NaN      NaN   0.00
## race*         7.0      6.0 -3.59    12.33   0.01
## presgl       81.5     81.5 -0.08    -1.29   0.15
## empstat       3.0      2.0  0.36    -1.83   0.01
## fertyr        1.0      1.0  3.54    10.50   0.00
## inctot   587116.0 593716.0  5.50    57.65 218.33
## age          64.0     48.0 -0.10    -1.23   0.10

library(table1)

## Warning: package 'table1' was built under R version 4.0.4

## 
## Attaching package: 'table1'

## The following objects are masked from 'package:base':
## 
##     units, units<-

#Frequency Table

#random visualizations
ggplot2::ggplot(data = tx_5a, aes(x = empstat)) +
   geom_bar() +
   xlab("X-axis label") +
   ylab("Frequency")

ggplot(tx_5a, aes(x = `newpuma`)) +
        geom_bar() +
        theme(axis.text.x = element_text(angle = 45, hjust = 1))

library(ggplot2)


#HIstogram of age
ggplot(tx_5a, aes(x = `age`)) +
        geom_bar() +
        theme(axis.text.x = element_text(angle = 45, hjust = 1))

sapply(tx_5a, class)

##       perwt      strata     newpuma        edu3         edu      lfpart 
##   "numeric"   "numeric" "character"   "numeric" "character"   "numeric" 
##         sex        race      presgl     empstat      fertyr      inctot 
##   "numeric" "character"   "numeric"   "integer"   "numeric"   "numeric" 
##         age 
##   "integer"

# library(summarytools)
# summarytools::freq(tx_5a$Type, order = "freq")

# options(survey.lonely.psu = "adjust")
# 
# des<-svydesign(ids=~1, strata=~strata, weights=~perwt, data = tx_5a )

table(tx_5a$sex, tx_5a$edu)

##    
##     college hs complete hs incomplete
##   0    7198        8394          5234
##   1    8486        7611          4469

Blog Entry #2

Coda Rayo-Garza

4/4/2021