cfps2016child <- read_dta("../cfps/data/2016AllData/cfps2016child_201807.dta",
encoding = "GB2312")
#cfps2016adult <- read_dta("../cfps/data/2016AllData/cfps2016adult_201808.dta",
# encoding = "GB2312")
cfps2016familyconf <- read_dta("../cfps/data/2016AllData/cfps2016famconf_201804.dta",
encoding = "GB2312")
cfps2016familycon <- read_dta("../cfps/data/2016AllData/cfps2016famecon_201807.dta",
encoding = "GB2312")
#cfps2016familycon %>% sjPlot::view_df()
cfps2016child %>% count(wd402)
## # A tibble: 106 x 2
## wd402 n
## <dbl+lbl> <int>
## 1 -8 [不适用] 7186
## 2 -2 [拒绝回答] 5
## 3 -1 [不知道] 44
## 4 0 40
## 5 1 2
## 6 9 1
## 7 30 1
## 8 50 3
## 9 69 1
## 10 100 6
## # ... with 96 more rows
child <- cfps2016child %>%
dplyr::select(fid16,
wf601m,#为孩子学习放弃看电视
wf602m,#常与孩子谈学校里的事
wf603m,#要求孩子完成作业
wf604m,#检查孩子作业
wf605m,#阻止孩子看电视
wf606m,#限制孩子看的节目
wh9,#你期望的受教育程度
pd501b,#过去12个月学校教育支出(元)
pd503r,#过去12个月课外辅导费(元)
pd577r,#过去12个月其他费用 (元)
# pd5ckp,#教育总支出确认
pa301,#孩子现在的户口状况
wz301,#父母关心孩子教育
wz302,#父母主动与孩子沟通
cfps_age,cfps_gender,#urban16
wd4,#是否教育存钱
)%>%
filter(! wf601m %in% c(-1,-8,79),! wf602m %in% c(-1,-8,79),! wf603m %in% c(-1,-8,79) ,
! wf604m %in% c(-1,-8,79),! wf605m %in% c(-1,-8,79) ,! wf606m %in% c(-1,-8,79) )%>%#不知道,不适用,情况不知道
filter(! wh9 %in% c(-1,-8,-2) )%>%
filter(! pd501b %in% c(-1,-8,-2) )%>%
filter(! pd503r %in% c(-1,-8,79) )%>%
filter(! pd577r %in% c(-1,-8,-2) )%>%
# filter( pd5ckp ==5 )%>%
filter( pa301 %in% c(1,3) )%>%
filter(! wz301 %in% c(-8,79) )%>%
filter(! wz301 %in% c(79) ,cfps_age != -1, wd4 %in% c(1,5)) %>%
#filter(cfps_age %in% c(10 ~15)) %>%
drop_na()
child_c <- child %>%
mutate(pd501b_c = if_else(pd501b >0,log(pd501b),0))%>%
mutate(pd503r_c = if_else(pd503r >0,log(pd503r),0))%>%
mutate(pd577r_c = if_else(pd577r >0,log(pd577r),0))%>%
mutate(wh9_c = case_when(
wh9 == 9 ~ 0,#按年限教育重编码
wh9 == 2 ~ 6,
wh9 == 3 ~ 9,
wh9 == 4 ~ 12,
wh9 == 5 ~ 15,
wh9 == 6 ~ 16,
wh9 == 7 ~ 19,
wh9 == 8 ~ 23,
TRUE ~ NA_real_
))
## Warning in log(pd503r): 产生了NaNs
change <- function(x){
case_when(
x == 1 ~ 5,
x == 2 ~ 4,
x == 3 ~ 3,
x == 4 ~ 2,
x == 5 ~ 1,
TRUE ~ NA_real_
)
}
child_change <- child_c %>%
mutate(wf601m_c = change(wf601m),#家庭教育参与重编码
wf602m_c = change(wf602m),
wf603m_c = change(wf603m),
wf604m_c = change(wf604m),
wf605m_c = change(wf605m),
wf606m_c = change(wf606m),
wz301_c = change(wz301),
wz302_c = change(wz302),
wf_average = (wf601m_c + wf602m_c+wf603m_c+wf604m_c+wf605m_c+wf606m_c)/6
)
child_change %>%head()
## # A tibble: 6 x 30
## fid16 wf601m wf602m wf603m wf604m wf605m wf606m wh9 pd501b
## <dbl+> <dbl+l> <dbl+l> <dbl+l> <dbl+l> <dbl+l> <dbl+l> <dbl+l> <dbl+>
## 1 110011 2 [经常(~ 4 [很少(~ 5 [从不]~ 5 [从不]~ 2 [经常(~ 5 [从不]~ 4 [高中]~ 3000
## 2 110020 3 [偶尔(~ 2 [经常(~ 2 [经常(~ 3 [偶尔(~ 3 [偶尔(~ 3 [偶尔(~ 6 [大学本~ 3500
## 3 110043 3 [偶尔(~ 4 [很少(~ 3 [偶尔(~ 2 [经常(~ 3 [偶尔(~ 4 [很少(~ 5 [大专]~ 3000
## 4 120032 5 [从不]~ 2 [经常(~ 5 [从不]~ 4 [很少(~ 4 [很少(~ 4 [很少(~ 6 [大学本~ 98
## 5 120072 3 [偶尔(~ 2 [经常(~ 2 [经常(~ 3 [偶尔(~ 2 [经常(~ 4 [很少(~ 4 [高中]~ 250
## 6 120095 5 [从不]~ 4 [很少(~ 1 [很经常~ 5 [从不]~ 1 [很经常~ 5 [从不]~ 6 [大学本~ 1000
## # ... with 21 more variables: pd503r <dbl+lbl>, pd577r <dbl+lbl>,
## # pa301 <dbl+lbl>, wz301 <dbl+lbl>, wz302 <dbl+lbl>, cfps_age <dbl+lbl>,
## # cfps_gender <dbl+lbl>, wd4 <dbl+lbl>, pd501b_c <dbl+lbl>,
## # pd503r_c <dbl+lbl>, pd577r_c <dbl+lbl>, wh9_c <dbl>, wf601m_c <dbl>,
## # wf602m_c <dbl>, wf603m_c <dbl>, wf604m_c <dbl>, wf605m_c <dbl>,
## # wf606m_c <dbl>, wz301_c <dbl>, wz302_c <dbl>, wf_average <dbl>
#family %>% count(tb4_a16_f)
familyconf <- cfps2016familyconf %>%
dplyr::select(fid16,
tb4_a16_m,#母亲最高学历
tb4_a16_f,#父
)%>%
filter(! tb4_a16_m %in% c(-8,-1,-2,-9),
! tb4_a16_f %in% c(-8,-1,-2,-9))%>%
mutate(max_edu = if_else(tb4_a16_f >tb4_a16_m,tb4_a16_f,tb4_a16_m)) %>%
drop_na()
change_2 <- function(x){
case_when(
x == 9 ~ 0,#按年限教育重编码
x == 2 ~ 6,
x == 3 ~ 9,
x == 4 ~ 12,
x == 5 ~ 15,
x == 6 ~ 16,
x == 7 ~ 19,
x == 8 ~ 23,
TRUE ~ NA_real_
)
}
family_change <- familyconf %>%
mutate(tb4_a16_m_c = change_2(tb4_a16_m),
tb4_a16_f_c = change_2(tb4_a16_f) )%>%
drop_na()
family_change %>% count(fid16)
## # A tibble: 9,044 x 2
## fid16 n
## <dbl> <int>
## 1 100051 1
## 2 100160 1
## 3 100376 1
## 4 100435 3
## 5 100453 3
## 6 100551 2
## 7 100724 1
## 8 100810 1
## 9 100879 1
## 10 101021 1
## # ... with 9,034 more rows
bbbb<- family_change %>%
dplyr::distinct(fid16,.keep_all = TRUE)
cfps2016familycon %>% count(fincome1)
## # A tibble: 3,999 x 2
## fincome1 n
## <dbl+lbl> <int>
## 1 5 3
## 2 50 1
## 3 85 1
## 4 160 2
## 5 200 2
## 6 230 1
## 7 300 1
## 8 400 3
## 9 429 1
## 10 500 7
## # ... with 3,989 more rows
familycon <- cfps2016familycon %>%
dplyr::select(fid16,
finc,#收入
)%>%
filter(! finc %in% c(-8,-1,-2))%>%
drop_na()
familycon %>% count(fid16)
## # A tibble: 13,842 x 2
## fid16 n
## <dbl+lbl> <int>
## 1 100051 1
## 2 100160 1
## 3 100286 1
## 4 100376 1
## 5 100435 1
## 6 100453 1
## 7 100531 1
## 8 100551 1
## 9 100569 1
## 10 100724 1
## # ... with 13,832 more rows
aaaa<- familycon %>%
dplyr::distinct(fid16,.keep_all = TRUE)
library(visdat)
## Warning: package 'visdat' was built under R version 3.6.1
#adult %>%vis_dat()
child %>% vis_dat()
family_change %>% vis_dat()
#合并
fam <- family_change[!duplicated(family_change$fid16), ]
bbb <- fam %>% group_by("fid16") %>% count(fid16)
all <-child_change %>% left_join(bbbb,by ="fid16")%>%
drop_na()
## Warning: Column `fid16` has different attributes on LHS and RHS of join
#all <-child_change %>% left_join(fam,by ="fid16")%>%
# drop_na()
#all %>% is.na()
#all_1 <-all %>% left_join(aaaa,by ="fid16")%>%
# drop_na()
#all_1 %>% colnames
#ready_cor <- all_1 %>%
#dplyr::select(finc_c,max_edu,wf_average,wz302_c,wz301_c,cfps_gender,pd503r_c,wh9_c,pd501b_c,
# pd577r_c)
#cor_m <- all_1 %>%
# cor(starts_with("wf60"))
#write.csv(ready_cor,"all_5.csv")