cn=colnames(d)
n=10
id=1:n
gender=sample(c("m","f"),n,replace = T)
bdate=runif(n,min(d$bdate),max(d$bdate))
educ=sample(min(d$educ):max(d$educ),n,replace=T)
jobcat=sample(c(1:3),n,replace = T)
salbegin=runif(n,min(d$salbegin),max(d$salbegin))
salbegin=ceiling(salbegin)
prevexp=runif(n,min(d$jobtime),max(d$jobtime))
prevexp=ceiling(prevexp)
jobtime=runif(n,min(d$jobtime),max(d$jobtime))
jobtime=ceiling(jobtime)
minority=sample(c(0,1),n,replace = T)
dp=data.frame(id,gender,bdate=as.Date(bdate),educ,jobcat,salbegin,jobtime,prevexp,minority)
dp
dp1=dp|> mutate(
# m1=f1(bdate,30),
age=f2(bdate),
gn=recode(gender,"m"=1,"f"=0)) |> select(-c(1,2,3))
#d1$bdate=f1(d1$bdate,30)
#write.csv(d1,"mycleaned.csv")
#d1$age=f2(d1$bdate)
#d1$gn=recode(d1$gender,"m"=1,"f"=0)
head(dp1)
'data.frame': 10 obs. of 8 variables:
$ educ : int 11 21 20 12 21 11 9 8 17 17
$ jobcat : int 1 3 2 3 2 2 3 1 2 3
$ salbegin: num 27442 46762 31907 19337 74095 ...
$ jobtime : num 96 72 84 68 74 67 86 90 73 97
$ prevexp : num 88 88 90 66 67 92 70 67 93 93
$ minority: num 0 1 1 0 0 1 1 1 1 1
$ age : num 78 80 54 83 61 68 70 55 81 91
$ gn : num 1 0 0 0 1 0 0 0 1 0