You are going to compile a dataset that have variables: 1) id, 2) age, 3) sex_gen, 4) no. of children ever born (nkids), 5) satisfaction with work (sat1i1), 6) satisfaction with family (sat1i4).
library(tidyverse) # Add the tidyverse package to my current library.
library(haven) # Handle labelled data.
library(Hmisc) # Weighted statistics.
library(skimr) #provide summary for multiple variables
library(splitstackshape) #transform wide data (with stacked variables) to long datawave1 <- read_dta("anchor1_50percent_Eng.dta")
wave2 <- read_dta("anchor2_50percent_Eng.dta")
wave3 <- read_dta("anchor3_50percent_Eng.dta")
wave4 <- read_dta("anchor4_50percent_Eng.dta")
wave5 <- read_dta("anchor5_50percent_Eng.dta")
wave6 <- read_dta("anchor6_50percent_Eng.dta")#check coding across 6 waves
sex_fun <- function(df) {
  table(as_factor(df$sex_gen))
        }
sapply(mget(paste0("wave", 1:6)), sex_fun)##                                   wave1 wave2 wave3 wave4 wave5 wave6
## -10 not in demodiff                   0     0     0     0     0     0
## -7 Incomplete data                    0     0     0     0     0     0
## -4 Filter error / Incorrect entry     0     0     0     0     0     0
## -3 Does not apply                     0     0     0     0     0     0
## 1 Male                             3029  2197  1905  1668  1493  1342
## 2 Female                           3172  2339  2050  1813  1626  1477#same coding for gender
child_fun <- function(df) {
  table(as_factor(df$nkids))
        }
sapply(mget(paste0("wave", 1:6)), child_fun)##                    wave1 wave2 wave3 wave4 wave5 wave6
## -7 Incomplete data     0     4     0     0     0     0
## 0                   4091  2951  2521  2120  1844  1598
## 1                    856   613   558   494   443   421
## 2                    877   665   587   572   549   529
## 3                    287   226   224   225   217   212
## 4                     62    56    47    48    42    40
## 5                     19    13    13    13    17    16
## 6                      6     5     2     6     4     1
## 7                      1     2     2     2     2     1
## 10                     2     1     1     1     1     1#same coding for no. of children
worksat_fun <- function(df) {
  table(as_factor(df$sat1i1))
        }
sapply(mget(paste0("wave", 1:6)), worksat_fun)##                                   wave1 wave2 wave3 wave4 wave5 wave6
## -5 Inconsistent value                 0     0     0     0     0     0
## -4 Filter error / Incorrect entry     0     0     0     0     0     0
## -3 Does not apply                     0     0     0     0     0     0
## -2 No answer                         13    10     9    25    17    16
## -1 Don't know                        10     4     7     3     4     2
## 0 Very dissatisfied                 103    90    58    55    46    40
## 1                                    50    39    31    28    21    25
## 2                                    89    92    63    66    54    56
## 3                                   208   127   142   102    78    87
## 4                                   219   169   114   123    93    75
## 5                                   593   382   330   247   225   192
## 6                                   494   346   325   273   249   239
## 7                                  1154   892   774   648   586   509
## 8                                  1490  1166  1043   966   885   822
## 9                                   773   622   554   480   469   415
## 10 Very satisfied                  1005   597   505   465   392   341familysat_fun <- function(df) {
  table(as_factor(df$sat1i1))
        }
sapply(mget(paste0("wave", 1:6)), familysat_fun)##                                   wave1 wave2 wave3 wave4 wave5 wave6
## -5 Inconsistent value                 0     0     0     0     0     0
## -4 Filter error / Incorrect entry     0     0     0     0     0     0
## -3 Does not apply                     0     0     0     0     0     0
## -2 No answer                         13    10     9    25    17    16
## -1 Don't know                        10     4     7     3     4     2
## 0 Very dissatisfied                 103    90    58    55    46    40
## 1                                    50    39    31    28    21    25
## 2                                    89    92    63    66    54    56
## 3                                   208   127   142   102    78    87
## 4                                   219   169   114   123    93    75
## 5                                   593   382   330   247   225   192
## 6                                   494   346   325   273   249   239
## 7                                  1154   892   774   648   586   509
## 8                                  1490  1166  1043   966   885   822
## 9                                   773   622   554   480   469   415
## 10 Very satisfied                  1005   597   505   465   392   341#coding of all variables across years are the sameclean_fun <- function(df) {
df %>% 
  transmute(
    id, 
    age, 
    wave=as.numeric(wave),
    sex=as_factor(sex_gen), #make sex_gen as a factor
    kidno=case_when(nkids<0 ~ as.numeric(NA),  #specify when hlt1 is missing 
                   TRUE ~ as.numeric(nkids)),
    worksat=case_when(sat1i1<0 ~ as.numeric(NA), #specify when sat1i1 is missing
                   TRUE ~ as.numeric(sat1i1)),
    famsat=case_when(sat1i1<0 ~ as.numeric(NA), #specify when sat1i4 is missing
                   TRUE ~ as.numeric(sat1i4))
                   ) %>% drop_na()
}
wave1a <- clean_fun(wave1)
wave2a <- clean_fun(wave2)
wave3a <- clean_fun(wave3)
wave4a <- clean_fun(wave4)
wave5a <- clean_fun(wave5)
wave6a <- clean_fun(wave6)allwave <- rbind(wave1a, wave2a, wave3a, wave4a, wave5a, wave6a) #rbind is combine dataset by rowsallwave_towide <- pivot_wider(
  allwave, #dataset to transform
  id_cols = id, #to identify the id column, in this case, "id" is the column
  names_from = wave, #i want use "wave" to create columns that reflect variables in each wave
  values_from = c(age, sex, kidno, worksat, famsat) #specify where the value comes from
)allwave_tolong<- merged.stack(allwave_towide, #dataset for transfrom
                            var.stubs = c("age", "sex","kidno", "worksat", "famsat"), 
#var.stubs is to specify the prefixes of the variable groups
                            sep = "_")   
#sep is to specify the character that separates the "variable name" from the "times" in the source
#use head() to have a look at the first several observations
head(allwave_tolong, 20)##         id .time_1 age      sex kidno worksat famsat
##  1: 174000       1  25   1 Male     0       7      7
##  2: 174000       2  NA     <NA>    NA      NA     NA
##  3: 174000       3  27   1 Male     0       8      7
##  4: 174000       4  NA     <NA>    NA      NA     NA
##  5: 174000       5  NA     <NA>    NA      NA     NA
##  6: 174000       6  NA     <NA>    NA      NA     NA
##  7: 309000       1  27 2 Female     1       0     10
##  8: 309000       2  NA     <NA>    NA      NA     NA
##  9: 309000       3  NA     <NA>    NA      NA     NA
## 10: 309000       4  NA     <NA>    NA      NA     NA
## 11: 309000       5  NA     <NA>    NA      NA     NA
## 12: 309000       6  NA     <NA>    NA      NA     NA
## 13: 423000       1  16   1 Male     0       8     10
## 14: 423000       2  17   1 Male     0       8      9
## 15: 423000       3  18   1 Male     0       9      9
## 16: 423000       4  19   1 Male     0       7     10
## 17: 423000       5  20   1 Male     0       7     10
## 18: 423000       6  22   1 Male     0       9     10
## 19: 828000       1  16   1 Male     0       9      9
## 20: 828000       2  17   1 Male     0       8      9allwave_tolong_v1 <- allwave_tolong %>% drop_na(sex)
#drop the observations which are missing in some waves
#Why i use drop_na(sex): sex has no missing values across all waves (step1), if it is missing, then it means the person does not participate in that wave
#we have 23983 person-year observationswave1b <- wave1a %>% 
rename(age.1=age, wave.1=wave,  sex.1=sex, kidno.1=kidno, worksat.1=worksat, famsat.1=famsat) 
#rename variables, with variable name.1 to specify it is from wave1
wave2b <- wave2a %>% 
rename(age.2=age, wave.2=wave,  sex.2=sex, kidno.2=kidno, worksat.2=worksat, famsat.2=famsat)
#rename variables, with variable name.2 to specify it is from wave2
wave3b <- wave3a %>% 
rename(age.3=age, wave.3=wave,  sex.3=sex, kidno.3=kidno, worksat.3=worksat, famsat.3=famsat)
wave4b <- wave4a %>% 
rename(age.4=age, wave.4=wave,  sex.4=sex, kidno.4=kidno, worksat.4=worksat, famsat.4=famsat)
wave5b <- wave5a %>% 
rename(age.5=age, wave.5=wave,  sex.5=sex, kidno.5=kidno, worksat.5=worksat, famsat.5=famsat)
wave6b <- wave6a %>% 
rename(age.6=age, wave.6=wave,  sex.6=sex, kidno.6=kidno, worksat.6=worksat, famsat.6=famsat)
joindata <- inner_join(wave1b, wave2b, by = "id") %>%  # inner join wave1b and wave2b
  inner_join(wave3b, by = "id") %>% # inner join with wave3b
  inner_join(wave4b, by = "id") %>% # inner join with wave4b
  inner_join(wave5b, by = "id") %>% # inner join with wave5b
  inner_join(wave6b, by = "id") # inner join with wave6bjoindata_tolong<- merged.stack(joindata, #dataset for transfrom
                            var.stubs = c("age", "wave", "sex","kidno", "worksat", "famsat"), 
#var.stubs is to specify the prefixes of the variable groups
                            sep = ".")   table(joindata_tolong$wave)## 
##    1    2    3    4    5    6 
## 2385 2385 2385 2385 2385 2385