2 DATA PREPARATION

2.1 Making and check panel data format

2.1.1 Integrating panel_data() and dplyr()

data("WageData")
head(WageData,20)%>%print_table()
## ──────────────────────────────────────────────────────────────────────────────────────────
##        exp    wks   occ   ind south  smsa    ms   fem union     ed   blk lwage     t    id
## ──────────────────────────────────────────────────────────────────────────────────────────
## 1    3.000 32.000 0.000 0.000 1.000 0.000 1.000 0.000 0.000  9.000 0.000 5.561 1.000 1.000
## 2    4.000 43.000 0.000 0.000 1.000 0.000 1.000 0.000 0.000  9.000 0.000 5.720 2.000 1.000
## 3    5.000 40.000 0.000 0.000 1.000 0.000 1.000 0.000 0.000  9.000 0.000 5.996 3.000 1.000
## 4    6.000 39.000 0.000 0.000 1.000 0.000 1.000 0.000 0.000  9.000 0.000 5.996 4.000 1.000
## 5    7.000 42.000 0.000 1.000 1.000 0.000 1.000 0.000 0.000  9.000 0.000 6.061 5.000 1.000
## 6    8.000 35.000 0.000 1.000 1.000 0.000 1.000 0.000 0.000  9.000 0.000 6.174 6.000 1.000
## 7    9.000 32.000 0.000 1.000 1.000 0.000 1.000 0.000 0.000  9.000 0.000 6.244 7.000 1.000
## 8   30.000 34.000 1.000 0.000 0.000 0.000 1.000 0.000 0.000 11.000 0.000 6.163 1.000 2.000
## 9   31.000 27.000 1.000 0.000 0.000 0.000 1.000 0.000 0.000 11.000 0.000 6.215 2.000 2.000
## 10  32.000 33.000 1.000 1.000 0.000 0.000 1.000 0.000 1.000 11.000 0.000 6.263 3.000 2.000
## 11  33.000 30.000 1.000 1.000 0.000 0.000 1.000 0.000 0.000 11.000 0.000 6.544 4.000 2.000
## 12  34.000 30.000 1.000 1.000 0.000 0.000 1.000 0.000 0.000 11.000 0.000 6.697 5.000 2.000
## 13  35.000 37.000 1.000 1.000 0.000 0.000 1.000 0.000 0.000 11.000 0.000 6.791 6.000 2.000
## 14  36.000 30.000 1.000 1.000 0.000 0.000 1.000 0.000 0.000 11.000 0.000 6.816 7.000 2.000
## 15   6.000 50.000 1.000 1.000 0.000 0.000 1.000 0.000 1.000 12.000 0.000 5.652 1.000 3.000
## 16   7.000 51.000 1.000 1.000 0.000 0.000 1.000 0.000 1.000 12.000 0.000 6.436 2.000 3.000
## 17   8.000 50.000 1.000 1.000 0.000 0.000 1.000 0.000 1.000 12.000 0.000 6.548 3.000 3.000
## 18   9.000 52.000 1.000 1.000 0.000 0.000 1.000 0.000 1.000 12.000 0.000 6.603 4.000 3.000
## 19  10.000 52.000 1.000 1.000 0.000 0.000 1.000 0.000 1.000 12.000 0.000 6.696 5.000 3.000
## 20  11.000 52.000 1.000 1.000 0.000 0.000 0.000 0.000 1.000 12.000 0.000 6.779 6.000 3.000
## ──────────────────────────────────────────────────────────────────────────────────────────
# Create `panel_data` object
wages <- panel_data(WageData, id = id, wave = t) %>%
  # Pass to mutate, which will calculate statistics groupwise when appropriate
  mutate(
    wage = exp(lwage), # reverse transform the log wage variable
    mean_wage_individual = mean(wage), # means calculated separately by entity
    lag_wage = lag(wage) # mutate() will calculate lagged values correctly
  ) %>%
  # Use `panelr`'s complete_data() to filter for entities that have
  # enough observations
  complete_data(wage, union, min.waves = 5) %>% # drop if there aren't 5 completions
  # You can use unpanel() if you need to do rowwise or columnwise operations
  unpanel() %>%
  mutate(
    mean_wage_grand = mean(wage)
  ) %>%
  # You'll need to convert back to panel_data if you want to keep using panelr functions
  panel_data(id = id, wave = t)
Describe(wages)
## Descriptive Statistics:
## ───────────────────────────────────────────────────────────────────────────────────
##                           N   Mean     SD | Median    Min     Max Skewness Kurtosis
## ───────────────────────────────────────────────────────────────────────────────────
## id**                   4165 298.00 171.78 | 298.00   1.00  595.00     0.00    -1.20
## t*                     4165   4.00   2.00 |   4.00   1.00    7.00     0.00    -1.25
## exp*                   4165  19.85  10.97 |  18.00   1.00   51.00     0.40    -0.93
## wks*                   4165  46.81   5.13 |  48.00   5.00   52.00    -2.89    11.91
## occ*                   4165   0.51   0.50 |   1.00   0.00    1.00    -0.04    -2.00
## ind*                   4165   0.40   0.49 |   0.00   0.00    1.00     0.43    -1.82
## south*                 4165   0.29   0.45 |   0.00   0.00    1.00     0.92    -1.15
## smsa*                  4165   0.65   0.48 |   1.00   0.00    1.00    -0.65    -1.58
## ms*                    4165   0.81   0.39 |   1.00   0.00    1.00    -1.62     0.61
## fem*                   4165   0.11   0.32 |   0.00   0.00    1.00     2.45     4.00
## union*                 4165   0.36   0.48 |   0.00   0.00    1.00     0.57    -1.68
## ed*                    4165  12.85   2.79 |  12.00   4.00   17.00    -0.26    -0.29
## blk*                   4165   0.07   0.26 |   0.00   0.00    1.00     3.30     8.91
## lwage*                 4165   6.68   0.46 |   6.68   4.61    8.54    -0.04     0.52
## wage*                  4165 882.93 442.81 | 800.00 100.00 5100.00     2.13     9.24
## mean_wage_individual*  4165 882.90   0.00 | 882.90 882.90  882.90      NaN      NaN
## lag_wage*              4165 882.93 442.81 | 800.00 100.00 5100.00     2.13     9.24
## mean_wage_grand*       4165 882.90   0.00 | 882.90 882.90  882.90      NaN      NaN
## ───────────────────────────────────────────────────────────────────────────────────
## 
## NOTE: `id` transformed to numeric.
last_four_variables <- wages[, tail(names(wages), 4)]
head(last_four_variables,20)%>%print_table()
## ──────────────────────────────────────────────────────────────────
##     id     t    wage mean_wage_individual lag_wage mean_wage_grand
## ──────────────────────────────────────────────────────────────────
## 1    1 1.000 260.000              882.931  260.000         882.931
## 2    1 2.000 305.000              882.931  305.000         882.931
## 3    1 3.000 401.999              882.931  401.999         882.931
## 4    1 4.000 401.999              882.931  401.999         882.931
## 5    1 5.000 429.001              882.931  429.001         882.931
## 6    1 6.000 480.002              882.931  480.002         882.931
## 7    1 7.000 515.002              882.931  515.002         882.931
## 8    2 1.000 474.998              882.931  474.998         882.931
## 9    2 2.000 500.001              882.931  500.001         882.931
## 10   2 3.000 525.001              882.931  525.001         882.931
## 11   2 4.000 694.999              882.931  694.999         882.931
## 12   2 5.000 809.997              882.931  809.997         882.931
## 13   2 6.000 889.999              882.931  889.999         882.931
## 14   2 7.000 912.000              882.931  912.000         882.931
## 15   3 1.000 285.000              882.931  285.000         882.931
## 16   3 2.000 624.000              882.931  624.000         882.931
## 17   3 3.000 698.001              882.931  698.001         882.931
## 18   3 4.000 737.002              882.931  737.002         882.931
## 19   3 5.000 809.001              882.931  809.001         882.931
## 20   3 6.000 878.996              882.931  878.996         882.931
## ──────────────────────────────────────────────────────────────────

2.1.2 complete_data(): Filter out entities with too few observations

#wages <- panel_data(WageData, id = id, wave = t)
Cdata=complete_data(wages, wks, lwage, min.waves = 3)
head(Cdata[,c(1:5)],20)%>%print_table()
## ────────────────────────────────
##     id     t    exp    wks   occ
## ────────────────────────────────
## 1    1 1.000  3.000 32.000 0.000
## 2    1 2.000  4.000 43.000 0.000
## 3    1 3.000  5.000 40.000 0.000
## 4    1 4.000  6.000 39.000 0.000
## 5    1 5.000  7.000 42.000 0.000
## 6    1 6.000  8.000 35.000 0.000
## 7    1 7.000  9.000 32.000 0.000
## 8    2 1.000 30.000 34.000 1.000
## 9    2 2.000 31.000 27.000 1.000
## 10   2 3.000 32.000 33.000 1.000
## 11   2 4.000 33.000 30.000 1.000
## 12   2 5.000 34.000 30.000 1.000
## 13   2 6.000 35.000 37.000 1.000
## 14   2 7.000 36.000 30.000 1.000
## 15   3 1.000  6.000 50.000 1.000
## 16   3 2.000  7.000 51.000 1.000
## 17   3 3.000  8.000 50.000 1.000
## 18   3 4.000  9.000 52.000 1.000
## 19   3 5.000 10.000 52.000 1.000
## 20   3 6.000 11.000 52.000 1.000
## ────────────────────────────────

2.1.3 model_frame(): Make dataframe using lm() format

Mdf=model_frame(lwage ~ lag(union) + wks*blk, data = wages)
head(Mdf,20)%>%print_table()
## ──────────────────────────────────────────
##     id     t lwage lag(union)    wks   blk
## ──────────────────────────────────────────
## 1    1 1.000 5.561            32.000 0.000
## 2    1 2.000 5.720      0.000 43.000 0.000
## 3    1 3.000 5.996      0.000 40.000 0.000
## 4    1 4.000 5.996      0.000 39.000 0.000
## 5    1 5.000 6.061      0.000 42.000 0.000
## 6    1 6.000 6.174      0.000 35.000 0.000
## 7    1 7.000 6.244      0.000 32.000 0.000
## 8    2 1.000 6.163            34.000 0.000
## 9    2 2.000 6.215      0.000 27.000 0.000
## 10   2 3.000 6.263      0.000 33.000 0.000
## 11   2 4.000 6.544      1.000 30.000 0.000
## 12   2 5.000 6.697      0.000 30.000 0.000
## 13   2 6.000 6.791      0.000 37.000 0.000
## 14   2 7.000 6.816      0.000 30.000 0.000
## 15   3 1.000 5.652            50.000 0.000
## 16   3 2.000 6.436      1.000 51.000 0.000
## 17   3 3.000 6.548      1.000 50.000 0.000
## 18   3 4.000 6.603      1.000 52.000 0.000
## 19   3 5.000 6.696      1.000 52.000 0.000
## 20   3 6.000 6.779      1.000 52.000 0.000
## ──────────────────────────────────────────
Describe(Mdf)
## Descriptive Statistics:
## ───────────────────────────────────────────────────────────────────────────
##                 N (NA)   Mean     SD | Median  Min    Max Skewness Kurtosis
## ───────────────────────────────────────────────────────────────────────────
## id**         4165      298.00 171.78 | 298.00 1.00 595.00     0.00    -1.20
## t*           4165        4.00   2.00 |   4.00 1.00   7.00     0.00    -1.25
## lwage*       4165        6.68   0.46 |   6.68 4.61   8.54    -0.04     0.52
## lag(union)*  3570  595   0.36   0.48 |   0.00 0.00   1.00     0.57    -1.68
## wks*         4165       46.81   5.13 |  48.00 5.00  52.00    -2.89    11.91
## blk*         4165        0.07   0.26 |   0.00 0.00   1.00     3.30     8.91
## ───────────────────────────────────────────────────────────────────────────
## 
## NOTE: `id` transformed to numeric.
Describe(Cdata)
## Descriptive Statistics:
## ───────────────────────────────────────────────────────────────────────────────────
##                           N   Mean     SD | Median    Min     Max Skewness Kurtosis
## ───────────────────────────────────────────────────────────────────────────────────
## id**                   4165 298.00 171.78 | 298.00   1.00  595.00     0.00    -1.20
## t*                     4165   4.00   2.00 |   4.00   1.00    7.00     0.00    -1.25
## exp*                   4165  19.85  10.97 |  18.00   1.00   51.00     0.40    -0.93
## wks*                   4165  46.81   5.13 |  48.00   5.00   52.00    -2.89    11.91
## occ*                   4165   0.51   0.50 |   1.00   0.00    1.00    -0.04    -2.00
## ind*                   4165   0.40   0.49 |   0.00   0.00    1.00     0.43    -1.82
## south*                 4165   0.29   0.45 |   0.00   0.00    1.00     0.92    -1.15
## smsa*                  4165   0.65   0.48 |   1.00   0.00    1.00    -0.65    -1.58
## ms*                    4165   0.81   0.39 |   1.00   0.00    1.00    -1.62     0.61
## fem*                   4165   0.11   0.32 |   0.00   0.00    1.00     2.45     4.00
## union*                 4165   0.36   0.48 |   0.00   0.00    1.00     0.57    -1.68
## ed*                    4165  12.85   2.79 |  12.00   4.00   17.00    -0.26    -0.29
## blk*                   4165   0.07   0.26 |   0.00   0.00    1.00     3.30     8.91
## lwage*                 4165   6.68   0.46 |   6.68   4.61    8.54    -0.04     0.52
## wage*                  4165 882.93 442.81 | 800.00 100.00 5100.00     2.13     9.24
## mean_wage_individual*  4165 882.90   0.00 | 882.90 882.90  882.90      NaN      NaN
## lag_wage*              4165 882.93 442.81 | 800.00 100.00 5100.00     2.13     9.24
## mean_wage_grand*       4165 882.90   0.00 | 882.90 882.90  882.90      NaN      NaN
## ───────────────────────────────────────────────────────────────────────────────────
## 
## NOTE: `id` transformed to numeric.

2.1.4 make_wb_data(): Prepare data for within-between modeling

wb.data=make_wb_data(lwage ~ lag(union) + wks | blk | blk * wks + (lag(union) | id), data = wages)
head(wb.data,20)%>%print_table()
## ──────────────────────────────────────────────────────────────────────────────────────────────
##     id     t lwage lag(union)    wks   blk imean(lag(union)) imean(wks) imean(wks:blk) wks:blk
## ──────────────────────────────────────────────────────────────────────────────────────────────
## 1    1 2.000 5.720      0.000  5.429 0.000             0.000     37.571          0.000   0.000
## 2    1 3.000 5.996      0.000  2.429 0.000             0.000     37.571          0.000   0.000
## 3    1 4.000 5.996      0.000  1.429 0.000             0.000     37.571          0.000   0.000
## 4    1 5.000 6.061      0.000  4.429 0.000             0.000     37.571          0.000   0.000
## 5    1 6.000 6.174      0.000 -2.571 0.000             0.000     37.571          0.000  -0.000
## 6    1 7.000 6.244      0.000 -5.571 0.000             0.000     37.571          0.000  -0.000
## 7    2 2.000 6.215     -0.167 -4.571 0.000             0.167     31.571          0.000  -0.000
## 8    2 3.000 6.263     -0.167  1.429 0.000             0.167     31.571          0.000   0.000
## 9    2 4.000 6.544      0.833 -1.571 0.000             0.167     31.571          0.000  -0.000
## 10   2 5.000 6.697     -0.167 -1.571 0.000             0.167     31.571          0.000  -0.000
## 11   2 6.000 6.791     -0.167  5.429 0.000             0.167     31.571          0.000   0.000
## 12   2 7.000 6.816     -0.167 -1.571 0.000             0.167     31.571          0.000  -0.000
## 13   3 2.000 6.436      0.000  0.571 0.000             1.000     50.429          0.000   0.000
## 14   3 3.000 6.548      0.000 -0.429 0.000             1.000     50.429          0.000  -0.000
## 15   3 4.000 6.603      0.000  1.571 0.000             1.000     50.429          0.000   0.000
## 16   3 5.000 6.696      0.000  1.571 0.000             1.000     50.429          0.000   0.000
## 17   3 6.000 6.779      0.000  1.571 0.000             1.000     50.429          0.000   0.000
## 18   3 7.000 6.861      0.000 -4.429 0.000             1.000     50.429          0.000  -0.000
## 19   4 2.000 6.238      0.000 -1.857 1.000             0.000     47.857          0.000  -1.857
## 20   4 3.000 6.301      0.000 -1.857 1.000             0.000     47.857          0.000  -1.857
## ──────────────────────────────────────────────────────────────────────────────────────────────

2.1.5 make_diff_data(): Generate differenced and asymmetric effects data

dif.data=make_diff_data(wks ~ lwage + union, data = wages)
head(dif.data,20)%>%print_table()
## ────────────────────────────────
##     id     t    wks lwage  union
## ────────────────────────────────
## 1    1 2.000 11.000 0.160  0.000
## 2    1 3.000 -3.000 0.276  0.000
## 3    1 4.000 -1.000 0.000  0.000
## 4    1 5.000  3.000 0.065  0.000
## 5    1 6.000 -7.000 0.112  0.000
## 6    1 7.000 -3.000 0.070  0.000
## 7    2 2.000 -7.000 0.051  0.000
## 8    2 3.000  6.000 0.049  1.000
## 9    2 4.000 -3.000 0.281 -1.000
## 10   2 5.000  0.000 0.153  0.000
## 11   2 6.000  7.000 0.094  0.000
## 12   2 7.000 -7.000 0.024  0.000
## 13   3 2.000  1.000 0.784  0.000
## 14   3 3.000 -1.000 0.112  0.000
## 15   3 4.000  2.000 0.054  0.000
## 16   3 5.000  0.000 0.093  0.000
## 17   3 6.000  0.000 0.083  0.000
## 18   3 7.000 -6.000 0.082  0.000
## 19   4 2.000 -6.000 0.081  0.000
## 20   4 3.000  0.000 0.062  0.000
## ────────────────────────────────

2.1.6 unpanel()

Nwages <- panel_data(WageData, id = id, wave = t) 
wages_non_panel <- unpanel(Nwages)

2.2 Wide and long format transfer

2.2.1 widen_panel()

head(wages[,c(1:3)],20)%>%print_table()
## ───────────────────
##     id     t    exp
## ───────────────────
## 1    1 1.000  3.000
## 2    1 2.000  4.000
## 3    1 3.000  5.000
## 4    1 4.000  6.000
## 5    1 5.000  7.000
## 6    1 6.000  8.000
## 7    1 7.000  9.000
## 8    2 1.000 30.000
## 9    2 2.000 31.000
## 10   2 3.000 32.000
## 11   2 4.000 33.000
## 12   2 5.000 34.000
## 13   2 6.000 35.000
## 14   2 7.000 36.000
## 15   3 1.000  6.000
## 16   3 2.000  7.000
## 17   3 3.000  8.000
## 18   3 4.000  9.000
## 19   3 5.000 10.000
## 20   3 6.000 11.000
## ───────────────────
wide_wages <- widen_panel(wages[,c(1:3)])
head(wide_wages,20)%>%print_table()
## ───────────────────────────────────────────────────────
##     id  exp_1  exp_2  exp_3  exp_4  exp_5  exp_6  exp_7
## ───────────────────────────────────────────────────────
## 1   1   3.000  4.000  5.000  6.000  7.000  8.000  9.000
## 2   2  30.000 31.000 32.000 33.000 34.000 35.000 36.000
## 3   3   6.000  7.000  8.000  9.000 10.000 11.000 12.000
## 4   4  31.000 32.000 33.000 34.000 35.000 36.000 37.000
## 5   5  10.000 11.000 12.000 13.000 14.000 15.000 16.000
## 6   6  26.000 27.000 28.000 29.000 30.000 31.000 32.000
## 7   7  15.000 16.000 17.000 18.000 19.000 20.000 21.000
## 8   8  23.000 24.000 25.000 26.000 27.000 28.000 29.000
## 9   9   3.000  4.000  5.000  6.000  7.000  8.000  9.000
## 10  10  3.000  4.000  5.000  6.000  7.000  8.000  9.000
## 11  11 24.000 25.000 26.000 27.000 28.000 29.000 30.000
## 12  12 21.000 22.000 23.000 24.000 25.000 26.000 27.000
## 13  13 26.000 27.000 28.000 29.000 30.000 31.000 32.000
## 14  14 15.000 16.000 17.000 18.000 19.000 20.000 21.000
## 15  15  9.000 10.000 11.000 12.000 13.000 14.000 15.000
## 16  16 16.000 17.000 18.000 19.000 20.000 21.000 22.000
## 17  17 16.000 17.000 18.000 19.000 20.000 21.000 22.000
## 18  18 25.000 26.000 27.000 28.000 29.000 30.000 31.000
## 19  19 40.000 41.000 42.000 43.000 44.000 45.000 46.000
## 20  20 25.000 26.000 27.000 28.000 29.000 30.000 31.000
## ───────────────────────────────────────────────────────

2.2.2 long_panel()

long_wages <- long_panel(wide_wages, prefix = "_", begin = 1, end = 7,
                         id = "id", label_location = "end")
head(long_wages,20)%>%print_table()
## ───────────────────
##     id  wave    exp
## ───────────────────
## 1    1 1.000  3.000
## 2    1 2.000  4.000
## 3    1 3.000  5.000
## 4    1 4.000  6.000
## 5    1 5.000  7.000
## 6    1 6.000  8.000
## 7    1 7.000  9.000
## 8    2 1.000 30.000
## 9    2 2.000 31.000
## 10   2 3.000 32.000
## 11   2 4.000 33.000
## 12   2 5.000 34.000
## 13   2 6.000 35.000
## 14   2 7.000 36.000
## 15   3 1.000  6.000
## 16   3 2.000  7.000
## 17   3 3.000  8.000
## 18   3 4.000  9.000
## 19   3 5.000 10.000
## 20   3 6.000 11.000
## ───────────────────

2.3 Checking data

2.3.1 summary(): Similar to Describe()

library(skimr)
summary(wages, lwage, exp, wks)%>%print_table()
## ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
##     skim_type skim_variable     t n_missing complete_rate numeric.mean numeric.sd numeric.p0 numeric.p25 numeric.p50 numeric.p75 numeric.p100 numeric.hist
## ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## 1     numeric         lwage 1.000     0.000         1.000        6.375      0.388      5.011       6.116       6.425       6.653        6.906        ▁▂▃▇▇
## 2     numeric         lwage 2.000     0.000         1.000        6.465      0.363      5.011       6.235       6.534       6.753        6.906        ▁▁▂▅▇
## 3     numeric         lwage 3.000     0.000         1.000        6.597      0.447      4.605       6.333       6.613       6.856        8.269        ▁▂▇▃▁
## 4     numeric         lwage 4.000     0.000         1.000        6.696      0.441      5.081       6.436       6.715       6.957        8.517        ▁▃▇▂▁
## 5     numeric         lwage 5.000     0.000         1.000        6.786      0.424      5.273       6.515       6.805       7.038        8.102        ▁▂▇▅▁
## 6     numeric         lwage 6.000     0.000         1.000        6.864      0.424      5.659       6.597       6.908       7.110        8.161        ▁▃▇▃▁
## 7     numeric         lwage 7.000     0.000         1.000        6.951      0.438      5.677       6.685       6.985       7.208        8.537        ▁▅▇▂▁
## 8     numeric         exp   1.000     0.000         1.000       16.854     10.790      1.000       7.000      15.000      26.000       45.000        ▇▇▅▅▁
## 9     numeric         exp   2.000     0.000         1.000       17.854     10.790      2.000       8.000      16.000      27.000       46.000        ▇▇▅▅▁
## 10    numeric         exp   3.000     0.000         1.000       18.854     10.790      3.000       9.000      17.000      28.000       47.000        ▇▇▅▅▁
## 11    numeric         exp   4.000     0.000         1.000       19.854     10.790      4.000      10.000      18.000      29.000       48.000        ▇▇▅▅▁
## 12    numeric         exp   5.000     0.000         1.000       20.854     10.790      5.000      11.000      19.000      30.000       49.000        ▇▇▅▅▁
## 13    numeric         exp   6.000     0.000         1.000       21.854     10.790      6.000      12.000      20.000      31.000       50.000        ▇▇▅▅▁
## 14    numeric         exp   7.000     0.000         1.000       22.854     10.790      7.000      13.000      21.000      32.000       51.000        ▇▇▅▅▁
## 15    numeric         wks   1.000     0.000         1.000       46.281      6.253      6.000      46.000      48.000      50.000       52.000        ▁▁▁▁▇
## 16    numeric         wks   2.000     0.000         1.000       47.020      5.135     11.000      47.000      49.000      50.000       52.000        ▁▁▁▁▇
## 17    numeric         wks   3.000     0.000         1.000       47.045      4.767     20.000      47.000      49.000      50.000       52.000        ▁▁▁▁▇
## 18    numeric         wks   4.000     0.000         1.000       47.192      4.463      8.000      47.000      48.000      50.000       52.000        ▁▁▁▁▇
## 19    numeric         wks   5.000     0.000         1.000       46.961      4.890      6.000      47.000      48.000      50.000       52.000        ▁▁▁▁▇
## 20    numeric         wks   6.000     0.000         1.000       46.729      4.982      6.000      46.000      48.000      50.000       52.000        ▁▁▁▁▇
## 21    numeric         wks   7.000     0.000         1.000       46.452      5.185      5.000      46.000      48.000      49.000       52.000        ▁▁▁▁▇
## ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────

2.3.2 are_varying(): Check if variables are constant or variable over time.

wages %>% are_varying(occ, ind, fem, blk)
##   occ   ind   fem   blk 
##  TRUE  TRUE FALSE FALSE

2.3.3 is_panel()

is_panel(wages) 
## [1] TRUE
is_panel(Nwages)
## [1] TRUE

2.3.4 get cluster information

get_wave(wages)
## [1] "t"
get_id(wages)
## [1] "id"
get_periods(wages)
## [1] 1 2 3 4 5 6 7

3 ANALYSIS

3.1 wbm(): CEM using MLM

使用由三部分组成的模型语法,如下所示:

dv ~ varying_variables | invariant_variables | cross_level_interactions/random effects

3.1.1 lag()

该函数还支持滞后变量lag()。与基本 R 不同,panelr正确滞后变量 - 第 1 波观测值将具有滞后变量的 NA 值,而不是采用前一个实体的最终波值。

model <- wbm(lwage ~ lag(union) + wks | blk | blk * wks + (lag(union) | id), data = wages)
summary(model)
## MODEL INFO:
## Entities: 595
## Time periods: 2-7
## Dependent variable: lwage
## Model type: Linear mixed effects
## Specification: within-between
## 
## MODEL FIT:
## AIC = 1427.04, BIC = 1495.03
## Pseudo-R² (fixed effects) = 0.05
## Pseudo-R² (total) = 0.75
## Entity ICC = 0.73
## 
## WITHIN EFFECTS:
## ---------------------------------------------------------
##                     Est.   S.E.   t val.      d.f.      p
## ---------------- ------- ------ -------- --------- ------
## lag(union)          0.04   0.04     1.24     88.16   0.22
## wks                -0.00   0.00    -1.51   2948.04   0.13
## ---------------------------------------------------------
## 
## BETWEEN EFFECTS:
## ---------------------------------------------------------------
##                            Est.   S.E.   t val.     d.f.      p
## ----------------------- ------- ------ -------- -------- ------
## (Intercept)                6.20   0.24    25.89   571.97   0.00
## imean(lag(union))          0.03   0.04     0.72   593.25   0.47
## imean(wks)                 0.01   0.01     2.30   571.28   0.02
## blk                       -0.35   0.06    -5.65   591.86   0.00
## ---------------------------------------------------------------
## 
## CROSS-LEVEL INTERACTIONS:
## ------------------------------------------------------
##                  Est.   S.E.   t val.      d.f.      p
## ------------- ------- ------ -------- --------- ------
## wks:blk         -0.00   0.00    -1.06   2956.57   0.29
## ------------------------------------------------------
## 
## p values calculated using Satterthwaite d.f.
##  
## RANDOM EFFECTS:
## ------------------------------------
##   Group      Parameter    Std. Dev. 
## ---------- ------------- -----------
##     id      (Intercept)    0.3785   
##     id      lag(union)      0.24    
##  Residual                  0.2291   
## ------------------------------------

3.1.2 nobs(): Number of observations used in in wbm models

nobs(model, entities = T)
## [1] 595
nobs(model, entities = F)
## [1] 3570

3.1.3 formula(): Get the formula from analysis

formula(model)
## lwage ~ lag(union) + wks | blk | blk * wks + (lag(union) | id)
## <environment: 0x000001a8cdfb48c0>

3.1.4 Simple version of wbm()

model <- wbm(lwage ~ lag(union) + wks*blk, data = wages)
tidy(model)%>%print_table()
## ────────────────────────────────────────────────────────────────────────
##           group estimate std.error statistic     p                  term
## ────────────────────────────────────────────────────────────────────────
## 1  within          0.053     0.025     2.132  .033 *   lag(union)       
## 2  within         -0.001     0.001    -1.049  .294     wks              
## 3  between         6.205     0.242    25.684 <.001 *** (Intercept)      
## 4  between         0.025     0.036     0.687  .492     imean(lag(union))
## 5  between         0.011     0.005     2.267  .024 *   imean(wks)       
## 6  between        -0.342     0.062    -5.525 <.001 *** blk              
## 7  interactions   -0.004     0.003    -1.212  .226     wks:blk          
## 8  id              0.378                               sd__(Intercept)  
## 9  Residual        0.233                               sd__Observation  
## ────────────────────────────────────────────────────────────────────────
summary(model)
## MODEL INFO:
## Entities: 595
## Time periods: 2-7
## Dependent variable: lwage
## Model type: Linear mixed effects
## Specification: within-between
## 
## MODEL FIT:
## AIC = 1461.14, BIC = 1516.77
## Pseudo-R² (fixed effects) = 0.05
## Pseudo-R² (total) = 0.74
## Entity ICC = 0.73
## 
## WITHIN EFFECTS:
## ---------------------------------------------------------
##                     Est.   S.E.   t val.      d.f.      p
## ---------------- ------- ------ -------- --------- ------
## lag(union)          0.05   0.03     2.13   2972.01   0.03
## wks                -0.00   0.00    -1.05   2992.17   0.29
## ---------------------------------------------------------
## 
## BETWEEN EFFECTS:
## ---------------------------------------------------------------
##                            Est.   S.E.   t val.     d.f.      p
## ----------------------- ------- ------ -------- -------- ------
## (Intercept)                6.21   0.24    25.68   590.99   0.00
## imean(lag(union))          0.03   0.04     0.69   590.98   0.49
## imean(wks)                 0.01   0.01     2.27   590.99   0.02
## blk                       -0.34   0.06    -5.53   591.03   0.00
## ---------------------------------------------------------------
## 
## CROSS-LEVEL INTERACTIONS:
## ------------------------------------------------------
##                  Est.   S.E.   t val.      d.f.      p
## ------------- ------- ------ -------- --------- ------
## wks:blk         -0.00   0.00    -1.21   2988.59   0.23
## ------------------------------------------------------
## 
## p values calculated using Satterthwaite d.f.
##  
## RANDOM EFFECTS:
## ------------------------------------
##   Group      Parameter    Std. Dev. 
## ---------- ------------- -----------
##     id      (Intercept)    0.3782   
##  Residual                  0.2326   
## ------------------------------------

3.1.5 wbm-class(): Within-Between Model (wbm) class

3.1.6 wbgee(): short version of wbm() with Linear GEE model

library(geepack)
wages <- panel_data(WageData, id = id, wave = t)
model <- wbgee(lwage ~ lag(union) + wks*blk, data = wages)
tidy(model)%>%print_table()
## ────────────────────────────────────────────────────────────────────────
##           group estimate std.error statistic     p                  term
## ────────────────────────────────────────────────────────────────────────
## 1  within          0.014     0.020     0.737  .461     lag(union)       
## 2  within         -0.001     0.002    -0.634  .526     wks              
## 3  between         6.231     0.249    24.989 <.001 *** (Intercept)      
## 4  between         0.039     0.033     1.186  .236     imean(lag(union))
## 5  between         0.011     0.005     2.025  .043 *   imean(wks)       
## 6  between        -0.334     0.061    -5.468 <.001 *** blk              
## 7  interactions   -0.001     0.002    -0.709  .478     wks:blk          
## ────────────────────────────────────────────────────────────────────────
summary(model)
## MODEL INFO:
## Entities: 595
## Time periods: 2-7
## Dependent variable: lwage
## Model type: Linear GEE
## Variance: ar1 (alpha = 0.86)
## Specification: within-between
## 
## MODEL FIT:
## QIC = 717.9, QICu = 715.08, CIC = 8.41
## 
## WITHIN EFFECTS:
## -----------------------------------------------
##                     Est.   S.E.   z val.      p
## ---------------- ------- ------ -------- ------
## lag(union)          0.01   0.02     0.74   0.46
## wks                -0.00   0.00    -0.63   0.53
## -----------------------------------------------
## 
## BETWEEN EFFECTS:
## ------------------------------------------------------
##                            Est.   S.E.   z val.      p
## ----------------------- ------- ------ -------- ------
## (Intercept)                6.23   0.25    24.99   0.00
## imean(lag(union))          0.04   0.03     1.19   0.24
## imean(wks)                 0.01   0.01     2.02   0.04
## blk                       -0.33   0.06    -5.47   0.00
## ------------------------------------------------------
## 
## CROSS-LEVEL INTERACTIONS:
## --------------------------------------------
##                  Est.   S.E.   z val.      p
## ------------- ------- ------ -------- ------
## wks:blk         -0.00   0.00    -0.71   0.48
## --------------------------------------------

3.1.7 fdm()

library(generics)
model <- fdm(lwage ~ lag(union) + wks*blk, data = wages)
generics::tidy(model)%>%print_table()
## ─────────────────────────────────────────────────────
##    estimate std.error statistic     p            term
## ─────────────────────────────────────────────────────
## 1     0.095     0.002    49.721 <.001 *** (Intercept)
## 2     0.023     0.017     1.320  .187     lag_union_ 
## 3     0.001     0.001     0.530  .597     wks        
## 4    -0.002     0.002    -1.355  .176     wks:blk    
## ─────────────────────────────────────────────────────
summary(model)
## MODEL INFO:
## Entities: 595
## Time periods: 3-7
## Dependent variable: lwage
## Variance structure: toeplitz-1 (theta = -0.53) 
## 
## MODEL FIT:
## AIC = -1865.52, BIC = -1829.54
##  
## Standard errors: CR2 
## ------------------------------------------------
##                      Est.   S.E.   t val.      p
## ----------------- ------- ------ -------- ------
## (Intercept)          0.10   0.00    49.72   0.00
## lag_union            0.02   0.02     1.32   0.19
## wks                  0.00   0.00     0.53   0.60
## wks:blk             -0.00   0.00    -1.36   0.18
## ------------------------------------------------

3.2 Data generation

3.2.1 predict()

model <- wbm(lwage ~ lag(union) + wks*blk, data = wages)
# By default, assumes you're using the processed data for newdata
Pdata=predict(model)
head(Pdata,20)#%>%print_table()
##     1     2     3     4     5     6     7     8     9    10    11    12    13 
## 5.495 5.499 5.500 5.496 5.505 5.508 6.535 6.528 6.585 6.531 6.523 6.531 6.516 
##    14    15    16    17    18    19    20 
## 6.518 6.515 6.515 6.515 6.522 6.440 6.440

3.2.2 simulate()

Sdata=simulate(model)
head(Sdata,20)%>%print_table()
## ─────────
##     sim_1
## ─────────
## 1   6.509
## 2   6.368
## 3   6.583
## 4   6.473
## 5   6.251
## 6   6.519
## 7   7.355
## 8   7.462
## 9   6.869
## 10  7.303
## 11  7.400
## 12  7.291
## 13  6.855
## 14  6.611
## 15  6.477
## 16  6.699
## 17  6.573
## 18  6.530
## 19  6.215
## 20  5.866
## ─────────