packagea loading

library(panelr)
library(tidyverse)
library(DataExplorer)

Prepare the dataset

# id는 대상, t는 시간 
# exp(years of full-time work experience.)
# wks(weeks worked) 
# occ 
# ind(works in a manufacturing industry?)
# south(resides in the south?)
# smsa(resides in a standard metropolitan statistical area?)
# ms, fem, union(individual's wage set by a union contract?) 
# ed(years of education.)
# blk(is the individual black?)
# lwage(logarithm of wage)

data("WageData")
head(WageData)
##   exp wks occ ind south smsa ms fem union ed blk   lwage t id
## 1   3  32   0   0     1    0  1   0     0  9   0 5.56068 1  1
## 2   4  43   0   0     1    0  1   0     0  9   0 5.72031 2  1
## 3   5  40   0   0     1    0  1   0     0  9   0 5.99645 3  1
## 4   6  39   0   0     1    0  1   0     0  9   0 5.99645 4  1
## 5   7  42   0   1     1    0  1   0     0  9   0 6.06146 5  1
## 6   8  35   0   1     1    0  1   0     0  9   0 6.17379 6  1

EDA of dataset

str(WageData)
## 'data.frame':    4165 obs. of  14 variables:
##  $ exp  : num  3 4 5 6 7 8 9 30 31 32 ...
##  $ wks  : num  32 43 40 39 42 35 32 34 27 33 ...
##  $ occ  : num  0 0 0 0 0 0 0 1 1 1 ...
##  $ ind  : num  0 0 0 0 1 1 1 0 0 1 ...
##  $ south: num  1 1 1 1 1 1 1 0 0 0 ...
##  $ smsa : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ ms   : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ fem  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ union: num  0 0 0 0 0 0 0 0 0 1 ...
##  $ ed   : num  9 9 9 9 9 9 9 11 11 11 ...
##  $ blk  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ lwage: num  5.56 5.72 6 6 6.06 ...
##  $ t    : num  1 2 3 4 5 6 7 1 2 3 ...
##  $ id   : num  1 1 1 1 1 1 1 2 2 2 ...
##  - attr(*, "datalabel")= chr ""
##  - attr(*, "time.stamp")= chr "10 Jun 2016 08:48"
##  - attr(*, "formats")= chr  "%9.0g" "%9.0g" "%9.0g" "%9.0g" ...
##  - attr(*, "types")= int  254 254 254 254 254 254 254 254 254 254 ...
##  - attr(*, "val.labels")= chr  "" "" "" "" ...
##  - attr(*, "var.labels")= chr  "" "" "" "" ...
##  - attr(*, "expansion.fields")=List of 6
##   ..$ : chr  "_dta" "iis" "id"
##   ..$ : chr  "_dta" "tis" "t"
##   ..$ : chr  "_dta" "_TSitrvl" "1"
##   ..$ : chr  "_dta" "_TSdelta" "+1.0000000000000X+000"
##   ..$ : chr  "_dta" "_TSpanel" "id"
##   ..$ : chr  "_dta" "_TStvar" "t"
##  - attr(*, "version")= int 12
     #595명(id) 7년(t) 자료, Total : 4165 obs. of  14 variables

# plot_missing(WageData)
# create_report(WageData)

Analysis

wages <- panel_data(WageData, id = id, wave = t)
head(wages)
## # Panel data:    6 x 14
## # entities:      id [1]
## # wave variable: t [1, 2, 3, ... (6 waves)]
##   id        t   exp   wks   occ   ind south  smsa    ms   fem union    ed
##   <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1         1     3    32     0     0     1     0     1     0     0     9
## 2 1         2     4    43     0     0     1     0     1     0     0     9
## 3 1         3     5    40     0     0     1     0     1     0     0     9
## 4 1         4     6    39     0     0     1     0     1     0     0     9
## 5 1         5     7    42     0     1     1     0     1     0     0     9
## 6 1         6     8    35     0     1     1     0     1     0     0     9
## # ... with 2 more variables: blk <dbl>, lwage <dbl>
str(wages)
## Classes 'panel_data', 'grouped_df', 'tbl_df', 'tbl' and 'data.frame':    4165 obs. of  14 variables:
##  $ id   : Factor w/ 595 levels "1","2","3","4",..: 1 1 1 1 1 1 1 2 2 2 ...
##  $ t    : num  1 2 3 4 5 6 7 1 2 3 ...
##  $ exp  : num  3 4 5 6 7 8 9 30 31 32 ...
##  $ wks  : num  32 43 40 39 42 35 32 34 27 33 ...
##  $ occ  : num  0 0 0 0 0 0 0 1 1 1 ...
##  $ ind  : num  0 0 0 0 1 1 1 0 0 1 ...
##  $ south: num  1 1 1 1 1 1 1 0 0 0 ...
##  $ smsa : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ ms   : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ fem  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ union: num  0 0 0 0 0 0 0 0 0 1 ...
##  $ ed   : num  9 9 9 9 9 9 9 11 11 11 ...
##  $ blk  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ lwage: num  5.56 5.72 6 6 6.06 ...
##  - attr(*, "datalabel")= chr ""
##  - attr(*, "time.stamp")= chr "10 Jun 2016 08:48"
##  - attr(*, "formats")= chr  "%9.0g" "%9.0g" "%9.0g" "%9.0g" ...
##  - attr(*, "types")= int  254 254 254 254 254 254 254 254 254 254 ...
##  - attr(*, "val.labels")= chr  "" "" "" "" ...
##  - attr(*, "var.labels")= chr  "" "" "" "" ...
##  - attr(*, "expansion.fields")=List of 6
##   ..$ : chr  "_dta" "iis" "id"
##   ..$ : chr  "_dta" "tis" "t"
##   ..$ : chr  "_dta" "_TSitrvl" "1"
##   ..$ : chr  "_dta" "_TSdelta" "+1.0000000000000X+000"
##   ..$ : chr  "_dta" "_TSpanel" "id"
##   ..$ : chr  "_dta" "_TStvar" "t"
##  - attr(*, "version")= int 12
##  - attr(*, "groups")=Classes 'tbl_df', 'tbl' and 'data.frame':   595 obs. of  2 variables:
##   ..$ id   : Factor w/ 595 levels "1","2","3","4",..: 1 2 3 4 5 6 7 8 9 10 ...
##   ..$ .rows:List of 595
##   .. ..$ : int  1 2 3 4 5 6 7
##   .. ..$ : int  8 9 10 11 12 13 14
##   .. ..$ : int  15 16 17 18 19 20 21
##   .. ..$ : int  22 23 24 25 26 27 28
##   .. ..$ : int  29 30 31 32 33 34 35
##   .. ..$ : int  36 37 38 39 40 41 42
##   .. ..$ : int  43 44 45 46 47 48 49
##   .. ..$ : int  50 51 52 53 54 55 56
##   .. ..$ : int  57 58 59 60 61 62 63
##   .. ..$ : int  64 65 66 67 68 69 70
##   .. ..$ : int  71 72 73 74 75 76 77
##   .. ..$ : int  78 79 80 81 82 83 84
##   .. ..$ : int  85 86 87 88 89 90 91
##   .. ..$ : int  92 93 94 95 96 97 98
##   .. ..$ : int  99 100 101 102 103 104 105
##   .. ..$ : int  106 107 108 109 110 111 112
##   .. ..$ : int  113 114 115 116 117 118 119
##   .. ..$ : int  120 121 122 123 124 125 126
##   .. ..$ : int  127 128 129 130 131 132 133
##   .. ..$ : int  134 135 136 137 138 139 140
##   .. ..$ : int  141 142 143 144 145 146 147
##   .. ..$ : int  148 149 150 151 152 153 154
##   .. ..$ : int  155 156 157 158 159 160 161
##   .. ..$ : int  162 163 164 165 166 167 168
##   .. ..$ : int  169 170 171 172 173 174 175
##   .. ..$ : int  176 177 178 179 180 181 182
##   .. ..$ : int  183 184 185 186 187 188 189
##   .. ..$ : int  190 191 192 193 194 195 196
##   .. ..$ : int  197 198 199 200 201 202 203
##   .. ..$ : int  204 205 206 207 208 209 210
##   .. ..$ : int  211 212 213 214 215 216 217
##   .. ..$ : int  218 219 220 221 222 223 224
##   .. ..$ : int  225 226 227 228 229 230 231
##   .. ..$ : int  232 233 234 235 236 237 238
##   .. ..$ : int  239 240 241 242 243 244 245
##   .. ..$ : int  246 247 248 249 250 251 252
##   .. ..$ : int  253 254 255 256 257 258 259
##   .. ..$ : int  260 261 262 263 264 265 266
##   .. ..$ : int  267 268 269 270 271 272 273
##   .. ..$ : int  274 275 276 277 278 279 280
##   .. ..$ : int  281 282 283 284 285 286 287
##   .. ..$ : int  288 289 290 291 292 293 294
##   .. ..$ : int  295 296 297 298 299 300 301
##   .. ..$ : int  302 303 304 305 306 307 308
##   .. ..$ : int  309 310 311 312 313 314 315
##   .. ..$ : int  316 317 318 319 320 321 322
##   .. ..$ : int  323 324 325 326 327 328 329
##   .. ..$ : int  330 331 332 333 334 335 336
##   .. ..$ : int  337 338 339 340 341 342 343
##   .. ..$ : int  344 345 346 347 348 349 350
##   .. ..$ : int  351 352 353 354 355 356 357
##   .. ..$ : int  358 359 360 361 362 363 364
##   .. ..$ : int  365 366 367 368 369 370 371
##   .. ..$ : int  372 373 374 375 376 377 378
##   .. ..$ : int  379 380 381 382 383 384 385
##   .. ..$ : int  386 387 388 389 390 391 392
##   .. ..$ : int  393 394 395 396 397 398 399
##   .. ..$ : int  400 401 402 403 404 405 406
##   .. ..$ : int  407 408 409 410 411 412 413
##   .. ..$ : int  414 415 416 417 418 419 420
##   .. ..$ : int  421 422 423 424 425 426 427
##   .. ..$ : int  428 429 430 431 432 433 434
##   .. ..$ : int  435 436 437 438 439 440 441
##   .. ..$ : int  442 443 444 445 446 447 448
##   .. ..$ : int  449 450 451 452 453 454 455
##   .. ..$ : int  456 457 458 459 460 461 462
##   .. ..$ : int  463 464 465 466 467 468 469
##   .. ..$ : int  470 471 472 473 474 475 476
##   .. ..$ : int  477 478 479 480 481 482 483
##   .. ..$ : int  484 485 486 487 488 489 490
##   .. ..$ : int  491 492 493 494 495 496 497
##   .. ..$ : int  498 499 500 501 502 503 504
##   .. ..$ : int  505 506 507 508 509 510 511
##   .. ..$ : int  512 513 514 515 516 517 518
##   .. ..$ : int  519 520 521 522 523 524 525
##   .. ..$ : int  526 527 528 529 530 531 532
##   .. ..$ : int  533 534 535 536 537 538 539
##   .. ..$ : int  540 541 542 543 544 545 546
##   .. ..$ : int  547 548 549 550 551 552 553
##   .. ..$ : int  554 555 556 557 558 559 560
##   .. ..$ : int  561 562 563 564 565 566 567
##   .. ..$ : int  568 569 570 571 572 573 574
##   .. ..$ : int  575 576 577 578 579 580 581
##   .. ..$ : int  582 583 584 585 586 587 588
##   .. ..$ : int  589 590 591 592 593 594 595
##   .. ..$ : int  596 597 598 599 600 601 602
##   .. ..$ : int  603 604 605 606 607 608 609
##   .. ..$ : int  610 611 612 613 614 615 616
##   .. ..$ : int  617 618 619 620 621 622 623
##   .. ..$ : int  624 625 626 627 628 629 630
##   .. ..$ : int  631 632 633 634 635 636 637
##   .. ..$ : int  638 639 640 641 642 643 644
##   .. ..$ : int  645 646 647 648 649 650 651
##   .. ..$ : int  652 653 654 655 656 657 658
##   .. ..$ : int  659 660 661 662 663 664 665
##   .. ..$ : int  666 667 668 669 670 671 672
##   .. ..$ : int  673 674 675 676 677 678 679
##   .. ..$ : int  680 681 682 683 684 685 686
##   .. ..$ : int  687 688 689 690 691 692 693
##   .. .. [list output truncated]
##   ..- attr(*, ".drop")= logi TRUE
##  - attr(*, "id")= chr "id"
##  - attr(*, "wave")= chr "t"
##  - attr(*, "periods")= num  1 2 3 4 5 6 7

Analysis

wages %>% 
  mutate(
    wks_mean = mean(wks), # this is the person-level mean
    wks_lag = lag(wks), # this will have a value of NA when t = 1
    cumu_wages = cumsum(exp(lwage)) # cumulative summation works within person
  ) %>%
  select(wks, wks_mean, wks_lag, lwage, cumu_wages) 
## # Panel data:    4,165 x 7
## # entities:      id [595]
## # wave variable: t [1, 2, 3, ... (7 waves)]
##    id        t   wks wks_mean wks_lag lwage cumu_wages
##    <fct> <dbl> <dbl>    <dbl>   <dbl> <dbl>      <dbl>
##  1 1         1    32     37.6      NA  5.56       260.
##  2 1         2    43     37.6      32  5.72       565.
##  3 1         3    40     37.6      43  6.00       967.
##  4 1         4    39     37.6      40  6.00      1369.
##  5 1         5    42     37.6      39  6.06      1798.
##  6 1         6    35     37.6      42  6.17      2278.
##  7 1         7    32     37.6      35  6.24      2793.
##  8 2         1    34     31.6      NA  6.16       475.
##  9 2         2    27     31.6      34  6.21       975.
## 10 2         3    33     31.6      27  6.26      1500.
## # ... with 4,155 more rows
wages
## # Panel data:    4,165 x 14
## # entities:      id [595]
## # wave variable: t [1, 2, 3, ... (7 waves)]
##    id        t   exp   wks   occ   ind south  smsa    ms   fem union    ed
##    <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
##  1 1         1     3    32     0     0     1     0     1     0     0     9
##  2 1         2     4    43     0     0     1     0     1     0     0     9
##  3 1         3     5    40     0     0     1     0     1     0     0     9
##  4 1         4     6    39     0     0     1     0     1     0     0     9
##  5 1         5     7    42     0     1     1     0     1     0     0     9
##  6 1         6     8    35     0     1     1     0     1     0     0     9
##  7 1         7     9    32     0     1     1     0     1     0     0     9
##  8 2         1    30    34     1     0     0     0     1     0     0    11
##  9 2         2    31    27     1     0     0     0     1     0     0    11
## 10 2         3    32    33     1     1     0     0     1     0     1    11
## # ... with 4,155 more rows, and 2 more variables: blk <dbl>, lwage <dbl>
wages["wks"]
## # Panel data:    4,165 x 3
## # entities:      id [595]
## # wave variable: t [1, 2, 3, ... (7 waves)]
##    id        t   wks
##    <fct> <dbl> <dbl>
##  1 1         1    32
##  2 1         2    43
##  3 1         3    40
##  4 1         4    39
##  5 1         5    42
##  6 1         6    35
##  7 1         7    32
##  8 2         1    34
##  9 2         2    27
## 10 2         3    33
## # ... with 4,155 more rows
wages["wage"] <- exp(wages[["lwage"]]) # note double brackets 
wages["wage"]
## # Panel data:    4,165 x 3
## # entities:      id [595]
## # wave variable: t [1, 2, 3, ... (7 waves)]
##    id        t  wage
##    <fct> <dbl> <dbl>
##  1 1         1  260.
##  2 1         2  305.
##  3 1         3  402.
##  4 1         4  402.
##  5 1         5  429.
##  6 1         6  480.
##  7 1         7  515.
##  8 2         1  475.
##  9 2         2  500.
## 10 2         3  525.
## # ... with 4,155 more rows
summary(wages, union, lwage)
## Loading required namespace: skimr
## Skim summary statistics
##  n obs: 4165 
##  n variables: 3 
##  group variables: t 
## 
## -- Variable type:numeric --------------------------------------------------------------
##  t variable missing complete   n mean   sd   p0  p25  p50  p75 p100
##  1    lwage       0      595 595 6.38 0.39 5.01 6.12 6.42 6.65 6.91
##  1    union       0      595 595 0.36 0.48 0    0    0    1    1   
##  2    lwage       0      595 595 6.47 0.36 5.01 6.24 6.53 6.75 6.91
##  2    union       0      595 595 0.35 0.48 0    0    0    1    1   
##  3    lwage       0      595 595 6.6  0.45 4.61 6.33 6.61 6.86 8.27
##  3    union       0      595 595 0.37 0.48 0    0    0    1    1   
##  4    lwage       0      595 595 6.7  0.44 5.08 6.44 6.72 6.96 8.52
##  4    union       0      595 595 0.37 0.48 0    0    0    1    1   
##  5    lwage       0      595 595 6.79 0.42 5.27 6.51 6.8  7.04 8.1 
##  5    union       0      595 595 0.37 0.48 0    0    0    1    1   
##  6    lwage       0      595 595 6.86 0.42 5.66 6.6  6.91 7.11 8.16
##  6    union       0      595 595 0.36 0.48 0    0    0    1    1   
##  7    lwage       0      595 595 6.95 0.44 5.68 6.68 6.98 7.21 8.54
##  7    union       0      595 595 0.37 0.48 0    0    0    1    1   
##      hist
##  <U+2581><U+2581><U+2582><U+2582><U+2585><U+2586><U+2587><U+2587>
##  <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2585>
##  <U+2581><U+2581><U+2581><U+2582><U+2582><U+2585><U+2586><U+2587>
##  <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2585>
##  <U+2581><U+2581><U+2581><U+2585><U+2587><U+2583><U+2581><U+2581>
##  <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2585>
##  <U+2581><U+2581><U+2583><U+2587><U+2586><U+2582><U+2581><U+2581>
##  <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2585>
##  <U+2581><U+2581><U+2582><U+2585><U+2587><U+2583><U+2581><U+2581>
##  <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2585>
##  <U+2581><U+2582><U+2583><U+2586><U+2587><U+2583><U+2581><U+2581>
##  <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2585>
##  <U+2581><U+2582><U+2585><U+2587><U+2586><U+2582><U+2581><U+2581>
##  <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2585>

Plot

line_plot(wages, lwage)

line_plot(wages, lwage, add.mean = TRUE, alpha = 0.2)

***

Model

model <- wbm(lwage ~ wks + union + ms + occ | blk + fem, data = wages)
summary(model)
## MODEL INFO:
## Entities: 595
## Time periods: 1-7
## Dependent variable: lwage
## Model type: Linear mixed effects
## Specification: within-between
## 
## MODEL FIT:
## AIC = 2036.78, BIC = 2119.13
## Pseudo-R² (fixed effects) = 0.27
## Pseudo-R² (total) = 0.69
## Entity ICC = 0.57
## 
## WITHIN EFFECTS:
## ----------------------------------------------------
##                Est.   S.E.   t val.      d.f.      p
## ----------- ------- ------ -------- --------- ------
## wks            0.00   0.00     1.06   3566.00   0.29
## union          0.06   0.03     2.53   3566.00   0.01
## ms            -0.08   0.03    -2.57   3566.00   0.01
## occ           -0.08   0.02    -3.32   3566.00   0.00
## ----------------------------------------------------
## 
## BETWEEN EFFECTS:
## ----------------------------------------------------------
##                       Est.   S.E.   t val.     d.f.      p
## ------------------ ------- ------ -------- -------- ------
## (Intercept)           6.30   0.20    30.85   588.00   0.00
## imean(wks)            0.01   0.00     2.25   588.00   0.02
## imean(union)          0.15   0.03     4.67   588.00   0.00
## imean(ms)             0.17   0.05     3.07   588.00   0.00
## imean(occ)           -0.41   0.03   -13.31   588.00   0.00
## blk                  -0.15   0.05    -2.81   588.00   0.01
## fem                  -0.32   0.06    -4.96   588.00   0.00
## ----------------------------------------------------------
## 
## p values calculated using Satterthwaite d.f.
##  
## RANDOM EFFECTS:
## ------------------------------------
##   Group      Parameter    Std. Dev. 
## ---------- ------------- -----------
##     id      (Intercept)    0.2992   
##  Residual                  0.2589   
## ------------------------------------