library(panelr)
library(tidyverse)
library(DataExplorer)
# id는 대상, t는 시간
# exp(years of full-time work experience.)
# wks(weeks worked)
# occ
# ind(works in a manufacturing industry?)
# south(resides in the south?)
# smsa(resides in a standard metropolitan statistical area?)
# ms, fem, union(individual's wage set by a union contract?)
# ed(years of education.)
# blk(is the individual black?)
# lwage(logarithm of wage)
data("WageData")
head(WageData)
## exp wks occ ind south smsa ms fem union ed blk lwage t id
## 1 3 32 0 0 1 0 1 0 0 9 0 5.56068 1 1
## 2 4 43 0 0 1 0 1 0 0 9 0 5.72031 2 1
## 3 5 40 0 0 1 0 1 0 0 9 0 5.99645 3 1
## 4 6 39 0 0 1 0 1 0 0 9 0 5.99645 4 1
## 5 7 42 0 1 1 0 1 0 0 9 0 6.06146 5 1
## 6 8 35 0 1 1 0 1 0 0 9 0 6.17379 6 1
str(WageData)
## 'data.frame': 4165 obs. of 14 variables:
## $ exp : num 3 4 5 6 7 8 9 30 31 32 ...
## $ wks : num 32 43 40 39 42 35 32 34 27 33 ...
## $ occ : num 0 0 0 0 0 0 0 1 1 1 ...
## $ ind : num 0 0 0 0 1 1 1 0 0 1 ...
## $ south: num 1 1 1 1 1 1 1 0 0 0 ...
## $ smsa : num 0 0 0 0 0 0 0 0 0 0 ...
## $ ms : num 1 1 1 1 1 1 1 1 1 1 ...
## $ fem : num 0 0 0 0 0 0 0 0 0 0 ...
## $ union: num 0 0 0 0 0 0 0 0 0 1 ...
## $ ed : num 9 9 9 9 9 9 9 11 11 11 ...
## $ blk : num 0 0 0 0 0 0 0 0 0 0 ...
## $ lwage: num 5.56 5.72 6 6 6.06 ...
## $ t : num 1 2 3 4 5 6 7 1 2 3 ...
## $ id : num 1 1 1 1 1 1 1 2 2 2 ...
## - attr(*, "datalabel")= chr ""
## - attr(*, "time.stamp")= chr "10 Jun 2016 08:48"
## - attr(*, "formats")= chr "%9.0g" "%9.0g" "%9.0g" "%9.0g" ...
## - attr(*, "types")= int 254 254 254 254 254 254 254 254 254 254 ...
## - attr(*, "val.labels")= chr "" "" "" "" ...
## - attr(*, "var.labels")= chr "" "" "" "" ...
## - attr(*, "expansion.fields")=List of 6
## ..$ : chr "_dta" "iis" "id"
## ..$ : chr "_dta" "tis" "t"
## ..$ : chr "_dta" "_TSitrvl" "1"
## ..$ : chr "_dta" "_TSdelta" "+1.0000000000000X+000"
## ..$ : chr "_dta" "_TSpanel" "id"
## ..$ : chr "_dta" "_TStvar" "t"
## - attr(*, "version")= int 12
#595명(id) 7년(t) 자료, Total : 4165 obs. of 14 variables
# plot_missing(WageData)
# create_report(WageData)
wages <- panel_data(WageData, id = id, wave = t)
head(wages)
## # Panel data: 6 x 14
## # entities: id [1]
## # wave variable: t [1, 2, 3, ... (6 waves)]
## id t exp wks occ ind south smsa ms fem union ed
## <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 1 3 32 0 0 1 0 1 0 0 9
## 2 1 2 4 43 0 0 1 0 1 0 0 9
## 3 1 3 5 40 0 0 1 0 1 0 0 9
## 4 1 4 6 39 0 0 1 0 1 0 0 9
## 5 1 5 7 42 0 1 1 0 1 0 0 9
## 6 1 6 8 35 0 1 1 0 1 0 0 9
## # ... with 2 more variables: blk <dbl>, lwage <dbl>
str(wages)
## Classes 'panel_data', 'grouped_df', 'tbl_df', 'tbl' and 'data.frame': 4165 obs. of 14 variables:
## $ id : Factor w/ 595 levels "1","2","3","4",..: 1 1 1 1 1 1 1 2 2 2 ...
## $ t : num 1 2 3 4 5 6 7 1 2 3 ...
## $ exp : num 3 4 5 6 7 8 9 30 31 32 ...
## $ wks : num 32 43 40 39 42 35 32 34 27 33 ...
## $ occ : num 0 0 0 0 0 0 0 1 1 1 ...
## $ ind : num 0 0 0 0 1 1 1 0 0 1 ...
## $ south: num 1 1 1 1 1 1 1 0 0 0 ...
## $ smsa : num 0 0 0 0 0 0 0 0 0 0 ...
## $ ms : num 1 1 1 1 1 1 1 1 1 1 ...
## $ fem : num 0 0 0 0 0 0 0 0 0 0 ...
## $ union: num 0 0 0 0 0 0 0 0 0 1 ...
## $ ed : num 9 9 9 9 9 9 9 11 11 11 ...
## $ blk : num 0 0 0 0 0 0 0 0 0 0 ...
## $ lwage: num 5.56 5.72 6 6 6.06 ...
## - attr(*, "datalabel")= chr ""
## - attr(*, "time.stamp")= chr "10 Jun 2016 08:48"
## - attr(*, "formats")= chr "%9.0g" "%9.0g" "%9.0g" "%9.0g" ...
## - attr(*, "types")= int 254 254 254 254 254 254 254 254 254 254 ...
## - attr(*, "val.labels")= chr "" "" "" "" ...
## - attr(*, "var.labels")= chr "" "" "" "" ...
## - attr(*, "expansion.fields")=List of 6
## ..$ : chr "_dta" "iis" "id"
## ..$ : chr "_dta" "tis" "t"
## ..$ : chr "_dta" "_TSitrvl" "1"
## ..$ : chr "_dta" "_TSdelta" "+1.0000000000000X+000"
## ..$ : chr "_dta" "_TSpanel" "id"
## ..$ : chr "_dta" "_TStvar" "t"
## - attr(*, "version")= int 12
## - attr(*, "groups")=Classes 'tbl_df', 'tbl' and 'data.frame': 595 obs. of 2 variables:
## ..$ id : Factor w/ 595 levels "1","2","3","4",..: 1 2 3 4 5 6 7 8 9 10 ...
## ..$ .rows:List of 595
## .. ..$ : int 1 2 3 4 5 6 7
## .. ..$ : int 8 9 10 11 12 13 14
## .. ..$ : int 15 16 17 18 19 20 21
## .. ..$ : int 22 23 24 25 26 27 28
## .. ..$ : int 29 30 31 32 33 34 35
## .. ..$ : int 36 37 38 39 40 41 42
## .. ..$ : int 43 44 45 46 47 48 49
## .. ..$ : int 50 51 52 53 54 55 56
## .. ..$ : int 57 58 59 60 61 62 63
## .. ..$ : int 64 65 66 67 68 69 70
## .. ..$ : int 71 72 73 74 75 76 77
## .. ..$ : int 78 79 80 81 82 83 84
## .. ..$ : int 85 86 87 88 89 90 91
## .. ..$ : int 92 93 94 95 96 97 98
## .. ..$ : int 99 100 101 102 103 104 105
## .. ..$ : int 106 107 108 109 110 111 112
## .. ..$ : int 113 114 115 116 117 118 119
## .. ..$ : int 120 121 122 123 124 125 126
## .. ..$ : int 127 128 129 130 131 132 133
## .. ..$ : int 134 135 136 137 138 139 140
## .. ..$ : int 141 142 143 144 145 146 147
## .. ..$ : int 148 149 150 151 152 153 154
## .. ..$ : int 155 156 157 158 159 160 161
## .. ..$ : int 162 163 164 165 166 167 168
## .. ..$ : int 169 170 171 172 173 174 175
## .. ..$ : int 176 177 178 179 180 181 182
## .. ..$ : int 183 184 185 186 187 188 189
## .. ..$ : int 190 191 192 193 194 195 196
## .. ..$ : int 197 198 199 200 201 202 203
## .. ..$ : int 204 205 206 207 208 209 210
## .. ..$ : int 211 212 213 214 215 216 217
## .. ..$ : int 218 219 220 221 222 223 224
## .. ..$ : int 225 226 227 228 229 230 231
## .. ..$ : int 232 233 234 235 236 237 238
## .. ..$ : int 239 240 241 242 243 244 245
## .. ..$ : int 246 247 248 249 250 251 252
## .. ..$ : int 253 254 255 256 257 258 259
## .. ..$ : int 260 261 262 263 264 265 266
## .. ..$ : int 267 268 269 270 271 272 273
## .. ..$ : int 274 275 276 277 278 279 280
## .. ..$ : int 281 282 283 284 285 286 287
## .. ..$ : int 288 289 290 291 292 293 294
## .. ..$ : int 295 296 297 298 299 300 301
## .. ..$ : int 302 303 304 305 306 307 308
## .. ..$ : int 309 310 311 312 313 314 315
## .. ..$ : int 316 317 318 319 320 321 322
## .. ..$ : int 323 324 325 326 327 328 329
## .. ..$ : int 330 331 332 333 334 335 336
## .. ..$ : int 337 338 339 340 341 342 343
## .. ..$ : int 344 345 346 347 348 349 350
## .. ..$ : int 351 352 353 354 355 356 357
## .. ..$ : int 358 359 360 361 362 363 364
## .. ..$ : int 365 366 367 368 369 370 371
## .. ..$ : int 372 373 374 375 376 377 378
## .. ..$ : int 379 380 381 382 383 384 385
## .. ..$ : int 386 387 388 389 390 391 392
## .. ..$ : int 393 394 395 396 397 398 399
## .. ..$ : int 400 401 402 403 404 405 406
## .. ..$ : int 407 408 409 410 411 412 413
## .. ..$ : int 414 415 416 417 418 419 420
## .. ..$ : int 421 422 423 424 425 426 427
## .. ..$ : int 428 429 430 431 432 433 434
## .. ..$ : int 435 436 437 438 439 440 441
## .. ..$ : int 442 443 444 445 446 447 448
## .. ..$ : int 449 450 451 452 453 454 455
## .. ..$ : int 456 457 458 459 460 461 462
## .. ..$ : int 463 464 465 466 467 468 469
## .. ..$ : int 470 471 472 473 474 475 476
## .. ..$ : int 477 478 479 480 481 482 483
## .. ..$ : int 484 485 486 487 488 489 490
## .. ..$ : int 491 492 493 494 495 496 497
## .. ..$ : int 498 499 500 501 502 503 504
## .. ..$ : int 505 506 507 508 509 510 511
## .. ..$ : int 512 513 514 515 516 517 518
## .. ..$ : int 519 520 521 522 523 524 525
## .. ..$ : int 526 527 528 529 530 531 532
## .. ..$ : int 533 534 535 536 537 538 539
## .. ..$ : int 540 541 542 543 544 545 546
## .. ..$ : int 547 548 549 550 551 552 553
## .. ..$ : int 554 555 556 557 558 559 560
## .. ..$ : int 561 562 563 564 565 566 567
## .. ..$ : int 568 569 570 571 572 573 574
## .. ..$ : int 575 576 577 578 579 580 581
## .. ..$ : int 582 583 584 585 586 587 588
## .. ..$ : int 589 590 591 592 593 594 595
## .. ..$ : int 596 597 598 599 600 601 602
## .. ..$ : int 603 604 605 606 607 608 609
## .. ..$ : int 610 611 612 613 614 615 616
## .. ..$ : int 617 618 619 620 621 622 623
## .. ..$ : int 624 625 626 627 628 629 630
## .. ..$ : int 631 632 633 634 635 636 637
## .. ..$ : int 638 639 640 641 642 643 644
## .. ..$ : int 645 646 647 648 649 650 651
## .. ..$ : int 652 653 654 655 656 657 658
## .. ..$ : int 659 660 661 662 663 664 665
## .. ..$ : int 666 667 668 669 670 671 672
## .. ..$ : int 673 674 675 676 677 678 679
## .. ..$ : int 680 681 682 683 684 685 686
## .. ..$ : int 687 688 689 690 691 692 693
## .. .. [list output truncated]
## ..- attr(*, ".drop")= logi TRUE
## - attr(*, "id")= chr "id"
## - attr(*, "wave")= chr "t"
## - attr(*, "periods")= num 1 2 3 4 5 6 7
wages %>%
mutate(
wks_mean = mean(wks), # this is the person-level mean
wks_lag = lag(wks), # this will have a value of NA when t = 1
cumu_wages = cumsum(exp(lwage)) # cumulative summation works within person
) %>%
select(wks, wks_mean, wks_lag, lwage, cumu_wages)
## # Panel data: 4,165 x 7
## # entities: id [595]
## # wave variable: t [1, 2, 3, ... (7 waves)]
## id t wks wks_mean wks_lag lwage cumu_wages
## <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 1 32 37.6 NA 5.56 260.
## 2 1 2 43 37.6 32 5.72 565.
## 3 1 3 40 37.6 43 6.00 967.
## 4 1 4 39 37.6 40 6.00 1369.
## 5 1 5 42 37.6 39 6.06 1798.
## 6 1 6 35 37.6 42 6.17 2278.
## 7 1 7 32 37.6 35 6.24 2793.
## 8 2 1 34 31.6 NA 6.16 475.
## 9 2 2 27 31.6 34 6.21 975.
## 10 2 3 33 31.6 27 6.26 1500.
## # ... with 4,155 more rows
wages
## # Panel data: 4,165 x 14
## # entities: id [595]
## # wave variable: t [1, 2, 3, ... (7 waves)]
## id t exp wks occ ind south smsa ms fem union ed
## <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 1 3 32 0 0 1 0 1 0 0 9
## 2 1 2 4 43 0 0 1 0 1 0 0 9
## 3 1 3 5 40 0 0 1 0 1 0 0 9
## 4 1 4 6 39 0 0 1 0 1 0 0 9
## 5 1 5 7 42 0 1 1 0 1 0 0 9
## 6 1 6 8 35 0 1 1 0 1 0 0 9
## 7 1 7 9 32 0 1 1 0 1 0 0 9
## 8 2 1 30 34 1 0 0 0 1 0 0 11
## 9 2 2 31 27 1 0 0 0 1 0 0 11
## 10 2 3 32 33 1 1 0 0 1 0 1 11
## # ... with 4,155 more rows, and 2 more variables: blk <dbl>, lwage <dbl>
wages["wks"]
## # Panel data: 4,165 x 3
## # entities: id [595]
## # wave variable: t [1, 2, 3, ... (7 waves)]
## id t wks
## <fct> <dbl> <dbl>
## 1 1 1 32
## 2 1 2 43
## 3 1 3 40
## 4 1 4 39
## 5 1 5 42
## 6 1 6 35
## 7 1 7 32
## 8 2 1 34
## 9 2 2 27
## 10 2 3 33
## # ... with 4,155 more rows
wages["wage"] <- exp(wages[["lwage"]]) # note double brackets
wages["wage"]
## # Panel data: 4,165 x 3
## # entities: id [595]
## # wave variable: t [1, 2, 3, ... (7 waves)]
## id t wage
## <fct> <dbl> <dbl>
## 1 1 1 260.
## 2 1 2 305.
## 3 1 3 402.
## 4 1 4 402.
## 5 1 5 429.
## 6 1 6 480.
## 7 1 7 515.
## 8 2 1 475.
## 9 2 2 500.
## 10 2 3 525.
## # ... with 4,155 more rows
summary(wages, union, lwage)
## Loading required namespace: skimr
## Skim summary statistics
## n obs: 4165
## n variables: 3
## group variables: t
##
## -- Variable type:numeric --------------------------------------------------------------
## t variable missing complete n mean sd p0 p25 p50 p75 p100
## 1 lwage 0 595 595 6.38 0.39 5.01 6.12 6.42 6.65 6.91
## 1 union 0 595 595 0.36 0.48 0 0 0 1 1
## 2 lwage 0 595 595 6.47 0.36 5.01 6.24 6.53 6.75 6.91
## 2 union 0 595 595 0.35 0.48 0 0 0 1 1
## 3 lwage 0 595 595 6.6 0.45 4.61 6.33 6.61 6.86 8.27
## 3 union 0 595 595 0.37 0.48 0 0 0 1 1
## 4 lwage 0 595 595 6.7 0.44 5.08 6.44 6.72 6.96 8.52
## 4 union 0 595 595 0.37 0.48 0 0 0 1 1
## 5 lwage 0 595 595 6.79 0.42 5.27 6.51 6.8 7.04 8.1
## 5 union 0 595 595 0.37 0.48 0 0 0 1 1
## 6 lwage 0 595 595 6.86 0.42 5.66 6.6 6.91 7.11 8.16
## 6 union 0 595 595 0.36 0.48 0 0 0 1 1
## 7 lwage 0 595 595 6.95 0.44 5.68 6.68 6.98 7.21 8.54
## 7 union 0 595 595 0.37 0.48 0 0 0 1 1
## hist
## <U+2581><U+2581><U+2582><U+2582><U+2585><U+2586><U+2587><U+2587>
## <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2585>
## <U+2581><U+2581><U+2581><U+2582><U+2582><U+2585><U+2586><U+2587>
## <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2585>
## <U+2581><U+2581><U+2581><U+2585><U+2587><U+2583><U+2581><U+2581>
## <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2585>
## <U+2581><U+2581><U+2583><U+2587><U+2586><U+2582><U+2581><U+2581>
## <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2585>
## <U+2581><U+2581><U+2582><U+2585><U+2587><U+2583><U+2581><U+2581>
## <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2585>
## <U+2581><U+2582><U+2583><U+2586><U+2587><U+2583><U+2581><U+2581>
## <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2585>
## <U+2581><U+2582><U+2585><U+2587><U+2586><U+2582><U+2581><U+2581>
## <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2585>
line_plot(wages, lwage)
line_plot(wages, lwage, add.mean = TRUE, alpha = 0.2)
***
model <- wbm(lwage ~ wks + union + ms + occ | blk + fem, data = wages)
summary(model)
## MODEL INFO:
## Entities: 595
## Time periods: 1-7
## Dependent variable: lwage
## Model type: Linear mixed effects
## Specification: within-between
##
## MODEL FIT:
## AIC = 2036.78, BIC = 2119.13
## Pseudo-R² (fixed effects) = 0.27
## Pseudo-R² (total) = 0.69
## Entity ICC = 0.57
##
## WITHIN EFFECTS:
## ----------------------------------------------------
## Est. S.E. t val. d.f. p
## ----------- ------- ------ -------- --------- ------
## wks 0.00 0.00 1.06 3566.00 0.29
## union 0.06 0.03 2.53 3566.00 0.01
## ms -0.08 0.03 -2.57 3566.00 0.01
## occ -0.08 0.02 -3.32 3566.00 0.00
## ----------------------------------------------------
##
## BETWEEN EFFECTS:
## ----------------------------------------------------------
## Est. S.E. t val. d.f. p
## ------------------ ------- ------ -------- -------- ------
## (Intercept) 6.30 0.20 30.85 588.00 0.00
## imean(wks) 0.01 0.00 2.25 588.00 0.02
## imean(union) 0.15 0.03 4.67 588.00 0.00
## imean(ms) 0.17 0.05 3.07 588.00 0.00
## imean(occ) -0.41 0.03 -13.31 588.00 0.00
## blk -0.15 0.05 -2.81 588.00 0.01
## fem -0.32 0.06 -4.96 588.00 0.00
## ----------------------------------------------------------
##
## p values calculated using Satterthwaite d.f.
##
## RANDOM EFFECTS:
## ------------------------------------
## Group Parameter Std. Dev.
## ---------- ------------- -----------
## id (Intercept) 0.2992
## Residual 0.2589
## ------------------------------------