Análise 2: Penalized Generalized Estimating Equations in High-Dimension longitudinal data https://cran.r-project.org/web/packages/PGEE/PGEE.pdf https://journal.r-project.org/archive/2017/RJ-2017-030/RJ-2017-030.pdf

#install.packages("PGEE")
require(PGEE)
require(DT) #para a fun??o datatable
require(tidyverse)
require(readxl)
require(tidyr)
require(corrplot)
require(MASS)
require(mvtnorm)
dados=read_excel("dados_renato.xlsx",sheet=1)
names(dados)=c("Ano","UF","Cod.UF","tx.latrc","tx.pres","gini.ibge","perc.jov.1524","perc.hom","pbf","densidade.urbana1","densidade.urbana2","taxa.casamentos","Taxa.desligamentos","raz.2020","raz.1040")

#Precisa estar nesta ordem
dados2=dados[complete.cases(dados),] %>%
  select(Cod.UF,tx.latrc,Ano,tx.pres,gini.ibge,perc.jov.1524,perc.hom,pbf,densidade.urbana1,Taxa.desligamentos,taxa.casamentos)%>%
  rename(id=Cod.UF,y=tx.latrc,time=Ano)

Passo 1: we needed to compute the cross-validated value of tuning paremeter over a grid via the function CVfit

https://civil.colorado.edu/~balajir/CVEN6833/lectures/GammaGLM-01.pdf

formula1 =   "y ~.-id"
lambda.vec <- seq(0.01,0.2,0.01)
 cv <-  CVfit(formula = formula1, id = id, data = dados2, family = gaussian(link = "identity"), scale.fix = TRUE, scale.value = 1, fold = 8, lambda.vec = lambda.vec, eps = 10^-6, maxiter = 30, tol = 10^-6)
cv$lam.opt
## [1] 0.2

Passo 2: Penalizada After selecting the tuning parameter λ via the function CVfit, we apply the PGEE function with the working correlation matrix type corstr = “independence” as follows

myfit1 <- PGEE(formula = formula1, id = id, data = dados2, na.action = NULL,
 family = gaussian(link="identity"), corstr = "independence" , Mv = NULL,
 beta_int = c(rep(0,dim(dados2)[2]-1)), R = NULL, scale.fix = TRUE,
 scale.value = 1, lambda = cv$lam.opt, eps = 10^-6,
 maxiter = 30, tol = 10^-6, silent = TRUE)
## Warning in model.matrix.default(Terms, m, contrasts): non-list contrasts
## argument ignored
# see a portion of the results returned by coef(summary(myfit1))
coef(summary(myfit1))
##                         Estimate   Naive S.E.       Naive z  Robust S.E.
## (Intercept)        -1.200851e-09 0.0004305898 -2.788851e-06 1.714589e-09
## time                3.675895e-04 0.0002753788  1.334850e+00 1.755748e-04
## tx.pres             3.208556e-04 0.0006381484  5.027915e-01 6.157490e-04
## gini.ibge          -1.695845e-08 0.0004339650 -3.907791e-05 1.609202e-07
## perc.jov.1524       5.592343e-03 0.0239470503  2.335295e-01 1.241823e-02
## perc.hom            6.035025e-05 0.0033672548  1.792269e-02 2.219919e-04
## pbf                 1.010506e-04 0.0007786820  1.297714e-01 8.183733e-04
## densidade.urbana1   1.714984e-03 0.0026420954  6.490998e-01 2.994008e-03
## Taxa.desligamentos -4.416234e-08 0.0004397310 -1.004303e-04 7.693049e-08
## taxa.casamentos    -2.690528e-09 0.0004309100 -6.243828e-06 3.010371e-09
##                      Robust z
## (Intercept)        -0.7003723
## time                2.0936348
## tx.pres             0.5210818
## gini.ibge          -0.1053842
## perc.jov.1524       0.4503335
## perc.hom            0.2718578
## pbf                 0.1234774
## densidade.urbana1   0.5728052
## Taxa.desligamentos -0.5740550
## taxa.casamentos    -0.8937528

Any regression estimate less than 10−3 in magnitude can be considered as equal to 0 (and thus not selected) in PGEE. In this sense, we obtained the variables whose regression estimates greater than 10−3 and their summary statistics as follows:

index1 <- which(abs(coef(summary(myfit1))[,"Estimate"]) > 10^-3)
names(which(abs(coef(summary(myfit1))[index1,"Robust z"]) > 1.96))
## character(0)
coef(summary(myfit1))[index1,]
##                      Estimate  Naive S.E.   Naive z Robust S.E.  Robust z
## perc.jov.1524     0.005592343 0.023947050 0.2335295 0.012418225 0.4503335
## densidade.urbana1 0.001714984 0.002642095 0.6490998 0.002994008 0.5728052

Não penalizado: For comparison, we fitted the unpenalized GEEs via MGEE function under the same settings defined above as follows:

# fit the GEE model
myfit2 <- MGEE(formula = formula1, id = id, data = dados2, na.action = NULL,
family = gaussian(link="identity"), corstr = "independence", Mv = NULL,
beta_int = c(rep(0,dim(dados2)[2]-1)), R = NULL, scale.fix = TRUE,
scale.value = 1, maxiter = 30, tol = 10^-6, silent = TRUE)
## Warning in model.matrix.default(Terms, m, contrasts): non-list contrasts
## argument ignored
index2 <- which(abs(coef(summary(myfit2))[,"Estimate"]) > 10^-3)
names(which(abs(coef(summary(myfit1))[index2,"Robust z"]) > 1.96))
## character(0)
coef(summary(myfit2))[index2,]
##                      Estimate  Naive S.E.   Naive z Robust S.E.  Robust z
## perc.jov.1524     0.005286642 0.042122408 0.1255066 0.037964744 0.1392513
## perc.hom          0.027630307 0.085121154 0.3245998 0.135967215 0.2032130
## densidade.urbana1 0.002241512 0.003153488 0.7108041 0.003797535 0.5902545
names(myfit1)
##  [1] "title"               "version"             "model"              
##  [4] "call"                "terms"               "nobs"               
##  [7] "iterations"          "coefficients"        "nas"                
## [10] "linear.predictors"   "fitted.values"       "residuals"          
## [13] "family"              "y"                   "id"                 
## [16] "max.id"              "working.correlation" "scale"              
## [19] "epsilon"             "lambda.value"        "robust.variance"    
## [22] "naive.variance"      "xnames"              "error"

Diagnostico ainda vago: https://myweb.uiowa.edu/pbreheny/uk/teaching/760-s13/notes/3-26.pdf

hist(myfit1$residuals)

plot(myfit1$residuals)

qqnorm(myfit1$residuals)
qqline(myfit1$residuals)

shapiro.test(myfit1$residuals)
## 
##  Shapiro-Wilk normality test
## 
## data:  myfit1$residuals
## W = 0.88497, p-value = 4.642e-12
plot(dados2$y,myfit1$fitted.values,xlim=c(0,4),ylim=c(0,4))
abline(0,1)

cbind(dados2$y,myfit1$fitted.values)
##              [,1]      [,2]
##   [1,] 1.30327631 1.0261946
##   [2,] 1.60008500 1.0232028
##   [3,] 1.51332136 1.0426194
##   [4,] 1.13821552 1.0561372
##   [5,] 0.99738817 1.0808237
##   [6,] 0.76120156 1.0587079
##   [7,] 0.88049705 1.0821805
##   [8,] 0.57863204 1.0703937
##   [9,] 0.14931256 1.0770079
##  [10,] 0.15258207 1.0796324
##  [11,] 0.14704304 1.1096289
##  [12,] 1.73628193 1.1173911
##  [13,] 1.47376826 1.1323079
##  [14,] 1.31789464 1.1281714
##  [15,] 1.67425879 1.1628404
##  [16,] 0.88596268 1.1557058
##  [17,] 0.61874870 1.0288709
##  [18,] 0.74489283 1.0351666
##  [19,] 0.83804835 1.0428261
##  [20,] 0.82513867 1.0451197
##  [21,] 1.21524299 1.0648006
##  [22,] 1.11390050 1.0801757
##  [23,] 0.94539776 1.0945947
##  [24,] 1.16166715 1.0984036
##  [25,] 0.25554729 1.0204645
##  [26,] 0.25270074 1.0398956
##  [27,] 0.24225804 1.0465967
##  [28,] 0.47449697 1.0484650
##  [29,] 0.21731335 1.0552674
##  [30,] 0.61466341 1.0497324
##  [31,] 0.60369947 1.0496263
##  [32,] 2.02278546 1.0350778
##  [33,] 3.42505838 1.0387552
##  [34,] 2.82729219 1.0351992
##  [35,] 2.12622224 1.0414125
##  [36,] 1.82087932 1.0529391
##  [37,] 1.69392322 1.0496043
##  [38,] 1.95742500 1.0507694
##  [39,] 2.22939924 1.0495433
##  [40,] 0.67273587 1.0521301
##  [41,] 0.85133771 1.0572161
##  [42,] 3.58794711 1.0565381
##  [43,] 2.71301561 1.0551391
##  [44,] 1.00200114 1.0703295
##  [45,] 1.22449646 1.1029963
##  [46,] 1.99757095 1.0946849
##  [47,] 0.91902755 0.9713458
##  [48,] 0.96491955 0.9883083
##  [49,] 0.54665762 0.9839161
##  [50,] 1.00615223 0.9761908
##  [51,] 1.14212944 0.9907638
##  [52,] 0.56429667 0.9903998
##  [53,] 1.35302984 1.0074575
##  [54,] 0.93527871 1.0543298
##  [55,] 0.85199433 1.0557584
##  [56,] 1.01323829 1.0482950
##  [57,] 1.01498064 1.0435036
##  [58,] 0.98945555 1.0453367
##  [59,] 1.47462420 1.0483240
##  [60,] 0.80425193 1.0471195
##  [61,] 0.92724770 1.0525151
##  [62,] 1.05095926 1.0558082
##  [63,] 0.23279906 1.0523062
##  [64,] 0.06595360 1.0556662
##  [65,] 0.09616319 1.0556277
##  [66,] 0.19075930 1.0476172
##  [67,] 0.12737523 1.0441018
##  [68,] 0.63276161 1.0526501
##  [69,] 0.91075654 1.0525451
##  [70,] 1.09555836 1.0537858
##  [71,] 1.23498322 1.1317831
##  [72,] 1.53935433 1.1260161
##  [73,] 1.05318875 1.1325014
##  [74,] 0.98270797 1.1267986
##  [75,] 0.89095685 1.1313699
##  [76,] 1.17359913 1.1382955
##  [77,] 1.21887650 1.1474804
##  [78,] 0.84814851 1.1390976
##  [79,] 0.73257951 1.1439583
##  [80,] 1.29407314 1.1277635
##  [81,] 0.70820846 1.1276483
##  [82,] 0.60556978 1.1321867
##  [83,] 0.53147305 1.1356286
##  [84,] 0.27879331 1.1401513
##  [85,] 0.47422034 1.1227391
##  [86,] 1.81897662 1.1312813
##  [87,] 0.80647718 1.1308174
##  [88,] 0.87878361 1.1377127
##  [89,] 1.01533530 1.1390460
##  [90,] 0.71618474 1.1343030
##  [91,] 0.71215396 1.1202310
##  [92,] 0.55043404 1.1250842
##  [93,] 0.71530374 1.1251897
##  [94,] 0.48175847 1.1322708
##  [95,] 4.78986801 1.1830680
##  [96,] 2.11699876 1.1683984
##  [97,] 1.28437197 1.1791960
##  [98,] 1.45405518 1.1804178
##  [99,] 1.27124569 1.1830151
## [100,] 0.87987397 1.1846537
## [101,] 0.94054122 1.1860545
## [102,] 0.79274153 1.1963337
## [103,] 0.87305867 1.1637315
## [104,] 0.92840905 1.1412606
## [105,] 0.85604289 1.1185919
## [106,] 1.31092735 1.1196110
## [107,] 0.95053781 1.1316803
## [108,] 1.14526256 1.1287186
## [109,] 2.74840529 1.1350121
## [110,] 2.39326130 1.1415999
## [111,] 1.83639248 1.1456389
## [112,] 0.50818405 1.1853784
## [113,] 0.61873977 1.1511433
## [114,] 0.75023482 1.1400765
## [115,] 1.33684610 1.1381656
## [116,] 1.14842482 1.1248535
## [117,] 1.70546036 1.1373129
## [118,] 1.59405227 1.1329075
## [119,] 1.48677179 1.1257313
## [120,] 1.00000000 1.0866850
## [121,] 0.80000000 1.0967604
## [122,] 0.79000000 1.0897386
## [123,] 0.83000000 1.0903125
## [124,] 0.83000000 1.0848337
## [125,] 0.89000000 1.0680166
## [126,] 1.23000000 1.0694539
## [127,] 1.01000000 1.0704970
## [128,] 1.32000000 1.0688514
## [129,] 0.77972912 1.0133314
## [130,] 0.54416583 1.0298453
## [131,] 0.42545391 1.0258430
## [132,] 0.31737920 1.0365016
## [133,] 0.17470593 1.0381937
## [134,] 0.45618817 1.0355213
## [135,] 0.64466311 1.0399409
## [136,] 0.40789855 1.0458307
## [137,] 0.30384733 1.0486403
## [138,] 0.20537706 1.0402194
## [139,] 0.43298978 1.0378410
## [140,] 0.29835882 1.0392305
## [141,] 0.57909781 1.0547494
## [142,] 0.77426037 1.0534721
## [143,] 0.62023284 1.0672179
## [144,] 1.28561036 1.0834346
## [145,] 0.91160884 1.0842260
## [146,] 1.28698505 1.0922625
## [147,] 1.39000000 1.0284284
## [148,] 1.36000000 1.0289637
## [149,] 1.24000000 1.0196553
## [150,] 1.50000000 1.0195059
## [151,] 1.40000000 1.0220072
## [152,] 0.73000000 1.0254181
## [153,] 0.87000000 1.0330423
## [154,] 0.90000000 1.0402746
## [155,] 0.92000000 1.0439423
## [156,] 0.80000000 1.0651883
## [157,] 0.70000000 1.0665092
## [158,] 0.52890000 1.0678173
## [159,] 0.63860000 1.0680850
## [160,] 0.72010000 1.0709482
## [161,] 0.73680000 1.0782262
## [162,] 0.79480000 1.0875703
## [163,] 0.87030000 1.0949017
## [164,] 0.87430000 1.1011192
## [165,] 0.92575846 0.9920024
## [166,] 0.47213600 1.0407516
## [167,] 0.42110200 1.0428477
## [168,] 0.83711072 1.0319816
## [169,] 0.39705968 1.0210605
## [170,] 0.41827821 0.9880268
## [171,] 0.94750874 1.0018175
## [172,] 0.47728075 0.9671545
## [173,] 0.31888472 0.9646020
## [174,] 0.49433332 0.9693475
## [175,] 0.64435257 0.9736378
## [176,] 0.94790711 0.9786611
## [177,] 0.60154623 0.9818729
## [178,] 0.79896154 0.9891089
## [179,] 0.82903066 0.9895055
## [180,] 0.84731301 0.9801937
## [181,] 1.25402406 1.0067387
## [182,] 1.29524002 1.0077505
## [183,] 1.12445687 1.0080526
## [184,] 0.76460952 1.0167087
## [185,] 0.60472078 1.0189228
## [186,] 0.78263081 1.0150296
## [187,] 0.85417687 0.9950276
## [188,] 1.18236736 1.0079919
## [189,] 1.30272535 1.0070030
## [190,] 0.44160483 1.0516093
## [191,] 0.43516461 1.0813341
## [192,] 0.97095391 1.0776889
## [193,] 1.24140753 1.0955748
## [194,] 0.63545913 1.0689019
## [195,] 0.76688912 1.0687485
## [196,] 1.11772521 1.0449076
## [197,] 1.00492063 1.1236196
## [198,] 1.60326333 1.0842026
## [199,] 1.71228357 1.0145486
## [200,] 1.19005992 1.0122477
## [201,] 1.50631848 1.0213212
## [202,] 1.89334260 1.0390588
## [203,] 1.83229992 1.0348875
## [204,] 1.59301104 1.0383884
## [205,] 1.28397065 0.9848446
## [206,] 1.41415468 1.0263246
## [207,] 1.61272465 1.0099256
## [208,] 1.17439457 0.9817660
## [209,] 1.25637940 0.9836212
## [210,] 1.00937926 0.9784175
## [211,] 0.87254123 0.9781986
## [212,] 1.01243609 0.9817337
## [213,] 0.74004443 0.9822878
## [214,] 1.36474500 0.9538162
## [215,] 1.92724705 0.9825813
## [216,] 2.56008457 0.9965969
## [217,] 2.22878667 1.0380283
## [218,] 2.34920614 1.0378651
## [219,] 1.99519281 1.0374921
## [220,] 2.34635482 1.0273295
## [221,] 1.99471783 1.0381777
## [222,] 1.68582505 1.0544859
## [223,] 1.77456795 1.0260272
## [224,] 1.03951557 1.0725195
## [225,] 1.75292704 1.0921957
myfit1$terms
## y ~ (id + time + tx.pres + gini.ibge + perc.jov.1524 + perc.hom + 
##     pbf + densidade.urbana1 + Taxa.desligamentos + taxa.casamentos) - 
##     id
## attr(,"variables")
## list(y, id, time, tx.pres, gini.ibge, perc.jov.1524, perc.hom, 
##     pbf, densidade.urbana1, Taxa.desligamentos, taxa.casamentos)
## attr(,"factors")
##                    time tx.pres gini.ibge perc.jov.1524 perc.hom pbf
## y                     0       0         0             0        0   0
## id                    0       0         0             0        0   0
## time                  1       0         0             0        0   0
## tx.pres               0       1         0             0        0   0
## gini.ibge             0       0         1             0        0   0
## perc.jov.1524         0       0         0             1        0   0
## perc.hom              0       0         0             0        1   0
## pbf                   0       0         0             0        0   1
## densidade.urbana1     0       0         0             0        0   0
## Taxa.desligamentos    0       0         0             0        0   0
## taxa.casamentos       0       0         0             0        0   0
##                    densidade.urbana1 Taxa.desligamentos taxa.casamentos
## y                                  0                  0               0
## id                                 0                  0               0
## time                               0                  0               0
## tx.pres                            0                  0               0
## gini.ibge                          0                  0               0
## perc.jov.1524                      0                  0               0
## perc.hom                           0                  0               0
## pbf                                0                  0               0
## densidade.urbana1                  1                  0               0
## Taxa.desligamentos                 0                  1               0
## taxa.casamentos                    0                  0               1
## attr(,"term.labels")
## [1] "time"               "tx.pres"            "gini.ibge"         
## [4] "perc.jov.1524"      "perc.hom"           "pbf"               
## [7] "densidade.urbana1"  "Taxa.desligamentos" "taxa.casamentos"   
## attr(,"order")
## [1] 1 1 1 1 1 1 1 1 1
## attr(,"intercept")
## [1] 1
## attr(,"response")
## [1] 1
## attr(,".Environment")
## <environment: 0x00000000199a4c50>
## attr(,"predvars")
## list(y, id, time, tx.pres, gini.ibge, perc.jov.1524, perc.hom, 
##     pbf, densidade.urbana1, Taxa.desligamentos, taxa.casamentos)
## attr(,"dataClasses")
##                  y                 id               time            tx.pres 
##          "numeric"          "numeric"          "numeric"          "numeric" 
##          gini.ibge      perc.jov.1524           perc.hom                pbf 
##          "numeric"          "numeric"          "numeric"          "numeric" 
##  densidade.urbana1 Taxa.desligamentos    taxa.casamentos               (id) 
##          "numeric"          "numeric"          "numeric"          "numeric"
summary(myfit1)
## 
##  PGEE: PENALIZED GENERALIZED ESTIMATING EQUATIONS FOR LONGITUDINAL DATA
##  Version: 1.5 
## 
## Model:
##  Link:                      Identity 
##  Variance to Mean Relation: Gaussian 
##  Correlation Structure:     Independent 
## 
## Call:
## PGEE(formula = formula1, id = id, data = dados2, na.action = NULL, 
##     family = gaussian(link = "identity"), corstr = "independence", 
##     Mv = NULL, beta_int = c(rep(0, dim(dados2)[2] - 1)), R = NULL, 
##     scale.fix = TRUE, scale.value = 1, lambda = cv$lam.opt, eps = 10^-6, 
##     maxiter = 30, tol = 10^-6, silent = TRUE)
## 
## Summary of Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.9897126 -0.4098859 -0.1328806  0.2727582  3.6068000 
## 
## 
## Coefficients:
##                         Estimate   Naive S.E.       Naive z  Robust S.E.
## (Intercept)        -1.200851e-09 0.0004305898 -2.788851e-06 1.714589e-09
## time                3.675895e-04 0.0002753788  1.334850e+00 1.755748e-04
## tx.pres             3.208556e-04 0.0006381484  5.027915e-01 6.157490e-04
## gini.ibge          -1.695845e-08 0.0004339650 -3.907791e-05 1.609202e-07
## perc.jov.1524       5.592343e-03 0.0239470503  2.335295e-01 1.241823e-02
## perc.hom            6.035025e-05 0.0033672548  1.792269e-02 2.219919e-04
## pbf                 1.010506e-04 0.0007786820  1.297714e-01 8.183733e-04
## densidade.urbana1   1.714984e-03 0.0026420954  6.490998e-01 2.994008e-03
## Taxa.desligamentos -4.416234e-08 0.0004397310 -1.004303e-04 7.693049e-08
## taxa.casamentos    -2.690528e-09 0.0004309100 -6.243828e-06 3.010371e-09
##                      Robust z
## (Intercept)        -0.7003723
## time                2.0936348
## tx.pres             0.5210818
## gini.ibge          -0.1053842
## perc.jov.1524       0.4503335
## perc.hom            0.2718578
## pbf                 0.1234774
## densidade.urbana1   0.5728052
## Taxa.desligamentos -0.5740550
## taxa.casamentos    -0.8937528
## 
## Estimated Scale Parameter:  1
## Lambda value:  0.2
## Number of Iterations:  30
## 
## Working Correlation
##       [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
##  [1,]    1    0    0    0    0    0    0    0    0
##  [2,]    0    1    0    0    0    0    0    0    0
##  [3,]    0    0    1    0    0    0    0    0    0
##  [4,]    0    0    0    1    0    0    0    0    0
##  [5,]    0    0    0    0    1    0    0    0    0
##  [6,]    0    0    0    0    0    1    0    0    0
##  [7,]    0    0    0    0    0    0    1    0    0
##  [8,]    0    0    0    0    0    0    0    1    0
##  [9,]    0    0    0    0    0    0    0    0    1
summary(myfit1$residuals)
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## -0.989713 -0.409886 -0.132881 -0.000029  0.272758  3.606800