Análise 2: Penalized Generalized Estimating Equations in High-Dimension longitudinal data https://cran.r-project.org/web/packages/PGEE/PGEE.pdf https://journal.r-project.org/archive/2017/RJ-2017-030/RJ-2017-030.pdf
#install.packages("PGEE")
require(PGEE)
require(DT) #para a fun??o datatable
require(tidyverse)
require(readxl)
require(tidyr)
require(corrplot)
require(MASS)
require(mvtnorm)
dados=read_excel("dados_renato.xlsx",sheet=1)
names(dados)=c("Ano","UF","Cod.UF","tx.latrc","tx.pres","gini.ibge","perc.jov.1524","perc.hom","pbf","densidade.urbana1","densidade.urbana2","taxa.casamentos","Taxa.desligamentos","raz.2020","raz.1040")
#Precisa estar nesta ordem
dados2=dados[complete.cases(dados),] %>%
select(Cod.UF,tx.latrc,Ano,tx.pres,gini.ibge,perc.jov.1524,perc.hom,pbf,densidade.urbana1,Taxa.desligamentos,taxa.casamentos)%>%
rename(id=Cod.UF,y=tx.latrc,time=Ano)
Passo 1: we needed to compute the cross-validated value of tuning paremeter over a grid via the function CVfit
https://civil.colorado.edu/~balajir/CVEN6833/lectures/GammaGLM-01.pdf
formula1 = "y ~.-id"
lambda.vec <- seq(0.01,0.2,0.01)
cv <- CVfit(formula = formula1, id = id, data = dados2, family = gaussian(link = "identity"), scale.fix = TRUE, scale.value = 1, fold = 8, lambda.vec = lambda.vec, eps = 10^-6, maxiter = 30, tol = 10^-6)
cv$lam.opt
## [1] 0.2
Passo 2: Penalizada After selecting the tuning parameter λ via the function CVfit, we apply the PGEE function with the working correlation matrix type corstr = “independence” as follows
myfit1 <- PGEE(formula = formula1, id = id, data = dados2, na.action = NULL,
family = gaussian(link="identity"), corstr = "independence" , Mv = NULL,
beta_int = c(rep(0,dim(dados2)[2]-1)), R = NULL, scale.fix = TRUE,
scale.value = 1, lambda = cv$lam.opt, eps = 10^-6,
maxiter = 30, tol = 10^-6, silent = TRUE)
## Warning in model.matrix.default(Terms, m, contrasts): non-list contrasts
## argument ignored
# see a portion of the results returned by coef(summary(myfit1))
coef(summary(myfit1))
## Estimate Naive S.E. Naive z Robust S.E.
## (Intercept) -1.200851e-09 0.0004305898 -2.788851e-06 1.714589e-09
## time 3.675895e-04 0.0002753788 1.334850e+00 1.755748e-04
## tx.pres 3.208556e-04 0.0006381484 5.027915e-01 6.157490e-04
## gini.ibge -1.695845e-08 0.0004339650 -3.907791e-05 1.609202e-07
## perc.jov.1524 5.592343e-03 0.0239470503 2.335295e-01 1.241823e-02
## perc.hom 6.035025e-05 0.0033672548 1.792269e-02 2.219919e-04
## pbf 1.010506e-04 0.0007786820 1.297714e-01 8.183733e-04
## densidade.urbana1 1.714984e-03 0.0026420954 6.490998e-01 2.994008e-03
## Taxa.desligamentos -4.416234e-08 0.0004397310 -1.004303e-04 7.693049e-08
## taxa.casamentos -2.690528e-09 0.0004309100 -6.243828e-06 3.010371e-09
## Robust z
## (Intercept) -0.7003723
## time 2.0936348
## tx.pres 0.5210818
## gini.ibge -0.1053842
## perc.jov.1524 0.4503335
## perc.hom 0.2718578
## pbf 0.1234774
## densidade.urbana1 0.5728052
## Taxa.desligamentos -0.5740550
## taxa.casamentos -0.8937528
Any regression estimate less than 10−3 in magnitude can be considered as equal to 0 (and thus not selected) in PGEE. In this sense, we obtained the variables whose regression estimates greater than 10−3 and their summary statistics as follows:
index1 <- which(abs(coef(summary(myfit1))[,"Estimate"]) > 10^-3)
names(which(abs(coef(summary(myfit1))[index1,"Robust z"]) > 1.96))
## character(0)
coef(summary(myfit1))[index1,]
## Estimate Naive S.E. Naive z Robust S.E. Robust z
## perc.jov.1524 0.005592343 0.023947050 0.2335295 0.012418225 0.4503335
## densidade.urbana1 0.001714984 0.002642095 0.6490998 0.002994008 0.5728052
Não penalizado: For comparison, we fitted the unpenalized GEEs via MGEE function under the same settings defined above as follows:
# fit the GEE model
myfit2 <- MGEE(formula = formula1, id = id, data = dados2, na.action = NULL,
family = gaussian(link="identity"), corstr = "independence", Mv = NULL,
beta_int = c(rep(0,dim(dados2)[2]-1)), R = NULL, scale.fix = TRUE,
scale.value = 1, maxiter = 30, tol = 10^-6, silent = TRUE)
## Warning in model.matrix.default(Terms, m, contrasts): non-list contrasts
## argument ignored
index2 <- which(abs(coef(summary(myfit2))[,"Estimate"]) > 10^-3)
names(which(abs(coef(summary(myfit1))[index2,"Robust z"]) > 1.96))
## character(0)
coef(summary(myfit2))[index2,]
## Estimate Naive S.E. Naive z Robust S.E. Robust z
## perc.jov.1524 0.005286642 0.042122408 0.1255066 0.037964744 0.1392513
## perc.hom 0.027630307 0.085121154 0.3245998 0.135967215 0.2032130
## densidade.urbana1 0.002241512 0.003153488 0.7108041 0.003797535 0.5902545
names(myfit1)
## [1] "title" "version" "model"
## [4] "call" "terms" "nobs"
## [7] "iterations" "coefficients" "nas"
## [10] "linear.predictors" "fitted.values" "residuals"
## [13] "family" "y" "id"
## [16] "max.id" "working.correlation" "scale"
## [19] "epsilon" "lambda.value" "robust.variance"
## [22] "naive.variance" "xnames" "error"
Diagnostico ainda vago: https://myweb.uiowa.edu/pbreheny/uk/teaching/760-s13/notes/3-26.pdf
hist(myfit1$residuals)
plot(myfit1$residuals)
qqnorm(myfit1$residuals)
qqline(myfit1$residuals)
shapiro.test(myfit1$residuals)
##
## Shapiro-Wilk normality test
##
## data: myfit1$residuals
## W = 0.88497, p-value = 4.642e-12
plot(dados2$y,myfit1$fitted.values,xlim=c(0,4),ylim=c(0,4))
abline(0,1)
cbind(dados2$y,myfit1$fitted.values)
## [,1] [,2]
## [1,] 1.30327631 1.0261946
## [2,] 1.60008500 1.0232028
## [3,] 1.51332136 1.0426194
## [4,] 1.13821552 1.0561372
## [5,] 0.99738817 1.0808237
## [6,] 0.76120156 1.0587079
## [7,] 0.88049705 1.0821805
## [8,] 0.57863204 1.0703937
## [9,] 0.14931256 1.0770079
## [10,] 0.15258207 1.0796324
## [11,] 0.14704304 1.1096289
## [12,] 1.73628193 1.1173911
## [13,] 1.47376826 1.1323079
## [14,] 1.31789464 1.1281714
## [15,] 1.67425879 1.1628404
## [16,] 0.88596268 1.1557058
## [17,] 0.61874870 1.0288709
## [18,] 0.74489283 1.0351666
## [19,] 0.83804835 1.0428261
## [20,] 0.82513867 1.0451197
## [21,] 1.21524299 1.0648006
## [22,] 1.11390050 1.0801757
## [23,] 0.94539776 1.0945947
## [24,] 1.16166715 1.0984036
## [25,] 0.25554729 1.0204645
## [26,] 0.25270074 1.0398956
## [27,] 0.24225804 1.0465967
## [28,] 0.47449697 1.0484650
## [29,] 0.21731335 1.0552674
## [30,] 0.61466341 1.0497324
## [31,] 0.60369947 1.0496263
## [32,] 2.02278546 1.0350778
## [33,] 3.42505838 1.0387552
## [34,] 2.82729219 1.0351992
## [35,] 2.12622224 1.0414125
## [36,] 1.82087932 1.0529391
## [37,] 1.69392322 1.0496043
## [38,] 1.95742500 1.0507694
## [39,] 2.22939924 1.0495433
## [40,] 0.67273587 1.0521301
## [41,] 0.85133771 1.0572161
## [42,] 3.58794711 1.0565381
## [43,] 2.71301561 1.0551391
## [44,] 1.00200114 1.0703295
## [45,] 1.22449646 1.1029963
## [46,] 1.99757095 1.0946849
## [47,] 0.91902755 0.9713458
## [48,] 0.96491955 0.9883083
## [49,] 0.54665762 0.9839161
## [50,] 1.00615223 0.9761908
## [51,] 1.14212944 0.9907638
## [52,] 0.56429667 0.9903998
## [53,] 1.35302984 1.0074575
## [54,] 0.93527871 1.0543298
## [55,] 0.85199433 1.0557584
## [56,] 1.01323829 1.0482950
## [57,] 1.01498064 1.0435036
## [58,] 0.98945555 1.0453367
## [59,] 1.47462420 1.0483240
## [60,] 0.80425193 1.0471195
## [61,] 0.92724770 1.0525151
## [62,] 1.05095926 1.0558082
## [63,] 0.23279906 1.0523062
## [64,] 0.06595360 1.0556662
## [65,] 0.09616319 1.0556277
## [66,] 0.19075930 1.0476172
## [67,] 0.12737523 1.0441018
## [68,] 0.63276161 1.0526501
## [69,] 0.91075654 1.0525451
## [70,] 1.09555836 1.0537858
## [71,] 1.23498322 1.1317831
## [72,] 1.53935433 1.1260161
## [73,] 1.05318875 1.1325014
## [74,] 0.98270797 1.1267986
## [75,] 0.89095685 1.1313699
## [76,] 1.17359913 1.1382955
## [77,] 1.21887650 1.1474804
## [78,] 0.84814851 1.1390976
## [79,] 0.73257951 1.1439583
## [80,] 1.29407314 1.1277635
## [81,] 0.70820846 1.1276483
## [82,] 0.60556978 1.1321867
## [83,] 0.53147305 1.1356286
## [84,] 0.27879331 1.1401513
## [85,] 0.47422034 1.1227391
## [86,] 1.81897662 1.1312813
## [87,] 0.80647718 1.1308174
## [88,] 0.87878361 1.1377127
## [89,] 1.01533530 1.1390460
## [90,] 0.71618474 1.1343030
## [91,] 0.71215396 1.1202310
## [92,] 0.55043404 1.1250842
## [93,] 0.71530374 1.1251897
## [94,] 0.48175847 1.1322708
## [95,] 4.78986801 1.1830680
## [96,] 2.11699876 1.1683984
## [97,] 1.28437197 1.1791960
## [98,] 1.45405518 1.1804178
## [99,] 1.27124569 1.1830151
## [100,] 0.87987397 1.1846537
## [101,] 0.94054122 1.1860545
## [102,] 0.79274153 1.1963337
## [103,] 0.87305867 1.1637315
## [104,] 0.92840905 1.1412606
## [105,] 0.85604289 1.1185919
## [106,] 1.31092735 1.1196110
## [107,] 0.95053781 1.1316803
## [108,] 1.14526256 1.1287186
## [109,] 2.74840529 1.1350121
## [110,] 2.39326130 1.1415999
## [111,] 1.83639248 1.1456389
## [112,] 0.50818405 1.1853784
## [113,] 0.61873977 1.1511433
## [114,] 0.75023482 1.1400765
## [115,] 1.33684610 1.1381656
## [116,] 1.14842482 1.1248535
## [117,] 1.70546036 1.1373129
## [118,] 1.59405227 1.1329075
## [119,] 1.48677179 1.1257313
## [120,] 1.00000000 1.0866850
## [121,] 0.80000000 1.0967604
## [122,] 0.79000000 1.0897386
## [123,] 0.83000000 1.0903125
## [124,] 0.83000000 1.0848337
## [125,] 0.89000000 1.0680166
## [126,] 1.23000000 1.0694539
## [127,] 1.01000000 1.0704970
## [128,] 1.32000000 1.0688514
## [129,] 0.77972912 1.0133314
## [130,] 0.54416583 1.0298453
## [131,] 0.42545391 1.0258430
## [132,] 0.31737920 1.0365016
## [133,] 0.17470593 1.0381937
## [134,] 0.45618817 1.0355213
## [135,] 0.64466311 1.0399409
## [136,] 0.40789855 1.0458307
## [137,] 0.30384733 1.0486403
## [138,] 0.20537706 1.0402194
## [139,] 0.43298978 1.0378410
## [140,] 0.29835882 1.0392305
## [141,] 0.57909781 1.0547494
## [142,] 0.77426037 1.0534721
## [143,] 0.62023284 1.0672179
## [144,] 1.28561036 1.0834346
## [145,] 0.91160884 1.0842260
## [146,] 1.28698505 1.0922625
## [147,] 1.39000000 1.0284284
## [148,] 1.36000000 1.0289637
## [149,] 1.24000000 1.0196553
## [150,] 1.50000000 1.0195059
## [151,] 1.40000000 1.0220072
## [152,] 0.73000000 1.0254181
## [153,] 0.87000000 1.0330423
## [154,] 0.90000000 1.0402746
## [155,] 0.92000000 1.0439423
## [156,] 0.80000000 1.0651883
## [157,] 0.70000000 1.0665092
## [158,] 0.52890000 1.0678173
## [159,] 0.63860000 1.0680850
## [160,] 0.72010000 1.0709482
## [161,] 0.73680000 1.0782262
## [162,] 0.79480000 1.0875703
## [163,] 0.87030000 1.0949017
## [164,] 0.87430000 1.1011192
## [165,] 0.92575846 0.9920024
## [166,] 0.47213600 1.0407516
## [167,] 0.42110200 1.0428477
## [168,] 0.83711072 1.0319816
## [169,] 0.39705968 1.0210605
## [170,] 0.41827821 0.9880268
## [171,] 0.94750874 1.0018175
## [172,] 0.47728075 0.9671545
## [173,] 0.31888472 0.9646020
## [174,] 0.49433332 0.9693475
## [175,] 0.64435257 0.9736378
## [176,] 0.94790711 0.9786611
## [177,] 0.60154623 0.9818729
## [178,] 0.79896154 0.9891089
## [179,] 0.82903066 0.9895055
## [180,] 0.84731301 0.9801937
## [181,] 1.25402406 1.0067387
## [182,] 1.29524002 1.0077505
## [183,] 1.12445687 1.0080526
## [184,] 0.76460952 1.0167087
## [185,] 0.60472078 1.0189228
## [186,] 0.78263081 1.0150296
## [187,] 0.85417687 0.9950276
## [188,] 1.18236736 1.0079919
## [189,] 1.30272535 1.0070030
## [190,] 0.44160483 1.0516093
## [191,] 0.43516461 1.0813341
## [192,] 0.97095391 1.0776889
## [193,] 1.24140753 1.0955748
## [194,] 0.63545913 1.0689019
## [195,] 0.76688912 1.0687485
## [196,] 1.11772521 1.0449076
## [197,] 1.00492063 1.1236196
## [198,] 1.60326333 1.0842026
## [199,] 1.71228357 1.0145486
## [200,] 1.19005992 1.0122477
## [201,] 1.50631848 1.0213212
## [202,] 1.89334260 1.0390588
## [203,] 1.83229992 1.0348875
## [204,] 1.59301104 1.0383884
## [205,] 1.28397065 0.9848446
## [206,] 1.41415468 1.0263246
## [207,] 1.61272465 1.0099256
## [208,] 1.17439457 0.9817660
## [209,] 1.25637940 0.9836212
## [210,] 1.00937926 0.9784175
## [211,] 0.87254123 0.9781986
## [212,] 1.01243609 0.9817337
## [213,] 0.74004443 0.9822878
## [214,] 1.36474500 0.9538162
## [215,] 1.92724705 0.9825813
## [216,] 2.56008457 0.9965969
## [217,] 2.22878667 1.0380283
## [218,] 2.34920614 1.0378651
## [219,] 1.99519281 1.0374921
## [220,] 2.34635482 1.0273295
## [221,] 1.99471783 1.0381777
## [222,] 1.68582505 1.0544859
## [223,] 1.77456795 1.0260272
## [224,] 1.03951557 1.0725195
## [225,] 1.75292704 1.0921957
myfit1$terms
## y ~ (id + time + tx.pres + gini.ibge + perc.jov.1524 + perc.hom +
## pbf + densidade.urbana1 + Taxa.desligamentos + taxa.casamentos) -
## id
## attr(,"variables")
## list(y, id, time, tx.pres, gini.ibge, perc.jov.1524, perc.hom,
## pbf, densidade.urbana1, Taxa.desligamentos, taxa.casamentos)
## attr(,"factors")
## time tx.pres gini.ibge perc.jov.1524 perc.hom pbf
## y 0 0 0 0 0 0
## id 0 0 0 0 0 0
## time 1 0 0 0 0 0
## tx.pres 0 1 0 0 0 0
## gini.ibge 0 0 1 0 0 0
## perc.jov.1524 0 0 0 1 0 0
## perc.hom 0 0 0 0 1 0
## pbf 0 0 0 0 0 1
## densidade.urbana1 0 0 0 0 0 0
## Taxa.desligamentos 0 0 0 0 0 0
## taxa.casamentos 0 0 0 0 0 0
## densidade.urbana1 Taxa.desligamentos taxa.casamentos
## y 0 0 0
## id 0 0 0
## time 0 0 0
## tx.pres 0 0 0
## gini.ibge 0 0 0
## perc.jov.1524 0 0 0
## perc.hom 0 0 0
## pbf 0 0 0
## densidade.urbana1 1 0 0
## Taxa.desligamentos 0 1 0
## taxa.casamentos 0 0 1
## attr(,"term.labels")
## [1] "time" "tx.pres" "gini.ibge"
## [4] "perc.jov.1524" "perc.hom" "pbf"
## [7] "densidade.urbana1" "Taxa.desligamentos" "taxa.casamentos"
## attr(,"order")
## [1] 1 1 1 1 1 1 1 1 1
## attr(,"intercept")
## [1] 1
## attr(,"response")
## [1] 1
## attr(,".Environment")
## <environment: 0x00000000199a4c50>
## attr(,"predvars")
## list(y, id, time, tx.pres, gini.ibge, perc.jov.1524, perc.hom,
## pbf, densidade.urbana1, Taxa.desligamentos, taxa.casamentos)
## attr(,"dataClasses")
## y id time tx.pres
## "numeric" "numeric" "numeric" "numeric"
## gini.ibge perc.jov.1524 perc.hom pbf
## "numeric" "numeric" "numeric" "numeric"
## densidade.urbana1 Taxa.desligamentos taxa.casamentos (id)
## "numeric" "numeric" "numeric" "numeric"
summary(myfit1)
##
## PGEE: PENALIZED GENERALIZED ESTIMATING EQUATIONS FOR LONGITUDINAL DATA
## Version: 1.5
##
## Model:
## Link: Identity
## Variance to Mean Relation: Gaussian
## Correlation Structure: Independent
##
## Call:
## PGEE(formula = formula1, id = id, data = dados2, na.action = NULL,
## family = gaussian(link = "identity"), corstr = "independence",
## Mv = NULL, beta_int = c(rep(0, dim(dados2)[2] - 1)), R = NULL,
## scale.fix = TRUE, scale.value = 1, lambda = cv$lam.opt, eps = 10^-6,
## maxiter = 30, tol = 10^-6, silent = TRUE)
##
## Summary of Residuals:
## Min 1Q Median 3Q Max
## -0.9897126 -0.4098859 -0.1328806 0.2727582 3.6068000
##
##
## Coefficients:
## Estimate Naive S.E. Naive z Robust S.E.
## (Intercept) -1.200851e-09 0.0004305898 -2.788851e-06 1.714589e-09
## time 3.675895e-04 0.0002753788 1.334850e+00 1.755748e-04
## tx.pres 3.208556e-04 0.0006381484 5.027915e-01 6.157490e-04
## gini.ibge -1.695845e-08 0.0004339650 -3.907791e-05 1.609202e-07
## perc.jov.1524 5.592343e-03 0.0239470503 2.335295e-01 1.241823e-02
## perc.hom 6.035025e-05 0.0033672548 1.792269e-02 2.219919e-04
## pbf 1.010506e-04 0.0007786820 1.297714e-01 8.183733e-04
## densidade.urbana1 1.714984e-03 0.0026420954 6.490998e-01 2.994008e-03
## Taxa.desligamentos -4.416234e-08 0.0004397310 -1.004303e-04 7.693049e-08
## taxa.casamentos -2.690528e-09 0.0004309100 -6.243828e-06 3.010371e-09
## Robust z
## (Intercept) -0.7003723
## time 2.0936348
## tx.pres 0.5210818
## gini.ibge -0.1053842
## perc.jov.1524 0.4503335
## perc.hom 0.2718578
## pbf 0.1234774
## densidade.urbana1 0.5728052
## Taxa.desligamentos -0.5740550
## taxa.casamentos -0.8937528
##
## Estimated Scale Parameter: 1
## Lambda value: 0.2
## Number of Iterations: 30
##
## Working Correlation
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
## [1,] 1 0 0 0 0 0 0 0 0
## [2,] 0 1 0 0 0 0 0 0 0
## [3,] 0 0 1 0 0 0 0 0 0
## [4,] 0 0 0 1 0 0 0 0 0
## [5,] 0 0 0 0 1 0 0 0 0
## [6,] 0 0 0 0 0 1 0 0 0
## [7,] 0 0 0 0 0 0 1 0 0
## [8,] 0 0 0 0 0 0 0 1 0
## [9,] 0 0 0 0 0 0 0 0 1
summary(myfit1$residuals)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.989713 -0.409886 -0.132881 -0.000029 0.272758 3.606800