# Método
###Estudo Transversal
require(RcmdrMisc)
require(dplyr)
library(colorspace, pos=33)
#notifcações match
banco <- read.csv("C:/Users/edson/Downloads/Base_de_dados_match (1).csv", comment.char="#")
banco$recorrencia2<-0
banco$recorrencia2[banco$recorrencia!="1" & banco$diff<365]<-1
banco$recorrencia2<-as.factor(banco$recorrencia2)
banco$CS_ESCOL_N<-as.factor(banco$CS_ESCOL_N)
#repetição
## Load survival package
library(survival)
#remoção de duplicatas
dataset <- banco
dataset$NU_IDADE_N<-dataset$NU_IDADE_N-4000
#dataset$recorrencia2<-0
#dataset$recorrencia2[dataset$recorrencia>2 & dataset$diff<60]<-1
#banco$recorrencia2<-as.factor(banco$recorrencia2)
dataset<-arrange(dataset,desc(dataset$DT_NOTIFIC))
dataset<-dplyr::distinct(dataset,dataset$study_ID,.keep_all=T)
tabela <- within(dataset, {
CS_ESCOL_N <- as.factor(CS_ESCOL_N)
CS_GESTANT <- as.factor(CS_GESTANT)
CS_RACA <- as.factor(CS_RACA)
MES <- as.factor(MES)
PCRUZ <- as.factor(PCRUZ)
RESULT <- as.factor(RESULT)
recorrencia2 <- as.factor(recorrencia2)
})
dataset$TRA_ESQUEM<-as.factor(dataset$TRA_ESQUEM)
#criar variável recorrência
tabela<-subset(dataset, select=c(NU_IDADE_N , CS_SEXO, CS_GESTANT, CS_RACA, CS_ESCOL_N, RESULT,
PMM , PCRUZ , TRA_ESQUEM , MES , recorrencia2,
diff, LVC , Tempo_class))
#teste de hiotese em banco bruto as.it is
require(gtsummary)
gtsummary::tbl_summary(tabela)
Characteristic |
N = 6,234 |
NU_IDADE_N |
36 (27, 48) |
CS_SEXO |
|
F |
1,374 (22%) |
M |
4,860 (78%) |
CS_GESTANT |
|
1 |
9 (0.1%) |
2 |
18 (0.3%) |
3 |
9 (0.1%) |
4 |
2 (<0.1%) |
5 |
971 (16%) |
6 |
5,079 (81%) |
9 |
146 (2.3%) |
CS_RACA |
|
1 |
2,859 (47%) |
2 |
313 (5.1%) |
3 |
50 (0.8%) |
4 |
2,419 (40%) |
5 |
125 (2.0%) |
9 |
354 (5.8%) |
Unknown |
114 |
CS_ESCOL_N |
|
0 |
85 (1.5%) |
1 |
571 (10.0%) |
2 |
271 (4.7%) |
3 |
721 (13%) |
4 |
514 (9.0%) |
5 |
424 (7.4%) |
6 |
869 (15%) |
7 |
181 (3.2%) |
8 |
629 (11%) |
9 |
1,285 (22%) |
10 |
165 (2.9%) |
Unknown |
519 |
RESULT |
|
4 |
5,841 (94%) |
5 |
272 (4.4%) |
6 |
121 (1.9%) |
PMM |
501 (118, 4,088) |
Unknown |
1,887 |
PCRUZ |
|
1 |
1,192 (19%) |
2 |
759 (12%) |
3 |
935 (15%) |
4 |
2,577 (41%) |
5 |
581 (9.3%) |
6 |
190 (3.0%) |
TRA_ESQUEM |
|
1 |
4,499 (75%) |
2 |
45 (0.8%) |
3 |
80 (1.3%) |
4 |
65 (1.1%) |
5 |
19 (0.3%) |
6 |
13 (0.2%) |
7 |
18 (0.3%) |
8 |
5 (<0.1%) |
9 |
36 (0.6%) |
10 |
34 (0.6%) |
11 |
43 (0.7%) |
12 |
21 (0.4%) |
99 |
1,096 (18%) |
Unknown |
260 |
MES |
7 (4, 10) |
Unknown |
64 |
recorrencia2 |
|
0 |
5,542 (89%) |
1 |
692 (11%) |
diff |
0 (0, 0) |
LVC |
|
LVC |
635 (10%) |
Ñ_LVC |
5,599 (90%) |
Tempo_class |
|
maior_60_dias |
475 (7.6%) |
menor_igual_60_dias |
5,759 (92%) |
gtsummary::tbl_summary(tabela,by="recorrencia2")%>%add_p(simulate.p.value=TRUE)
Characteristic |
0, N = 5,542 |
1, N = 692 |
p-value |
NU_IDADE_N |
36 (26, 48) |
37 (28, 50) |
0.037 |
CS_SEXO |
|
|
0.017 |
F |
1,246 (22%) |
128 (18%) |
|
M |
4,296 (78%) |
564 (82%) |
|
CS_GESTANT |
|
|
0.7 |
1 |
9 (0.2%) |
0 (0%) |
|
2 |
17 (0.3%) |
1 (0.1%) |
|
3 |
9 (0.2%) |
0 (0%) |
|
4 |
2 (<0.1%) |
0 (0%) |
|
5 |
873 (16%) |
98 (14%) |
|
6 |
4,499 (81%) |
580 (84%) |
|
9 |
133 (2.4%) |
13 (1.9%) |
|
CS_RACA |
|
|
0.3 |
1 |
2,534 (47%) |
325 (48%) |
|
2 |
272 (5.0%) |
41 (6.0%) |
|
3 |
48 (0.9%) |
2 (0.3%) |
|
4 |
2,151 (40%) |
268 (39%) |
|
5 |
116 (2.1%) |
9 (1.3%) |
|
9 |
315 (5.8%) |
39 (5.7%) |
|
Unknown |
106 |
8 |
|
CS_ESCOL_N |
|
|
0.3 |
0 |
80 (1.6%) |
5 (0.8%) |
|
1 |
509 (10%) |
62 (9.6%) |
|
2 |
244 (4.8%) |
27 (4.2%) |
|
3 |
647 (13%) |
74 (12%) |
|
4 |
447 (8.8%) |
67 (10%) |
|
5 |
385 (7.6%) |
39 (6.1%) |
|
6 |
756 (15%) |
113 (18%) |
|
7 |
161 (3.2%) |
20 (3.1%) |
|
8 |
554 (11%) |
75 (12%) |
|
9 |
1,138 (22%) |
147 (23%) |
|
10 |
151 (3.0%) |
14 (2.2%) |
|
Unknown |
470 |
49 |
|
RESULT |
|
|
0.4 |
4 |
5,189 (94%) |
652 (94%) |
|
5 |
248 (4.5%) |
24 (3.5%) |
|
6 |
105 (1.9%) |
16 (2.3%) |
|
PMM |
500 (100, 4,000) |
501 (200, 5,158) |
0.003 |
Unknown |
1,649 |
238 |
|
PCRUZ |
|
|
0.022 |
1 |
1,089 (20%) |
103 (15%) |
|
2 |
684 (12%) |
75 (11%) |
|
3 |
829 (15%) |
106 (15%) |
|
4 |
2,258 (41%) |
319 (46%) |
|
5 |
512 (9.2%) |
69 (10.0%) |
|
6 |
170 (3.1%) |
20 (2.9%) |
|
TRA_ESQUEM |
|
|
|
1 |
4,077 (77%) |
422 (64%) |
|
2 |
45 (0.8%) |
0 (0%) |
|
3 |
68 (1.3%) |
12 (1.8%) |
|
4 |
56 (1.1%) |
9 (1.4%) |
|
5 |
19 (0.4%) |
0 (0%) |
|
6 |
13 (0.2%) |
0 (0%) |
|
7 |
15 (0.3%) |
3 (0.5%) |
|
8 |
3 (<0.1%) |
2 (0.3%) |
|
9 |
30 (0.6%) |
6 (0.9%) |
|
10 |
16 (0.3%) |
18 (2.7%) |
|
11 |
43 (0.8%) |
0 (0%) |
|
12 |
21 (0.4%) |
0 (0%) |
|
99 |
908 (17%) |
188 (28%) |
|
Unknown |
228 |
32 |
|
MES |
7 (4, 10) |
7 (4, 10) |
0.082 |
Unknown |
58 |
6 |
|
diff |
0 (0, 0) |
70 (38, 111) |
<0.001 |
LVC |
|
|
<0.001 |
LVC |
387 (7.0%) |
248 (36%) |
|
Ñ_LVC |
5,155 (93%) |
444 (64%) |
|
Tempo_class |
|
|
<0.001 |
maior_60_dias |
65 (1.2%) |
410 (59%) |
|
menor_igual_60_dias |
5,477 (99%) |
282 (41%) |
|
#levando em conta subset de recorrências
#Script de Cox Exaustivo
options(digits=3)
dataset <- within(dataset, {
CS_ESCOL_N <- as.factor(CS_ESCOL_N)
CS_GESTANT <- as.factor(CS_GESTANT)
CS_RACA <- as.factor(CS_RACA)
MES <- as.factor(MES)
PCRUZ <- as.factor(PCRUZ)
RESULT <- as.factor(RESULT)
TRA_ESQUEM <- as.factor(TRA_ESQUEM)
})
## Create outcome variable, then survival object
dataset <- within(dataset, {
status.dichotomous <- recorrencia2 != 0
survival.vector <- Surv(diff, recorrencia2)
})
dataset$TRA_ESQUEM2<-0
dataset$TRA_ESQUEM2[dataset$TRA_ESQUEM=="4"]<-1
dataset$TRA_ESQUEM2<-as.factor(dataset$TRA_ESQUEM2)
dataset$gestante2<-0
dataset$gestante2[dataset$CS_GESTANT!=1]<-1
dataset$gestante2<-as.factor(dataset$gestante2)
dataset$raca2<-0
dataset$raca2[dataset$CS_RACA!=1]<-1
dataset$raca2<-as.factor(dataset$raca2)
dataset$escola2<-0
dataset$escola2[dataset$CS_ESCOL_N=="7"|dataset$CS_ESCOL_N=="8"|dataset$CS_ESCOL_N=="9"|dataset$CS_ESCOL_N=="10"]<-1
dataset$escola2<-as.factor(dataset$escola2)
## Create vectors for outcome and predictors
outcome <- c("survival.vector")
#predictors <- c("esquema2","cd8_cd4", "idade","cv","COR","gon","Grupo6")
predictors <- c('NU_IDADE_N' , 'CS_SEXO', 'raca2', 'escola2', 'RESULT', 'PMM' , 'PCRUZ' , 'TRA_ESQUEM2' , 'LVC','TRA_ESQUEM','CS_GESTANT')
## The lines below should not need modification.
## Create list of models
list.of.models <- lapply(seq_along((predictors)), function(n) {
left.hand.side <- outcome
right.hand.side <- apply(X = combn(predictors, n), MARGIN = 2, paste, collapse = " + ")
paste(left.hand.side, right.hand.side, sep = " ~ ")
})
## Convert to a
vector.of.models <- unlist(list.of.models)
## Fit coxph to all models
list.of.fits <- lapply(vector.of.models, function(x) {
formula <- as.formula(x)
fit <- coxph(formula, data = dataset, id=study_ID)
suma<- summary(fit)
result.AIC <- extractAIC(fit)
data.frame(num.predictors = result.AIC[1],
AIC = result.AIC[2],
model = x,
suma= suma[["coefficients"]])
})
## Collapse to a data frame
result <- do.call(rbind, list.of.fits)
result$ciup<-exp(result$suma.coef + 2*result$suma.se.coef.)
result$cidown<-exp(result$suma.coef - 2*result$suma.se.coef.)
result$var<- row.names(result)
## Sort and print
library(doBy)
a<-orderBy(~ AIC + model, result)
a<-as.data.frame(a, digits=2)
result$pvalorsig<-0
result$pvalorsig[result$suma.Pr...z..<0.05]<-1
f<-result %>%
group_by(model) %>%
summarise(somamodelo= sum(pvalorsig))
a<-merge(a,f)
a$dif<-a$num.predictors- a$somamodelo
#filtrar modelos com todas as var em todos os fatores < 0.05
modelocomtudosig<-subset(a,a$dif<1 & a$suma.Pr...z..<0.05)
#filtrar modelos com mais de um preditor, vai ser todos uni se binária ou numérica todas as var em todos os fatores < 0.05
modelomulticomtudosig<-subset(a,a$dif<1 & a$num.predictors>1)
##filtrar modelos com mais de um preditor, vai ser todos uni se binária ou numérica todas -1 as var em todos os fatores < 0.05, para pegar multinomial
modelomulticomtudosigmenos1<-subset(a,a$dif<2 & a$num.predictors>1)
modelomulticomtudosigmenos3<-subset(a,a$dif<2 & a$num.predictors>4)
modelo_uni<-subset(a,a$num.predictors==1)
htmlTable::htmlTable(modelo_uni)
|
model
|
num.predictors
|
AIC
|
suma.coef
|
suma.exp.coef.
|
suma.se.coef.
|
suma.z
|
suma.Pr…z..
|
ciup
|
cidown
|
var
|
somamodelo
|
dif
|
7
|
survival.vector ~ CS_SEXO
|
1
|
8113.55292267763
|
-0.103139783866269
|
0.902000879498455
|
0.0979299138115787
|
-1.05319998611164
|
0.292249344846739
|
1.09715453700676
|
0.741559697538738
|
CS_SEXOM
|
0
|
1
|
8199
|
survival.vector ~ escola2
|
1
|
8107.9681115605
|
0.205578300869994
|
1.22823514907753
|
0.0788376332971222
|
2.6076163409829
|
0.00911750812918678
|
1.43800044314189
|
1.04906892666419
|
escola21
|
1
|
0
|
10119
|
survival.vector ~ LVC
|
1
|
8087.40690013913
|
-0.429209114383688
|
0.651023776509997
|
0.0804735255129662
|
-5.33354431345912
|
9.63140449580789e-08
|
0.764707353064944
|
0.554240724746037
|
LVCÑ_LVC
|
1
|
0
|
10159
|
survival.vector ~ NU_IDADE_N
|
1
|
8113.75140960879
|
-0.000205568896110861
|
0.999794452231727
|
0.000204435357995372
|
-1.0055447263458
|
0.314634624068129
|
1.0002033224871
|
0.999385749117261
|
NU_IDADE_N
|
0
|
1
|
27295
|
survival.vector ~ PMM
|
1
|
4929.10360520894
|
3.30684913161125e-09
|
1.00000000330685
|
5.00462821210673e-09
|
0.660758200501614
|
0.50876739291489
|
1.00000001331611
|
0.999999993297593
|
PMM
|
0
|
1
|
27727
|
survival.vector ~ raca2
|
1
|
8111.86850534762
|
-0.127091866259103
|
0.880652766605711
|
0.0762807088131721
|
-1.66610756817138
|
0.0956919914192787
|
1.02579667168878
|
0.756045829290417
|
raca21
|
0
|
1
|
32685
|
survival.vector ~ TRA_ESQUEM2
|
1
|
8109.34327382061
|
-0.687735578891047
|
0.50271313541725
|
0.335879502956417
|
-2.04756638269852
|
0.0406025061529627
|
0.984150375497512
|
0.256790529997294
|
TRA_ESQUEM21
|
1
|
0
|
htmlTable::htmlTable(modelomulticomtudosig)
|
model
|
num.predictors
|
AIC
|
suma.coef
|
suma.exp.coef.
|
suma.se.coef.
|
suma.z
|
suma.Pr…z..
|
ciup
|
cidown
|
var
|
somamodelo
|
dif
|
8207
|
survival.vector ~ escola2 + LVC
|
2
|
8082.66182637341
|
0.206802330960217
|
1.22973946633484
|
0.0788617230066802
|
2.62234101761509
|
0.00873280009688099
|
1.43983104518785
|
1.05030320058434
|
escola218
|
2
|
0
|
8208
|
survival.vector ~ escola2 + LVC
|
2
|
8082.66182637341
|
-0.42994122992774
|
0.650547326313245
|
0.0805008409165198
|
-5.34082905262559
|
9.25224727302636e-08
|
0.764189450960208
|
0.553804849362346
|
LVCÑ_LVC4
|
2
|
0
|
10027
|
survival.vector ~ escola2 + TRA_ESQUEM2
|
2
|
8104.66023155941
|
0.205785445001466
|
1.22848959713353
|
0.0788401135718815
|
2.61016170168049
|
0.00904994334530299
|
1.4383054821161
|
1.04928105262097
|
escola217
|
2
|
0
|
10028
|
survival.vector ~ escola2 + TRA_ESQUEM2
|
2
|
8104.66023155941
|
-0.688532574587842
|
0.502312634831422
|
0.335897141135911
|
-2.04983160100564
|
0.0403808667018732
|
0.983401014562777
|
0.256576899326737
|
TRA_ESQUEM214
|
2
|
0
|
#htmlTable::htmlTable(modelomulticomtudosig)
#dataset
.Survfit <- survfit(Surv(diff, recorrencia2) ~ LVC ,
error="greenwood",
data=dataset,id=study_ID)
plot(.Survfit)

table(dataset$LVC)
##
## LVC Ñ_LVC
## 635 5599
#a<-dataset$diff[dataset$recorrencia3==1]
#edu::resumosnumericos(a)
#a<-as.data.frame(a)
#edu::resumosnumericos(a)
#a<-dataset$diff[dataset$recorrencia2==1]
#a<-as.data.frame(a)
#table(dataset$recorrencia3)
#table(dataset$recorrencia3)
#a<-dataset$diff[dataset$recorrencia2==1]
#edu::resumosnumericos(a)
#a<-dataset$diff[dataset$recorrencia2==1]
#a<-as.data.frame(a)
#edu::resumosnumericos(a)