karina

# Método
###Estudo Transversal
require(RcmdrMisc)
require(dplyr)
library(colorspace, pos=33)

#notifcações match
banco <- read.csv("C:/Users/edson/Downloads/Base_de_dados_match  (1).csv", comment.char="#")
banco$recorrencia2<-0
banco$recorrencia2[banco$recorrencia!="1" & banco$diff<365]<-1
banco$recorrencia2<-as.factor(banco$recorrencia2)

banco$CS_ESCOL_N<-as.factor(banco$CS_ESCOL_N)
#repetição

## Load survival package
library(survival)

#remoção de duplicatas

dataset    <- banco
dataset$NU_IDADE_N<-dataset$NU_IDADE_N-4000
#dataset$recorrencia2<-0
#dataset$recorrencia2[dataset$recorrencia>2 & dataset$diff<60]<-1
#banco$recorrencia2<-as.factor(banco$recorrencia2)

dataset<-arrange(dataset,desc(dataset$DT_NOTIFIC))

dataset<-dplyr::distinct(dataset,dataset$study_ID,.keep_all=T)




tabela <- within(dataset, {
CS_ESCOL_N <- as.factor(CS_ESCOL_N)
CS_GESTANT <- as.factor(CS_GESTANT)
CS_RACA <- as.factor(CS_RACA)
MES <- as.factor(MES)
PCRUZ <- as.factor(PCRUZ)
RESULT <- as.factor(RESULT)
recorrencia2 <- as.factor(recorrencia2)

})

dataset$TRA_ESQUEM<-as.factor(dataset$TRA_ESQUEM)

#criar variável recorrência

tabela<-subset(dataset, select=c(NU_IDADE_N , CS_SEXO,     CS_GESTANT,  CS_RACA,     CS_ESCOL_N,  RESULT,     
PMM       ,  PCRUZ   ,    TRA_ESQUEM , MES     ,      recorrencia2,
diff,        LVC ,        Tempo_class))
#teste de hiotese em banco bruto as.it is
require(gtsummary)

gtsummary::tbl_summary(tabela)

Characteristic	N = 6,234¹
NU_IDADE_N	36 (27, 48)
CS_SEXO
F	1,374 (22%)
M	4,860 (78%)
CS_GESTANT
1	9 (0.1%)
2	18 (0.3%)
3	9 (0.1%)
4	2 (<0.1%)
5	971 (16%)
6	5,079 (81%)
9	146 (2.3%)
CS_RACA
1	2,859 (47%)
2	313 (5.1%)
3	50 (0.8%)
4	2,419 (40%)
5	125 (2.0%)
9	354 (5.8%)
Unknown	114
CS_ESCOL_N
0	85 (1.5%)
1	571 (10.0%)
2	271 (4.7%)
3	721 (13%)
4	514 (9.0%)
5	424 (7.4%)
6	869 (15%)
7	181 (3.2%)
8	629 (11%)
9	1,285 (22%)
10	165 (2.9%)
Unknown	519
RESULT
4	5,841 (94%)
5	272 (4.4%)
6	121 (1.9%)
PMM	501 (118, 4,088)
Unknown	1,887
PCRUZ
1	1,192 (19%)
2	759 (12%)
3	935 (15%)
4	2,577 (41%)
5	581 (9.3%)
6	190 (3.0%)
TRA_ESQUEM
1	4,499 (75%)
2	45 (0.8%)
3	80 (1.3%)
4	65 (1.1%)
5	19 (0.3%)
6	13 (0.2%)
7	18 (0.3%)
8	5 (<0.1%)
9	36 (0.6%)
10	34 (0.6%)
11	43 (0.7%)
12	21 (0.4%)
99	1,096 (18%)
Unknown	260
MES	7 (4, 10)
Unknown	64
recorrencia2
0	5,542 (89%)
1	692 (11%)
diff	0 (0, 0)
LVC
LVC	635 (10%)
Ñ_LVC	5,599 (90%)
Tempo_class
maior_60_dias	475 (7.6%)
menor_igual_60_dias	5,759 (92%)
¹ Median (IQR); n (%)

gtsummary::tbl_summary(tabela,by="recorrencia2")%>%add_p(simulate.p.value=TRUE)

Characteristic	0, N = 5,542¹	1, N = 692¹	p-value²
NU_IDADE_N	36 (26, 48)	37 (28, 50)	0.037
CS_SEXO			0.017
F	1,246 (22%)	128 (18%)
M	4,296 (78%)	564 (82%)
CS_GESTANT			0.7
1	9 (0.2%)	0 (0%)
2	17 (0.3%)	1 (0.1%)
3	9 (0.2%)	0 (0%)
4	2 (<0.1%)	0 (0%)
5	873 (16%)	98 (14%)
6	4,499 (81%)	580 (84%)
9	133 (2.4%)	13 (1.9%)
CS_RACA			0.3
1	2,534 (47%)	325 (48%)
2	272 (5.0%)	41 (6.0%)
3	48 (0.9%)	2 (0.3%)
4	2,151 (40%)	268 (39%)
5	116 (2.1%)	9 (1.3%)
9	315 (5.8%)	39 (5.7%)
Unknown	106	8
CS_ESCOL_N			0.3
0	80 (1.6%)	5 (0.8%)
1	509 (10%)	62 (9.6%)
2	244 (4.8%)	27 (4.2%)
3	647 (13%)	74 (12%)
4	447 (8.8%)	67 (10%)
5	385 (7.6%)	39 (6.1%)
6	756 (15%)	113 (18%)
7	161 (3.2%)	20 (3.1%)
8	554 (11%)	75 (12%)
9	1,138 (22%)	147 (23%)
10	151 (3.0%)	14 (2.2%)
Unknown	470	49
RESULT			0.4
4	5,189 (94%)	652 (94%)
5	248 (4.5%)	24 (3.5%)
6	105 (1.9%)	16 (2.3%)
PMM	500 (100, 4,000)	501 (200, 5,158)	0.003
Unknown	1,649	238
PCRUZ			0.022
1	1,089 (20%)	103 (15%)
2	684 (12%)	75 (11%)
3	829 (15%)	106 (15%)
4	2,258 (41%)	319 (46%)
5	512 (9.2%)	69 (10.0%)
6	170 (3.1%)	20 (2.9%)
TRA_ESQUEM
1	4,077 (77%)	422 (64%)
2	45 (0.8%)	0 (0%)
3	68 (1.3%)	12 (1.8%)
4	56 (1.1%)	9 (1.4%)
5	19 (0.4%)	0 (0%)
6	13 (0.2%)	0 (0%)
7	15 (0.3%)	3 (0.5%)
8	3 (<0.1%)	2 (0.3%)
9	30 (0.6%)	6 (0.9%)
10	16 (0.3%)	18 (2.7%)
11	43 (0.8%)	0 (0%)
12	21 (0.4%)	0 (0%)
99	908 (17%)	188 (28%)
Unknown	228	32
MES	7 (4, 10)	7 (4, 10)	0.082
Unknown	58	6
diff	0 (0, 0)	70 (38, 111)	<0.001
LVC			<0.001
LVC	387 (7.0%)	248 (36%)
Ñ_LVC	5,155 (93%)	444 (64%)
Tempo_class			<0.001
maior_60_dias	65 (1.2%)	410 (59%)
menor_igual_60_dias	5,477 (99%)	282 (41%)
¹ Median (IQR); n (%) ² Wilcoxon rank sum test; Pearson's Chi-squared test; Fisher's exact test

#levando em conta subset de recorrências
#Script de Cox Exaustivo

options(digits=3)





dataset <- within(dataset, {
CS_ESCOL_N <- as.factor(CS_ESCOL_N)
CS_GESTANT <- as.factor(CS_GESTANT)
CS_RACA <- as.factor(CS_RACA)
MES <- as.factor(MES)
PCRUZ <- as.factor(PCRUZ)
RESULT <- as.factor(RESULT)
TRA_ESQUEM <- as.factor(TRA_ESQUEM)
})


## Create outcome variable, then survival object
dataset <- within(dataset, {
  status.dichotomous <- recorrencia2 != 0
  survival.vector    <- Surv(diff, recorrencia2)
})

dataset$TRA_ESQUEM2<-0
dataset$TRA_ESQUEM2[dataset$TRA_ESQUEM=="4"]<-1
dataset$TRA_ESQUEM2<-as.factor(dataset$TRA_ESQUEM2)

dataset$gestante2<-0
dataset$gestante2[dataset$CS_GESTANT!=1]<-1
dataset$gestante2<-as.factor(dataset$gestante2)

dataset$raca2<-0
dataset$raca2[dataset$CS_RACA!=1]<-1
dataset$raca2<-as.factor(dataset$raca2)

dataset$escola2<-0
dataset$escola2[dataset$CS_ESCOL_N=="7"|dataset$CS_ESCOL_N=="8"|dataset$CS_ESCOL_N=="9"|dataset$CS_ESCOL_N=="10"]<-1
dataset$escola2<-as.factor(dataset$escola2)


## Create vectors for outcome and predictors
outcome    <- c("survival.vector")
#predictors <- c("esquema2","cd8_cd4", "idade","cv","COR","gon","Grupo6")
predictors <- c('NU_IDADE_N' , 'CS_SEXO',  'raca2',     'escola2',  'RESULT',     'PMM'       ,  'PCRUZ'   ,    'TRA_ESQUEM2'   ,   'LVC','TRA_ESQUEM','CS_GESTANT')





## The lines below should not need modification.

## Create list of models
list.of.models <- lapply(seq_along((predictors)), function(n) {
  
  left.hand.side  <- outcome
  right.hand.side <- apply(X = combn(predictors, n), MARGIN = 2, paste, collapse = " + ")
  
  paste(left.hand.side, right.hand.side, sep = "  ~  ")
})

## Convert to a 

vector.of.models <- unlist(list.of.models)

## Fit coxph to all models
list.of.fits <- lapply(vector.of.models, function(x) {
  
  formula    <- as.formula(x)
  fit        <- coxph(formula, data = dataset, id=study_ID)
  suma<- summary(fit)
  result.AIC <- extractAIC(fit)
  
  data.frame(num.predictors = result.AIC[1],
             AIC            = result.AIC[2],
             model          = x,
             suma= suma[["coefficients"]])
})


## Collapse to a data frame
result <- do.call(rbind, list.of.fits)
result$ciup<-exp(result$suma.coef + 2*result$suma.se.coef.)
result$cidown<-exp(result$suma.coef - 2*result$suma.se.coef.)
result$var<- row.names(result)


## Sort and print
library(doBy)
a<-orderBy(~ AIC + model, result)
a<-as.data.frame(a, digits=2)
result$pvalorsig<-0
result$pvalorsig[result$suma.Pr...z..<0.05]<-1

f<-result %>%
group_by(model) %>%
summarise(somamodelo= sum(pvalorsig))
a<-merge(a,f)

a$dif<-a$num.predictors- a$somamodelo




#filtrar modelos com todas as var em todos os fatores < 0.05
modelocomtudosig<-subset(a,a$dif<1 & a$suma.Pr...z..<0.05)
#filtrar modelos com mais de um preditor, vai ser todos uni se binária ou numérica todas as var em todos os fatores < 0.05
modelomulticomtudosig<-subset(a,a$dif<1 & a$num.predictors>1)
##filtrar modelos com mais de um preditor, vai ser todos uni se binária ou numérica todas -1 as var em todos os fatores < 0.05, para pegar multinomial
modelomulticomtudosigmenos1<-subset(a,a$dif<2 & a$num.predictors>1)
modelomulticomtudosigmenos3<-subset(a,a$dif<2 & a$num.predictors>4)
modelo_uni<-subset(a,a$num.predictors==1)

htmlTable::htmlTable(modelo_uni)

	model	num.predictors	AIC	suma.coef	suma.exp.coef.	suma.se.coef.	suma.z	suma.Pr…z..	ciup	cidown	var	somamodelo	dif
7	survival.vector ~ CS_SEXO	1	8113.55292267763	-0.103139783866269	0.902000879498455	0.0979299138115787	-1.05319998611164	0.292249344846739	1.09715453700676	0.741559697538738	CS_SEXOM	0	1
8199	survival.vector ~ escola2	1	8107.9681115605	0.205578300869994	1.22823514907753	0.0788376332971222	2.6076163409829	0.00911750812918678	1.43800044314189	1.04906892666419	escola21	1	0
10119	survival.vector ~ LVC	1	8087.40690013913	-0.429209114383688	0.651023776509997	0.0804735255129662	-5.33354431345912	9.63140449580789e-08	0.764707353064944	0.554240724746037	LVCÑ_LVC	1	0
10159	survival.vector ~ NU_IDADE_N	1	8113.75140960879	-0.000205568896110861	0.999794452231727	0.000204435357995372	-1.0055447263458	0.314634624068129	1.0002033224871	0.999385749117261	NU_IDADE_N	0	1
27295	survival.vector ~ PMM	1	4929.10360520894	3.30684913161125e-09	1.00000000330685	5.00462821210673e-09	0.660758200501614	0.50876739291489	1.00000001331611	0.999999993297593	PMM	0	1
27727	survival.vector ~ raca2	1	8111.86850534762	-0.127091866259103	0.880652766605711	0.0762807088131721	-1.66610756817138	0.0956919914192787	1.02579667168878	0.756045829290417	raca21	0	1
32685	survival.vector ~ TRA_ESQUEM2	1	8109.34327382061	-0.687735578891047	0.50271313541725	0.335879502956417	-2.04756638269852	0.0406025061529627	0.984150375497512	0.256790529997294	TRA_ESQUEM21	1	0

htmlTable::htmlTable(modelomulticomtudosig)

	model	num.predictors	AIC	suma.coef	suma.exp.coef.	suma.se.coef.	suma.z	suma.Pr…z..	ciup	cidown	var	somamodelo
8207	survival.vector ~ escola2 + LVC	2	8082.66182637341	0.206802330960217	1.22973946633484	0.0788617230066802	2.62234101761509	0.00873280009688099	1.43983104518785	1.05030320058434	escola218	2
8208	survival.vector ~ escola2 + LVC	2	8082.66182637341	-0.42994122992774	0.650547326313245	0.0805008409165198	-5.34082905262559	9.25224727302636e-08	0.764189450960208	0.553804849362346	LVCÑ_LVC4	2
10027	survival.vector ~ escola2 + TRA_ESQUEM2	2	8104.66023155941	0.205785445001466	1.22848959713353	0.0788401135718815	2.61016170168049	0.00904994334530299	1.4383054821161	1.04928105262097	escola217	2
10028	survival.vector ~ escola2 + TRA_ESQUEM2	2	8104.66023155941	-0.688532574587842	0.502312634831422	0.335897141135911	-2.04983160100564	0.0403808667018732	0.983401014562777	0.256576899326737	TRA_ESQUEM214	2

#htmlTable::htmlTable(modelomulticomtudosig)

#dataset

.Survfit <- survfit(Surv(diff, recorrencia2) ~ LVC , 
              error="greenwood", 
                    data=dataset,id=study_ID)
plot(.Survfit)

table(dataset$LVC)

## 
##   LVC Ñ_LVC 
##   635  5599

#a<-dataset$diff[dataset$recorrencia3==1]
#edu::resumosnumericos(a)
#a<-as.data.frame(a)
#edu::resumosnumericos(a)
#a<-dataset$diff[dataset$recorrencia2==1]
#a<-as.data.frame(a)
#table(dataset$recorrencia3)
#table(dataset$recorrencia3)
#a<-dataset$diff[dataset$recorrencia2==1]
#edu::resumosnumericos(a)
#a<-dataset$diff[dataset$recorrencia2==1]
#a<-as.data.frame(a)
#edu::resumosnumericos(a)

karina

Eduardo Peixoto

13/07/2021