# Método
###Estudo Transversal
require(RcmdrMisc)
require(dplyr)
library(colorspace, pos=33)
#notifcações match
banco <- read.csv("C:/Users/edson/Downloads/Base_de_dados_match (1).csv", comment.char="#")
banco$recorrencia2<-0
banco$recorrencia2[banco$recorrencia!="1" & banco$diff<60]<-1
banco$recorrencia2<-as.factor(banco$recorrencia2)
#repetição
table(banco$recorrencia)
##
## 1 2 3 4 5
## 6234 820 142 28 6
banco<-arrange(banco,desc(banco$DT_NOTIFIC))
banco_umavez<-dplyr::distinct(banco, banco$study_ID,.keep_all=T)
banco_repete<-subset(banco,banco$recorrencia>1)
banco_repete<-dplyr::arrange(banco_repete,banco_repete$DT_NOTIFIC, banco_repete$study_ID)
banco_repete2<-dplyr::distinct(banco_repete, banco_repete$study_ID,.keep_all=T)
banco3<-dplyr::full_join(banco_umavez,banco_repete2)
tabela<-subset(banco_umavez, select=c(NU_IDADE_N , CS_SEXO, CS_GESTANT, CS_RACA, CS_ESCOL_N, RESULT,
PMM , PCRUZ , TRA_ESQUEM , MES , ANO , recorrencia,
diff, LVC , Tempo_class))
tabela <- within(tabela, {
ANO <- as.factor(ANO)
CS_ESCOL_N <- as.factor(CS_ESCOL_N)
CS_GESTANT <- as.factor(CS_GESTANT)
CS_RACA <- as.factor(CS_RACA)
MES <- as.factor(MES)
PCRUZ <- as.factor(PCRUZ)
recorrencia <- as.factor(recorrencia)
RESULT <- as.factor(RESULT)
TRA_ESQUEM <- as.factor(TRA_ESQUEM)
})
#descritiva apenas
edu::resumonumericooucategorico(tabela)
Characteristic | N = 6,2341 |
---|---|
NU_IDADE_N | 4,036 (4,027, 4,048) |
CS_SEXO | |
F | 1,374 (22.04%) |
M | 4,860 (77.96%) |
CS_GESTANT | |
1 | 9 (0.14%) |
2 | 18 (0.29%) |
3 | 9 (0.14%) |
4 | 2 (0.03%) |
5 | 971 (15.58%) |
6 | 5,079 (81.47%) |
9 | 146 (2.34%) |
CS_RACA | |
1 | 2,859 (46.72%) |
2 | 313 (5.11%) |
3 | 50 (0.82%) |
4 | 2,419 (39.53%) |
5 | 125 (2.04%) |
9 | 354 (5.78%) |
Unknown | 114 |
CS_ESCOL_N | |
0 | 85 (1.49%) |
1 | 571 (9.99%) |
2 | 271 (4.74%) |
3 | 721 (12.62%) |
4 | 514 (8.99%) |
5 | 424 (7.42%) |
6 | 869 (15.21%) |
7 | 181 (3.17%) |
8 | 629 (11.01%) |
9 | 1,285 (22.48%) |
10 | 165 (2.89%) |
Unknown | 519 |
RESULT | |
4 | 5,841 (93.70%) |
5 | 272 (4.36%) |
6 | 121 (1.94%) |
PMM | 501 (118, 4,088) |
Unknown | 1,887 |
PCRUZ | |
1 | 1,192 (19.12%) |
2 | 759 (12.18%) |
3 | 935 (15.00%) |
4 | 2,577 (41.34%) |
5 | 581 (9.32%) |
6 | 190 (3.05%) |
TRA_ESQUEM | |
1 | 4,499 (75.31%) |
2 | 45 (0.75%) |
3 | 80 (1.34%) |
4 | 65 (1.09%) |
5 | 19 (0.32%) |
6 | 13 (0.22%) |
7 | 18 (0.30%) |
8 | 5 (0.08%) |
9 | 36 (0.60%) |
10 | 34 (0.57%) |
11 | 43 (0.72%) |
12 | 21 (0.35%) |
99 | 1,096 (18.35%) |
Unknown | 260 |
MES | |
1 | 521 (8.44%) |
2 | 437 (7.08%) |
3 | 533 (8.64%) |
4 | 510 (8.27%) |
5 | 524 (8.49%) |
6 | 515 (8.35%) |
7 | 495 (8.02%) |
8 | 521 (8.44%) |
9 | 525 (8.51%) |
10 | 547 (8.87%) |
11 | 536 (8.69%) |
12 | 506 (8.20%) |
Unknown | 64 |
ANO | |
1920 | 2 (0.03%) |
1922 | 1 (0.02%) |
1923 | 1 (0.02%) |
1925 | 4 (0.06%) |
1926 | 2 (0.03%) |
1927 | 4 (0.06%) |
1928 | 1 (0.02%) |
1929 | 1 (0.02%) |
1930 | 5 (0.08%) |
1931 | 1 (0.02%) |
1932 | 3 (0.05%) |
1933 | 7 (0.11%) |
1934 | 13 (0.21%) |
1935 | 9 (0.15%) |
1936 | 10 (0.16%) |
1937 | 15 (0.24%) |
1938 | 16 (0.26%) |
1939 | 12 (0.19%) |
1940 | 22 (0.36%) |
1941 | 15 (0.24%) |
1942 | 26 (0.42%) |
1943 | 22 (0.36%) |
1944 | 26 (0.42%) |
1945 | 27 (0.44%) |
1946 | 39 (0.63%) |
1947 | 41 (0.66%) |
1948 | 30 (0.49%) |
1949 | 34 (0.55%) |
1950 | 50 (0.81%) |
1951 | 49 (0.79%) |
1952 | 68 (1.10%) |
1953 | 72 (1.17%) |
1954 | 60 (0.97%) |
1955 | 77 (1.25%) |
1956 | 69 (1.12%) |
1957 | 90 (1.46%) |
1958 | 87 (1.41%) |
1959 | 118 (1.91%) |
1960 | 89 (1.44%) |
1961 | 90 (1.46%) |
1962 | 120 (1.94%) |
1963 | 125 (2.03%) |
1964 | 110 (1.78%) |
1965 | 127 (2.06%) |
1966 | 128 (2.07%) |
1967 | 113 (1.83%) |
1968 | 129 (2.09%) |
1969 | 130 (2.11%) |
1970 | 131 (2.12%) |
1971 | 133 (2.16%) |
1972 | 129 (2.09%) |
1973 | 125 (2.03%) |
1974 | 118 (1.91%) |
1975 | 148 (2.40%) |
1976 | 142 (2.30%) |
1977 | 156 (2.53%) |
1978 | 169 (2.74%) |
1979 | 169 (2.74%) |
1980 | 148 (2.40%) |
1981 | 159 (2.58%) |
1982 | 179 (2.90%) |
1983 | 171 (2.77%) |
1984 | 159 (2.58%) |
1985 | 165 (2.67%) |
1986 | 147 (2.38%) |
1987 | 154 (2.50%) |
1988 | 125 (2.03%) |
1989 | 117 (1.90%) |
1990 | 104 (1.69%) |
1991 | 85 (1.38%) |
1992 | 92 (1.49%) |
1993 | 60 (0.97%) |
1994 | 64 (1.04%) |
1995 | 66 (1.07%) |
1996 | 41 (0.66%) |
1997 | 44 (0.71%) |
1998 | 41 (0.66%) |
1999 | 43 (0.70%) |
2000 | 32 (0.52%) |
2001 | 30 (0.49%) |
2002 | 25 (0.41%) |
2003 | 22 (0.36%) |
2004 | 26 (0.42%) |
2005 | 29 (0.47%) |
2006 | 19 (0.31%) |
2007 | 19 (0.31%) |
2008 | 22 (0.36%) |
2009 | 14 (0.23%) |
2010 | 15 (0.24%) |
2011 | 9 (0.15%) |
2012 | 6 (0.10%) |
2013 | 7 (0.11%) |
2014 | 6 (0.10%) |
2015 | 5 (0.08%) |
2016 | 2 (0.03%) |
2017 | 3 (0.05%) |
2018 | 2 (0.03%) |
2019 | 3 (0.05%) |
Unknown | 64 |
recorrencia | |
1 | 5,477 (87.86%) |
2 | 620 (9.95%) |
3 | 111 (1.78%) |
4 | 20 (0.32%) |
5 | 6 (0.10%) |
diff | 0 (0, 0) |
LVC | |
LVC | 635 (10.19%) |
Ñ_LVC | 5,599 (89.81%) |
Tempo_class | |
maior_60_dias | 475 (7.62%) |
menor_igual_60_dias | 5,759 (92.38%) |
1
Median (IQR); n (%)
|
#notificações
dim(banco)
## [1] 7230 98
#casos
dim(banco3)
## [1] 6434 100
table(banco$recorrencia2)
##
## 0 1
## 6784 446
table(banco$recorrencia)
##
## 1 2 3 4 5
## 6234 820 142 28 6
tabela<-subset(banco, select=c(NU_IDADE_N , CS_SEXO, CS_GESTANT, CS_RACA, CS_ESCOL_N, RESULT,
PMM , PCRUZ , TRA_ESQUEM , MES,
diff , Tempo_class, recorrencia2))
tabela <- within(tabela, {
CS_ESCOL_N <- as.factor(CS_ESCOL_N)
CS_GESTANT <- as.factor(CS_GESTANT)
CS_RACA <- as.factor(CS_RACA)
MES <- as.factor(MES)
PCRUZ <- as.factor(PCRUZ)
RESULT <- as.factor(RESULT)
recorrencia2 <- as.factor(recorrencia2)
})
#criar variável recorrência
#teste de hiotese em banco bruto as.it is
require(gtsummary)
gtsummary::tbl_summary(tabela,by="recorrencia2")%>%add_p(simulate.p.value=TRUE)
Characteristic | 0, N = 6,7841 | 1, N = 4461 | p-value2 |
---|---|---|---|
NU_IDADE_N | 4,036 (4,027, 4,049) | 4,035 (4,027, 4,050) | 0.8 |
CS_SEXO | 0.3 | ||
F | 1,471 (21.68%) | 88 (19.73%) | |
M | 5,313 (78.32%) | 358 (80.27%) | |
CS_GESTANT | 0.3 | ||
1 | 10 (0.15%) | 1 (0.22%) | |
2 | 22 (0.32%) | 0 (0.00%) | |
3 | 10 (0.15%) | 1 (0.22%) | |
4 | 3 (0.04%) | 1 (0.22%) | |
5 | 1,024 (15.09%) | 58 (13.00%) | |
6 | 5,556 (81.90%) | 375 (84.08%) | |
9 | 159 (2.34%) | 10 (2.24%) | |
CS_RACA | |||
1 | 3,140 (47.16%) | 190 (43.28%) | |
2 | 339 (5.09%) | 28 (6.38%) | |
3 | 56 (0.84%) | 1 (0.23%) | |
4 | 2,616 (39.29%) | 176 (40.09%) | |
5 | 135 (2.03%) | 5 (1.14%) | |
9 | 372 (5.59%) | 39 (8.88%) | |
Unknown | 126 | 7 | |
CS_ESCOL_N | 0.006 | ||
0 | 89 (1.43%) | 2 (0.49%) | |
1 | 627 (10.08%) | 36 (8.78%) | |
2 | 312 (5.02%) | 16 (3.90%) | |
3 | 780 (12.54%) | 41 (10.00%) | |
4 | 555 (8.93%) | 41 (10.00%) | |
5 | 464 (7.46%) | 17 (4.15%) | |
6 | 955 (15.36%) | 69 (16.83%) | |
7 | 200 (3.22%) | 9 (2.20%) | |
8 | 680 (10.94%) | 45 (10.98%) | |
9 | 1,384 (22.26%) | 115 (28.05%) | |
10 | 172 (2.77%) | 19 (4.63%) | |
Unknown | 566 | 36 | |
RESULT | 0.3 | ||
4 | 6,354 (93.66%) | 424 (95.07%) | |
5 | 298 (4.39%) | 18 (4.04%) | |
6 | 132 (1.95%) | 4 (0.90%) | |
PMM | 501 (135, 4,500) | 600 (200, 7,220) | 0.017 |
Unknown | 2,076 | 159 | |
PCRUZ | 0.11 | ||
1 | 1,266 (18.66%) | 70 (15.70%) | |
2 | 804 (11.85%) | 46 (10.31%) | |
3 | 1,017 (14.99%) | 55 (12.33%) | |
4 | 2,831 (41.73%) | 208 (46.64%) | |
5 | 654 (9.64%) | 51 (11.43%) | |
6 | 212 (3.12%) | 16 (3.59%) | |
TRA_ESQUEM | 1 (1, 1) | 1 (1, 99) | <0.001 |
Unknown | 280 | 32 | |
MES | 0.059 | ||
1 | 558 (8.31%) | 20 (4.52%) | |
2 | 467 (6.95%) | 34 (7.69%) | |
3 | 573 (8.53%) | 32 (7.24%) | |
4 | 558 (8.31%) | 42 (9.50%) | |
5 | 575 (8.56%) | 48 (10.86%) | |
6 | 567 (8.44%) | 39 (8.82%) | |
7 | 544 (8.10%) | 34 (7.69%) | |
8 | 566 (8.43%) | 27 (6.11%) | |
9 | 583 (8.68%) | 40 (9.05%) | |
10 | 609 (9.07%) | 52 (11.76%) | |
11 | 573 (8.53%) | 33 (7.47%) | |
12 | 545 (8.11%) | 41 (9.28%) | |
Unknown | 66 | 4 | |
diff | 0 (0, 0) | 16 (1, 43) | <0.001 |
Tempo_class | <0.001 | ||
maior_60_dias | 547 (8.06%) | 0 (0.00%) | |
menor_igual_60_dias | 6,237 (91.94%) | 446 (100.00%) | |
1
Median (IQR); n (%)
2
Wilcoxon rank sum test; Pearson's Chi-squared test; Fisher's exact test
|
banco <- within(banco, {
recorrencia2 <- as.factor(recorrencia2)
})
with(banco, piechart(recorrencia2, xlab="", ylab="", main="recorrencia2", col=palette()[2:3],
scale="percent"))
#Casos por ano
library(ggplot2)
library(dplyr)
library(plotly)
library(hrbrthemes)
data<-banco
data$DT_NOTIFIC <- as.Date(data$DT_NOTIFIC)
a<-table(data$DT_NOTIFIC,data$NU_NOTIFIC)
a<-as.data.frame(a)
a<-subset(a,a$Freq==1)
a$Var1<-as.Date(a$Var1)
data<-a
# Usual area chart
p <- data %>%
ggplot( aes(x=Var1, y=Freq)) +
geom_area(fill="#69b3a4", alpha=0.5) +
geom_line(color="#69b3a4") +
theme_ipsum()
# Turn it interactive with ggplotly
p <- ggplotly(p)
p
#tempo de recorrencia
data<-banco
data<-subset(data, banco$recorrencia2==1)
Boxplot( ~ diff, data=data, id=list(method="y"))
#recorrencia
# Load dataset from github
data$DT_NOTIFIC <- as.Date(data$DT_NOTIFIC)
a<-table(data$DT_NOTIFIC,data$NU_NOTIFIC)
a<-as.data.frame(a)
a<-subset(a,a$Freq==1)
a$Var1<-as.Date(a$Var1)
data<-a
# Usual area chart
p <- data %>%
ggplot( aes(x=Var1, y=Freq)) +
geom_area(fill="#69b3a4", alpha=0.5) +
geom_line(color="#69b3a4") +
theme_ipsum()
# Turn it interactive with ggplotly
p <- ggplotly(p)
p
## Load survival package
library(survival)
#remoção de duplicatas
dataset <- banco
#dataset$recorrencia2<-0
#dataset$recorrencia2[dataset$recorrencia>2 & dataset$diff<60]<-1
#banco$recorrencia2<-as.factor(banco$recorrencia2)
dataset<-arrange(dataset,desc(dataset$DT_NOTIFIC))
dataset<-dplyr::distinct(dataset,dataset$study_ID,.keep_all=T)
#levando em conta subset de recorrências
#Script de Cox Exaustivo
options(digits=3)
dataset<-subset(dataset, select=c(NU_IDADE_N , CS_SEXO, CS_GESTANT, CS_RACA, CS_ESCOL_N, RESULT,
PMM , PCRUZ , TRA_ESQUEM , MES , recorrencia2,
diff, LVC , Tempo_class,study_ID))
dataset <- within(dataset, {
CS_ESCOL_N <- as.factor(CS_ESCOL_N)
CS_GESTANT <- as.factor(CS_GESTANT)
CS_RACA <- as.factor(CS_RACA)
MES <- as.factor(MES)
PCRUZ <- as.factor(PCRUZ)
RESULT <- as.factor(RESULT)
TRA_ESQUEM <- as.factor(TRA_ESQUEM)
})
dataset$diff[dataset$diff>60]<-NA
## Create outcome variable, then survival object
dataset <- within(dataset, {
status.dichotomous <- recorrencia2 != 0
survival.vector <- Surv(diff, recorrencia2)
})
dataset$TRA_ESQUEM2<-0
dataset$TRA_ESQUEM2[dataset$TRA_ESQUEM=="4"]<-1
dataset$TRA_ESQUEM2<-as.factor(dataset$TRA_ESQUEM2)
dataset$gestante2<-0
dataset$gestante2[dataset$CS_GESTANT!=1]<-1
dataset$gestante2<-as.factor(dataset$gestante2)
dataset$raca2<-0
dataset$raca2[dataset$CS_RACA!=1]<-1
dataset$raca2<-as.factor(dataset$raca2)
dataset$escola2<-0
dataset$escola2[dataset$CS_ESCOL_N=="7"|dataset$CS_ESCOL_N=="8"|dataset$CS_ESCOL_N=="9"|dataset$CS_ESCOL_N=="10"]<-1
dataset$escola2<-as.factor(dataset$escola2)
## Create vectors for outcome and predictors
outcome <- c("survival.vector")
#predictors <- c("esquema2","cd8_cd4", "idade","cv","COR","gon","Grupo6")
predictors <- c('NU_IDADE_N' , 'CS_SEXO', 'raca2', 'escola2', 'RESULT', 'PMM' , 'PCRUZ' , 'TRA_ESQUEM2' , 'LVC')
## The lines below should not need modification.
## Create list of models
list.of.models <- lapply(seq_along((predictors)), function(n) {
left.hand.side <- outcome
right.hand.side <- apply(X = combn(predictors, n), MARGIN = 2, paste, collapse = " + ")
paste(left.hand.side, right.hand.side, sep = " ~ ")
})
## Convert to a
vector.of.models <- unlist(list.of.models)
## Fit coxph to all models
list.of.fits <- lapply(vector.of.models, function(x) {
formula <- as.formula(x)
fit <- coxph(formula, data = dataset, id=study_ID)
suma<- summary(fit)
result.AIC <- extractAIC(fit)
data.frame(num.predictors = result.AIC[1],
AIC = result.AIC[2],
model = x,
suma= suma[["coefficients"]])
})
## Collapse to a data frame
result <- do.call(rbind, list.of.fits)
result$ciup<-exp(result$suma.coef + 2*result$suma.se.coef.)
result$cidown<-exp(result$suma.coef - 2*result$suma.se.coef.)
result$var<- row.names(result)
## Sort and print
library(doBy)
a<-orderBy(~ AIC + model, result)
a<-as.data.frame(a, digits=2)
result$pvalorsig<-0
result$pvalorsig[result$suma.Pr...z..<0.05]<-1
f<-result %>%
group_by(model) %>%
summarise(somamodelo= sum(pvalorsig))
a<-merge(a,f)
a$dif<-a$num.predictors- a$somamodelo
#filtrar modelos com todas as var em todos os fatores < 0.05
modelocomtudosig<-subset(a,a$dif<1 & a$suma.Pr...z..<0.05)
#filtrar modelos com mais de um preditor, vai ser todos uni se binária ou numérica todas as var em todos os fatores < 0.05
modelomulticomtudosig<-subset(a,a$dif<1 & a$num.predictors>1)
##filtrar modelos com mais de um preditor, vai ser todos uni se binária ou numérica todas -1 as var em todos os fatores < 0.05, para pegar multinomial
modelomulticomtudosigmenos1<-subset(a,a$dif<2 & a$num.predictors>1)
modelomulticomtudosigmenos3<-subset(a,a$dif<2 & a$num.predictors>4)
modelo_uni<-subset(a,a$num.predictors==1)
htmlTable::htmlTable(modelo_uni)
model | num.predictors | AIC | suma.coef | suma.exp.coef. | suma.se.coef. | suma.z | suma.Pr…z.. | ciup | cidown | var | somamodelo | dif | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | survival.vector ~ CS_SEXO | 1 | 2624.51240894881 | -0.198627956944199 | 0.81985485790608 | 0.153000915033901 | -1.29821417669423 | 0.194213747926069 | 1.1133504284787 | 0.603729042392027 | CS_SEXOM | 0 | 1 |
897 | survival.vector ~ escola2 | 1 | 2625.58204930766 | 0.0905067280371467 | 1.09472887299339 | 0.12193085558174 | 0.742279118811506 | 0.457918240603139 | 1.39705777961419 | 0.857825154301308 | escola21 | 0 | 1 |
1089 | survival.vector ~ LVC | 1 | 2617.78600216494 | 0.350111564027484 | 1.41922587431581 | 0.122157881973692 | 2.86605791104731 | 0.0041561820825042 | 1.81199297009523 | 1.11159486574698 | LVCÑ_LVC | 1 | 0 |
1090 | survival.vector ~ NU_IDADE_N | 1 | 2625.26491675674 | -0.000427473246744527 | 0.999572618106926 | 0.000403955166237912 | -1.05821953145356 | 0.289955368791973 | 1.0003805094611 | 0.998765379192936 | NU_IDADE_N | 0 | 1 |
3034 | survival.vector ~ PMM | 1 | 1455.48734693847 | -7.51547722450545e-09 | 0.999999992484523 | 8.70143307734639e-09 | -0.863705685914141 | 0.387749589345111 | 1.00000000988739 | 0.999999975081657 | PMM | 0 | 1 |
3070 | survival.vector ~ raca2 | 1 | 2624.67023605154 | 0.146247352870244 | 1.15748245925289 | 0.121291573317825 | 1.20575031611658 | 0.227913777770747 | 1.47525447377728 | 0.908159010728327 | raca21 | 0 | 1 |
3582 | survival.vector ~ TRA_ESQUEM2 | 1 | 2624.56849671888 | 1.04018013365329 | 2.82972669745246 | 0.712677982580516 | 1.45953734937472 | 0.144417268576137 | 11.7697902315402 | 0.680331002061313 | TRA_ESQUEM21 | 0 | 1 |
#htmlTable::htmlTable(modelomulticomtudosig)
#dataset
.Survfit <- survfit(Surv(diff, recorrencia2) ~ LVC ,
error="greenwood",
data=dataset,id=study_ID)
plot(.Survfit)
table(dataset$LVC)
##
## LVC Ñ_LVC
## 635 5599