Listas
1 Lista 01
1.1 Questao 01
xq1 <- c(6,12,10,3,5,1,6,
8,1,5,2,2,5,8,1)1.1.1 A)
indices = 1:length(xq1)
plot_ly(x = ~indices, y = ~xq1)1.1.2 B) (duvida)
Caso exista seja possivel obter a informação de se a criança apresentou algum problema relacionado a falta de calcio, poderia ter uma nova coluna de 0 e 1 para essa informação. Além disso, adicionaria uma coluna de censura …
1.2 Questao 02
xq2 = c(2,4,29,6,3,1,1,
2,3,9,10,11,5,5,1)
labs = c(0,0,1,1,rep(0,5),1,
0,0,1,0,0)
df = data.frame(xq2,
labs)
df## xq2 labs
## 1 2 0
## 2 4 0
## 3 29 1
## 4 6 1
## 5 3 0
## 6 1 0
## 7 1 0
## 8 2 0
## 9 3 0
## 10 9 1
## 11 10 0
## 12 11 0
## 13 5 1
## 14 5 0
## 15 1 0
1.2.1 A)
indices = 1:(dim(df)[1])
plot_ly(df, x = ~indices, y = ~xq2,
color=df$labs)1.2.2 B) (Duvida)
Adicionaria uma coluna de censura, representando possivelmente se o individuo não conseguiu terminar no estudo, seja por obito ou outra causa.
1.3 Questao 03
df03 = data.frame(
Y = c(4,6,12,3,6,9),
cens = c(1, 0,0,0,1,0),
row.names = 1:6
)
df03## Y cens
## 1 4 1
## 2 6 0
## 3 12 0
## 4 3 0
## 5 6 1
## 6 9 0
indices = 1:(dim(df03)[1])
plot_ly(df03, x = ~indices, y = ~Y,
color=df03$cens)1.4 Questão 04
Curva de sobrevivência
1.4.1 A)
A probabilidade é inferior a 0.18 aproximadamente
1.4.2 B)
Ao analisarmos o gráfico, podemos dizer que a mediana encontra-se proximo do tempo 5.
1.4.3 C)
Por volta do tempo t=3.
1.5 Questão 05 (duvida)
sendo \(h(t) = c\), sabendo que:
\[ h(t) = \frac{f(t)}{S(t)} \]
então,
\[ S(t) = \frac{f(t)}{h(t)} = \frac{1}{c}\cdot f(t) \]
1.6 Questão 06
1.6.1 A)
Temos que \(S(100) = 0.7\), então seja P a probabilidade de sobreviver ate 100 dias (inclusive), então \(P = 1 - S(100)\) = 0.3.
1.6.2 B)
St = 0.3
Ht = -log(St)
Ht## [1] 1.203973
1.7 Questão 07
temos as seguintes formulas a serem aplicadas:
\[ \hat{f}(t) = \frac{N(t)}{n\cdot \Delta t} \]
\[ \hat{S}(t) = \frac{R(t)}{n} \]
\[ \hat{h}(t) = \frac{N(t)}{R(t)\cdot \Delta t} \] com \(t\geq 0\)
xt = seq(0,800, by=100)
yt = c(2,5,10,16,9,7,4,1)
intervals = cut(c(0,800), xt,right=F)
inter.freq = cbind(table(intervals), 'freq' = yt)
inter.freq = inter.freq[,-c(1)]
(freqs = cbind('freq' = inter.freq))## freq
## [0,100) 2
## [100,200) 5
## [200,300) 10
## [300,400) 16
## [400,500) 9
## [500,600) 7
## [600,700) 4
## [700,800) 1
rt = c(54, 52, 47, 37, 21, 12, 5, 1)
dt = rep(100, length.out = length(rt))
nt = yt
n = rt[1]
freqs.f = cbind(freqs,'rt' = rt, 'nt' = nt, 'dt' = dt)
fhatt = nt/(n*dt)
shatt = rt/(n)
hhatt = nt/(rt*dt)
cbind(freqs.f,
'fhatt' = round(fhatt, 5),
'shatt' = round(shatt, 5),
'hhatt' = round(hhatt, 5)
)## freq rt nt dt fhatt shatt hhatt
## [0,100) 2 54 2 100 0.00037 1.00000 0.00037
## [100,200) 5 52 5 100 0.00093 0.96296 0.00096
## [200,300) 10 47 10 100 0.00185 0.87037 0.00213
## [300,400) 16 37 16 100 0.00296 0.68519 0.00432
## [400,500) 9 21 9 100 0.00167 0.38889 0.00429
## [500,600) 7 12 7 100 0.00130 0.22222 0.00583
## [600,700) 4 5 4 100 0.00074 0.09259 0.00800
## [700,800) 1 1 1 100 0.00019 0.01852 0.01000
1.8 Questão 08
Demonstração da questão 08
1.9 Questão 09
Demonstração da questão 08
1.10 Questão 10
min = 0; max = 9;by = 1
interval = seq(min, max, by=by)
rt = c(1100,860,680,496,358,240,180,128,84,52)
nt = c(240,180,184,138,118,60,52,44,32,28)
dt = rep(by, length.out=length(rt))
n = rt[1]
intervals = cut(c(min, max), interval,right=F)
inter.freq = cbind(table(intervals))
inter.freq = rbind(inter.freq, '[9,+)' = 0)
inter.freq = inter.freq[,-c(1)]
(freqs.f = cbind(inter.freq,
'rt' = rt,
'nt' = nt,
'dt' = by))## rt nt dt
## [0,1) 1100 240 1
## [1,2) 860 180 1
## [2,3) 680 184 1
## [3,4) 496 138 1
## [4,5) 358 118 1
## [5,6) 240 60 1
## [6,7) 180 52 1
## [7,8) 128 44 1
## [8,9) 84 32 1
## [9,+) 52 28 1
fhatt = nt/(n*dt)
shatt = rt/(n)
hhatt = nt/(rt*dt)
cbind(freqs.f,
'fhatt' = round(fhatt, 5),
'shatt' = round(shatt, 5),
'hhatt' = round(hhatt, 5))## rt nt dt fhatt shatt hhatt
## [0,1) 1100 240 1 0.21818 1.00000 0.21818
## [1,2) 860 180 1 0.16364 0.78182 0.20930
## [2,3) 680 184 1 0.16727 0.61818 0.27059
## [3,4) 496 138 1 0.12545 0.45091 0.27823
## [4,5) 358 118 1 0.10727 0.32545 0.32961
## [5,6) 240 60 1 0.05455 0.21818 0.25000
## [6,7) 180 52 1 0.04727 0.16364 0.28889
## [7,8) 128 44 1 0.04000 0.11636 0.34375
## [8,9) 84 32 1 0.02909 0.07636 0.38095
## [9,+) 52 28 1 0.02545 0.04727 0.53846
1.11 Questão 11
Demonstração da questao 11
1.12 Questão 12
Demonstração da questao 12
1.13 Questão 13
Demonstração da questao 13
1.14 Questão 14 (Duvida)
1.14.1 A) e B)
### C)
### D)
Demonstração da questao 14, letra D
1.15 Questão 15 (Duvida)
1.15.1 A)
1.15.2 B)
Demonstração da questao 15, letra B
1.16 Questão 17 (Duvida)
df17 = data.frame('id'=c(1,1,2,2,3,3),
'inicio'=c(25,324,39,131,1,131),
'fim'=c(323,768,130,345,56,145),
'status'=c(0,1,0,0,0,1),
'CD4'=c(0,1,0,1,0,1))
df17## id inicio fim status CD4
## 1 1 25 323 0 0
## 2 1 324 768 1 1
## 3 2 39 130 0 0
## 4 2 131 345 0 1
## 5 3 1 56 0 0
## 6 3 131 145 1 1
1.17 Questão 18 (Duvida)
2 Lista 02
2.1 Questão 01
Demonstração da questão 01
2.2 Questão 02
n = 48
xt = c(36.3, 41.7, 43.9, 49.9, 50.1,
50.8, 51.9, 52.1, 52.3, 52.3,
52.4, 52.6, 52.7, 53.1, 53.6,
53.6, 53.9, 53.9, 54.1, 54.6,
54.8, 54.8, 55.1, 55.4, 55.9,
56.0, 56.1, 56.5, 56.9, 57.1,
57.1, 57.3, 57.7, 57.8, 58.1,
58.9, 59.0, 59.1, 59.6, 60.4,
60.7)
tps = c(26.8, 29.6, 33.4,
35,40,41.9, 42.5)
cens = NULL
m = length(xt) - length(tps)
for (i in 1:length(xt)){
if (i >= m){
cens[i] = 1
}else{
cens[i] = 0
}
}
df02 = data.frame(
xt, cens
)
df02## xt cens
## 1 36.3 0
## 2 41.7 0
## 3 43.9 0
## 4 49.9 0
## 5 50.1 0
## 6 50.8 0
## 7 51.9 0
## 8 52.1 0
## 9 52.3 0
## 10 52.3 0
## 11 52.4 0
## 12 52.6 0
## 13 52.7 0
## 14 53.1 0
## 15 53.6 0
## 16 53.6 0
## 17 53.9 0
## 18 53.9 0
## 19 54.1 0
## 20 54.6 0
## 21 54.8 0
## 22 54.8 0
## 23 55.1 0
## 24 55.4 0
## 25 55.9 0
## 26 56.0 0
## 27 56.1 0
## 28 56.5 0
## 29 56.9 0
## 30 57.1 0
## 31 57.1 0
## 32 57.3 0
## 33 57.7 0
## 34 57.8 1
## 35 58.1 1
## 36 58.9 1
## 37 59.0 1
## 38 59.1 1
## 39 59.6 1
## 40 60.4 1
## 41 60.7 1
indices = 1:(dim(df02)[1])
plot_ly(df, x = ~indices, y = ~xt,
color=df02$cens)2.3 Questão 03 (DUVIDA TOTAL)
xt = c(2,29,6,3,1,1,
2,3,9,10,5,5,1)
cens = c(0,1,1,rep(0,5),
1,0,1,0,0)
df03 = data.frame(xt,cens)indices = 1:(dim(df03)[1])
plot_ly(df03, x = ~indices, y = ~xt,
color=df03$cens)2.3.1 A)
ekm<- survfit(Surv(df03$xt,df03$cens)~1)
ss = summary(ekm)
plot(ekm,
xlab="Tempo (semanas)",ylab="S(t) estimada")ss$surv## [1] 0.8333333 0.6250000 0.4166667 0.0000000
2.3.2 B) (duvida)
2.3.3 C)
2.4 Questão 05
g1 = c(7,8,8,8,8,12,12,17,18,22,rep(30,6))
censg1 = c(rep(0,10), rep(1,6))
g2 = c(8,8,9,10,10,14,15,15,18,19,21,22,23,25)
censg2 = c(rep(0, 14))
g3 = c(rep(8,6), 9,10,10,10,11,17,19)
censg3 = c(rep(0, 13))2.4.1 A)
ekm1<- survfit(Surv(g1,censg1)~1)
ekm2<- survfit(Surv(g2,censg2)~1)
ekm3<- survfit(Surv(g3,censg3)~1)
par(mfrow=c(2,2))
plot(ekm1, xlab="Tempo (semanas)",ylab="S(t) estimada", conf.int=F,
main='Grupo 01')#sem intervalo de confianca
plot(ekm2, xlab="Tempo (semanas)",ylab="S(t) estimada", conf.int=F,
main='Grupo 02')#sem intervalo de confianca
plot(ekm3, xlab="Tempo (semanas)",ylab="S(t) estimada", conf.int=F,
main='Grupo 03')#sem intervalo de confianca2.4.2 B)
grupos = c(rep(1,length(g1)), rep(2, length(g2)), rep(3,length(g3)))
cens = c(censg1,censg2,censg3)
cens = cens + 1
cens[cens == 2] = 0
data = c(g1,g2,g3)
df03 = data.frame(grupos,cens,'temp'=data)survdiff(Surv(df03$temp,
df03$cens)~df03$grupos,rho=0)## Call:
## survdiff(formula = Surv(df03$temp, df03$cens) ~ df03$grupos,
## rho = 0)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## df03$grupos=1 16 10 17.1 2.9328 6.7157
## df03$grupos=2 14 14 13.2 0.0456 0.0829
## df03$grupos=3 13 13 6.7 5.9240 9.6123
##
## Chisq= 11.9 on 2 degrees of freedom, p= 0.003
subdf03 = df03[df03$grupos==1|df03$grupos==2,]
survdiff(Surv(subdf03$temp,
subdf03$cens)~subdf03$grupos,rho=0)## Call:
## survdiff(formula = Surv(subdf03$temp, subdf03$cens) ~ subdf03$grupos,
## rho = 0)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## subdf03$grupos=1 16 10 13.7 1.02 2.67
## subdf03$grupos=2 14 14 10.3 1.37 2.67
##
## Chisq= 2.7 on 1 degrees of freedom, p= 0.1
subdf03 = df03[df03$grupos==2|df03$grupos==3,]
survdiff(Surv(subdf03$temp,
subdf03$cens)~subdf03$grupos,rho=0)## Call:
## survdiff(formula = Surv(subdf03$temp, subdf03$cens) ~ subdf03$grupos,
## rho = 0)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## subdf03$grupos=2 14 14 19.09 1.36 6.63
## subdf03$grupos=3 13 13 7.91 3.27 6.63
##
## Chisq= 6.6 on 1 degrees of freedom, p= 0.01
subdf03 = df03[df03$grupos==1|df03$grupos==3,]
survdiff(Surv(subdf03$temp,
subdf03$cens)~subdf03$grupos,rho=0)## Call:
## survdiff(formula = Surv(subdf03$temp, subdf03$cens) ~ subdf03$grupos,
## rho = 0)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## subdf03$grupos=1 16 10 15.34 1.86 7.86
## subdf03$grupos=3 13 13 7.66 3.72 7.86
##
## Chisq= 7.9 on 1 degrees of freedom, p= 0.005
2.5 Questão 06
2.6 Questão 10 (Duvida)
2.7 Questão 12 (Duvida)
2.8 Questão 13 (Duvida)
3 Scripts
3.1 Relações
\[ S(t) = 1 - F(t) \] \[ h(t) = \frac{f(t)}{S(t)} \]
\[ H(t) = -ln(S(t)) \]
\[ S(t) = exp(-H(t)) \]
3.2 Tabelas
Tabela 01
3.3 Kaplan meier
\[
\hat{S}(t) = \Pi_{j:t_j \leq t}\bigg(1 - \frac{d_j}{n_j}\bigg)
\]
############## Kaplan meier s(t) ###########
ekm<- survfit(Surv(g1,censg1)~1)
summary(ekm)## Call: survfit(formula = Surv(g1, censg1) ~ 1)
##
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 30 6 6 0 NaN NA NA
plot(ekm, xlab="Tempo (semanas)",ylab="S(t) estimada")plot(ekm, xlab="Tempo (semanas)",ylab="S(t) estimada", conf.int=F)#sem intervalo de confian?a3.4 Teste de hipotese
############## Teste de hipotese ###########
##### Gehan
gehan.wilcoxon.test(Surv(df03$temp,df03$cens)~df03$grupos,
data=df03)##
## Gehan-Wilcoxon
##
## data:
## = 7.0584, p-value = 0.02933
## alternative hypothesis: two-sided
##### Log-rank
survdiff(Surv(df03$temp,df03$cens)~df03$grupos,rho=0)## Call:
## survdiff(formula = Surv(df03$temp, df03$cens) ~ df03$grupos,
## rho = 0)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## df03$grupos=1 16 10 17.1 2.9328 6.7157
## df03$grupos=2 14 14 13.2 0.0456 0.0829
## df03$grupos=3 13 13 6.7 5.9240 9.6123
##
## Chisq= 11.9 on 2 degrees of freedom, p= 0.003
##### Peto
survdiff(Surv(df03$temp,df03$cens)~df03$grupos,rho=1)## Call:
## survdiff(formula = Surv(df03$temp, df03$cens) ~ df03$grupos,
## rho = 1)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## df03$grupos=1 16 6.91 9.63 0.769 2.201
## df03$grupos=2 14 6.79 8.37 0.299 0.764
## df03$grupos=3 13 9.74 5.44 3.401 6.965
##
## Chisq= 7.1 on 2 degrees of freedom, p= 0.03
3.5 Função de risco
h(t) table
############## Função de Risco #############
library(muhaz)
risco<- pehaz(df03$temp,df03$cens)##
## max.time= 30
## width= 1.82134
## nbins= 17
riscoS <- muhaz(df03$temp,df03$cens, bw.smooth=200, max.time=3647)
objects(risco)## [1] "At.Risk" "call" "Cuts" "Events" "F.U.Time" "Hazard" "Width"
risco$Hazard## [1] 0.00000000 0.00000000 0.00000000 0.01281522 0.22221343 0.10785783
## [7] 0.07763437 0.02788928 0.06285091 0.14230222 0.09255855 0.05214129
## [13] 0.21154839 0.08162850 0.00000000 0.00000000 0.00000000
plot(risco)
lines(riscoS, col="red")3.6 IC para S(t)
\(n_j\): Nº sob risco
\(d_j\): Nº Falhas
\(c_j\): Nº Censuras
\[ \hat{q}_j = \frac{d_j}{n_j - \frac{c_j}{2}} \] \[ \hat{S}(t) = \Pi(1 - \hat{q}_{i-1}) \]
ou,
\[ \hat{S}(t) = \Pi_{j:t\leq t}\bigg(1 - \frac{d_j}{n_j}\bigg) \]
\[
IC(\hat{S}(t) | 1-\alpha \%) = \bigg[\hat{S}(t) +- \sqrt{\hat{Var}(\hat{S}(t))}\bigg]
\]