Anggota Kelompok

  1. Sinta Dian Monica (164221018)

  2. Salma Ayu Hanifah (164221012)

  3. Amalika Ari Anindya (164221029)

  4. Bela Sonia (164221004)

Dataset diambil melalui : https://dmkd.cs.vt.edu/projects/crowdfunding/dataset.htm

Import Pakages

library(readr)
## Warning: package 'readr' was built under R version 4.2.3
library(survival)
## Warning: package 'survival' was built under R version 4.2.3

Import Data

df <- read.csv("D:/SEMESTER 5/Analisis Ketahanan Hidup/UAS/data kickstarter.csv", header = TRUE, row.names = NULL)
# Kolom yang ingin disimpan
df <- df[, c("Currency", "Comments", "Facebook_Friends", "Has_Video", "State", "Duration_in_Days")]

1) PREPOCESSING

a) Missing Value Check

cat("Jumlah missing values di setiap kolom:\n")
## Jumlah missing values di setiap kolom:
print(colSums(is.na(df)))
##         Currency         Comments Facebook_Friends        Has_Video 
##                0                0                0                0 
##            State Duration_in_Days 
##                0                0

b) Outlier Check

check_outliers <- function(column) {
  if (is.numeric(column)) {
    Q1 <- quantile(column, 0.25, na.rm = TRUE)
    Q3 <- quantile(column, 0.75, na.rm = TRUE)
    IQR <- Q3 - Q1
    lower_bound <- Q1 - 1.5 * IQR
    upper_bound <- Q3 + 1.5 * IQR
    return(sum(column < lower_bound | column > upper_bound, na.rm = TRUE))
  } else {
    return(NA)
  }
}
outlier_counts <- sapply(df, check_outliers)
cat("Jumlah outliers di setiap kolom:\n")
## Jumlah outliers di setiap kolom:
print(outlier_counts)
##         Currency         Comments Facebook_Friends        Has_Video 
##              948              744              363              761 
##            State Duration_in_Days 
##                0             1964

2) EDA

A) Variabel Kategorik

a) Variabel Currency

currency_count <- table(df$Currency)
currency_count_sorted <- sort(currency_count, decreasing = TRUE)

barplot(currency_count_sorted, main = "Jumlah per Nilai Variabel Currency",
        xlab = "Currency", ylab = "Jumlah (n)", col = "lightblue", border = "lightblue")
text(x = seq_along(currency_count_sorted), y = currency_count_sorted, 
     label = currency_count_sorted, pos = 3, cex = 0.8, col = "black")

b) Variabel Has_Video

currency_count <- table(df$Has_Video)
currency_count_sorted <- sort(currency_count, decreasing = TRUE)

barplot(currency_count_sorted, main = "Jumlah per Nilai Variabel Has_Video",
        xlab = "Has Video", ylab = "Jumlah (n)", col = "lightblue", border = "lightblue")
text(x = seq_along(currency_count_sorted), y = currency_count_sorted, 
     label = currency_count_sorted, pos = 3, cex = 0.8, col = "black")

c) Variabel State

currency_count <- table(df$State)
currency_count_sorted <- sort(currency_count, decreasing = TRUE)

barplot(currency_count_sorted, main = "Jumlah per Nilai Variabel State",
        xlab = "State", ylab = "Jumlah (n)", col = "lightblue", border = "lightblue")
text(x = seq_along(currency_count_sorted), y = currency_count_sorted, 
     label = currency_count_sorted, pos = 3, cex = 0.8, col = "black")

B) Variabel Numerik

a) Variabel Duration_in_Days

hist(df$Duration_in_Days, 
     main = "Histogram Durasi dalam Hari (Duration_in_Days)", 
     xlab = "Durasi dalam Hari", 
     ylab = "Frekuensi", 
     col = "lightblue", 
     border = "blue", 
     breaks = 20, 
     probability = TRUE) 

abline(v = mean(df$Duration_in_Days, na.rm = TRUE), col = "red", lwd = 2, lty = 2)

curve(dnorm(x, mean = mean(df$Duration_in_Days, na.rm = TRUE), 
            sd = sd(df$Duration_in_Days, na.rm = TRUE)), 
      add = TRUE, col = "black", lwd = 1.5)

b) Variabel Comments

hist(df$Comments, 
     main = "Histogram Comments", 
     xlab = "Comments", 
     ylab = "Frekuensi", 
     col = "lightblue", 
     border = "blue", 
     breaks = 20, 
     probability = TRUE) 

abline(v = mean(df$Comments, na.rm = TRUE), col = "red", lwd = 2, lty = 2)

curve(dnorm(x, mean = mean(df$Comments, na.rm = TRUE), 
            sd = sd(df$Comments, na.rm = TRUE)), 
      add = TRUE, col = "black", lwd = 1.5)

c) Variabel Facebook_Friends

hist(df$Facebook_Friends, 
     main = "Histogram Facebook_Friends", 
     xlab = "Facebook_Friends", 
     ylab = "Frekuensi", 
     col = "lightblue", 
     border = "blue", 
     breaks = 20, 
     probability = TRUE) 

abline(v = mean(df$Facebook_Friends, na.rm = TRUE), col = "red", lwd = 2, lty = 2)

curve(dnorm(x, mean = mean(df$Facebook_Friends, na.rm = TRUE), 
            sd = sd(df$Facebook_Friends, na.rm = TRUE)), 
      add = TRUE, col = "black", lwd = 1.5)

3) SURVIVAL ANALYSIS

A) Kaplan Meier Curve

# Membuat objek Surv berdasarkan Duration_in_Days dan State
surv_object <- Surv(df$Duration_in_Days, df$State)

a) Variabel Currency

unique_values <- unique(df$Currency)
colors <- rainbow(length(unique_values))  

# Model Kaplan-Meier berdasarkan Currency
km_fit <- survfit(surv_object ~ df$Currency)

# Plot Kaplan-Meier 
plot(km_fit, 
     main = "Kaplan-Meier Survival Curve",
     xlab = "Waktu (hari)", 
     ylab = "Probabilitas Bertahan",
     col = colors, 
     lty = 1: length(unique_values))  
legend("topright", 
       legend = paste("Currency =", unique_values), 
       col = colors, 
       lty = 1: length(unique_values), 
       title = "Kelompok Currency")

b) Variabel Has_Video

unique_values <- unique(df$Has_Video)
colors <- rainbow(length(unique_values))  

# Model Kaplan-Meier berdasarkan Has_Video
km_fit <- survfit(surv_object ~ df$Has_Video)

# Plot Kaplan-Meier 
plot(km_fit, 
     main = "Kaplan-Meier Survival Curve",
     xlab = "Waktu (hari)", 
     ylab = "Probabilitas Bertahan",
     col = colors, 
     lty = 1: length(unique_values))  
legend("topright", 
       legend = paste("Has_Video =", unique_values), 
       col = colors, 
       lty = 1: length(unique_values), 
       title = "Kelompok Has_Video")

B) Log-Rank Test

a) Variabel Currency

# Uji log-rank antara kelompok-kelompok pada variabel Currency
logrank_test <- survdiff(surv_object ~ df$Currency)
print(logrank_test)
## Call:
## survdiff(formula = surv_object ~ df$Currency)
## 
##                  N Observed Expected (O-E)^2/E (O-E)^2/V
## df$Currency=1  107       47     66.5    5.7331    6.7284
## df$Currency=2  239      110    147.5    9.5402   11.5073
## df$Currency=3  579      313    295.2    1.0764    1.3636
## df$Currency=4   23       12     11.2    0.0566    0.0656
## df$Currency=5 4577     2522   2483.6    0.5944    3.9284
## 
##  Chisq= 19.5  on 4 degrees of freedom, p= 6e-04

b) Variabel Has_Video

# Uji log-rank antara kelompok-kelompok pada variabel Has_Video
logrank_test <- survdiff(surv_object ~ df$Has_Video)
print(logrank_test)
## Call:
## survdiff(formula = surv_object ~ df$Has_Video)
## 
##                   N Observed Expected (O-E)^2/E (O-E)^2/V
## df$Has_Video=1  761      314      430      31.2      41.8
## df$Has_Video=2 4764     2690     2574       5.2      41.8
## 
##  Chisq= 41.8  on 1 degrees of freedom, p= 1e-10

4) COX PH

a) Model

# Membuat model Cox PH
cox_model <- coxph(Surv(Duration_in_Days, State) ~ Currency + Comments + Facebook_Friends + Has_Video, data = df)

b) Uji Serentak, Parsial, & Hazard Ratio

summary(cox_model)
## Call:
## coxph(formula = Surv(Duration_in_Days, State) ~ Currency + Comments + 
##     Facebook_Friends + Has_Video, data = df)
## 
##   n= 5525, number of events= 3004 
## 
##                       coef exp(coef)  se(coef)     z Pr(>|z|)    
## Currency         4.060e-02 1.041e+00 1.935e-02 2.099  0.03584 *  
## Comments         7.046e-05 1.000e+00 2.180e-05 3.232  0.00123 ** 
## Facebook_Friends 1.682e-04 1.000e+00 1.935e-05 8.691  < 2e-16 ***
## Has_Video        3.739e-01 1.453e+00 5.978e-02 6.254 3.99e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                  exp(coef) exp(-coef) lower .95 upper .95
## Currency             1.041     0.9602     1.003     1.082
## Comments             1.000     0.9999     1.000     1.000
## Facebook_Friends     1.000     0.9998     1.000     1.000
## Has_Video            1.453     0.6881     1.293     1.634
## 
## Concordance= 0.577  (se = 0.006 )
## Likelihood ratio test= 128.7  on 4 df,   p=<2e-16
## Wald test            = 136.1  on 4 df,   p=<2e-16
## Score (logrank) test = 138.7  on 4 df,   p=<2e-16

c) Uji Asumsi PH GOF

# Menghitung residual Schoenfeld
schoenfeld_test <- cox.zph(cox_model)

# Menampilkan hasil uji Schoenfeld
print(schoenfeld_test)
##                  chisq df      p
## Currency         0.257  1 0.6120
## Comments         0.176  1 0.6748
## Facebook_Friends 0.156  1 0.6926
## Has_Video        7.233  1 0.0072
## GLOBAL           7.823  4 0.0983

d) Evaluasi Model

# AIC
AIC(cox_model)
## [1] 46746.72
# BIC
BIC(cox_model)
## [1] 46770.75
# Log-Likelihood
logLik(cox_model)
## 'log Lik.' -23369.36 (df=4)
# C-statistic (Harrell's C-index)
c_index <- summary(cox_model)$concordance[1]
print(c_index)
##         C 
## 0.5774915

5) STRATIFIED COX

A) Tanpa Interaksi

a) Model 1 –> Tanpa Split

stratified_cox_model <- coxph(Surv(Duration_in_Days, State) ~  Currency + Comments + Facebook_Friends + strata(Has_Video), data = df)

i) Uji Serentak, Parsial, & Hazard Ratio

summary(stratified_cox_model)
## Call:
## coxph(formula = Surv(Duration_in_Days, State) ~ Currency + Comments + 
##     Facebook_Friends + strata(Has_Video), data = df)
## 
##   n= 5525, number of events= 3004 
## 
##                       coef exp(coef)  se(coef)     z Pr(>|z|)    
## Currency         4.140e-02 1.042e+00 1.936e-02 2.139   0.0324 *  
## Comments         7.132e-05 1.000e+00 2.185e-05 3.264   0.0011 ** 
## Facebook_Friends 1.702e-04 1.000e+00 1.936e-05 8.793   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                  exp(coef) exp(-coef) lower .95 upper .95
## Currency             1.042     0.9594     1.003     1.083
## Comments             1.000     0.9999     1.000     1.000
## Facebook_Friends     1.000     0.9998     1.000     1.000
## 
## Concordance= 0.579  (se = 0.006 )
## Likelihood ratio test= 83.71  on 3 df,   p=<2e-16
## Wald test            = 95.69  on 3 df,   p=<2e-16
## Score (logrank) test = 97.5  on 3 df,   p=<2e-16

ii) Uji Asumsi PH GOF

# Menghitung residual Schoenfeld
schoenfeld_test <- cox.zph(stratified_cox_model)

# Menampilkan hasil uji Schoenfeld
print(schoenfeld_test)
##                  chisq df    p
## Currency         0.266  1 0.61
## Comments         0.121  1 0.73
## Facebook_Friends 0.188  1 0.66
## GLOBAL           0.624  3 0.89

iii) Evaluasi Model

# AIC
AIC(stratified_cox_model)
## [1] 44734.43
# BIC
BIC(stratified_cox_model)
## [1] 44752.45
# Log-Likelihood
logLik(stratified_cox_model)
## 'log Lik.' -22364.21 (df=3)
# C-statistic (Harrell's C-index)
c_index <- summary(stratified_cox_model)$concordance[1]
print(c_index)
##         C 
## 0.5787145

b) Model 2 –> Dengan Split

res.separate <- lapply(split(df, df$Has_Video), FUN = function(df) {coxph(Surv(Duration_in_Days, State) ~ Currency + Comments + Facebook_Friends, df)})

i) Uji Serentak, Parsial, & Hazard Ratio

res.separate
## $`1`
## Call:
## coxph(formula = Surv(Duration_in_Days, State) ~ Currency + Comments + 
##     Facebook_Friends, data = df)
## 
##                        coef  exp(coef)   se(coef)      z      p
## Currency         -2.595e-02  9.744e-01  5.362e-02 -0.484 0.6285
## Comments          1.523e-03  1.002e+00  8.547e-04  1.782 0.0748
## Facebook_Friends  1.419e-04  1.000e+00  6.516e-05  2.177 0.0294
## 
## Likelihood ratio test=7.35  on 3 df, p=0.06149
## n= 761, number of events= 314 
## 
## $`2`
## Call:
## coxph(formula = Surv(Duration_in_Days, State) ~ Currency + Comments + 
##     Facebook_Friends, data = df)
## 
##                       coef exp(coef)  se(coef)     z       p
## Currency         5.058e-02 1.052e+00 2.078e-02 2.434 0.01494
## Comments         7.059e-05 1.000e+00 2.200e-05 3.209 0.00133
## Facebook_Friends 1.723e-04 1.000e+00 2.029e-05 8.488 < 2e-16
## 
## Likelihood ratio test=80.4  on 3 df, p=< 2.2e-16
## n= 4764, number of events= 2690

ii) Evaluasi Model

loglik_table <- do.call(rbind, lapply(names(res.separate), function(name) {
  model <- res.separate[[name]]
  data.frame(
    Group = name,
    LogLikelihood = logLik(model),
    AIC = AIC(model),
    BIC = BIC(model)
  )
}))

print(loglik_table)
##   Group LogLikelihood       AIC       BIC
## 1     1     -1837.466  3680.931  3692.179
## 2     2    -20524.727 41055.454 41073.146

B) Interaksi

a) Model 1 –> Tanpa Split

stratified_cox_model_interaction <- coxph(Surv(Duration_in_Days, State) ~ Currency * Comments + Currency * Facebook_Friends  + Facebook_Friends  * Comments + strata(Has_Video), data = df)

i) Uji Serentak, Parsial, & Hazard Ratio

summary(stratified_cox_model_interaction)
## Call:
## coxph(formula = Surv(Duration_in_Days, State) ~ Currency * Comments + 
##     Currency * Facebook_Friends + Facebook_Friends * Comments + 
##     strata(Has_Video), data = df)
## 
##   n= 5525, number of events= 3004 
## 
##                                 coef  exp(coef)   se(coef)      z Pr(>|z|)  
## Currency                   3.903e-02  1.040e+00  2.503e-02  1.559   0.1189  
## Comments                   5.577e-04  1.001e+00  2.604e-04  2.142   0.0322 *
## Facebook_Friends           1.431e-04  1.000e+00  1.178e-04  1.215   0.2242  
## Currency:Comments         -8.291e-05  9.999e-01  5.070e-05 -1.635   0.1020  
## Currency:Facebook_Friends  6.320e-06  1.000e+00  2.439e-05  0.259   0.7956  
## Comments:Facebook_Friends -1.594e-07  1.000e+00  9.981e-08 -1.597   0.1103  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                           exp(coef) exp(-coef) lower .95 upper .95
## Currency                     1.0398     0.9617    0.9900     1.092
## Comments                     1.0006     0.9994    1.0000     1.001
## Facebook_Friends             1.0001     0.9999    0.9999     1.000
## Currency:Comments            0.9999     1.0001    0.9998     1.000
## Currency:Facebook_Friends    1.0000     1.0000    1.0000     1.000
## Comments:Facebook_Friends    1.0000     1.0000    1.0000     1.000
## 
## Concordance= 0.579  (se = 0.006 )
## Likelihood ratio test= 87.86  on 6 df,   p=<2e-16
## Wald test            = 103.4  on 6 df,   p=<2e-16
## Score (logrank) test = 107  on 6 df,   p=<2e-16

ii) Uji Asumsi PH GOF

# Menghitung residual Schoenfeld
schoenfeld_test <- cox.zph(stratified_cox_model_interaction)

# Menampilkan hasil uji Schoenfeld
print(schoenfeld_test)
##                              chisq df     p
## Currency                  3.21e-01  1 0.571
## Comments                  2.92e+00  1 0.088
## Facebook_Friends          1.64e-01  1 0.685
## Currency:Comments         2.36e+00  1 0.125
## Currency:Facebook_Friends 8.04e-05  1 0.993
## Comments:Facebook_Friends 2.52e+00  1 0.112
## GLOBAL                    1.29e+01  6 0.045

iii) Evaluasi Model

# AIC
AIC(stratified_cox_model_interaction)
## [1] 44736.27
# BIC
BIC(stratified_cox_model_interaction)
## [1] 44772.32
# Log-Likelihood
logLik(stratified_cox_model_interaction)
## 'log Lik.' -22362.14 (df=6)
# C-statistic (Harrell's C-index)
c_index <- summary(stratified_cox_model_interaction)$concordance[1]
print(c_index)
##         C 
## 0.5793806

b) Model 2 –> Dengan Split

res.separate <- lapply(split(df, df$Has_Video), FUN = function(df) {coxph(Surv(Duration_in_Days, State) ~ Currency * Comments + Currency * Facebook_Friends  + Facebook_Friends  * Comments, df)})

i) Uji Serentak, Parsial, & Hazard Ratio

res.separate
## $`1`
## Call:
## coxph(formula = Surv(Duration_in_Days, State) ~ Currency * Comments + 
##     Currency * Facebook_Friends + Facebook_Friends * Comments, 
##     data = df)
## 
##                                 coef  exp(coef)   se(coef)      z     p
## Currency                  -1.747e-02  9.827e-01  7.205e-02 -0.242 0.808
## Comments                   6.189e-03  1.006e+00  1.169e-02  0.529 0.597
## Facebook_Friends           2.135e-04  1.000e+00  3.829e-04  0.558 0.577
## Currency:Comments         -4.191e-04  9.996e-01  2.338e-03 -0.179 0.858
## Currency:Facebook_Friends -1.125e-05  1.000e+00  8.035e-05 -0.140 0.889
## Comments:Facebook_Friends -2.099e-06  1.000e+00  1.904e-06 -1.103 0.270
## 
## Likelihood ratio test=8.96  on 6 df, p=0.1757
## n= 761, number of events= 314 
## 
## $`2`
## Call:
## coxph(formula = Surv(Duration_in_Days, State) ~ Currency * Comments + 
##     Currency * Facebook_Friends + Facebook_Friends * Comments, 
##     data = df)
## 
##                                 coef  exp(coef)   se(coef)      z      p
## Currency                   4.907e-02  1.050e+00  2.688e-02  1.826 0.0679
## Comments                   5.713e-04  1.001e+00  2.605e-04  2.193 0.0283
## Facebook_Friends           1.504e-04  1.000e+00  1.242e-04  1.211 0.2259
## Currency:Comments         -8.527e-05  9.999e-01  5.071e-05 -1.681 0.0927
## Currency:Facebook_Friends  5.260e-06  1.000e+00  2.568e-05  0.205 0.8377
## Comments:Facebook_Friends -1.653e-07  1.000e+00  1.013e-07 -1.632 0.1026
## 
## Likelihood ratio test=84.73  on 6 df, p=3.758e-16
## n= 4764, number of events= 2690

ii) Evaluasi Model

loglik_table <- do.call(rbind, lapply(names(res.separate), function(name) {
  model <- res.separate[[name]]
  data.frame(
    Group = name,
    LogLikelihood = logLik(model),
    AIC = AIC(model),
    BIC = BIC(model)
  )
}))

print(loglik_table)
##   Group LogLikelihood      AIC       BIC
## 1     1      -1836.66  3685.32  3707.816
## 2     2     -20522.56 41057.12 41092.506

C) Likelihood Test

loglik_no_interaction <- logLik(stratified_cox_model)
loglik_with_interaction <- logLik(stratified_cox_model_interaction)

LR_statistic <- -2 * (loglik_no_interaction - loglik_with_interaction)
df1 <- attr(loglik_with_interaction, "df") - attr(loglik_no_interaction, "df")
p_value <- pchisq(LR_statistic, df = df1, lower.tail = FALSE)

cat("Log-Likelihood tanpa interaksi:", loglik_no_interaction, "\n")
## Log-Likelihood tanpa interaksi: -22364.21
cat("Log-Likelihood dengan interaksi:", loglik_with_interaction, "\n")
## Log-Likelihood dengan interaksi: -22362.14
cat("Statistik Likelihood Ratio:", LR_statistic, "\n")
## Statistik Likelihood Ratio: 4.151984
cat("Derajat kebebasan:", df1, "\n")
## Derajat kebebasan: 3
cat("P-value:", p_value, "\n")
## P-value: 0.2455134
if (p_value < 0.05) {
  cat("Terdapat interaksi yang signifikan antara variabel dalam model.\n")
} else {
  cat("Tidak terdapat interaksi yang signifikan antara variabel dalam model.\n")
}
## Tidak terdapat interaksi yang signifikan antara variabel dalam model.

6) COX EXTENDED

A) Fungsi linear t

a) Model

cox_extended_linear <- coxph(
  Surv(Duration_in_Days, State) ~ Currency + Comments + Facebook_Friends +
    Has_Video + tt(Has_Video),
  data = df,
  tt = function(x, t, ...) x * t
)

b) Uji Serentak, Parsial, & Hazard Ratio

summary(cox_extended_linear)
## Call:
## coxph(formula = Surv(Duration_in_Days, State) ~ Currency + Comments + 
##     Facebook_Friends + Has_Video + tt(Has_Video), data = df, 
##     tt = function(x, t, ...) x * t)
## 
##   n= 5525, number of events= 3004 
## 
##                        coef  exp(coef)   se(coef)      z Pr(>|z|)    
## Currency          4.212e-02  1.043e+00  1.936e-02  2.176  0.02958 *  
## Comments          7.102e-05  1.000e+00  2.181e-05  3.257  0.00113 ** 
## Facebook_Friends  1.698e-04  1.000e+00  1.934e-05  8.777  < 2e-16 ***
## Has_Video        -1.966e-01  8.215e-01  1.967e-01 -1.000  0.31747    
## tt(Has_Video)     1.864e-02  1.019e+00  6.259e-03  2.979  0.00290 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                  exp(coef) exp(-coef) lower .95 upper .95
## Currency            1.0430     0.9588    1.0042     1.083
## Comments            1.0001     0.9999    1.0000     1.000
## Facebook_Friends    1.0002     0.9998    1.0001     1.000
## Has_Video           0.8215     1.2173    0.5587     1.208
## tt(Has_Video)       1.0188     0.9815    1.0064     1.031
## 
## Concordance= 0.581  (se = 0.006 )
## Likelihood ratio test= 137.9  on 5 df,   p=<2e-16
## Wald test            = 143.3  on 5 df,   p=<2e-16
## Score (logrank) test = 146.5  on 5 df,   p=<2e-16

c) Evaluasi Model

# AIC
AIC(cox_extended_linear)
## [1] 46739.52
# BIC
BIC(cox_extended_linear)
## [1] 46769.56
# Log-Likelihood
logLik(cox_extended_linear)
## 'log Lik.' -23364.76 (df=5)
# C-statistic (Harrell's C-index)
c_index <- summary(cox_extended_linear)$concordance[1]
print(c_index)
##         C 
## 0.5808586

B) Fungsi log(t)

a) Model

cox_extended_log <- coxph(Surv(Duration_in_Days, State) ~ Currency + Comments + Facebook_Friends + Has_Video + tt(Has_Video), data = df, tt = function(x, t, ...) x * log(t + 1))

b) Uji Serentak, Parsial, & Hazard Ratio

summary(cox_extended_log)
## Call:
## coxph(formula = Surv(Duration_in_Days, State) ~ Currency + Comments + 
##     Facebook_Friends + Has_Video + tt(Has_Video), data = df, 
##     tt = function(x, t, ...) x * log(t + 1))
## 
##   n= 5525, number of events= 3004 
## 
##                        coef  exp(coef)   se(coef)      z Pr(>|z|)    
## Currency          4.205e-02  1.043e+00  1.936e-02  2.173 0.029813 *  
## Comments          7.118e-05  1.000e+00  2.182e-05  3.262 0.001107 ** 
## Facebook_Friends  1.699e-04  1.000e+00  1.934e-05  8.783  < 2e-16 ***
## Has_Video        -1.839e+00  1.589e-01  5.471e-01 -3.362 0.000775 ***
## tt(Has_Video)     6.519e-01  1.919e+00  1.616e-01  4.034 5.49e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                  exp(coef) exp(-coef) lower .95 upper .95
## Currency            1.0429     0.9588   1.00412    1.0833
## Comments            1.0001     0.9999   1.00003    1.0001
## Facebook_Friends    1.0002     0.9998   1.00013    1.0002
## Has_Video           0.1589     6.2917   0.05439    0.4645
## tt(Has_Video)       1.9192     0.5210   1.39816    2.6345
## 
## Concordance= 0.583  (se = 0.006 )
## Likelihood ratio test= 143.9  on 5 df,   p=<2e-16
## Wald test            = 149  on 5 df,   p=<2e-16
## Score (logrank) test = 152.1  on 5 df,   p=<2e-16

c) Evaluasi Model

# AIC
AIC(cox_extended_log)
## [1] 46733.47
# BIC
BIC(cox_extended_log)
## [1] 46763.51
# Log-Likelihood
logLik(cox_extended_log)
## 'log Lik.' -23361.74 (df=5)
# C-statistic (Harrell's C-index)
c_index <- summary(cox_extended_log)$concordance[1]
print(c_index)
##        C 
## 0.582824

C) Fungsi Heaviside

a) Model

cox_extended_heaviside <- coxph(
  Surv(Duration_in_Days, State) ~ Currency + Comments + Facebook_Friends +
    Has_Video + tt(Has_Video),
  data = df,
  tt = function(x, t, ...) ifelse(t > 30, x, 0)
)

b) Uji Serentak, Parsial, & Hazard Ratio

summary(cox_extended_heaviside)
## Call:
## coxph(formula = Surv(Duration_in_Days, State) ~ Currency + Comments + 
##     Facebook_Friends + Has_Video + tt(Has_Video), data = df, 
##     tt = function(x, t, ...) ifelse(t > 30, x, 0))
## 
##   n= 5525, number of events= 3004 
## 
##                       coef exp(coef)  se(coef)     z Pr(>|z|)    
## Currency         4.139e-02 1.042e+00 1.935e-02 2.139  0.03247 *  
## Comments         7.059e-05 1.000e+00 2.181e-05 3.237  0.00121 ** 
## Facebook_Friends 1.691e-04 1.000e+00 1.935e-05 8.740  < 2e-16 ***
## Has_Video        3.007e-01 1.351e+00 6.989e-02 4.303 1.69e-05 ***
## tt(Has_Video)    2.529e-01 1.288e+00 1.347e-01 1.878  0.06035 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                  exp(coef) exp(-coef) lower .95 upper .95
## Currency             1.042     0.9595    1.0035     1.083
## Comments             1.000     0.9999    1.0000     1.000
## Facebook_Friends     1.000     0.9998    1.0001     1.000
## Has_Video            1.351     0.7403    1.1779     1.549
## tt(Has_Video)        1.288     0.7765    0.9891     1.677
## 
## Concordance= 0.577  (se = 0.006 )
## Likelihood ratio test= 132.3  on 5 df,   p=<2e-16
## Wald test            = 139  on 5 df,   p=<2e-16
## Score (logrank) test = 141.8  on 5 df,   p=<2e-16

c) Evaluasi Model

# AIC
AIC(cox_extended_heaviside)
## [1] 46745.09
# BIC
BIC(cox_extended_heaviside)
## [1] 46775.13
# Log-Likelihood
logLik(cox_extended_heaviside)
## 'log Lik.' -23367.54 (df=5)
# C-statistic (Harrell's C-index)
c_index <- summary(cox_extended_heaviside)$concordance[1]
print(c_index)
##         C 
## 0.5771243