library(readxl) 
datIND1 <- read_xlsx(path = "C:/Users/ASUS/Documents/UNY/MySta/SEM 4/Analisis Data Kategorik/UAS ADK/DataWrangling/IDN_WVS7.xlsx", sheet = 1) 
head(datIND1)
## # A tibble: 6 × 611
##   version        doi   A_WAVE A_YEAR A_STUDY B_COUNTRY B_COUNTRY_ALPHA C_COW_NUM
##   <chr>          <chr> <chr>  <chr>  <chr>   <chr>     <chr>           <chr>    
## 1 6-0-0 (2024-0… doi.… WVS 2… 2018   WVS     Indonesia IDN             Indonesia
## 2 6-0-0 (2024-0… doi.… WVS 2… 2018   WVS     Indonesia IDN             Indonesia
## 3 6-0-0 (2024-0… doi.… WVS 2… 2018   WVS     Indonesia IDN             Indonesia
## 4 6-0-0 (2024-0… doi.… WVS 2… 2018   WVS     Indonesia IDN             Indonesia
## 5 6-0-0 (2024-0… doi.… WVS 2… 2018   WVS     Indonesia IDN             Indonesia
## 6 6-0-0 (2024-0… doi.… WVS 2… 2018   WVS     Indonesia IDN             Indonesia
## # ℹ 603 more variables: C_COW_ALPHA <chr>, D_INTERVIEW <dbl>, S007 <dbl>,
## #   J_INTDATE <chr>, FW_START <dbl>, FW_END <dbl>, K_TIME_START <chr>,
## #   K_TIME_END <chr>, K_DURATION <chr>, Q_MODE <chr>, N_REGION_ISO <chr>,
## #   N_REGION_WVS <chr>, N_REGION_NUTS2 <chr>, N_REGION_NUTS1 <chr>,
## #   N_TOWN <chr>, G_TOWNSIZE <chr>, G_TOWNSIZE2 <chr>, H_SETTLEMENT <chr>,
## #   H_URBRURAL <chr>, I_PSU <chr>, O1_LONGITUDE <chr>, O2_LATITUDE <chr>,
## #   L_INTERVIEWER_NUMBER <chr>, S_INTLANGUAGE <chr>, LNGE_ISO <chr>, …
dim(datIND1)
## [1] 3200  611

Pada data WVS 7 Indonesia ada sebanyak 3200 pengamatan dan 611 variabel

df1 <- subset(datIND1, select = c("Q46P", "Q47P", "Q173P", "Q273", "Q260",  "Q262","Q275" )) 
sapply(df1, function(x) any(is.na(x))) 
##  Q46P  Q47P Q173P  Q273  Q260  Q262  Q275 
## FALSE FALSE FALSE FALSE FALSE FALSE FALSE
str(df1)
## tibble [3,200 × 7] (S3: tbl_df/tbl/data.frame)
##  $ Q46P : chr [1:3200] "Very happy" "Very happy" "Quite happy" "Very happy" ...
##  $ Q47P : chr [1:3200] "Good" "Good" "Good" "Very good" ...
##  $ Q173P: chr [1:3200] "A religious person" "A religious person" "A religious person" "A religious person" ...
##  $ Q273 : chr [1:3200] "Married" "Married" "Married" "Married" ...
##  $ Q260 : chr [1:3200] "Male" "Male" "Male" "Male" ...
##  $ Q262 : chr [1:3200] "22" "35" "48" "46" ...
##  $ Q275 : chr [1:3200] "Upper secondary education (ISCED 3)" "Upper secondary education (ISCED 3)" "Upper secondary education (ISCED 3)" "Lower secondary education (ISCED 2)" ...
lapply(df1, table)
## $Q46P
## 
##        No answer Not at all happy   Not very happy      Quite happy 
##                1               34              161             1590 
##       Very happy 
##             1414 
## 
## $Q47P
## 
##      Fair      Good      Poor Very good Very poor 
##       720      1404       200       836        40 
## 
## $Q173P
## 
##     A religious person             Don't know              No answer 
##                   2924                     27                     14 
## Not a religious person 
##                    235 
## 
## $Q273
## 
##                   Divorced Living together as married 
##                        101                         23 
##                    Married                  Separated 
##                       2453                          9 
##                     Single                    Widowed 
##                        415                        199 
## 
## $Q260
## 
## Female   Male 
##   1754   1446 
## 
## $Q262
## 
##  18  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36  37 
##  70  66  58  56  54  65  65  84  53  54  84  79  99  67  93 105  97  85  78  66 
##  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54  55  56  57 
## 108  72  93  82  89  82  64  94  66  68  67  61  60  39  56  55  45  46  35  41 
##  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72  73  74  75  76  78 
##  52  31  53  32  27  26  21  28  26  13  16   9  19   1   5  10   4   6   6   3 
##  79  80  81  82  84  86  87 
##   1   3   1   1   1   2   2 
## 
## $Q275
## 
##                   Bachelor or equivalent (ISCED 6) 
##                                                221 
##                   Doctoral or equivalent (ISCED 8) 
##                                                  1 
##                                         Don't know 
##                                                  1 
## Early childhood education (ISCED 0) / no education 
##                                                311 
##                Lower secondary education (ISCED 2) 
##                                                659 
##                     Master or equivalent (ISCED 7) 
##                                                 17 
##                        Primary education (ISCED 1) 
##                                                842 
##           Short-cycle tertiary education (ISCED 5) 
##                                                 81 
##                Upper secondary education (ISCED 3) 
##                                               1067

PREPROCESSING DATA

# PREPROCESSING DATA
# Membuat faktor Happiness
df1$Happiness <- factor(df1$Q46P,
  levels = c("Not at all happy", "Not very happy", "Quite happy", "Very happy"),
  labels = c("Not at all happy", "Not very happy", "Quite happy", "Very happy")
)

# Membuat HappyStatus biner
df1$HappyStatus <- ifelse(
  is.na(df1$Happiness), NA,
  ifelse(df1$Happiness %in% c("Quite happy", "Very happy"), "Happy", "Unhappy")
)
df1$HappyStatus <- factor(df1$HappyStatus, levels = c("Unhappy", "Happy"))
table(df1$HappyStatus, useNA = "ifany")
## 
## Unhappy   Happy    <NA> 
##     195    3004       1
df1$Gender <- factor(df1$Q260)
sum(is.na(df1$Gender))
## [1] 0
levels(df1$Gender)
## [1] "Female" "Male"
df1$Age <- as.numeric(df1$Q262)
sum(is.na(df1$Age))
## [1] 0
df1$Marital <- factor(df1$Q273, levels = c("Single","Widowed","Separated","Divorced","Living together as married","Married"))
sum(is.na(df1$Marital))
## [1] 0
levels(df1$Marital)
## [1] "Single"                     "Widowed"                   
## [3] "Separated"                  "Divorced"                  
## [5] "Living together as married" "Married"
df1$Health <- factor(df1$Q47P, levels = c("Very poor", "Poor", "Fair","Good","Very good"))
sum(is.na(df1$Health))
## [1] 0
levels(df1$Health)
## [1] "Very poor" "Poor"      "Fair"      "Good"      "Very good"
df1$Religious <- factor(df1$Q173P, levels = c("Don't know","Not a religious person","A religious person"))
sum(is.na(df1$Religious))
## [1] 14
levels(df1$Religious)
## [1] "Don't know"             "Not a religious person" "A religious person"
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
df1 <- df1 %>% mutate(Education = case_when(Q275 %in% c("Early childhood education (ISCED 0) / no education","Primary education (ISCED 1)", "Lower secondary education (ISCED 2)") ~ "Low", Q275 %in% c("Upper secondary education (ISCED 3)") ~ "Middle", Q275 %in% c("Short-cycle tertiary education (ISCED 5)","Bachelor or equivalent (ISCED 6)", "Master or equivalent (ISCED 7)","Doctoral or equivalent (ISCED 8)") ~ "High"))
sum(is.na(df1$Education))
## [1] 1
#newdataset
df2 <- subset(df1,select = c("HappyStatus","Age","Gender","Marital","Education","Health","Religious"))
sapply(df2, function(x) sum(is.na(x)))
## HappyStatus         Age      Gender     Marital   Education      Health 
##           1           0           0           0           1           0 
##   Religious 
##          14
dim(df2)
## [1] 3200    7
colnames(df2)
## [1] "HappyStatus" "Age"         "Gender"      "Marital"     "Education"  
## [6] "Health"      "Religious"
df3 <- na.omit(df2)
dim(df3)
## [1] 3184    7
df3$Gender     <- as.factor(df3$Gender)
df3$Marital    <- as.factor(df3$Marital)
df3$Education  <- as.factor(df3$Education)
df3$Health     <- as.factor(df3$Health)
df3$Religious  <- as.factor(df3$Religious)
df3$HappyStatus <- factor(df3$HappyStatus, levels = c("Unhappy", "Happy"))

library(summarytools)
## Warning: package 'summarytools' was built under R version 4.4.3
dfSummary(df3)
## Data Frame Summary  
## df3  
## Dimensions: 3184 x 7  
## Duplicates: 1684  
## 
## ----------------------------------------------------------------------------------------------------------------
## No   Variable      Stats / Values                 Freqs (% of Valid)   Graph                Valid      Missing  
## ---- ------------- ------------------------------ -------------------- -------------------- ---------- ---------
## 1    HappyStatus   1. Unhappy                      195 ( 6.1%)         I                    3184       0        
##      [factor]      2. Happy                       2989 (93.9%)         IIIIIIIIIIIIIIIIII   (100.0%)   (0.0%)   
## 
## 2    Age           Mean (sd) : 40 (13.5)          67 distinct values       : .              3184       0        
##      [numeric]     min < med < max:                                    . : : : .            (100.0%)   (0.0%)   
##                    18 < 39 < 87                                        : : : : : .                              
##                    IQR (CV) : 19 (0.3)                                 : : : : : : .                            
##                                                                        : : : : : : : .                          
## 
## 3    Gender        1. Female                      1750 (55.0%)         IIIIIIIIII           3184       0        
##      [factor]      2. Male                        1434 (45.0%)         IIIIIIIII            (100.0%)   (0.0%)   
## 
## 4    Marital       1. Single                       415 (13.0%)         II                   3184       0        
##      [factor]      2. Widowed                      199 ( 6.2%)         I                    (100.0%)   (0.0%)   
##                    3. Separated                      9 ( 0.3%)                                                  
##                    4. Divorced                     101 ( 3.2%)                                                  
##                    5. Living together as marrie     23 ( 0.7%)                                                  
##                    6. Married                     2437 (76.5%)         IIIIIIIIIIIIIII                          
## 
## 5    Education     1. High                         319 (10.0%)         II                   3184       0        
##      [factor]      2. Low                         1804 (56.7%)         IIIIIIIIIII          (100.0%)   (0.0%)   
##                    3. Middle                      1061 (33.3%)         IIIIII                                   
## 
## 6    Health        1. Very poor                     40 ( 1.3%)                              3184       0        
##      [factor]      2. Poor                         199 ( 6.2%)         I                    (100.0%)   (0.0%)   
##                    3. Fair                         717 (22.5%)         IIII                                     
##                    4. Good                        1397 (43.9%)         IIIIIIII                                 
##                    5. Very good                    831 (26.1%)         IIIII                                    
## 
## 7    Religious     1. Don't know                    27 ( 0.8%)                              3184       0        
##      [factor]      2. Not a religious person       235 ( 7.4%)         I                    (100.0%)   (0.0%)   
##                    3. A religious person          2922 (91.8%)         IIIIIIIIIIIIIIIIII                       
## ----------------------------------------------------------------------------------------------------------------
dim(df3)
## [1] 3184    7
head(df3)
## # A tibble: 6 × 7
##   HappyStatus   Age Gender Marital Education Health    Religious             
##   <fct>       <dbl> <fct>  <fct>   <fct>     <fct>     <fct>                 
## 1 Happy          22 Male   Married Middle    Good      A religious person    
## 2 Happy          35 Male   Married Middle    Good      A religious person    
## 3 Happy          48 Male   Married Middle    Good      A religious person    
## 4 Happy          46 Male   Married Low       Very good A religious person    
## 5 Happy          46 Male   Married Middle    Fair      Not a religious person
## 6 Happy          46 Male   Married High      Good      A religious person

MODEL REGRESI LOGISTIK

library(dplyr)
library(car)
## Warning: package 'car' was built under R version 4.4.3
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
RLog <- glm(HappyStatus ~ Age + Gender + Marital + Education + Health + Religious,
             data = df3, family = binomial)
summary(RLog)
## 
## Call:
## glm(formula = HappyStatus ~ Age + Gender + Marital + Education + 
##     Health + Religious, family = binomial, data = df3)
## 
## Coefficients:
##                                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                         0.975897   0.777503   1.255 0.209418    
## Age                                -0.009563   0.006661  -1.436 0.151114    
## GenderMale                         -0.565114   0.164259  -3.440 0.000581 ***
## MaritalWidowed                      0.109606   0.405947   0.270 0.787159    
## MaritalSeparated                   -0.777390   0.920520  -0.845 0.398384    
## MaritalDivorced                    -0.942872   0.378326  -2.492 0.012695 *  
## MaritalLiving together as married  13.803299 481.871096   0.029 0.977148    
## MaritalMarried                      0.473267   0.262298   1.804 0.071183 .  
## EducationLow                       -0.632582   0.331600  -1.908 0.056435 .  
## EducationMiddle                    -0.327752   0.349214  -0.939 0.347965    
## HealthPoor                          0.077147   0.436467   0.177 0.859701    
## HealthFair                          0.966330   0.419068   2.306 0.021116 *  
## HealthGood                          1.857615   0.424406   4.377  1.2e-05 ***
## HealthVery good                     2.201755   0.456304   4.825  1.4e-06 ***
## ReligiousNot a religious person     0.867475   0.598834   1.449 0.147448    
## ReligiousA religious person         1.280936   0.551026   2.325 0.020091 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1467.0  on 3183  degrees of freedom
## Residual deviance: 1304.6  on 3168  degrees of freedom
## AIC: 1336.6
## 
## Number of Fisher Scoring iterations: 15

UJI ASUMSI REGRESI LOGISTIK 1. Tidak Ada MULTIKOLINEARITAS Antar Variabel Independen (VIF < 5)

vif(RLog)
##               GVIF Df GVIF^(1/(2*Df))
## Age       1.615195  1        1.270903
## Gender    1.129578  1        1.062816
## Marital   1.679766  5        1.053234
## Education 1.153073  2        1.036249
## Health    1.072077  4        1.008738
## Religious 1.091686  2        1.022173
  1. Uji linearitas logit dengan variabel numerik (Box-Tidwell)
library(car)
# Pastikan HappyStatus adalah biner (0/1), bukan faktor dengan label
df3$HappyStatus <- as.numeric(df3$HappyStatus) - 1

# Box-Tidwell
boxTidwell(HappyStatus ~ Age , data = df3)
##  MLE of lambda Score Statistic (t) Pr(>|t|)
##      0.0068794              1.0766   0.2817
## 
## iterations =  16
  1. outlier untuk variabel numerik
# Boxplot untuk visualisasi outlier pada Age
boxplot(df3$Age, main = "Boxplot Age", ylab = "Age")

# Deteksi nilai pencilan secara statistik
outlier_age <- boxplot.stats(df3$Age)$out
outlier_age 
##  [1] 80 78 87 87 78 86 82 81 80 78 79 80 84 86
# Plot pencar sederhana untuk Age
plot(df3$Age, 
     main = "Scatter Plot Age", 
     ylab = "Age", 
     xlab = "Index", 
     pch = 19, 
     col = ifelse(df3$Age %in% boxplot.stats(df3$Age)$out, "red", "black"))

# Tambahkan garis horizontal pada batas outlier
abline(h = boxplot.stats(df3$Age)$stats[c(1, 5)], col = "blue", lty = 2)
legend("topright", legend = c("Outlier", "Data Normal"), col = c("red", "black"), pch = 19)

# Transformasi
df3$Age_log <- log(df3$Age)
boxplot(df3$Age_log, main = "Boxplot Age (Log Transform)", ylab = "log(Age)")

boxplot.stats(df3$Age_log)$out  # Cek outlier setelah transformasi
## numeric(0)
# Scatter plot untuk Age_log, dengan garis batas outlier
plot(df3$Age_log, 
     main = "Scatter Plot log(Age)", 
     ylab = "log(Age)", 
     xlab = "Index", 
     pch = 19, 
     col = ifelse(df3$Age_log %in% boxplot.stats(df3$Age_log)$out, "red", "black"))

# Tambahkan garis horizontal pada batas bawah dan atas boxplot (Q1-1.5*IQR dan Q3+1.5*IQR)
abline(h = boxplot.stats(df3$Age_log)$stats[c(1, 5)], col = "blue", lty = 2)

# Tambahkan legenda
legend("topright", legend = c("Outlier", "Data Normal"), col = c("red", "black"), pch = 19)

library(dplyr)
library(car)

RLog2 <- glm(HappyStatus ~ Age_log + Gender + Marital + Education + Health + Religious,
             data = df3, family = binomial)
summary(RLog2)
## 
## Call:
## glm(formula = HappyStatus ~ Age_log + Gender + Marital + Education + 
##     Health + Religious, family = binomial, data = df3)
## 
## Coefficients:
##                                    Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                         2.63719    1.17797   2.239  0.02517 *  
## Age_log                            -0.59573    0.28371  -2.100  0.03575 *  
## GenderMale                         -0.54045    0.16439  -3.288  0.00101 ** 
## MaritalWidowed                      0.29845    0.41716   0.715  0.47434    
## MaritalSeparated                   -0.62844    0.92228  -0.681  0.49562    
## MaritalDivorced                    -0.78705    0.39027  -2.017  0.04373 *  
## MaritalLiving together as married  13.87239  482.21747   0.029  0.97705    
## MaritalMarried                      0.61674    0.27739   2.223  0.02619 *  
## EducationLow                       -0.63561    0.33180  -1.916  0.05541 .  
## EducationMiddle                    -0.34879    0.34948  -0.998  0.31826    
## HealthPoor                          0.07272    0.43664   0.167  0.86774    
## HealthFair                          0.95616    0.41910   2.281  0.02252 *  
## HealthGood                          1.84326    0.42425   4.345 1.39e-05 ***
## HealthVery good                     2.17789    0.45620   4.774 1.81e-06 ***
## ReligiousNot a religious person     0.84931    0.59780   1.421  0.15540    
## ReligiousA religious person         1.29411    0.54949   2.355  0.01852 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1467.0  on 3183  degrees of freedom
## Residual deviance: 1302.2  on 3168  degrees of freedom
## AIC: 1334.2
## 
## Number of Fisher Scoring iterations: 15
  1. UJI ASUMSI INDEPENDENSI / TIDAK AUTOKORELASI
library(lmtest)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
dwtest(RLog)
## 
##  Durbin-Watson test
## 
## data:  RLog
## DW = 2.0059, p-value = 0.5623
## alternative hypothesis: true autocorrelation is greater than 0
# Model dengan Age_log
RLog2 <- glm(HappyStatus ~ Age_log + Gender + Marital + Education + Health + Religious,
                data = df3, family = binomial)

# Uji autokorelasi 
library(lmtest)
dwtest(RLog2)
## 
##  Durbin-Watson test
## 
## data:  RLog2
## DW = 2.0051, p-value = 0.5531
## alternative hypothesis: true autocorrelation is greater than 0

Perbandingan Model1 dan model 2

summary(RLog)
## 
## Call:
## glm(formula = HappyStatus ~ Age + Gender + Marital + Education + 
##     Health + Religious, family = binomial, data = df3)
## 
## Coefficients:
##                                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                         0.975897   0.777503   1.255 0.209418    
## Age                                -0.009563   0.006661  -1.436 0.151114    
## GenderMale                         -0.565114   0.164259  -3.440 0.000581 ***
## MaritalWidowed                      0.109606   0.405947   0.270 0.787159    
## MaritalSeparated                   -0.777390   0.920520  -0.845 0.398384    
## MaritalDivorced                    -0.942872   0.378326  -2.492 0.012695 *  
## MaritalLiving together as married  13.803299 481.871096   0.029 0.977148    
## MaritalMarried                      0.473267   0.262298   1.804 0.071183 .  
## EducationLow                       -0.632582   0.331600  -1.908 0.056435 .  
## EducationMiddle                    -0.327752   0.349214  -0.939 0.347965    
## HealthPoor                          0.077147   0.436467   0.177 0.859701    
## HealthFair                          0.966330   0.419068   2.306 0.021116 *  
## HealthGood                          1.857615   0.424406   4.377  1.2e-05 ***
## HealthVery good                     2.201755   0.456304   4.825  1.4e-06 ***
## ReligiousNot a religious person     0.867475   0.598834   1.449 0.147448    
## ReligiousA religious person         1.280936   0.551026   2.325 0.020091 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1467.0  on 3183  degrees of freedom
## Residual deviance: 1304.6  on 3168  degrees of freedom
## AIC: 1336.6
## 
## Number of Fisher Scoring iterations: 15
summary(RLog2)
## 
## Call:
## glm(formula = HappyStatus ~ Age_log + Gender + Marital + Education + 
##     Health + Religious, family = binomial, data = df3)
## 
## Coefficients:
##                                    Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                         2.63719    1.17797   2.239  0.02517 *  
## Age_log                            -0.59573    0.28371  -2.100  0.03575 *  
## GenderMale                         -0.54045    0.16439  -3.288  0.00101 ** 
## MaritalWidowed                      0.29845    0.41716   0.715  0.47434    
## MaritalSeparated                   -0.62844    0.92228  -0.681  0.49562    
## MaritalDivorced                    -0.78705    0.39027  -2.017  0.04373 *  
## MaritalLiving together as married  13.87239  482.21747   0.029  0.97705    
## MaritalMarried                      0.61674    0.27739   2.223  0.02619 *  
## EducationLow                       -0.63561    0.33180  -1.916  0.05541 .  
## EducationMiddle                    -0.34879    0.34948  -0.998  0.31826    
## HealthPoor                          0.07272    0.43664   0.167  0.86774    
## HealthFair                          0.95616    0.41910   2.281  0.02252 *  
## HealthGood                          1.84326    0.42425   4.345 1.39e-05 ***
## HealthVery good                     2.17789    0.45620   4.774 1.81e-06 ***
## ReligiousNot a religious person     0.84931    0.59780   1.421  0.15540    
## ReligiousA religious person         1.29411    0.54949   2.355  0.01852 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1467.0  on 3183  degrees of freedom
## Residual deviance: 1302.2  on 3168  degrees of freedom
## AIC: 1334.2
## 
## Number of Fisher Scoring iterations: 15
AIC(RLog2)
## [1] 1334.162

4.UJI GOODNESS OF FIT (KESESUAIAN)

  1. Uji Likelihood Ratio (Overall Model Fit) Bandingkan model dengan hanya intercept vs dengan prediktor, menggunakan anova:
anova(RLog2, test="Chisq")
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: HappyStatus
## 
## Terms added sequentially (first to last)
## 
## 
##           Df Deviance Resid. Df Resid. Dev  Pr(>Chi)    
## NULL                       3183     1467.0              
## Age_log    1   15.383      3182     1451.7 8.777e-05 ***
## Gender     1    9.496      3181     1442.2  0.002059 ** 
## Marital    5   33.116      3176     1409.0 3.570e-06 ***
## Education  2   10.483      3174     1398.6  0.005293 ** 
## Health     4   89.652      3170     1308.9 < 2.2e-16 ***
## Religious  2    6.744      3168     1302.2  0.034324 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

jika <0.05, model dengan prediktor lebih baik daripada tanpa prediktor.

  1. Hosmer-Lemeshow Test (Khusus Goodness-of-fit)
library(ResourceSelection)
## Warning: package 'ResourceSelection' was built under R version 4.4.3
## ResourceSelection 0.3-6   2023-06-27
# y harus 0/1, bukan faktor
hoslem.test(df3$HappyStatus, fitted(RLog2))
## 
##  Hosmer and Lemeshow goodness of fit (GOF) test
## 
## data:  df3$HappyStatus, fitted(RLog2)
## X-squared = 6.7558, df = 8, p-value = 0.5632

p-value > 0.05: model fit/baik.

  1. uji chi-square Pearson
# Hitung residual Pearson
pearson_resid <- residuals(RLog2, type = "pearson")
# Hitung statistik Pearson Chi-Square
pearson_chisq <- sum(pearson_resid^2)
# Hitung derajat bebas = n - k (n: jumlah data, k: jumlah parameter)
df <- RLog2$df.residual

# Hitung p-value
p_value <- 1 - pchisq(pearson_chisq, df)

# Tampilkan hasil
cat("Pearson Chi-square:", pearson_chisq, "\n")
## Pearson Chi-square: 3058.956
cat("Degrees of freedom:", df, "\n")
## Degrees of freedom: 3168
cat("p-value:", p_value, "\n")
## p-value: 0.9158294

p-value > 0.05 → Model cocok (fit) dengan data.