library(readxl) 
datIND <- read_xlsx(path = "C:/Users/ASUS/Documents/UNY/MySta/SEM 4/Analisis Data Kategorik/UAS ADK/DataWrangling/IDN_WVS7.xlsx", sheet = 1) 
head(datIND) 
## # A tibble: 6 × 611
##   version        doi   A_WAVE A_YEAR A_STUDY B_COUNTRY B_COUNTRY_ALPHA C_COW_NUM
##   <chr>          <chr> <chr>  <chr>  <chr>   <chr>     <chr>           <chr>    
## 1 6-0-0 (2024-0… doi.… WVS 2… 2018   WVS     Indonesia IDN             Indonesia
## 2 6-0-0 (2024-0… doi.… WVS 2… 2018   WVS     Indonesia IDN             Indonesia
## 3 6-0-0 (2024-0… doi.… WVS 2… 2018   WVS     Indonesia IDN             Indonesia
## 4 6-0-0 (2024-0… doi.… WVS 2… 2018   WVS     Indonesia IDN             Indonesia
## 5 6-0-0 (2024-0… doi.… WVS 2… 2018   WVS     Indonesia IDN             Indonesia
## 6 6-0-0 (2024-0… doi.… WVS 2… 2018   WVS     Indonesia IDN             Indonesia
## # ℹ 603 more variables: C_COW_ALPHA <chr>, D_INTERVIEW <dbl>, S007 <dbl>,
## #   J_INTDATE <chr>, FW_START <dbl>, FW_END <dbl>, K_TIME_START <chr>,
## #   K_TIME_END <chr>, K_DURATION <chr>, Q_MODE <chr>, N_REGION_ISO <chr>,
## #   N_REGION_WVS <chr>, N_REGION_NUTS2 <chr>, N_REGION_NUTS1 <chr>,
## #   N_TOWN <chr>, G_TOWNSIZE <chr>, G_TOWNSIZE2 <chr>, H_SETTLEMENT <chr>,
## #   H_URBRURAL <chr>, I_PSU <chr>, O1_LONGITUDE <chr>, O2_LATITUDE <chr>,
## #   L_INTERVIEWER_NUMBER <chr>, S_INTLANGUAGE <chr>, LNGE_ISO <chr>, …
str(datIND)
## tibble [3,200 × 611] (S3: tbl_df/tbl/data.frame)
##  $ version                   : chr [1:3200] "6-0-0 (2024-04-30)" "6-0-0 (2024-04-30)" "6-0-0 (2024-04-30)" "6-0-0 (2024-04-30)" ...
##  $ doi                       : chr [1:3200] "doi.org/10.14281/18241.24" "doi.org/10.14281/18241.24" "doi.org/10.14281/18241.24" "doi.org/10.14281/18241.24" ...
##  $ A_WAVE                    : chr [1:3200] "WVS 2017-2022" "WVS 2017-2022" "WVS 2017-2022" "WVS 2017-2022" ...
##  $ A_YEAR                    : chr [1:3200] "2018" "2018" "2018" "2018" ...
##  $ A_STUDY                   : chr [1:3200] "WVS" "WVS" "WVS" "WVS" ...
##  $ B_COUNTRY                 : chr [1:3200] "Indonesia" "Indonesia" "Indonesia" "Indonesia" ...
##  $ B_COUNTRY_ALPHA           : chr [1:3200] "IDN" "IDN" "IDN" "IDN" ...
##  $ C_COW_NUM                 : chr [1:3200] "Indonesia" "Indonesia" "Indonesia" "Indonesia" ...
##  $ C_COW_ALPHA               : chr [1:3200] "INS" "INS" "INS" "INS" ...
##  $ D_INTERVIEW               : num [1:3200] 3.6e+08 3.6e+08 3.6e+08 3.6e+08 3.6e+08 ...
##  $ S007                      : num [1:3200] 3.61e+08 3.61e+08 3.61e+08 3.61e+08 3.61e+08 ...
##  $ J_INTDATE                 : chr [1:3200] "20180727" "20180802" "20180720" "20180720" ...
##  $ FW_START                  : num [1:3200] 201806 201806 201806 201806 201806 ...
##  $ FW_END                    : num [1:3200] 201808 201808 201808 201808 201808 ...
##  $ K_TIME_START              : chr [1:3200] "Not asked in survey" "Not asked in survey" "Not asked in survey" "Not asked in survey" ...
##  $ K_TIME_END                : chr [1:3200] "Not asked in survey" "Not asked in survey" "Not asked in survey" "Not asked in survey" ...
##  $ K_DURATION                : chr [1:3200] "Not asked in survey" "Not asked in survey" "Not asked in survey" "Not asked in survey" ...
##  $ Q_MODE                    : chr [1:3200] "Computer-Assisted Personal Interviewing (CAPI)" "Computer-Assisted Personal Interviewing (CAPI)" "Computer-Assisted Personal Interviewing (CAPI)" "Computer-Assisted Personal Interviewing (CAPI)" ...
##  $ N_REGION_ISO              : chr [1:3200] "ID-SM Sumatera" "ID-SM Sumatera" "ID-SM Sumatera" "ID-SM Sumatera" ...
##  $ N_REGION_WVS              : chr [1:3200] "ID: ID-SM Sumatera" "ID: ID-SM Sumatera" "ID: ID-SM Sumatera" "ID: ID-SM Sumatera" ...
##  $ N_REGION_NUTS2            : chr [1:3200] "-4" "-4" "-4" "-4" ...
##  $ N_REGION_NUTS1            : chr [1:3200] "not applicable" "not applicable" "not applicable" "not applicable" ...
##  $ N_TOWN                    : chr [1:3200] "Not asked" "Not asked" "Not asked" "Not asked" ...
##  $ G_TOWNSIZE                : chr [1:3200] "Under 2,000" "5,000-10,000" "Under 2,000" "Under 2,000" ...
##  $ G_TOWNSIZE2               : chr [1:3200] "Under 5,000" "5000-20000" "Under 5,000" "Under 5,000" ...
##  $ H_SETTLEMENT              : chr [1:3200] "Village" "Village" "Village" "Village" ...
##  $ H_URBRURAL                : chr [1:3200] "Rural" "Rural" "Rural" "Rural" ...
##  $ I_PSU                     : chr [1:3200] "13" "9" "15" "15" ...
##  $ O1_LONGITUDE              : chr [1:3200] "97.13" "97.13" "97.13" "97.13" ...
##  $ O2_LATITUDE               : chr [1:3200] "3.91" "3.91" "3.91" "3.91" ...
##  $ L_INTERVIEWER_NUMBER      : chr [1:3200] "Item not included" "Item not included" "Item not included" "Item not included" ...
##  $ S_INTLANGUAGE             : chr [1:3200] "Indonesian" "Indonesian" "Indonesian" "Indonesian" ...
##  $ LNGE_ISO                  : chr [1:3200] "id" "id" "id" "id" ...
##  $ E_RESPINT                 : chr [1:3200] "Respondent was somewhat interested" "Respondent was very interested" "Respondent was very interested" "Respondent was very interested" ...
##  $ F_INTPRIVACY              : chr [1:3200] "There were other people around who could follow the interview" "There were other people around who could follow the interview" "There were no other people around who could follow the interview" "There were other people around who could follow the interview" ...
##  $ E1_LITERACY               : chr [1:3200] "Not asked in survey" "Not asked in survey" "Not asked in survey" "Not asked in survey" ...
##  $ W_WEIGHT                  : chr [1:3200] "0.240456274" "1.053733478" "0.389967189" "0.819383714" ...
##  $ S018                      : chr [1:3200] "0.3125" "0.3125" "0.3125" "0.3125" ...
##  $ PWGHT                     : chr [1:3200] "6.3753281875" "6.3753281875" "6.3753281875" "6.3753281875" ...
##  $ S025                      : chr [1:3200] "Indonesia (2018)" "Indonesia (2018)" "Indonesia (2018)" "Indonesia (2018)" ...
##  $ Q1P                       : chr [1:3200] "Very important" "Very important" "Very important" "Very important" ...
##  $ Q2P                       : chr [1:3200] "Very important" "Very important" "Very important" "Very important" ...
##  $ Q3P                       : chr [1:3200] "Rather important" "Very important" "Very important" "Very important" ...
##  $ Q4P                       : chr [1:3200] "Not at all important" "Not very important" "Not very important" "Not very important" ...
##  $ Q5P                       : chr [1:3200] "Very important" "Very important" "Very important" "Very important" ...
##  $ Q6P                       : chr [1:3200] "Very important" "Very important" "Very important" "Very important" ...
##  $ Q7P                       : chr [1:3200] "Important" "Important" "Important" "Important" ...
##  $ Q8P                       : chr [1:3200] "Not mentioned" "Not mentioned" "Important" "Not mentioned" ...
##  $ Q9P                       : chr [1:3200] "Not mentioned" "Not mentioned" "Not mentioned" "Not mentioned" ...
##  $ Q10P                      : chr [1:3200] "Important" "Important" "Important" "Important" ...
##  $ Q11P                      : chr [1:3200] "Not mentioned" "Not mentioned" "Not mentioned" "Not mentioned" ...
##  $ Q12P                      : chr [1:3200] "Not mentioned" "Important" "Important" "Important" ...
##  $ Q13P                      : chr [1:3200] "Important" "Not mentioned" "Not mentioned" "Important" ...
##  $ Q14P                      : chr [1:3200] "Not mentioned" "Not mentioned" "Not mentioned" "Not mentioned" ...
##  $ Q15P                      : chr [1:3200] "Important" "Important" "Important" "Not mentioned" ...
##  $ Q16P                      : chr [1:3200] "Not mentioned" "Important" "Not mentioned" "Important" ...
##  $ Q17P                      : chr [1:3200] "Important" "Not mentioned" "Not mentioned" "Not mentioned" ...
##  $ Q18P                      : chr [1:3200] "Important" "Not mentioned" "Important" "Important" ...
##  $ Q19P                      : chr [1:3200] "Not mentioned" "Not mentioned" "Not mentioned" "Not mentioned" ...
##  $ Q20P                      : chr [1:3200] "Important" "Important" "Important" "Important" ...
##  $ Q21P                      : chr [1:3200] "Not mentioned" "Not mentioned" "Not mentioned" "Not mentioned" ...
##  $ Q22P                      : chr [1:3200] "Not mentioned" "Important" "Important" "Important" ...
##  $ Q23P                      : chr [1:3200] "Not mentioned" "Not mentioned" "Not mentioned" "Important" ...
##  $ Q24P                      : chr [1:3200] "Not mentioned" "Not mentioned" "Important" "Not mentioned" ...
##  $ Q25P                      : chr [1:3200] "Not mentioned" "Not mentioned" "Important" "Important" ...
##  $ Q26P                      : chr [1:3200] "Not mentioned" "Not mentioned" "Not mentioned" "Not mentioned" ...
##  $ Q27P                      : chr [1:3200] "Agree" "Agree strongly" "Agree strongly" "Agree strongly" ...
##  $ Q28P                      : chr [1:3200] "Strongly disagree" "Disagree" "Agree strongly" "Disagree" ...
##  $ Q29P                      : chr [1:3200] "Agree" "Agree" "Agree strongly" "Agree strongly" ...
##  $ Q30P                      : chr [1:3200] "Agree" "Disagree" "Agree" "Agree" ...
##  $ Q31P                      : chr [1:3200] "Agree" "Disagree" "Agree" "Agree strongly" ...
##  $ Q32P                      : chr [1:3200] "Disagree" "Disagree" "Agree strongly" "Agree strongly" ...
##  $ Q33_3                     : chr [1:3200] "Agree" "Disagree" "Agree" "Agree" ...
##  $ Q33P                      : chr [1:3200] "Agree" "Disagree" "Agree strongly" "Agree strongly" ...
##  $ Q34_3                     : chr [1:3200] "Agree" "Disagree" "Agree" "Agree" ...
##  $ Q34P                      : chr [1:3200] "Agree" "Disagree" "Agree strongly" "Agree strongly" ...
##  $ Q35_3                     : chr [1:3200] "Disagree" "Disagree" "Agree" "Agree" ...
##  $ Q35P                      : chr [1:3200] "Disagree" "Disagree" "Agree strongly" "Agree" ...
##  $ Q36P                      : chr [1:3200] "Disagree strongly" "Disagree" "Disagree strongly" "Disagree" ...
##  $ Q37P                      : chr [1:3200] "Agree" "Agree" "Agree strongly" "Agree" ...
##  $ Q38P                      : chr [1:3200] "Agree" "Agree" "Agree strongly" "Agree" ...
##  $ Q39P                      : chr [1:3200] "Disagree" "Agree" "Agree strongly" "Agree strongly" ...
##  $ Q40P                      : chr [1:3200] "Agree" "Agree" "Agree strongly" "Agree" ...
##  $ Q41P                      : chr [1:3200] "Agree" "Disagree" "Agree" "Agree strongly" ...
##  $ Q42                       : chr [1:3200] "Our society must be gradually improved by reforms" "Our society must be gradually improved by reforms" "The entire way our society is organized must be radically changed by revolutionary action" "The entire way our society is organized must be radically changed by revolutionary action" ...
##  $ Q43P                      : chr [1:3200] "Don’t mind" "Bad" "Bad" "Good" ...
##  $ Q44P                      : chr [1:3200] "Good" "Good" "Bad" "Good" ...
##  $ Q45P                      : chr [1:3200] "Good" "Don’t mind" "Bad" "Bad" ...
##  $ Q46P                      : chr [1:3200] "Very happy" "Very happy" "Quite happy" "Very happy" ...
##  $ Q47P                      : chr [1:3200] "Good" "Good" "Good" "Very good" ...
##  $ Q48                       : chr [1:3200] "A great deal" "A great deal" "8" "5" ...
##  $ Q49                       : chr [1:3200] "Completely satisfied" "Completely satisfied" "8" "Completely satisfied" ...
##  $ Q50                       : chr [1:3200] "Satisfied" "Satisfied" "8" "Satisfied" ...
##  $ Q51P                      : chr [1:3200] "Never" "Never" "Never" "Never" ...
##  $ Q52P                      : chr [1:3200] "Never" "Never" "Never" "Never" ...
##  $ Q53P                      : chr [1:3200] "Rarely" "Never" "Never" "Never" ...
##  $ Q54P                      : chr [1:3200] "Never" "Never" "Never" "Never" ...
##  $ Q55P                      : chr [1:3200] "Never" "Never" "Never" "Never" ...
##  $ Q56P                      : chr [1:3200] "Better off" "Or about the same" "Or about the same" "Better off" ...
##   [list output truncated]
dim(datIND)
## [1] 3200  611

Pada data WVS 7 Indonesia ada sebanyak 3200 pengamatan dan 611 variabel

df <- subset(datIND, select = c("Q46P","Q49", "Q47P", "Q173P", "Q273", "Q260",  "Q262","Q275" )) 
sapply(df, function(x) any(is.na(x))) 
##  Q46P   Q49  Q47P Q173P  Q273  Q260  Q262  Q275 
## FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
str(df)
## tibble [3,200 × 8] (S3: tbl_df/tbl/data.frame)
##  $ Q46P : chr [1:3200] "Very happy" "Very happy" "Quite happy" "Very happy" ...
##  $ Q49  : chr [1:3200] "Completely satisfied" "Completely satisfied" "8" "Completely satisfied" ...
##  $ Q47P : chr [1:3200] "Good" "Good" "Good" "Very good" ...
##  $ Q173P: chr [1:3200] "A religious person" "A religious person" "A religious person" "A religious person" ...
##  $ Q273 : chr [1:3200] "Married" "Married" "Married" "Married" ...
##  $ Q260 : chr [1:3200] "Male" "Male" "Male" "Male" ...
##  $ Q262 : chr [1:3200] "22" "35" "48" "46" ...
##  $ Q275 : chr [1:3200] "Upper secondary education (ISCED 3)" "Upper secondary education (ISCED 3)" "Upper secondary education (ISCED 3)" "Lower secondary education (ISCED 2)" ...
lapply(df, table)
## $Q46P
## 
##        No answer Not at all happy   Not very happy      Quite happy 
##                1               34              161             1590 
##       Very happy 
##             1414 
## 
## $Q49
## 
##                                          2 
##                                         50 
##                                          3 
##                                         63 
##                                          4 
##                                         97 
##                                          5 
##                                        333 
##                                          6 
##                                        245 
##                                          7 
##                                        413 
##                                          8 
##                                        554 
##                                          9 
##                                        341 
##                    Completely dissatisfied 
##                                        117 
##                       Completely satisfied 
##                                        983 
##                                 Don't know 
##                                          3 
## Other missing; Multiple answers Mail (EVS) 
##                                          1 
## 
## $Q47P
## 
##      Fair      Good      Poor Very good Very poor 
##       720      1404       200       836        40 
## 
## $Q173P
## 
##     A religious person             Don't know              No answer 
##                   2924                     27                     14 
## Not a religious person 
##                    235 
## 
## $Q273
## 
##                   Divorced Living together as married 
##                        101                         23 
##                    Married                  Separated 
##                       2453                          9 
##                     Single                    Widowed 
##                        415                        199 
## 
## $Q260
## 
## Female   Male 
##   1754   1446 
## 
## $Q262
## 
##  18  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36  37 
##  70  66  58  56  54  65  65  84  53  54  84  79  99  67  93 105  97  85  78  66 
##  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54  55  56  57 
## 108  72  93  82  89  82  64  94  66  68  67  61  60  39  56  55  45  46  35  41 
##  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72  73  74  75  76  78 
##  52  31  53  32  27  26  21  28  26  13  16   9  19   1   5  10   4   6   6   3 
##  79  80  81  82  84  86  87 
##   1   3   1   1   1   2   2 
## 
## $Q275
## 
##                   Bachelor or equivalent (ISCED 6) 
##                                                221 
##                   Doctoral or equivalent (ISCED 8) 
##                                                  1 
##                                         Don't know 
##                                                  1 
## Early childhood education (ISCED 0) / no education 
##                                                311 
##                Lower secondary education (ISCED 2) 
##                                                659 
##                     Master or equivalent (ISCED 7) 
##                                                 17 
##                        Primary education (ISCED 1) 
##                                                842 
##           Short-cycle tertiary education (ISCED 5) 
##                                                 81 
##                Upper secondary education (ISCED 3) 
##                                               1067

PREPROCESSING DATA

df$Happiness <- factor(df$Q46P, levels = c("Not at all happy","Not very happy","Quite happy","Very happy"),labels = c("Not at all happy","Not very happy","Quite happy","Very happy"))
sum(is.na(df$Happiness))
## [1] 1
levels(df$Happiness)
## [1] "Not at all happy" "Not very happy"   "Quite happy"      "Very happy"
df$Gender <- factor(df$Q260)
sum(is.na(df$Gender))
## [1] 0
levels(df$Gender)
## [1] "Female" "Male"
df$Age <- as.numeric(df$Q262)
sum(is.na(df$Age))
## [1] 0
df$Life <- as.numeric(factor(df$Q49,levels = c("Completely dissatisfied","2", "3","4","5","6","7","8","9","Completely satisfied"),labels = c("1","2", "3","4","5","6","7","8","9","10")))
sum(is.na(df$Life))
## [1] 4
df$Marital <- factor(df$Q273, levels = c("Single","Widowed","Separated","Divorced","Living together as married","Married"))
sum(is.na(df$Marital))
## [1] 0
levels(df$Marital)
## [1] "Single"                     "Widowed"                   
## [3] "Separated"                  "Divorced"                  
## [5] "Living together as married" "Married"
df$Health <- factor(df$Q47P, levels = c("Very poor", "Poor", "Fair","Good","Very good"))
sum(is.na(df$Health))
## [1] 0
levels(df$Health)
## [1] "Very poor" "Poor"      "Fair"      "Good"      "Very good"
df$Religious <- factor(df$Q173P, levels = c("Don't know","Not a religious person","A religious person"))
sum(is.na(df$Religious))
## [1] 14
levels(df$Religious)
## [1] "Don't know"             "Not a religious person" "A religious person"
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
df <- df %>% mutate(Education = case_when(Q275 %in% c("Early childhood education (ISCED 0) / no education","Primary education (ISCED 1)", "Lower secondary education (ISCED 2)") ~ "Low", Q275 %in% c("Upper secondary education (ISCED 3)") ~ "Middle", Q275 %in% c("Short-cycle tertiary education (ISCED 5)","Bachelor or equivalent (ISCED 6)", "Master or equivalent (ISCED 7)","Doctoral or equivalent (ISCED 8)") ~ "High"))
sum(is.na(df$Education))
## [1] 1
colnames(df)
##  [1] "Q46P"      "Q49"       "Q47P"      "Q173P"     "Q273"      "Q260"     
##  [7] "Q262"      "Q275"      "Happiness" "Gender"    "Age"       "Life"     
## [13] "Marital"   "Health"    "Religious" "Education"
#newdataset
df2 <- subset(df,select = c("Happiness","Age","Gender","Marital","Education","Health","Religious","Life"))
sapply(df2, function(x) sum(is.na(x)))
## Happiness       Age    Gender   Marital Education    Health Religious      Life 
##         1         0         0         0         1         0        14         4
dim(df2)
## [1] 3200    8
df3 <- na.omit(df2)
dim(df3)
## [1] 3180    8
library(summarytools)
## Warning: package 'summarytools' was built under R version 4.4.3
dfSummary(df3)
## Data Frame Summary  
## df3  
## Dimensions: 3180 x 8  
## Duplicates: 439  
## 
## ----------------------------------------------------------------------------------------------------------------
## No   Variable      Stats / Values                 Freqs (% of Valid)   Graph                Valid      Missing  
## ---- ------------- ------------------------------ -------------------- -------------------- ---------- ---------
## 1    Happiness     1. Not at all happy              33 ( 1.0%)                              3180       0        
##      [factor]      2. Not very happy               160 ( 5.0%)         I                    (100.0%)   (0.0%)   
##                    3. Quite happy                 1580 (49.7%)         IIIIIIIII                                
##                    4. Very happy                  1407 (44.2%)         IIIIIIII                                 
## 
## 2    Age           Mean (sd) : 40 (13.5)          67 distinct values       : .              3180       0        
##      [numeric]     min < med < max:                                    . : : : .            (100.0%)   (0.0%)   
##                    18 < 39 < 87                                        : : : : : .                              
##                    IQR (CV) : 19 (0.3)                                 : : : : : : .                            
##                                                                        : : : : : : : .                          
## 
## 3    Gender        1. Female                      1747 (54.9%)         IIIIIIIIII           3180       0        
##      [factor]      2. Male                        1433 (45.1%)         IIIIIIIII            (100.0%)   (0.0%)   
## 
## 4    Marital       1. Single                       415 (13.1%)         II                   3180       0        
##      [factor]      2. Widowed                      198 ( 6.2%)         I                    (100.0%)   (0.0%)   
##                    3. Separated                      9 ( 0.3%)                                                  
##                    4. Divorced                     100 ( 3.1%)                                                  
##                    5. Living together as marrie     23 ( 0.7%)                                                  
##                    6. Married                     2435 (76.6%)         IIIIIIIIIIIIIII                          
## 
## 5    Education     1. High                         319 (10.0%)         II                   3180       0        
##      [character]   2. Low                         1802 (56.7%)         IIIIIIIIIII          (100.0%)   (0.0%)   
##                    3. Middle                      1059 (33.3%)         IIIIII                                   
## 
## 6    Health        1. Very poor                     40 ( 1.3%)                              3180       0        
##      [factor]      2. Poor                         198 ( 6.2%)         I                    (100.0%)   (0.0%)   
##                    3. Fair                         716 (22.5%)         IIII                                     
##                    4. Good                        1395 (43.9%)         IIIIIIII                                 
##                    5. Very good                    831 (26.1%)         IIIII                                    
## 
## 7    Religious     1. Don't know                    27 ( 0.8%)                              3180       0        
##      [factor]      2. Not a religious person       235 ( 7.4%)         I                    (100.0%)   (0.0%)   
##                    3. A religious person          2918 (91.8%)         IIIIIIIIIIIIIIIIII                       
## 
## 8    Life          Mean (sd) : 7.6 (2.4)          1 : 117 ( 3.7%)                           3180       0        
##      [numeric]     min < med < max:               2 :  50 ( 1.6%)                           (100.0%)   (0.0%)   
##                    1 < 8 < 10                     3 :  63 ( 2.0%)                                               
##                    IQR (CV) : 4 (0.3)             4 :  97 ( 3.1%)                                               
##                                                   5 : 331 (10.4%)      II                                       
##                                                   6 : 244 ( 7.7%)      I                                        
##                                                   7 : 410 (12.9%)      II                                       
##                                                   8 : 553 (17.4%)      III                                      
##                                                   9 : 340 (10.7%)      II                                       
##                                                   10 : 975 (30.7%)     IIIIII                                   
## ----------------------------------------------------------------------------------------------------------------
# Kategorik
library(tidyverse)
## Warning: package 'purrr' was built under R version 4.4.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ readr     2.1.5
## ✔ ggplot2   3.5.1     ✔ stringr   1.5.1
## ✔ lubridate 1.9.3     ✔ tibble    3.2.1
## ✔ purrr     1.0.4     ✔ tidyr     1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ✖ tibble::view()  masks summarytools::view()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
ct_gender <- xtabs(~Gender+Happiness, data=df3)
ct_marital <- xtabs(~Marital+Happiness, data=df3)
ct_education <- xtabs(~Education+Happiness, data=df3)
ct_health <- xtabs(~Health+Happiness, data=df3)
ct_religious <- xtabs(~Religious+Happiness, data=df3)

# Buat list hasil frekuensi dan uji chi-square
freq_result <- list(
  gender = list(
    round(ct_gender / rep(colSums(ct_gender), each = 2) * 100, 2),
    round(chisq.test(ct_gender)$statistic, 2),
    chisq.test(ct_gender)$p.value
  ),
  marital = list(
    round(ct_marital / rep(colSums(ct_marital), each = 2) * 100, 2),
    round(chisq.test(ct_marital)$statistic, 2),
    chisq.test(ct_marital)$p.value
  ),
  education = list(
    round(ct_education / rep(colSums(ct_education), each = 2) * 100, 2),
    round(chisq.test(ct_education)$statistic, 2),
    chisq.test(ct_education)$p.value
  ),
  health = list(
    round(ct_health / rep(colSums(ct_health), each = 2) * 100, 2),
    round(chisq.test(ct_health)$statistic, 2),
    chisq.test(ct_health)$p.value
  ),
  religious = list(
    round(ct_religious / rep(colSums(ct_religious), each = 2) * 100, 2),
    round(chisq.test(ct_religious)$statistic, 2),
    chisq.test(ct_religious)$p.value
  )
)
## Warning in chisq.test(ct_marital): Chi-squared approximation may be incorrect
## Warning in chisq.test(ct_marital): Chi-squared approximation may be incorrect
## Warning in ct_education/rep(colSums(ct_education), each = 2): longer object
## length is not a multiple of shorter object length
## Warning in chisq.test(ct_education): Chi-squared approximation may be incorrect
## Warning in chisq.test(ct_education): Chi-squared approximation may be incorrect
## Warning in ct_health/rep(colSums(ct_health), each = 2): longer object length is
## not a multiple of shorter object length
## Warning in chisq.test(ct_health): Chi-squared approximation may be incorrect
## Warning in chisq.test(ct_health): Chi-squared approximation may be incorrect
## Warning in ct_religious/rep(colSums(ct_religious), each = 2): longer object
## length is not a multiple of shorter object length
## Warning in chisq.test(ct_religious): Chi-squared approximation may be incorrect
## Warning in chisq.test(ct_religious): Chi-squared approximation may be incorrect
# Tampilkan hasil
freq_result
## $gender
## $gender[[1]]
##         Happiness
## Gender   Not at all happy Not very happy Quite happy Very happy
##   Female            51.52          40.62       51.58      60.41
##   Male              48.48          59.38       48.42      39.59
## 
## $gender[[2]]
## X-squared 
##     37.61 
## 
## $gender[[3]]
## [1] 3.410033e-08
## 
## 
## $marital
## $marital[[1]]
##                             Happiness
## Marital                      Not at all happy Not very happy Quite happy
##   Single                                 9.09           1.71       13.54
##   Widowed                               24.24           0.71        6.52
##   Separated                              0.62           3.03        0.28
##   Divorced                               2.50          42.42        3.55
##   Living together as married             0.00           0.00       15.15
##   Married                                1.08          69.38     3648.48
##                             Happiness
## Marital                      Very happy
##   Single                         108.75
##   Widowed                         48.12
##   Separated                        0.19
##   Divorced                         2.03
##   Living together as married       1.28
##   Married                         78.39
## 
## $marital[[2]]
## X-squared 
##      73.4 
## 
## $marital[[3]]
## [1] 1.097899e-09
## 
## 
## $education
## $education[[1]]
##          Happiness
## Education Not at all happy Not very happy Quite happy Very happy
##    High               3.03           6.25       10.95     466.67
##    Low               78.79           6.71       62.12     497.50
##    Middle             3.75           2.78     1672.73     285.62
## 
## $education[[2]]
## X-squared 
##     16.58 
## 
## $education[[3]]
## [1] 0.01095217
## 
## 
## $health
## $health[[1]]
##            Happiness
## Health      Not at all happy Not very happy Quite happy Very happy
##   Very poor             6.06           0.44        8.12       1.28
##   Poor                 27.27           2.27       55.62     206.06
##   Fair                  8.12           3.91       29.24     563.64
##   Good                  1.88         151.52       47.91     365.62
##   Very good             0.38          48.48       18.41     343.75
## 
## $health[[2]]
## X-squared 
##    385.18 
## 
## $health[[3]]
## [1] 5.185637e-75
## 
## 
## $religious
## $religious[[1]]
##                         Happiness
## Religious                Not at all happy Not very happy Quite happy Very happy
##   Don't know                         3.03           2.50        1.07      21.21
##   Not a religious person             3.03           1.20        9.88      47.50
##   A religious person                19.38           8.67     4321.21     827.50
## 
## $religious[[2]]
## X-squared 
##     28.09 
## 
## $religious[[3]]
## [1] 9.026728e-05
# Numerik
library(dplyr)
num.var <- df3 %>% select(where(is.numeric)) %>% group_by(df3$Happiness) %>% summarise_each(funs(mean, sd))%>% as.data.frame()
## Warning: `summarise_each()` was deprecated in dplyr 0.7.0.
## ℹ Please use `across()` instead.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: `funs()` was deprecated in dplyr 0.8.0.
## ℹ Please use a list of either functions or lambdas:
## 
## # Simple named list: list(mean = mean, median = median)
## 
## # Auto named with `tibble::lst()`: tibble::lst(mean, median)
## 
## # Using lambdas list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
num.var
##      df3$Happiness Age_mean Life_mean   Age_sd  Life_sd
## 1 Not at all happy 45.90909  4.878788 15.42799 3.028589
## 2   Not very happy 42.89375  5.337500 12.52650 2.751529
## 3      Quite happy 41.16582  7.242405 13.77165 2.275814
## 4       Very happy 38.24876  8.215352 13.10362 2.256924

VISUALISASI

ggplot(df3, aes(x=Happiness,fill=Gender))+geom_bar(position = "dodge")+facet_grid(Life~Marital) +theme_bw()+theme(axis.text.x = element_text(angle = 45, hjust = 1))

MODEL REGRESI LOGISTIK ORDINAL

library(VGAM)
## Warning: package 'VGAM' was built under R version 4.4.3
## Loading required package: stats4
## Loading required package: splines
mod <- vglm(Happiness~Gender+Age+Marital+Education+Health+Religious+Life, family=cumulative(parallel = TRUE),data=df3)
## Warning in eval(slot(family, "initialize")): response should be ordinal---see
## ordered()
summary(mod)
## 
## Call:
## vglm(formula = Happiness ~ Gender + Age + Marital + Education + 
##     Health + Religious + Life, family = cumulative(parallel = TRUE), 
##     data = df3)
## 
## Coefficients: 
##                                    Estimate Std. Error z value Pr(>|z|)    
## (Intercept):1                     -2.713679   0.565949  -4.795 1.63e-06 ***
## (Intercept):2                     -0.818472   0.544437  -1.503 0.132752    
## (Intercept):3                      2.574708   0.546324   4.713 2.44e-06 ***
## GenderMale                         0.296301   0.077382   3.829 0.000129 ***
## Age                                0.018654   0.003364   5.546 2.93e-08 ***
## MaritalWidowed                    -0.317277   0.212311  -1.494 0.135071    
## MaritalSeparated                   0.120748   0.704120   0.171 0.863840    
## MaritalDivorced                    0.293943   0.238397   1.233 0.217577    
## MaritalLiving together as married -2.040387   0.559991  -3.644 0.000269 ***
## MaritalMarried                    -0.345076   0.125557  -2.748 0.005990 ** 
## EducationLow                       0.125722   0.126816   0.991 0.321503    
## EducationMiddle                    0.202279   0.132476   1.527 0.126783    
## HealthPoor                         0.305423   0.352499   0.866 0.386242    
## HealthFair                         0.410286   0.330219   1.242 0.214064    
## HealthGood                        -0.153111   0.326603  -0.469 0.639214    
## HealthVery good                   -1.001095   0.331208  -3.023 0.002506 ** 
## ReligiousNot a religious person   -0.669989   0.417634  -1.604 0.108659    
## ReligiousA religious person       -1.150799   0.396913  -2.899 0.003739 ** 
## Life                              -0.223535   0.016130 -13.858  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Names of linear predictors: logitlink(P[Y<=1]), logitlink(P[Y<=2]), 
## logitlink(P[Y<=3])
## 
## Residual deviance: 5164.219 on 9521 degrees of freedom
## 
## Log-likelihood: -2582.11 on 9521 degrees of freedom
## 
## Number of Fisher scoring iterations: 6 
## 
## No Hauck-Donner effect found in any of the estimates
## 
## 
## Exponentiated coefficients:
##                        GenderMale                               Age 
##                         1.3448752                         1.0188293 
##                    MaritalWidowed                  MaritalSeparated 
##                         0.7281287                         1.1283404 
##                   MaritalDivorced MaritalLiving together as married 
##                         1.3417072                         0.1299784 
##                    MaritalMarried                      EducationLow 
##                         0.7081669                         1.1339667 
##                   EducationMiddle                        HealthPoor 
##                         1.2241892                         1.3571996 
##                        HealthFair                        HealthGood 
##                         1.5072493                         0.8580346 
##                   HealthVery good   ReligiousNot a religious person 
##                         0.3674768                         0.5117141 
##       ReligiousA religious person                              Life 
##                         0.3163838                         0.7996866
anova(mod)
## Analysis of Deviance Table (Type II tests)
## 
## Model: 'cumulative', 'VGAMordinal', 'VGAMcategorical'
## 
## Links: 'logitlink'
## 
## Response: Happiness
## 
##           Df Deviance Resid. Df Resid. Dev  Pr(>Chi)    
## Gender     1   14.679      9522     5178.9 0.0001275 ***
## Age        1   30.931      9522     5195.2 2.674e-08 ***
## Marital    5   28.981      9526     5193.2 2.339e-05 ***
## Education  2    2.508      9523     5166.7 0.2853593    
## Health     4  183.955      9525     5348.2 < 2.2e-16 ***
## Religious  2   19.265      9523     5183.5 6.557e-05 ***
## Life       1  192.321      9522     5356.5 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Hitung odds ratio dan confidence interval
se <- sqrt(diag(vcov(mod)))
odds_ratios <- data.frame(
  Coeff = names(coef(mod)),
  OR = exp(coef(mod)),
  se = se,
  CI_lower = exp(coef(mod) - 1.96 * se),
  CI_upper = exp(coef(mod) + 1.96 * se)
)

# Tampilkan hasil tanpa kolom 'Coeff'
print(round(odds_ratios[,-1], 3))
##                                       OR    se CI_lower CI_upper
## (Intercept):1                      0.066 0.566    0.022    0.201
## (Intercept):2                      0.441 0.544    0.152    1.282
## (Intercept):3                     13.127 0.546    4.499   38.302
## GenderMale                         1.345 0.077    1.156    1.565
## Age                                1.019 0.003    1.012    1.026
## MaritalWidowed                     0.728 0.212    0.480    1.104
## MaritalSeparated                   1.128 0.704    0.284    4.485
## MaritalDivorced                    1.342 0.238    0.841    2.141
## MaritalLiving together as married  0.130 0.560    0.043    0.390
## MaritalMarried                     0.708 0.126    0.554    0.906
## EducationLow                       1.134 0.127    0.884    1.454
## EducationMiddle                    1.224 0.132    0.944    1.587
## HealthPoor                         1.357 0.352    0.680    2.708
## HealthFair                         1.507 0.330    0.789    2.879
## HealthGood                         0.858 0.327    0.452    1.627
## HealthVery good                    0.367 0.331    0.192    0.703
## ReligiousNot a religious person    0.512 0.418    0.226    1.160
## ReligiousA religious person        0.316 0.397    0.145    0.689
## Life                               0.800 0.016    0.775    0.825