library(readxl)
datIND <- read_xlsx(path = "C:/Users/ASUS/Documents/UNY/MySta/SEM 4/Analisis Data Kategorik/UAS ADK/DataWrangling/IDN_WVS7.xlsx", sheet = 1)
head(datIND)
## # A tibble: 6 × 611
## version doi A_WAVE A_YEAR A_STUDY B_COUNTRY B_COUNTRY_ALPHA C_COW_NUM
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 6-0-0 (2024-0… doi.… WVS 2… 2018 WVS Indonesia IDN Indonesia
## 2 6-0-0 (2024-0… doi.… WVS 2… 2018 WVS Indonesia IDN Indonesia
## 3 6-0-0 (2024-0… doi.… WVS 2… 2018 WVS Indonesia IDN Indonesia
## 4 6-0-0 (2024-0… doi.… WVS 2… 2018 WVS Indonesia IDN Indonesia
## 5 6-0-0 (2024-0… doi.… WVS 2… 2018 WVS Indonesia IDN Indonesia
## 6 6-0-0 (2024-0… doi.… WVS 2… 2018 WVS Indonesia IDN Indonesia
## # ℹ 603 more variables: C_COW_ALPHA <chr>, D_INTERVIEW <dbl>, S007 <dbl>,
## # J_INTDATE <chr>, FW_START <dbl>, FW_END <dbl>, K_TIME_START <chr>,
## # K_TIME_END <chr>, K_DURATION <chr>, Q_MODE <chr>, N_REGION_ISO <chr>,
## # N_REGION_WVS <chr>, N_REGION_NUTS2 <chr>, N_REGION_NUTS1 <chr>,
## # N_TOWN <chr>, G_TOWNSIZE <chr>, G_TOWNSIZE2 <chr>, H_SETTLEMENT <chr>,
## # H_URBRURAL <chr>, I_PSU <chr>, O1_LONGITUDE <chr>, O2_LATITUDE <chr>,
## # L_INTERVIEWER_NUMBER <chr>, S_INTLANGUAGE <chr>, LNGE_ISO <chr>, …
str(datIND)
## tibble [3,200 × 611] (S3: tbl_df/tbl/data.frame)
## $ version : chr [1:3200] "6-0-0 (2024-04-30)" "6-0-0 (2024-04-30)" "6-0-0 (2024-04-30)" "6-0-0 (2024-04-30)" ...
## $ doi : chr [1:3200] "doi.org/10.14281/18241.24" "doi.org/10.14281/18241.24" "doi.org/10.14281/18241.24" "doi.org/10.14281/18241.24" ...
## $ A_WAVE : chr [1:3200] "WVS 2017-2022" "WVS 2017-2022" "WVS 2017-2022" "WVS 2017-2022" ...
## $ A_YEAR : chr [1:3200] "2018" "2018" "2018" "2018" ...
## $ A_STUDY : chr [1:3200] "WVS" "WVS" "WVS" "WVS" ...
## $ B_COUNTRY : chr [1:3200] "Indonesia" "Indonesia" "Indonesia" "Indonesia" ...
## $ B_COUNTRY_ALPHA : chr [1:3200] "IDN" "IDN" "IDN" "IDN" ...
## $ C_COW_NUM : chr [1:3200] "Indonesia" "Indonesia" "Indonesia" "Indonesia" ...
## $ C_COW_ALPHA : chr [1:3200] "INS" "INS" "INS" "INS" ...
## $ D_INTERVIEW : num [1:3200] 3.6e+08 3.6e+08 3.6e+08 3.6e+08 3.6e+08 ...
## $ S007 : num [1:3200] 3.61e+08 3.61e+08 3.61e+08 3.61e+08 3.61e+08 ...
## $ J_INTDATE : chr [1:3200] "20180727" "20180802" "20180720" "20180720" ...
## $ FW_START : num [1:3200] 201806 201806 201806 201806 201806 ...
## $ FW_END : num [1:3200] 201808 201808 201808 201808 201808 ...
## $ K_TIME_START : chr [1:3200] "Not asked in survey" "Not asked in survey" "Not asked in survey" "Not asked in survey" ...
## $ K_TIME_END : chr [1:3200] "Not asked in survey" "Not asked in survey" "Not asked in survey" "Not asked in survey" ...
## $ K_DURATION : chr [1:3200] "Not asked in survey" "Not asked in survey" "Not asked in survey" "Not asked in survey" ...
## $ Q_MODE : chr [1:3200] "Computer-Assisted Personal Interviewing (CAPI)" "Computer-Assisted Personal Interviewing (CAPI)" "Computer-Assisted Personal Interviewing (CAPI)" "Computer-Assisted Personal Interviewing (CAPI)" ...
## $ N_REGION_ISO : chr [1:3200] "ID-SM Sumatera" "ID-SM Sumatera" "ID-SM Sumatera" "ID-SM Sumatera" ...
## $ N_REGION_WVS : chr [1:3200] "ID: ID-SM Sumatera" "ID: ID-SM Sumatera" "ID: ID-SM Sumatera" "ID: ID-SM Sumatera" ...
## $ N_REGION_NUTS2 : chr [1:3200] "-4" "-4" "-4" "-4" ...
## $ N_REGION_NUTS1 : chr [1:3200] "not applicable" "not applicable" "not applicable" "not applicable" ...
## $ N_TOWN : chr [1:3200] "Not asked" "Not asked" "Not asked" "Not asked" ...
## $ G_TOWNSIZE : chr [1:3200] "Under 2,000" "5,000-10,000" "Under 2,000" "Under 2,000" ...
## $ G_TOWNSIZE2 : chr [1:3200] "Under 5,000" "5000-20000" "Under 5,000" "Under 5,000" ...
## $ H_SETTLEMENT : chr [1:3200] "Village" "Village" "Village" "Village" ...
## $ H_URBRURAL : chr [1:3200] "Rural" "Rural" "Rural" "Rural" ...
## $ I_PSU : chr [1:3200] "13" "9" "15" "15" ...
## $ O1_LONGITUDE : chr [1:3200] "97.13" "97.13" "97.13" "97.13" ...
## $ O2_LATITUDE : chr [1:3200] "3.91" "3.91" "3.91" "3.91" ...
## $ L_INTERVIEWER_NUMBER : chr [1:3200] "Item not included" "Item not included" "Item not included" "Item not included" ...
## $ S_INTLANGUAGE : chr [1:3200] "Indonesian" "Indonesian" "Indonesian" "Indonesian" ...
## $ LNGE_ISO : chr [1:3200] "id" "id" "id" "id" ...
## $ E_RESPINT : chr [1:3200] "Respondent was somewhat interested" "Respondent was very interested" "Respondent was very interested" "Respondent was very interested" ...
## $ F_INTPRIVACY : chr [1:3200] "There were other people around who could follow the interview" "There were other people around who could follow the interview" "There were no other people around who could follow the interview" "There were other people around who could follow the interview" ...
## $ E1_LITERACY : chr [1:3200] "Not asked in survey" "Not asked in survey" "Not asked in survey" "Not asked in survey" ...
## $ W_WEIGHT : chr [1:3200] "0.240456274" "1.053733478" "0.389967189" "0.819383714" ...
## $ S018 : chr [1:3200] "0.3125" "0.3125" "0.3125" "0.3125" ...
## $ PWGHT : chr [1:3200] "6.3753281875" "6.3753281875" "6.3753281875" "6.3753281875" ...
## $ S025 : chr [1:3200] "Indonesia (2018)" "Indonesia (2018)" "Indonesia (2018)" "Indonesia (2018)" ...
## $ Q1P : chr [1:3200] "Very important" "Very important" "Very important" "Very important" ...
## $ Q2P : chr [1:3200] "Very important" "Very important" "Very important" "Very important" ...
## $ Q3P : chr [1:3200] "Rather important" "Very important" "Very important" "Very important" ...
## $ Q4P : chr [1:3200] "Not at all important" "Not very important" "Not very important" "Not very important" ...
## $ Q5P : chr [1:3200] "Very important" "Very important" "Very important" "Very important" ...
## $ Q6P : chr [1:3200] "Very important" "Very important" "Very important" "Very important" ...
## $ Q7P : chr [1:3200] "Important" "Important" "Important" "Important" ...
## $ Q8P : chr [1:3200] "Not mentioned" "Not mentioned" "Important" "Not mentioned" ...
## $ Q9P : chr [1:3200] "Not mentioned" "Not mentioned" "Not mentioned" "Not mentioned" ...
## $ Q10P : chr [1:3200] "Important" "Important" "Important" "Important" ...
## $ Q11P : chr [1:3200] "Not mentioned" "Not mentioned" "Not mentioned" "Not mentioned" ...
## $ Q12P : chr [1:3200] "Not mentioned" "Important" "Important" "Important" ...
## $ Q13P : chr [1:3200] "Important" "Not mentioned" "Not mentioned" "Important" ...
## $ Q14P : chr [1:3200] "Not mentioned" "Not mentioned" "Not mentioned" "Not mentioned" ...
## $ Q15P : chr [1:3200] "Important" "Important" "Important" "Not mentioned" ...
## $ Q16P : chr [1:3200] "Not mentioned" "Important" "Not mentioned" "Important" ...
## $ Q17P : chr [1:3200] "Important" "Not mentioned" "Not mentioned" "Not mentioned" ...
## $ Q18P : chr [1:3200] "Important" "Not mentioned" "Important" "Important" ...
## $ Q19P : chr [1:3200] "Not mentioned" "Not mentioned" "Not mentioned" "Not mentioned" ...
## $ Q20P : chr [1:3200] "Important" "Important" "Important" "Important" ...
## $ Q21P : chr [1:3200] "Not mentioned" "Not mentioned" "Not mentioned" "Not mentioned" ...
## $ Q22P : chr [1:3200] "Not mentioned" "Important" "Important" "Important" ...
## $ Q23P : chr [1:3200] "Not mentioned" "Not mentioned" "Not mentioned" "Important" ...
## $ Q24P : chr [1:3200] "Not mentioned" "Not mentioned" "Important" "Not mentioned" ...
## $ Q25P : chr [1:3200] "Not mentioned" "Not mentioned" "Important" "Important" ...
## $ Q26P : chr [1:3200] "Not mentioned" "Not mentioned" "Not mentioned" "Not mentioned" ...
## $ Q27P : chr [1:3200] "Agree" "Agree strongly" "Agree strongly" "Agree strongly" ...
## $ Q28P : chr [1:3200] "Strongly disagree" "Disagree" "Agree strongly" "Disagree" ...
## $ Q29P : chr [1:3200] "Agree" "Agree" "Agree strongly" "Agree strongly" ...
## $ Q30P : chr [1:3200] "Agree" "Disagree" "Agree" "Agree" ...
## $ Q31P : chr [1:3200] "Agree" "Disagree" "Agree" "Agree strongly" ...
## $ Q32P : chr [1:3200] "Disagree" "Disagree" "Agree strongly" "Agree strongly" ...
## $ Q33_3 : chr [1:3200] "Agree" "Disagree" "Agree" "Agree" ...
## $ Q33P : chr [1:3200] "Agree" "Disagree" "Agree strongly" "Agree strongly" ...
## $ Q34_3 : chr [1:3200] "Agree" "Disagree" "Agree" "Agree" ...
## $ Q34P : chr [1:3200] "Agree" "Disagree" "Agree strongly" "Agree strongly" ...
## $ Q35_3 : chr [1:3200] "Disagree" "Disagree" "Agree" "Agree" ...
## $ Q35P : chr [1:3200] "Disagree" "Disagree" "Agree strongly" "Agree" ...
## $ Q36P : chr [1:3200] "Disagree strongly" "Disagree" "Disagree strongly" "Disagree" ...
## $ Q37P : chr [1:3200] "Agree" "Agree" "Agree strongly" "Agree" ...
## $ Q38P : chr [1:3200] "Agree" "Agree" "Agree strongly" "Agree" ...
## $ Q39P : chr [1:3200] "Disagree" "Agree" "Agree strongly" "Agree strongly" ...
## $ Q40P : chr [1:3200] "Agree" "Agree" "Agree strongly" "Agree" ...
## $ Q41P : chr [1:3200] "Agree" "Disagree" "Agree" "Agree strongly" ...
## $ Q42 : chr [1:3200] "Our society must be gradually improved by reforms" "Our society must be gradually improved by reforms" "The entire way our society is organized must be radically changed by revolutionary action" "The entire way our society is organized must be radically changed by revolutionary action" ...
## $ Q43P : chr [1:3200] "Don’t mind" "Bad" "Bad" "Good" ...
## $ Q44P : chr [1:3200] "Good" "Good" "Bad" "Good" ...
## $ Q45P : chr [1:3200] "Good" "Don’t mind" "Bad" "Bad" ...
## $ Q46P : chr [1:3200] "Very happy" "Very happy" "Quite happy" "Very happy" ...
## $ Q47P : chr [1:3200] "Good" "Good" "Good" "Very good" ...
## $ Q48 : chr [1:3200] "A great deal" "A great deal" "8" "5" ...
## $ Q49 : chr [1:3200] "Completely satisfied" "Completely satisfied" "8" "Completely satisfied" ...
## $ Q50 : chr [1:3200] "Satisfied" "Satisfied" "8" "Satisfied" ...
## $ Q51P : chr [1:3200] "Never" "Never" "Never" "Never" ...
## $ Q52P : chr [1:3200] "Never" "Never" "Never" "Never" ...
## $ Q53P : chr [1:3200] "Rarely" "Never" "Never" "Never" ...
## $ Q54P : chr [1:3200] "Never" "Never" "Never" "Never" ...
## $ Q55P : chr [1:3200] "Never" "Never" "Never" "Never" ...
## $ Q56P : chr [1:3200] "Better off" "Or about the same" "Or about the same" "Better off" ...
## [list output truncated]
dim(datIND)
## [1] 3200 611
Pada data WVS 7 Indonesia ada sebanyak 3200 pengamatan dan 611 variabel
df <- subset(datIND, select = c("Q46P","Q49", "Q47P", "Q173P", "Q273", "Q260", "Q262","Q275" ))
sapply(df, function(x) any(is.na(x)))
## Q46P Q49 Q47P Q173P Q273 Q260 Q262 Q275
## FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
str(df)
## tibble [3,200 × 8] (S3: tbl_df/tbl/data.frame)
## $ Q46P : chr [1:3200] "Very happy" "Very happy" "Quite happy" "Very happy" ...
## $ Q49 : chr [1:3200] "Completely satisfied" "Completely satisfied" "8" "Completely satisfied" ...
## $ Q47P : chr [1:3200] "Good" "Good" "Good" "Very good" ...
## $ Q173P: chr [1:3200] "A religious person" "A religious person" "A religious person" "A religious person" ...
## $ Q273 : chr [1:3200] "Married" "Married" "Married" "Married" ...
## $ Q260 : chr [1:3200] "Male" "Male" "Male" "Male" ...
## $ Q262 : chr [1:3200] "22" "35" "48" "46" ...
## $ Q275 : chr [1:3200] "Upper secondary education (ISCED 3)" "Upper secondary education (ISCED 3)" "Upper secondary education (ISCED 3)" "Lower secondary education (ISCED 2)" ...
lapply(df, table)
## $Q46P
##
## No answer Not at all happy Not very happy Quite happy
## 1 34 161 1590
## Very happy
## 1414
##
## $Q49
##
## 2
## 50
## 3
## 63
## 4
## 97
## 5
## 333
## 6
## 245
## 7
## 413
## 8
## 554
## 9
## 341
## Completely dissatisfied
## 117
## Completely satisfied
## 983
## Don't know
## 3
## Other missing; Multiple answers Mail (EVS)
## 1
##
## $Q47P
##
## Fair Good Poor Very good Very poor
## 720 1404 200 836 40
##
## $Q173P
##
## A religious person Don't know No answer
## 2924 27 14
## Not a religious person
## 235
##
## $Q273
##
## Divorced Living together as married
## 101 23
## Married Separated
## 2453 9
## Single Widowed
## 415 199
##
## $Q260
##
## Female Male
## 1754 1446
##
## $Q262
##
## 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37
## 70 66 58 56 54 65 65 84 53 54 84 79 99 67 93 105 97 85 78 66
## 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57
## 108 72 93 82 89 82 64 94 66 68 67 61 60 39 56 55 45 46 35 41
## 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 78
## 52 31 53 32 27 26 21 28 26 13 16 9 19 1 5 10 4 6 6 3
## 79 80 81 82 84 86 87
## 1 3 1 1 1 2 2
##
## $Q275
##
## Bachelor or equivalent (ISCED 6)
## 221
## Doctoral or equivalent (ISCED 8)
## 1
## Don't know
## 1
## Early childhood education (ISCED 0) / no education
## 311
## Lower secondary education (ISCED 2)
## 659
## Master or equivalent (ISCED 7)
## 17
## Primary education (ISCED 1)
## 842
## Short-cycle tertiary education (ISCED 5)
## 81
## Upper secondary education (ISCED 3)
## 1067
PREPROCESSING DATA
df$Happiness <- factor(df$Q46P, levels = c("Not at all happy","Not very happy","Quite happy","Very happy"),labels = c("Not at all happy","Not very happy","Quite happy","Very happy"))
sum(is.na(df$Happiness))
## [1] 1
levels(df$Happiness)
## [1] "Not at all happy" "Not very happy" "Quite happy" "Very happy"
df$Gender <- factor(df$Q260)
sum(is.na(df$Gender))
## [1] 0
levels(df$Gender)
## [1] "Female" "Male"
df$Age <- as.numeric(df$Q262)
sum(is.na(df$Age))
## [1] 0
df$Life <- as.numeric(factor(df$Q49,levels = c("Completely dissatisfied","2", "3","4","5","6","7","8","9","Completely satisfied"),labels = c("1","2", "3","4","5","6","7","8","9","10")))
sum(is.na(df$Life))
## [1] 4
df$Marital <- factor(df$Q273, levels = c("Single","Widowed","Separated","Divorced","Living together as married","Married"))
sum(is.na(df$Marital))
## [1] 0
levels(df$Marital)
## [1] "Single" "Widowed"
## [3] "Separated" "Divorced"
## [5] "Living together as married" "Married"
df$Health <- factor(df$Q47P, levels = c("Very poor", "Poor", "Fair","Good","Very good"))
sum(is.na(df$Health))
## [1] 0
levels(df$Health)
## [1] "Very poor" "Poor" "Fair" "Good" "Very good"
df$Religious <- factor(df$Q173P, levels = c("Don't know","Not a religious person","A religious person"))
sum(is.na(df$Religious))
## [1] 14
levels(df$Religious)
## [1] "Don't know" "Not a religious person" "A religious person"
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
df <- df %>% mutate(Education = case_when(Q275 %in% c("Early childhood education (ISCED 0) / no education","Primary education (ISCED 1)", "Lower secondary education (ISCED 2)") ~ "Low", Q275 %in% c("Upper secondary education (ISCED 3)") ~ "Middle", Q275 %in% c("Short-cycle tertiary education (ISCED 5)","Bachelor or equivalent (ISCED 6)", "Master or equivalent (ISCED 7)","Doctoral or equivalent (ISCED 8)") ~ "High"))
sum(is.na(df$Education))
## [1] 1
colnames(df)
## [1] "Q46P" "Q49" "Q47P" "Q173P" "Q273" "Q260"
## [7] "Q262" "Q275" "Happiness" "Gender" "Age" "Life"
## [13] "Marital" "Health" "Religious" "Education"
#newdataset
df2 <- subset(df,select = c("Happiness","Age","Gender","Marital","Education","Health","Religious","Life"))
sapply(df2, function(x) sum(is.na(x)))
## Happiness Age Gender Marital Education Health Religious Life
## 1 0 0 0 1 0 14 4
dim(df2)
## [1] 3200 8
df3 <- na.omit(df2)
dim(df3)
## [1] 3180 8
library(summarytools)
## Warning: package 'summarytools' was built under R version 4.4.3
dfSummary(df3)
## Data Frame Summary
## df3
## Dimensions: 3180 x 8
## Duplicates: 439
##
## ----------------------------------------------------------------------------------------------------------------
## No Variable Stats / Values Freqs (% of Valid) Graph Valid Missing
## ---- ------------- ------------------------------ -------------------- -------------------- ---------- ---------
## 1 Happiness 1. Not at all happy 33 ( 1.0%) 3180 0
## [factor] 2. Not very happy 160 ( 5.0%) I (100.0%) (0.0%)
## 3. Quite happy 1580 (49.7%) IIIIIIIII
## 4. Very happy 1407 (44.2%) IIIIIIII
##
## 2 Age Mean (sd) : 40 (13.5) 67 distinct values : . 3180 0
## [numeric] min < med < max: . : : : . (100.0%) (0.0%)
## 18 < 39 < 87 : : : : : .
## IQR (CV) : 19 (0.3) : : : : : : .
## : : : : : : : .
##
## 3 Gender 1. Female 1747 (54.9%) IIIIIIIIII 3180 0
## [factor] 2. Male 1433 (45.1%) IIIIIIIII (100.0%) (0.0%)
##
## 4 Marital 1. Single 415 (13.1%) II 3180 0
## [factor] 2. Widowed 198 ( 6.2%) I (100.0%) (0.0%)
## 3. Separated 9 ( 0.3%)
## 4. Divorced 100 ( 3.1%)
## 5. Living together as marrie 23 ( 0.7%)
## 6. Married 2435 (76.6%) IIIIIIIIIIIIIII
##
## 5 Education 1. High 319 (10.0%) II 3180 0
## [character] 2. Low 1802 (56.7%) IIIIIIIIIII (100.0%) (0.0%)
## 3. Middle 1059 (33.3%) IIIIII
##
## 6 Health 1. Very poor 40 ( 1.3%) 3180 0
## [factor] 2. Poor 198 ( 6.2%) I (100.0%) (0.0%)
## 3. Fair 716 (22.5%) IIII
## 4. Good 1395 (43.9%) IIIIIIII
## 5. Very good 831 (26.1%) IIIII
##
## 7 Religious 1. Don't know 27 ( 0.8%) 3180 0
## [factor] 2. Not a religious person 235 ( 7.4%) I (100.0%) (0.0%)
## 3. A religious person 2918 (91.8%) IIIIIIIIIIIIIIIIII
##
## 8 Life Mean (sd) : 7.6 (2.4) 1 : 117 ( 3.7%) 3180 0
## [numeric] min < med < max: 2 : 50 ( 1.6%) (100.0%) (0.0%)
## 1 < 8 < 10 3 : 63 ( 2.0%)
## IQR (CV) : 4 (0.3) 4 : 97 ( 3.1%)
## 5 : 331 (10.4%) II
## 6 : 244 ( 7.7%) I
## 7 : 410 (12.9%) II
## 8 : 553 (17.4%) III
## 9 : 340 (10.7%) II
## 10 : 975 (30.7%) IIIIII
## ----------------------------------------------------------------------------------------------------------------
# Kategorik
library(tidyverse)
## Warning: package 'purrr' was built under R version 4.4.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ readr 2.1.5
## ✔ ggplot2 3.5.1 ✔ stringr 1.5.1
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.4 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tibble::view() masks summarytools::view()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
ct_gender <- xtabs(~Gender+Happiness, data=df3)
ct_marital <- xtabs(~Marital+Happiness, data=df3)
ct_education <- xtabs(~Education+Happiness, data=df3)
ct_health <- xtabs(~Health+Happiness, data=df3)
ct_religious <- xtabs(~Religious+Happiness, data=df3)
# Buat list hasil frekuensi dan uji chi-square
freq_result <- list(
gender = list(
round(ct_gender / rep(colSums(ct_gender), each = 2) * 100, 2),
round(chisq.test(ct_gender)$statistic, 2),
chisq.test(ct_gender)$p.value
),
marital = list(
round(ct_marital / rep(colSums(ct_marital), each = 2) * 100, 2),
round(chisq.test(ct_marital)$statistic, 2),
chisq.test(ct_marital)$p.value
),
education = list(
round(ct_education / rep(colSums(ct_education), each = 2) * 100, 2),
round(chisq.test(ct_education)$statistic, 2),
chisq.test(ct_education)$p.value
),
health = list(
round(ct_health / rep(colSums(ct_health), each = 2) * 100, 2),
round(chisq.test(ct_health)$statistic, 2),
chisq.test(ct_health)$p.value
),
religious = list(
round(ct_religious / rep(colSums(ct_religious), each = 2) * 100, 2),
round(chisq.test(ct_religious)$statistic, 2),
chisq.test(ct_religious)$p.value
)
)
## Warning in chisq.test(ct_marital): Chi-squared approximation may be incorrect
## Warning in chisq.test(ct_marital): Chi-squared approximation may be incorrect
## Warning in ct_education/rep(colSums(ct_education), each = 2): longer object
## length is not a multiple of shorter object length
## Warning in chisq.test(ct_education): Chi-squared approximation may be incorrect
## Warning in chisq.test(ct_education): Chi-squared approximation may be incorrect
## Warning in ct_health/rep(colSums(ct_health), each = 2): longer object length is
## not a multiple of shorter object length
## Warning in chisq.test(ct_health): Chi-squared approximation may be incorrect
## Warning in chisq.test(ct_health): Chi-squared approximation may be incorrect
## Warning in ct_religious/rep(colSums(ct_religious), each = 2): longer object
## length is not a multiple of shorter object length
## Warning in chisq.test(ct_religious): Chi-squared approximation may be incorrect
## Warning in chisq.test(ct_religious): Chi-squared approximation may be incorrect
# Tampilkan hasil
freq_result
## $gender
## $gender[[1]]
## Happiness
## Gender Not at all happy Not very happy Quite happy Very happy
## Female 51.52 40.62 51.58 60.41
## Male 48.48 59.38 48.42 39.59
##
## $gender[[2]]
## X-squared
## 37.61
##
## $gender[[3]]
## [1] 3.410033e-08
##
##
## $marital
## $marital[[1]]
## Happiness
## Marital Not at all happy Not very happy Quite happy
## Single 9.09 1.71 13.54
## Widowed 24.24 0.71 6.52
## Separated 0.62 3.03 0.28
## Divorced 2.50 42.42 3.55
## Living together as married 0.00 0.00 15.15
## Married 1.08 69.38 3648.48
## Happiness
## Marital Very happy
## Single 108.75
## Widowed 48.12
## Separated 0.19
## Divorced 2.03
## Living together as married 1.28
## Married 78.39
##
## $marital[[2]]
## X-squared
## 73.4
##
## $marital[[3]]
## [1] 1.097899e-09
##
##
## $education
## $education[[1]]
## Happiness
## Education Not at all happy Not very happy Quite happy Very happy
## High 3.03 6.25 10.95 466.67
## Low 78.79 6.71 62.12 497.50
## Middle 3.75 2.78 1672.73 285.62
##
## $education[[2]]
## X-squared
## 16.58
##
## $education[[3]]
## [1] 0.01095217
##
##
## $health
## $health[[1]]
## Happiness
## Health Not at all happy Not very happy Quite happy Very happy
## Very poor 6.06 0.44 8.12 1.28
## Poor 27.27 2.27 55.62 206.06
## Fair 8.12 3.91 29.24 563.64
## Good 1.88 151.52 47.91 365.62
## Very good 0.38 48.48 18.41 343.75
##
## $health[[2]]
## X-squared
## 385.18
##
## $health[[3]]
## [1] 5.185637e-75
##
##
## $religious
## $religious[[1]]
## Happiness
## Religious Not at all happy Not very happy Quite happy Very happy
## Don't know 3.03 2.50 1.07 21.21
## Not a religious person 3.03 1.20 9.88 47.50
## A religious person 19.38 8.67 4321.21 827.50
##
## $religious[[2]]
## X-squared
## 28.09
##
## $religious[[3]]
## [1] 9.026728e-05
# Numerik
library(dplyr)
num.var <- df3 %>% select(where(is.numeric)) %>% group_by(df3$Happiness) %>% summarise_each(funs(mean, sd))%>% as.data.frame()
## Warning: `summarise_each()` was deprecated in dplyr 0.7.0.
## ℹ Please use `across()` instead.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: `funs()` was deprecated in dplyr 0.8.0.
## ℹ Please use a list of either functions or lambdas:
##
## # Simple named list: list(mean = mean, median = median)
##
## # Auto named with `tibble::lst()`: tibble::lst(mean, median)
##
## # Using lambdas list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
num.var
## df3$Happiness Age_mean Life_mean Age_sd Life_sd
## 1 Not at all happy 45.90909 4.878788 15.42799 3.028589
## 2 Not very happy 42.89375 5.337500 12.52650 2.751529
## 3 Quite happy 41.16582 7.242405 13.77165 2.275814
## 4 Very happy 38.24876 8.215352 13.10362 2.256924
VISUALISASI
ggplot(df3, aes(x=Happiness,fill=Gender))+geom_bar(position = "dodge")+facet_grid(Life~Marital) +theme_bw()+theme(axis.text.x = element_text(angle = 45, hjust = 1))
MODEL REGRESI LOGISTIK ORDINAL
library(VGAM)
## Warning: package 'VGAM' was built under R version 4.4.3
## Loading required package: stats4
## Loading required package: splines
mod <- vglm(Happiness~Gender+Age+Marital+Education+Health+Religious+Life, family=cumulative(parallel = TRUE),data=df3)
## Warning in eval(slot(family, "initialize")): response should be ordinal---see
## ordered()
summary(mod)
##
## Call:
## vglm(formula = Happiness ~ Gender + Age + Marital + Education +
## Health + Religious + Life, family = cumulative(parallel = TRUE),
## data = df3)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept):1 -2.713679 0.565949 -4.795 1.63e-06 ***
## (Intercept):2 -0.818472 0.544437 -1.503 0.132752
## (Intercept):3 2.574708 0.546324 4.713 2.44e-06 ***
## GenderMale 0.296301 0.077382 3.829 0.000129 ***
## Age 0.018654 0.003364 5.546 2.93e-08 ***
## MaritalWidowed -0.317277 0.212311 -1.494 0.135071
## MaritalSeparated 0.120748 0.704120 0.171 0.863840
## MaritalDivorced 0.293943 0.238397 1.233 0.217577
## MaritalLiving together as married -2.040387 0.559991 -3.644 0.000269 ***
## MaritalMarried -0.345076 0.125557 -2.748 0.005990 **
## EducationLow 0.125722 0.126816 0.991 0.321503
## EducationMiddle 0.202279 0.132476 1.527 0.126783
## HealthPoor 0.305423 0.352499 0.866 0.386242
## HealthFair 0.410286 0.330219 1.242 0.214064
## HealthGood -0.153111 0.326603 -0.469 0.639214
## HealthVery good -1.001095 0.331208 -3.023 0.002506 **
## ReligiousNot a religious person -0.669989 0.417634 -1.604 0.108659
## ReligiousA religious person -1.150799 0.396913 -2.899 0.003739 **
## Life -0.223535 0.016130 -13.858 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Names of linear predictors: logitlink(P[Y<=1]), logitlink(P[Y<=2]),
## logitlink(P[Y<=3])
##
## Residual deviance: 5164.219 on 9521 degrees of freedom
##
## Log-likelihood: -2582.11 on 9521 degrees of freedom
##
## Number of Fisher scoring iterations: 6
##
## No Hauck-Donner effect found in any of the estimates
##
##
## Exponentiated coefficients:
## GenderMale Age
## 1.3448752 1.0188293
## MaritalWidowed MaritalSeparated
## 0.7281287 1.1283404
## MaritalDivorced MaritalLiving together as married
## 1.3417072 0.1299784
## MaritalMarried EducationLow
## 0.7081669 1.1339667
## EducationMiddle HealthPoor
## 1.2241892 1.3571996
## HealthFair HealthGood
## 1.5072493 0.8580346
## HealthVery good ReligiousNot a religious person
## 0.3674768 0.5117141
## ReligiousA religious person Life
## 0.3163838 0.7996866
anova(mod)
## Analysis of Deviance Table (Type II tests)
##
## Model: 'cumulative', 'VGAMordinal', 'VGAMcategorical'
##
## Links: 'logitlink'
##
## Response: Happiness
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## Gender 1 14.679 9522 5178.9 0.0001275 ***
## Age 1 30.931 9522 5195.2 2.674e-08 ***
## Marital 5 28.981 9526 5193.2 2.339e-05 ***
## Education 2 2.508 9523 5166.7 0.2853593
## Health 4 183.955 9525 5348.2 < 2.2e-16 ***
## Religious 2 19.265 9523 5183.5 6.557e-05 ***
## Life 1 192.321 9522 5356.5 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Hitung odds ratio dan confidence interval
se <- sqrt(diag(vcov(mod)))
odds_ratios <- data.frame(
Coeff = names(coef(mod)),
OR = exp(coef(mod)),
se = se,
CI_lower = exp(coef(mod) - 1.96 * se),
CI_upper = exp(coef(mod) + 1.96 * se)
)
# Tampilkan hasil tanpa kolom 'Coeff'
print(round(odds_ratios[,-1], 3))
## OR se CI_lower CI_upper
## (Intercept):1 0.066 0.566 0.022 0.201
## (Intercept):2 0.441 0.544 0.152 1.282
## (Intercept):3 13.127 0.546 4.499 38.302
## GenderMale 1.345 0.077 1.156 1.565
## Age 1.019 0.003 1.012 1.026
## MaritalWidowed 0.728 0.212 0.480 1.104
## MaritalSeparated 1.128 0.704 0.284 4.485
## MaritalDivorced 1.342 0.238 0.841 2.141
## MaritalLiving together as married 0.130 0.560 0.043 0.390
## MaritalMarried 0.708 0.126 0.554 0.906
## EducationLow 1.134 0.127 0.884 1.454
## EducationMiddle 1.224 0.132 0.944 1.587
## HealthPoor 1.357 0.352 0.680 2.708
## HealthFair 1.507 0.330 0.789 2.879
## HealthGood 0.858 0.327 0.452 1.627
## HealthVery good 0.367 0.331 0.192 0.703
## ReligiousNot a religious person 0.512 0.418 0.226 1.160
## ReligiousA religious person 0.316 0.397 0.145 0.689
## Life 0.800 0.016 0.775 0.825