# Load libraries
library(tidyverse)
library(plotly)
library(corrplot)
library(caret)
library(rpart)
library(rpart.plot)
library(pROC)
library(randomForest)
library(e1071)
library(nnet)
library(UBL)
# Load the dataset
data <- read_csv("marriage_divorce_india_with_id.csv")
# Check the dataset
glimpse(data)Rows: 1,200
Columns: 11
$ `Unique ID` <chr> "MD1", "MD2", "MD3", "MD4", "MD5", "MD6…
$ `Marriage Duration (Years)` <dbl> 39, 29, 15, 8, 21, 39, 19, 23, 11, 11, …
$ `Age at Marriage` <dbl> 29, 34, 34, 27, 34, 32, 29, 33, 25, 34,…
$ `Marriage Type` <chr> "Love", "Arranged", "Love", "Arranged",…
$ `Education Level` <chr> "Secondary", "No Education", "No Educat…
$ `Income Level (INR per month)` <dbl> 113464, 18682, 159455, 63160, 28666, 63…
$ `Caste/Religion` <chr> "Hindu", "Jain", "Muslim", "Jain", "Jai…
$ `Urban/Rural` <chr> "Rural", "Rural", "Urban", "Urban", "Ur…
$ `Family Involvement` <chr> "Moderate", "Moderate", "Moderate", "Hi…
$ Children <dbl> 2, 0, 4, 1, 1, 1, 0, 3, 2, 0, 1, 0, 0, …
$ `Divorce Status` <chr> "No", "Yes", "Yes", "Yes", "Yes", "No",…
colSums(is.na(data)) Unique ID Marriage Duration (Years)
0 0
Age at Marriage Marriage Type
0 0
Education Level Income Level (INR per month)
0 0
Caste/Religion Urban/Rural
0 0
Family Involvement Children
0 0
Divorce Status
0
summary(data) Unique ID Marriage Duration (Years) Age at Marriage
Length:1200 Min. : 1.00 Min. :18.00
Class :character 1st Qu.:10.00 1st Qu.:22.00
Mode :character Median :22.00 Median :26.00
Mean :20.55 Mean :26.05
3rd Qu.:30.00 3rd Qu.:30.00
Max. :39.00 Max. :34.00
Marriage Type Education Level Income Level (INR per month)
Length:1200 Length:1200 Min. : 5287
Class :character Class :character 1st Qu.: 54522
Mode :character Mode :character Median :101889
Mean :102353
3rd Qu.:150569
Max. :199999
Caste/Religion Urban/Rural Family Involvement Children
Length:1200 Length:1200 Length:1200 Min. :0.000
Class :character Class :character Class :character 1st Qu.:1.000
Mode :character Mode :character Mode :character Median :2.000
Mean :1.886
3rd Qu.:3.000
Max. :4.000
Divorce Status
Length:1200
Class :character
Mode :character
# Check values in categorical variables
unique(data$`Education Level`)[1] "Secondary" "No Education" "Postgraduate" "Graduate" "Primary"
unique(data$`Family Involvement`)[1] "Moderate" "High" "Low"
unique(data$`Caste/Religion`)[1] "Hindu" "Jain" "Muslim" "Christian" "Other" "Sikh"
unique(data$`Urban/Rural`)[1] "Rural" "Urban"