# file.choose()
bd1 <- read.csv("C:\\Users\\ximen\\Downloads\\ClaimsData2018.csv")
bd2 <- read.csv("C:\\Users\\ximen\\Downloads\\TransactionsSummary.csv")
bd <- merge(bd1, bd2, by="ClaimID",all=TRUE)## Warning: package 'dplyr' was built under R version 4.3.1
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
bd <- bd %>%
mutate(Total_Incurred_Cost_Claim = TotalReserves + IndemnityPaid + OtherPaid - TotalRecovery)## ClaimID TotalPaid TotalReserves TotalRecovery
## Min. : 650919 Min. : -81.8 Min. : 0 Min. : 0.00
## 1st Qu.: 806228 1st Qu.: 20.1 1st Qu.: 0 1st Qu.: 0.00
## Median : 833851 Median : 223.0 Median : 0 Median : 0.00
## Mean : 8053898 Mean : 6504.3 Mean : 2423 Mean : 31.13
## 3rd Qu.: 7143280 3rd Qu.: 932.1 3rd Qu.: 0 3rd Qu.: 0.00
## Max. :62203889 Max. :2985247.9 Max. :2069575 Max. :90357.52
##
## IndemnityPaid OtherPaid ClaimStatus IncidentDate
## Min. : -1.2 Min. : -81.8 Length:59197 Length:59197
## 1st Qu.: 0.0 1st Qu.: 16.4 Class :character Class :character
## Median : 0.0 Median : 218.7 Mode :character Mode :character
## Mean : 2945.2 Mean : 3559.1
## 3rd Qu.: 0.0 3rd Qu.: 857.8
## Max. :492934.8 Max. :2700073.4
##
## IncidentDescription ReturnToWorkDate AverageWeeklyWage ClaimantOpenedDate
## Length:59197 Length:59197 Length:59197 Length:59197
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## ClaimantClosedDate EmployerNotificationDate ReceivedDate
## Length:59197 Length:59197 Length:59197
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## IsDenied ClaimantAge_at_DOI Gender ClaimantType
## Min. :0.00000 Length:59197 Length:59197 Length:59197
## 1st Qu.:0.00000 Class :character Class :character Class :character
## Median :0.00000 Mode :character Mode :character Mode :character
## Mean :0.05688
## 3rd Qu.:0.00000
## Max. :1.00000
##
## InjuryNature BodyPartRegion BodyPart BillReviewALE
## Length:59197 Length:59197 Length:59197 Min. : -456.00
## Class :character Class :character Class :character 1st Qu.: 8.25
## Mode :character Mode :character Mode :character Median : 24.00
## Mean : 174.80
## 3rd Qu.: 64.00
## Max. :20730.77
## NA's :46628
## Hospital PhysicianOutpatient Rx
## Min. :-12570.4 Min. : -162.9 Min. : -160.7
## 1st Qu.: 203.1 1st Qu.: 106.8 1st Qu.: 23.4
## Median : 572.9 Median : 220.2 Median : 61.1
## Mean : 4580.8 Mean : 1700.4 Mean : 1357.1
## 3rd Qu.: 2213.5 3rd Qu.: 667.2 3rd Qu.: 176.5
## Max. :667973.0 Max. :1481468.5 Max. :380924.3
## NA's :49187 NA's :34369 NA's :49906
## Total_Incurred_Cost_Claim
## Min. : -2961
## 1st Qu.: 22
## Median : 226
## Mean : 8897
## 3rd Qu.: 976
## Max. :5054823
##
## ClaimID TotalPaid TotalReserves TotalRecovery
## Min. : 650919 Min. : -81.8 Min. : 0 Min. : 0.00
## 1st Qu.: 806228 1st Qu.: 20.1 1st Qu.: 0 1st Qu.: 0.00
## Median : 833851 Median : 223.0 Median : 0 Median : 0.00
## Mean : 8053898 Mean : 6504.3 Mean : 2423 Mean : 31.13
## 3rd Qu.: 7143280 3rd Qu.: 932.1 3rd Qu.: 0 3rd Qu.: 0.00
## Max. :62203889 Max. :2985247.9 Max. :2069575 Max. :90357.52
##
## IndemnityPaid OtherPaid ClaimStatus IncidentDate
## Min. : -1.2 Min. : -81.8 Length:59197 Length:59197
## 1st Qu.: 0.0 1st Qu.: 16.4 Class :character Class :character
## Median : 0.0 Median : 218.7 Mode :character Mode :character
## Mean : 2945.2 Mean : 3559.1
## 3rd Qu.: 0.0 3rd Qu.: 857.8
## Max. :492934.8 Max. :2700073.4
##
## IncidentDescription ReturnToWorkDate AverageWeeklyWage ClaimantOpenedDate
## Length:59197 Length:59197 Length:59197 Length:59197
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## ClaimantClosedDate EmployerNotificationDate ReceivedDate
## Length:59197 Length:59197 Length:59197
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## IsDenied ClaimantAge_at_DOI Gender ClaimantType
## Min. :0.00000 Length:59197 Length:59197 Length:59197
## 1st Qu.:0.00000 Class :character Class :character Class :character
## Median :0.00000 Mode :character Mode :character Mode :character
## Mean :0.05688
## 3rd Qu.:0.00000
## Max. :1.00000
##
## InjuryNature BodyPartRegion BodyPart BillReviewALE
## Length:59197 Length:59197 Length:59197 Min. : -456.00
## Class :character Class :character Class :character 1st Qu.: 8.25
## Mode :character Mode :character Mode :character Median : 24.00
## Mean : 174.80
## 3rd Qu.: 64.00
## Max. :20730.77
## NA's :46628
## Hospital PhysicianOutpatient Rx
## Min. :-12570.4 Min. : -162.9 Min. : -160.7
## 1st Qu.: 203.1 1st Qu.: 106.8 1st Qu.: 23.4
## Median : 572.9 Median : 220.2 Median : 61.1
## Mean : 4580.8 Mean : 1700.4 Mean : 1357.1
## 3rd Qu.: 2213.5 3rd Qu.: 667.2 3rd Qu.: 176.5
## Max. :667973.0 Max. :1481468.5 Max. :380924.3
## NA's :49187 NA's :34369 NA's :49906
## Total_Incurred_Cost_Claim
## Min. : -2961
## 1st Qu.: 22
## Median : 226
## Mean : 8897
## 3rd Qu.: 976
## Max. :5054823
##
#count(bd_mujeres1, ClaimStatus, sort= TRUE)
#count(bd_mujeres1, IncidentDate, sort= TRUE)
#count(bd_mujeres1, IncidentDescription, sort= TRUE)
#count(bd_mujeres1, ReturnToWorkDate, sort= TRUE)
#count(bd_mujeres1, AverageWeeklyWage, sort= TRUE)
#count(bd_mujeres1, ClaimantOpenedDate, sort= TRUE)
#count(bd_mujeres1, ClaimantClosedDate, sort= TRUE)
#count(bd_mujeres1, EmployerNotificationDate, sort= TRUE)
#count(bd_mujeres1, ReceivedDate, sort= TRUE)
#count(bd_mujeres1, ClaimantAge_at_DOI, sort= TRUE)
#count(bd_mujeres1, Gender, sort= TRUE)
#count(bd_mujeres1, ClaimantType, sort= TRUE)
#count(bd_mujeres1, InjuryNature, sort= TRUE)
#count(bd_mujeres1, BodyPartRegion, sort= TRUE)
#count(bd_mujeres1, BodyPart, sort= TRUE)Observaciones:
1. Tenemos muchos “#VALUE!” en la variable de Return to Work Date
2. Muchos NULL en Average Weekly Wage
3. Tenemos muchos “#VALUE!” en la variable de
EmployerNotificationDate
4. Muchos NULL en ClaimantAGE_at_DOI
# Extraer las variables de interés
Mujeres <- bd_mujeres1[,c("ClaimStatus","ClaimantType","BodyPartRegion","InjuryNature")]library(dplyr)
# Tener una base de datos con las columnas necesarias
bd_mujeresrl <- bd_mujereslimpia %>%
select(ClaimID, ClaimStatus, BodyPartRegion, ClaimantAge_at_DOI, Gender, ClaimantType, ClaimantOpenedDate, ClaimantClosedDate, Total_Incurred_Cost_Claim)
# View(bd_mujeresrl)
summary(bd_mujeresrl)## ClaimID ClaimStatus BodyPartRegion ClaimantAge_at_DOI
## Min. : 650919 Length:59197 Length:59197 Length:59197
## 1st Qu.: 806228 Class :character Class :character Class :character
## Median : 833851 Mode :character Mode :character Mode :character
## Mean : 8053898
## 3rd Qu.: 7143280
## Max. :62203889
## Gender ClaimantType ClaimantOpenedDate ClaimantClosedDate
## Length:59197 Length:59197 Length:59197 Length:59197
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## Total_Incurred_Cost_Claim
## Min. : -2961
## 1st Qu.: 22
## Median : 226
## Mean : 8897
## 3rd Qu.: 976
## Max. :5054823
bd_mujeresrl$ClaimantOpenedDate <- as.Date(bd_mujeresrl$ClaimantOpenedDate, format = "%m/%d/%y")
bd_mujeresrl$ClaimantClosedDate <- as.Date(bd_mujeresrl$ClaimantClosedDate, format = "%m/%d/%y")
# Calcular la diferencia en días entre las fechas
bd_mujeresrl$TiempoDeProcesamientoDias <- as.numeric(difftime(bd_mujeresrl$ClaimantClosedDate, bd_mujeresrl$ClaimantOpenedDate, units = "days"))
# Eliminar las columnas originales de fecha
bd_mujeresrl <- bd_mujeresrl[, !(names(bd_mujeresrl) %in% c("ClaimantOpenedDate", "ClaimantClosedDate"))]
# View(bd_mujeresAR)## ClaimID ClaimStatus BodyPartRegion ClaimantAge_at_DOI
## Min. : 650919 Length:59197 Length:59197 Length:59197
## 1st Qu.: 806228 Class :character Class :character Class :character
## Median : 833851 Mode :character Mode :character Mode :character
## Mean : 8053898
## 3rd Qu.: 7143280
## Max. :62203889
##
## Gender ClaimantType Total_Incurred_Cost_Claim
## Length:59197 Length:59197 Min. : -2961
## Class :character Class :character 1st Qu.: 22
## Mode :character Mode :character Median : 226
## Mean : 8897
## 3rd Qu.: 976
## Max. :5054823
##
## TiempoDeProcesamientoDias
## Min. :-333.0
## 1st Qu.: 0.0
## Median : 245.0
## Mean : 806.1
## 3rd Qu.: 920.0
## Max. :6912.0
## NA's :54104
bd_mujeresrl$ClaimStatus <- factor(bd_mujeresrl$ClaimStatus, levels = c("C", "O", "R"), labels = c(1, 2, 3))
bd_mujeresrl$ClaimantAge_at_DOI <- as.numeric(bd_mujeresrl$ClaimantAge_at_DOI)## Warning: NAs introducidos por coerción
## ClaimID ClaimStatus BodyPartRegion ClaimantAge_at_DOI
## Min. : 650919 1:56900 Length:59197 Min. :-8000.00
## 1st Qu.: 806228 2: 1786 Class :character 1st Qu.: 33.00
## Median : 833851 3: 511 Mode :character Median : 43.00
## Mean : 8053898 Mean : 39.75
## 3rd Qu.: 7143280 3rd Qu.: 52.00
## Max. :62203889 Max. : 89.00
## NA's :17097
## Gender ClaimantType Total_Incurred_Cost_Claim
## Min. :2 Min. :1.000 Min. : -2961
## 1st Qu.:2 1st Qu.:1.000 1st Qu.: 22
## Median :2 Median :1.000 Median : 226
## Mean :2 Mean :1.357 Mean : 8897
## 3rd Qu.:2 3rd Qu.:2.000 3rd Qu.: 976
## Max. :2 Max. :3.000 Max. :5054823
##
## TiempoDeProcesamientoDias
## Min. :-333.0
## 1st Qu.: 0.0
## Median : 245.0
## Mean : 806.1
## 3rd Qu.: 920.0
## Max. :6912.0
## NA's :54104
# Eliminar valores negativos en ClaimantAge_at_DOI
bd_mujeresrl <- bd_mujeresrl %>%
filter(ClaimantAge_at_DOI >= 0)
# Eliminar valores negativos en Total_Incurred_Cost_Claim
bd_mujeresrl <- bd_mujeresrl %>%
filter(Total_Incurred_Cost_Claim >= 0)
# Eliminar valores negativos en TiempoDeProcesamientoDías
bd_mujeresrl <- bd_mujeresrl %>%
filter(TiempoDeProcesamientoDias >= 0)
summary(bd_mujeresrl)## ClaimID ClaimStatus BodyPartRegion ClaimantAge_at_DOI
## Min. : 650919 1:4015 Length:4018 Min. : 1.00
## 1st Qu.: 823404 2: 3 Class :character 1st Qu.:34.00
## Median : 5970814 3: 0 Mode :character Median :44.00
## Mean :15622363 Mean :43.14
## 3rd Qu.:30288888 3rd Qu.:52.00
## Max. :61592860 Max. :87.00
## Gender ClaimantType Total_Incurred_Cost_Claim TiempoDeProcesamientoDias
## Min. :2 Min. :1.00 Min. : 0.0 Min. : 0.0
## 1st Qu.:2 1st Qu.:1.00 1st Qu.: 0.0 1st Qu.: 33.0
## Median :2 Median :1.00 Median : 171.3 Median : 301.0
## Mean :2 Mean :1.68 Mean : 4459.3 Mean : 717.1
## 3rd Qu.:2 3rd Qu.:2.00 3rd Qu.: 1125.3 3rd Qu.: 844.0
## Max. :2 Max. :3.00 Max. :388620.8 Max. :6912.0
regresion <- lm(Total_Incurred_Cost_Claim ~ ClaimID + ClaimantAge_at_DOI + BodyPartRegion + TiempoDeProcesamientoDias,
data = bd_mujeresrl)
summary(regresion)##
## Call:
## lm(formula = Total_Incurred_Cost_Claim ~ ClaimID + ClaimantAge_at_DOI +
## BodyPartRegion + TiempoDeProcesamientoDias, data = bd_mujeresrl)
##
## Residuals:
## Min 1Q Median 3Q Max
## -23086 -4840 -2286 -14 363016
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.842e+03 1.676e+03 -2.889 0.00388 **
## ClaimID -5.826e-05 2.034e-05 -2.864 0.00421 **
## ClaimantAge_at_DOI 1.269e+02 2.770e+01 4.580 4.78e-06 ***
## BodyPartRegionLower Extremities 3.031e+03 1.231e+03 2.461 0.01389 *
## BodyPartRegionMultiple Body Parts 1.952e+03 1.403e+03 1.391 0.16417
## BodyPartRegionNeck 3.727e+03 1.893e+03 1.969 0.04903 *
## BodyPartRegionNon-Standard Code -8.554e+02 2.703e+03 -0.316 0.75166
## BodyPartRegionNot Available -3.580e+00 2.056e+04 0.000 0.99986
## BodyPartRegionTrunk 3.408e+03 1.336e+03 2.551 0.01076 *
## BodyPartRegionUpper Extremities 1.794e+03 1.170e+03 1.533 0.12547
## TiempoDeProcesamientoDias 3.543e+00 3.343e-01 10.599 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 20530 on 4007 degrees of freedom
## Multiple R-squared: 0.04572, Adjusted R-squared: 0.04334
## F-statistic: 19.2 on 10 and 4007 DF, p-value: < 2.2e-16
regresion <- lm(Total_Incurred_Cost_Claim ~ ClaimantAge_at_DOI + BodyPartRegion + TiempoDeProcesamientoDias,
data = bd_mujeresrl)
summary(regresion)##
## Call:
## lm(formula = Total_Incurred_Cost_Claim ~ ClaimantAge_at_DOI +
## BodyPartRegion + TiempoDeProcesamientoDias, data = bd_mujeresrl)
##
## Residuals:
## Min 1Q Median 3Q Max
## -25854 -4494 -2357 -250 362038
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6264.9460 1602.2526 -3.910 9.38e-05 ***
## ClaimantAge_at_DOI 126.0115 27.7233 4.545 5.65e-06 ***
## BodyPartRegionLower Extremities 3348.0154 1227.5452 2.727 0.00641 **
## BodyPartRegionMultiple Body Parts 2561.3990 1387.9538 1.845 0.06505 .
## BodyPartRegionNeck 3727.3213 1894.4594 1.967 0.04920 *
## BodyPartRegionNon-Standard Code -1174.6409 2702.9742 -0.435 0.66390
## BodyPartRegionNot Available -665.6847 20574.4218 -0.032 0.97419
## BodyPartRegionTrunk 3846.9340 1328.2441 2.896 0.00380 **
## BodyPartRegionUpper Extremities 2010.1022 1168.9276 1.720 0.08558 .
## TiempoDeProcesamientoDias 3.9175 0.3079 12.724 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 20550 on 4008 degrees of freedom
## Multiple R-squared: 0.04377, Adjusted R-squared: 0.04162
## F-statistic: 20.38 on 9 and 4008 DF, p-value: < 2.2e-16
datos <- data.frame(ClaimantAge_at_DOI = 43.14, TiempoDeProcesamientoDias = 717.1, BodyPartRegion = "Lower Extremities")
predict(regresion, datos)## 1
## 5328.477
regresion <- lm(TiempoDeProcesamientoDias ~ ClaimantAge_at_DOI + ClaimantType + Total_Incurred_Cost_Claim,
data = bd_mujeresrl)
summary(regresion)##
## Call:
## lm(formula = TiempoDeProcesamientoDias ~ ClaimantAge_at_DOI +
## ClaimantType + Total_Incurred_Cost_Claim, data = bd_mujeresrl)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2719.3 -569.3 -289.7 48.3 6332.3
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.716e+03 6.655e+01 25.789 <2e-16 ***
## ClaimantAge_at_DOI -1.229e+01 1.351e+00 -9.099 <2e-16 ***
## ClaimantType -3.075e+02 1.961e+01 -15.682 <2e-16 ***
## Total_Incurred_Cost_Claim 1.071e-02 7.620e-04 14.061 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1011 on 4014 degrees of freedom
## Multiple R-squared: 0.1133, Adjusted R-squared: 0.1126
## F-statistic: 171 on 3 and 4014 DF, p-value: < 2.2e-16
# Crear una nueva base de datos con las columnas deseadas
bd_mujerescl <- bd_mujeresrl[, c("ClaimID", "TiempoDeProcesamientoDias", "Total_Incurred_Cost_Claim")]
# Llamar a los renglones como ClaimID
rownames(bd_mujerescl) <- bd_mujerescl$ClaimID
bd_mujerescl <- subset(bd_mujerescl, select = -c(ClaimID))
# View(bd_mujeresCL)
# Columna de TiempoDeProcesamientoDias
IQR_TiempoDeProcesamientoDias <- IQR(bd_mujerescl$TiempoDeProcesamientoDias)
IQR_TiempoDeProcesamientoDias## [1] 811
## TiempoDeProcesamientoDias Total_Incurred_Cost_Claim
## Min. : 0.0 Min. : 0.0
## 1st Qu.: 33.0 1st Qu.: 0.0
## Median : 301.0 Median : 171.3
## Mean : 717.1 Mean : 4459.3
## 3rd Qu.: 844.0 3rd Qu.: 1125.3
## Max. :6912.0 Max. :388620.8
## [1] -1183.5
LS_TiempoDeProcesamientoDias <- 844 + 1.5*IQR_TiempoDeProcesamientoDias
LS_TiempoDeProcesamientoDias## [1] 2060.5
## LI_TiempoDeProcesamientoDias: -1183.5
## LS_TiempoDeProcesamientoDias: 2060.5
bd_mujerescl <- bd_mujerescl[bd_mujerescl$TiempoDeProcesamientoDias <= 2061, ]
### Nota: se redondeó a 2061 porque el LS dió un resultado de 2060.5.
#Columna de Total_Incurred_Cost_Claim
IQR_Total_Incurred_Cost_Claim <- IQR(bd_mujerescl$Total_Incurred_Cost_Claim)
IQR_Total_Incurred_Cost_Claim## [1] 1155.71
## TiempoDeProcesamientoDias Total_Incurred_Cost_Claim
## Min. : 0.0 Min. : 0.0
## 1st Qu.: 4.0 1st Qu.: 0.0
## Median : 235.0 Median : 177.8
## Mean : 398.6 Mean : 3707.7
## 3rd Qu.: 548.0 3rd Qu.: 1155.7
## Max. :2058.0 Max. :246847.9
## [1] -1733.565
LS_Total_Incurred_Cost_Claim <- 1125.3 + 1.5*IQR_Total_Incurred_Cost_Claim
LS_Total_Incurred_Cost_Claim## [1] 2858.865
## LI_Total_Incurred_Cost_Claim: -1733.565
## LS_Total_Incurred_Cost_Claim: 2858.865
bd_mujerescl <- bd_mujerescl[bd_mujerescl$Total_Incurred_Cost_Claim <= 2859, ]
### Nota: se redondeó a 2859 porque el LS dió un resultado de 2858.865.
summary(bd_mujerescl)## TiempoDeProcesamientoDias Total_Incurred_Cost_Claim
## Min. : 0.0 Min. : 0.00
## 1st Qu.: 0.0 1st Qu.: 0.00
## Median : 175.0 Median : 98.62
## Mean : 337.7 Mean : 372.37
## 3rd Qu.: 460.5 3rd Qu.: 428.27
## Max. :2058.0 Max. :2851.58
# O. Normalizar variables
bd_mujerescl <- as.data.frame(scale(bd_mujerescl))
# 1. Crear base de datos
bdmujeresCLUSTER <- bd_mujerescl
# 2. Determinar el número de grupos
grupos <- 10
# 3. Realizar la clasificación
segmentos <- kmeans(bdmujeresCLUSTER,grupos)
# 4. Revisar la asignación de grupos
asignacion <- cbind(bdmujeresCLUSTER, cluster=segmentos$cluster)
# 5. Graficar asignaciones
# install.packages("ggplot2")
library(ggplot2)## Warning: package 'ggplot2' was built under R version 4.3.1
## Warning: package 'factoextra' was built under R version 4.3.1
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
fviz_cluster(segmentos, data = bdmujeresCLUSTER,
ellipse.type = "euclid",
star.plot = T,
repel = T,
ggtheme = theme())## Warning: package 'data.table' was built under R version 4.3.1
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
## Warning: did not converge in 10 iterations
# Árbol
#Extraer las variables de interés
mujeresarbol <- bd_mujeresrl[ ,c("Total_Incurred_Cost_Claim","ClaimStatus", "ClaimantAge_at_DOI","Gender","ClaimantType", "TiempoDeProcesamientoDias")]
sum(is.na(mujeresarbol))## [1] 0
#Conversión de variables categóricas a factores
mujeresarbol$Total_Incurred_Cost_Claim <- as.numeric(mujeresarbol$Total_Incurred_Cost_Claim)
mujeresarbol$ClaimStatus <- as.factor(mujeresarbol$ClaimStatus)
mujeresarbol$ClaimantAge_at_DOI <- as.numeric(mujeresarbol$ClaimantAge_at_DOI)
mujeresarbol$Gender <- as.factor(mujeresarbol$Gender)
mujeresarbol$ClaimantType <- as.factor(mujeresarbol$ClaimantType)
mujeresarbol$TiempoDeProcesamientoDias <- as.numeric(mujeresarbol$TiempoDeProcesamientoDias)
str(mujeresarbol)## 'data.frame': 4018 obs. of 6 variables:
## $ Total_Incurred_Cost_Claim: num 43108 390.2 0 106.4 19.6 ...
## $ ClaimStatus : Factor w/ 3 levels "1","2","3": 1 1 1 1 1 1 1 1 1 1 ...
## $ ClaimantAge_at_DOI : num 41 38 51 35 42 51 47 36 47 47 ...
## $ Gender : Factor w/ 1 level "2": 1 1 1 1 1 1 1 1 1 1 ...
## $ ClaimantType : Factor w/ 3 levels "1","2","3": 2 1 1 1 1 1 1 2 1 1 ...
## $ TiempoDeProcesamientoDias: num 848 2464 2856 2679 3256 ...
## Warning: package 'rpart' was built under R version 4.3.1
## Warning: package 'rpart.plot' was built under R version 4.3.1
## n= 4018
##
## node), split, n, deviance, yval
## * denotes terminal node
##
## 1) root 4018 4623308000 717.11470
## 2) ClaimantType=3 904 330852900 129.85840
## 4) Total_Incurred_Cost_Claim< 25 890 219013300 89.88764 *
## 5) Total_Incurred_Cost_Claim>=25 14 20024210 2670.85700 *
## 3) ClaimantType=1,2 3114 3890188000 887.59630
## 6) Total_Incurred_Cost_Claim>=51.975 2443 2068233000 787.16950
## 12) Total_Incurred_Cost_Claim< 208180.3 2430 1927711000 772.19140
## 24) Total_Incurred_Cost_Claim< 12823.46 2155 1650736000 718.96470 *
## 25) Total_Incurred_Cost_Claim>=12823.46 275 223025800 1189.29500 *
## 13) Total_Incurred_Cost_Claim>=208180.3 13 38075070 3586.92300 *
## 7) Total_Incurred_Cost_Claim< 51.975 671 1707609000 1253.23400
## 14) ClaimantAge_at_DOI>=48.5 196 247688700 726.56120 *
## 15) ClaimantAge_at_DOI< 48.5 475 1383120000 1470.55600 *