
#Llamar a las librerias
library(tidyverse)
library(readxl)
library(dplyr)
library(rpart)
library(rpart.plot)
library(ggplot2)
library(factoextra)
library(cluster)
library(data.table)
#Importar datos
claims_data <- read_excel("ClaimsData2018.xlsx")
transactions_summary <- read.csv("TransactionsSummary2018.csv")merged_df <- merge(claims_data, transactions_summary, by = "ClaimID", all = TRUE)
#summary(merged_df)
#count(merged_df, ClaimStatus, sort=TRUE)
#count(merged_df, IncidentDescription, sort=TRUE)
#count(merged_df, Gender, sort=TRUE)
#count(merged_df, ClaimantType, sort=TRUE)
#count(merged_df, InjuryNature, sort=TRUE)
#count(merged_df, BodyPartRegion, sort=TRUE)
#count(merged_df, BodyPart, sort=TRUE)
#count(merged_df, IsDenied, sort=TRUE)## Corregir el tipo de datos:
# as.Date
merged_df$IncidentDate <- as.Date(merged_df$IncidentDate,"%m/%d/%Y")
merged_df$ReturnToWorkDate <- as.Date(merged_df$ReturnToWorkDate,"%m/%d/%Y")
merged_df$ClaimantOpenedDate <- as.Date(merged_df$ClaimantOpenedDate,"%m/%d/%Y")
merged_df$ClaimantClosedDate <- as.Date(merged_df$ClaimantClosedDate,"%m/%d/%Y")
merged_df$EmployerNotificationDate <- as.Date(merged_df$EmployerNotificationDate,"%m/%d/%Y")
merged_df$ReceivedDate <- as.Date(merged_df$ReceivedDate,"%m/%d/%Y")
#as.numeric
merged_df$AverageWeeklyWage <- as.numeric(merged_df$AverageWeeklyWage)
merged_df$IsDenied <- as.character(merged_df$IsDenied)
merged_df$ClaimantAge_at_DOI <- as.numeric(merged_df$ClaimantAge_at_DOI)
merged_df$TotalPaid <- as.numeric(merged_df$TotalReserves)
merged_df$TotalRecovery <- as.numeric(merged_df$TotalRecovery)
merged_df$TotalReserves <- as.numeric(merged_df$TotalReserves)
merged_df$TotalPaid <- as.numeric(merged_df$TotalPaid)
merged_df$IndemnityPaid <- as.numeric(merged_df$IndemnityPaid)
merged_df$OtherPaid <- as.numeric(merged_df$OtherPaid)
merged_df$ClaimantAge_at_DOI <- as.numeric(merged_df$ClaimantAge_at_DOI)
summary(merged_df)## ClaimID TotalPaid TotalReserves TotalRecovery
## Min. : 633915 Min. : 0 Min. : 0 Min. : 0.00
## 1st Qu.: 810246 1st Qu.: 0 1st Qu.: 0 1st Qu.: 0.00
## Median : 856915 Median : 0 Median : 0 Median : 0.00
## Mean :12344572 Mean : 2233 Mean : 2233 Mean : 68.88
## 3rd Qu.:22716420 3rd Qu.: 0 3rd Qu.: 0 3rd Qu.: 0.00
## Max. :62246496 Max. :2069575 Max. :2069575 Max. :130541.03
## NA's :52673 NA's :52673 NA's :52673
## IndemnityPaid OtherPaid ClaimStatus IncidentDate
## Min. : -475 Min. : -7820 Length:186677 Min. :1947-02-24
## 1st Qu.: 0 1st Qu.: 58 Class :character 1st Qu.:1998-12-21
## Median : 0 Median : 230 Mode :character Median :2004-01-05
## Mean : 3061 Mean : 3685 Mean :2003-12-08
## 3rd Qu.: 0 3rd Qu.: 855 3rd Qu.:2009-02-02
## Max. :640732 Max. :4129915 Max. :2014-06-27
## NA's :52673 NA's :52673 NA's :52673
## IncidentDescription ReturnToWorkDate AverageWeeklyWage
## Length:186677 Min. :1976-10-29 Min. : 0.0
## Class :character 1st Qu.:2002-04-25 1st Qu.: 300.0
## Mode :character Median :2007-07-09 Median : 492.0
## Mean :2006-06-01 Mean : 587.3
## 3rd Qu.:2011-06-01 3rd Qu.: 660.4
## Max. :2015-05-07 Max. :2024000.0
## NA's :111310 NA's :137597
## ClaimantOpenedDate ClaimantClosedDate EmployerNotificationDate
## Min. :1947-02-24 Min. :1999-06-01 Min. :1972-09-10
## 1st Qu.:1999-02-09 1st Qu.:2005-03-31 1st Qu.:2000-03-13
## Median :2004-02-17 Median :2006-04-04 Median :2004-12-28
## Mean :2004-01-23 Mean :2007-05-24 Mean :2005-08-29
## 3rd Qu.:2009-04-09 3rd Qu.:2009-11-11 3rd Qu.:2009-11-03
## Max. :2014-06-30 Max. :2014-06-30 Max. :9999-07-21
## NA's :52673 NA's :57351 NA's :74961
## ReceivedDate IsDenied ClaimantAge_at_DOI Gender
## Min. :1947-02-24 Length:186677 Min. :-8000.00 Length:186677
## 1st Qu.:1999-02-09 Class :character 1st Qu.: 33.00 Class :character
## Median :2004-02-13 Mode :character Median : 42.00 Mode :character
## Mean :2004-07-19 Mean : 39.85
## 3rd Qu.:2009-02-27 3rd Qu.: 51.00
## Max. :9999-07-21 Max. : 94.00
## NA's :52673 NA's :97751
## ClaimantType InjuryNature BodyPartRegion BodyPart
## Length:186677 Length:186677 Length:186677 Length:186677
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## BillReviewALE Hospital PhysicianOutpatient Rx
## Min. : -456.0 Min. : -12570.4 Min. : -4655.7 Min. : -469.5
## 1st Qu.: 16.0 1st Qu.: 193.9 1st Qu.: 107.6 1st Qu.: 23.3
## Median : 32.0 Median : 559.1 Median : 221.6 Median : 58.3
## Mean : 191.2 Mean : 4394.7 Mean : 1752.3 Mean : 1140.4
## 3rd Qu.: 80.0 3rd Qu.: 2253.4 3rd Qu.: 710.5 3rd Qu.: 174.5
## Max. :56475.3 Max. :2759604.0 Max. :1481468.5 Max. :631635.5
## NA's :139865 NA's :145262 NA's :84986 NA's :145752
## [1] 74961
## [1] 52673
## [1] 57351
## [1] 52673
## [1] 52673
## ClaimID TotalPaid TotalReserves
## 0 52673 52673
## TotalRecovery IndemnityPaid OtherPaid
## 52673 52673 52673
## ClaimStatus IncidentDate IncidentDescription
## 52673 52673 52673
## ReturnToWorkDate AverageWeeklyWage ClaimantOpenedDate
## 111310 137597 52673
## ClaimantClosedDate EmployerNotificationDate ReceivedDate
## 57351 74961 52673
## IsDenied ClaimantAge_at_DOI Gender
## 52673 97751 52673
## ClaimantType InjuryNature BodyPartRegion
## 52673 52673 52673
## BodyPart BillReviewALE Hospital
## 52673 139865 145262
## PhysicianOutpatient Rx
## 84986 145752
rldata <- merged_df %>% select(TotalIncurredCost,BodyPartRegion,BodyPart,ClaimantType,Gender,ClaimantAge_at_DOI,TimeProcesses,InjuryNature,IsDenied)
# ¿Cuántos NA tengo por variable?
sapply(rldata, function(x) sum(is.na(x)))## TotalIncurredCost BodyPartRegion BodyPart ClaimantType
## 52673 52673 52673 52673
## Gender ClaimantAge_at_DOI TimeProcesses InjuryNature
## 52673 97751 57351 52673
## IsDenied
## 52673
data <- data.frame(BodyPartRegion = "Upper Extremities", BodyPart = "Hand", ClaimantType = "Medical Only", Gender = "Male", ClaimantAge_at_DOI = 42, TimeProcesses = 989.3, InjuryNature = "Strain", IsDenied = "0" )
predict(regresion, data)## 1
## 1090.108
# Bases de datos nuevas
cuerpo <- merged_df %>% select(Gender, BodyPartRegion)
denied <- merged_df %>% select(Gender, IsDenied)
#Árboles de predicción
arbol <- rpart(formula = Gender ~ ., data=cuerpo)
rpart.plot(arbol)cluster <- merged_df %>% select(TotalIncurredCost, TimeProcesses) %>% na.omit() %>% filter(TotalIncurredCost>0)
cluster <- as.data.frame(scale(cluster))
summary(cluster)## TotalIncurredCost TimeProcesses
## Min. :-0.2087 Min. :-1.0576
## 1st Qu.:-0.2027 1st Qu.:-0.9005
## Median :-0.1951 Median :-0.2542
## Mean : 0.0000 Mean : 0.0000
## 3rd Qu.:-0.1597 3rd Qu.: 0.6083
## Max. :71.4549 Max. :12.2470
# Los datos fuera de lo normal están fuera de los siguientes límites:
# Límite inferior = q1 -1.5*IQR
# Límite superior = Q3 + 1.5*IQR
# Q1: Cuartil 1, Q3
IQR_C <- IQR(cluster$TotalIncurredCost)
LI_C <- -0.2027-1.5*IQR_C
LS_C <- -0.1597+1.5*IQR_C
cluster <- cluster[cluster$TotalIncurredCost <=-0.095,]
IQR_T <- IQR(cluster$TimeProcesses)
LI_T<- -0.9280-1.5*IQR_T
LS_T <- 0.5945+1.5*IQR_T
cluster <- cluster[cluster$TimeProcesses<=2.8783,]
grupos <- 4
segmentos <- kmeans(cluster, grupos)
asignación <- cbind(cluster, cluster=segmentos$cluster)library(shiny)
# Define la UI
ui <- fluidPage(
# Encabezado de la aplicación
headerPanel("Análisis de Datos de Reclamaciones"),
# Contenido principal
mainPanel(
tabsetPanel(
tabPanel("Resumen", verbatimTextOutput("summary_output")),
tabPanel("Gráficos", plotOutput("plots")),
tabPanel("Regresión Lineal",verbatimTextOutput("regression_output"),
sidebarLayout(
sidebarPanel(
selectInput("BodyPartRegion", "Región de la Parte del Cuerpo", choices=unique(merged_df$BodyPartRegion)),
selectInput("BodyPart", "Parte del Cuerpo", choices=unique(merged_df$BodyPart)),
selectInput("ClaimantType", "Tipo de Demandante", choices=unique(merged_df$ClaimantType)),
selectInput("Gender", "Género", choices=unique(merged_df$Gender)),
sliderInput("ClaimantAge_at_DOI", "Edad del Demandante en DOI", min=0, max=100, value=0),
sliderInput("TimeProcesses", "Tiempo de Procesos", min=0, max=1500, value=0),
selectInput("InjuryNature", "Naturaleza de la Lesión", choices=unique(merged_df$InjuryNature)),
selectInput("IsDenied", "Es Denegado", choices=unique(merged_df$IsDenied)),
actionButton("predictButton", "Predecir"),
br()
),
mainPanel(
h4("Total Incurred Cost Predicho:"),
verbatimTextOutput("predictedCost")
)
)
),
tabPanel("Predicción", verbatimTextOutput("prediction_output")),
tabPanel("Árbol de Decisiones", plotOutput("tree_plot"))
)
)
)
# Define el servidor
server <- function(input, output) {
# Resumen de datos
output$summary_output <- renderPrint({
data_summary <- summary(merged_df) # Utiliza la base de datos cargada en RMarkdown
return(data_summary)
})
# Gráficos (agrega tus gráficos aquí)
output$plots <- renderPlot({
# Puedes agregar tus gráficos personalizados aquí
# Ejemplo: ggplot(merged_df, aes(x = ClaimStatus)) + geom_bar()
})
# Regresión Lineal
output$regression_output <- renderPrint({
observeEvent(input$predictButton, {
new_data <- data.frame(
BodyPartRegion = input$BodyPartRegion,
BodyPart = input$BodyPart,
ClaimantType = input$ClaimantType,
Gender = input$Gender,
ClaimantAge_at_DOI = input$ClaimantAge_at_DOI,
TimeProcesses = input$TimeProcesses,
InjuryNature = input$InjuryNature,
IsDenied = input$IsDenied
)
predicted_cost <- predict(regresion, new_data)
output$predictedCost <- renderText({
paste("Total Incurred Cost estimado: $", round(predicted_cost, 2))
})
})
})
# Predicción
output$prediction_output <- renderPrint({
# Realiza la predicción aquí
# Ejemplo: data_to_predict <- data.frame(BodyPartRegion = "Upper Extremities", BodyPart = "Hand", ClaimantType = "Medical Only", Gender = "Male", ClaimantAge_at_DOI = 42, TimeProcesses = 989.3, InjuryNature = "Strain", IsDenied = "0")
# prediction <- predict(regresion, newdata = data_to_predict)
# prediction
})
# Árbol de Decisiones
output$tree_plot <- renderPlot({
# Crea y muestra el árbol de decisiones aquí
rpart.plot(arbol)
})
}
# Crea la aplicación Shiny
shinyApp(ui, server)