##importar la base de datos

#file.choose()
bd<-read.csv("C:\\Users\\AVRIL\\Downloads\\seguros.csv")

#entender la base de datos

summary(bd)
##     ClaimID           TotalPaid       TotalReserves     TotalRecovery      
##  Min.   :  777632   Min.   :      0   Min.   :      0   Min.   :     0.00  
##  1st Qu.:  800748   1st Qu.:     83   1st Qu.:      0   1st Qu.:     0.00  
##  Median :  812128   Median :    271   Median :      0   Median :     0.00  
##  Mean   : 1864676   Mean   :  10404   Mean   :   3368   Mean   :    66.05  
##  3rd Qu.:  824726   3rd Qu.:   1122   3rd Qu.:      0   3rd Qu.:     0.00  
##  Max.   :62203364   Max.   :4527291   Max.   :1529053   Max.   :100000.00  
##                                                                            
##  IndemnityPaid      OtherPaid       TotalIncurredCost ClaimStatus       
##  Min.   :     0   Min.   :      0   Min.   : -10400   Length:31619      
##  1st Qu.:     0   1st Qu.:     80   1st Qu.:     80   Class :character  
##  Median :     0   Median :    265   Median :    266   Mode  :character  
##  Mean   :  4977   Mean   :   5427   Mean   :  13706                     
##  3rd Qu.:     0   3rd Qu.:   1023   3rd Qu.:   1098                     
##  Max.   :640732   Max.   :4129915   Max.   :4734750                     
##                                                                         
##  IncidentDate       IncidentDescription ReturnToWorkDate   ClaimantOpenedDate
##  Length:31619       Length:31619        Length:31619       Length:31619      
##  Class :character   Class :character    Class :character   Class :character  
##  Mode  :character   Mode  :character    Mode  :character   Mode  :character  
##                                                                              
##                                                                              
##                                                                              
##                                                                              
##  ClaimantClosedDate EmployerNotificationDate ReceivedDate      
##  Length:31619       Length:31619             Length:31619      
##  Class :character   Class :character         Class :character  
##  Mode  :character   Mode  :character         Mode  :character  
##                                                                
##                                                                
##                                                                
##                                                                
##     IsDenied       Transaction_Time Procesing_Time     ClaimantAge_at_DOI
##  Min.   :0.00000   Min.   :    0    Min.   :    0.00   Min.   :14.0      
##  1st Qu.:0.00000   1st Qu.:  211    1st Qu.:    4.00   1st Qu.:33.0      
##  Median :0.00000   Median :  780    Median :   10.00   Median :42.0      
##  Mean   :0.04463   Mean   : 1004    Mean   :   62.99   Mean   :41.6      
##  3rd Qu.:0.00000   3rd Qu.: 1440    3rd Qu.:   24.00   3rd Qu.:50.0      
##  Max.   :1.00000   Max.   :16428    Max.   :11558.00   Max.   :94.0      
##                    NA's   :614                                           
##     Gender          ClaimantType       InjuryNature       BodyPartRegion    
##  Length:31619       Length:31619       Length:31619       Length:31619      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##    BodyPart         AverageWeeklyWage1    ClaimID1        BillReviewALE    
##  Length:31619       Min.   : 100.0     Min.   :  777632   Min.   : -448.0  
##  Class :character   1st Qu.: 492.0     1st Qu.:  800748   1st Qu.:   16.0  
##  Mode  :character   Median : 492.0     Median :  812128   Median :   24.0  
##                     Mean   : 536.5     Mean   : 1864676   Mean   :  188.7  
##                     3rd Qu.: 492.0     3rd Qu.:  824726   3rd Qu.:   64.1  
##                     Max.   :8613.5     Max.   :62203364   Max.   :46055.3  
##                                                           NA's   :14912    
##     Hospital         PhysicianOutpatient       Rx          
##  Min.   : -12570.4   Min.   :   -549.5   Min.   :  -160.7  
##  1st Qu.:    210.5   1st Qu.:    105.8   1st Qu.:    22.9  
##  Median :    613.9   Median :    218.0   Median :    61.5  
##  Mean   :   5113.2   Mean   :   1813.2   Mean   :  1695.2  
##  3rd Qu.:   2349.1   3rd Qu.:    680.6   3rd Qu.:   189.0  
##  Max.   :2759604.0   Max.   :1219766.6   Max.   :631635.5  
##  NA's   :19655       NA's   :2329        NA's   :20730
#install.packages("dplyr")
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
#dplyr es una libreria de manipulacion de datos
#count(bd,ClaimStatus, sort=TRUE)
#count(bd,IncidentDate, sort=TRUE)
#count(bd,IncidentDescription, sort=TRUE)
#count(bd,ReturnToworkDate, sort=TRUE)
#count(bd,ClaimantOpenedDate, sort=TRUE)
#count(bd,ClaimStatus, sort=TRUE)
#count(bd,ClaimClosedDate, sort=TRUE)
#count(bd,EmployerNotificationDate, sort=TRUE)
#count(bd,ReceivedDate, sort=TRUE)
#count(bd,Gender, sort=TRUE)
#count(bd,ClaimantType, sort=TRUE)
#count(bd,InjuryNature, sort=TRUE)
#count(bd,BodyPartRegion, sort=TRUE)
#count(bd,BodyPart, sort=TRUE)

#Observaciones
#1. Cambiar el modelo predictivo
LS0tDQp0aXRsZTogIk1vZGVsbyBwcmVkaWN0aXZvIGRlIGNvc3RvcyBwb3Igc2VndXJvcyBkZSBsYSB1bml2ZXJzaWRhZCINCmRhdGU6ICIyMDIzLTAyLTIxIg0Kb3V0cHV0OiANCiAgaHRtbF9kb2N1bWVudDoNCiAgICB0b2M6IHRydWUNCiAgICB0b2NfZmxvYXQ6IHRydWUNCiAgICBjb2RlX2Rvd25sb2FkOiB0cnVlDQotLS0NCiMjaW1wb3J0YXIgbGEgYmFzZSBkZSBkYXRvcw0KYGBge3J9DQojZmlsZS5jaG9vc2UoKQ0KYmQ8LXJlYWQuY3N2KCJDOlxcVXNlcnNcXEFWUklMXFxEb3dubG9hZHNcXHNlZ3Vyb3MuY3N2IikNCmBgYA0KI2VudGVuZGVyIGxhIGJhc2UgZGUgZGF0b3MNCmBgYHtyfQ0Kc3VtbWFyeShiZCkNCiNpbnN0YWxsLnBhY2thZ2VzKCJkcGx5ciIpDQpsaWJyYXJ5KGRwbHlyKQ0KI2RwbHlyIGVzIHVuYSBsaWJyZXJpYSBkZSBtYW5pcHVsYWNpb24gZGUgZGF0b3MNCiNjb3VudChiZCxDbGFpbVN0YXR1cywgc29ydD1UUlVFKQ0KI2NvdW50KGJkLEluY2lkZW50RGF0ZSwgc29ydD1UUlVFKQ0KI2NvdW50KGJkLEluY2lkZW50RGVzY3JpcHRpb24sIHNvcnQ9VFJVRSkNCiNjb3VudChiZCxSZXR1cm5Ub3dvcmtEYXRlLCBzb3J0PVRSVUUpDQojY291bnQoYmQsQ2xhaW1hbnRPcGVuZWREYXRlLCBzb3J0PVRSVUUpDQojY291bnQoYmQsQ2xhaW1TdGF0dXMsIHNvcnQ9VFJVRSkNCiNjb3VudChiZCxDbGFpbUNsb3NlZERhdGUsIHNvcnQ9VFJVRSkNCiNjb3VudChiZCxFbXBsb3llck5vdGlmaWNhdGlvbkRhdGUsIHNvcnQ9VFJVRSkNCiNjb3VudChiZCxSZWNlaXZlZERhdGUsIHNvcnQ9VFJVRSkNCiNjb3VudChiZCxHZW5kZXIsIHNvcnQ9VFJVRSkNCiNjb3VudChiZCxDbGFpbWFudFR5cGUsIHNvcnQ9VFJVRSkNCiNjb3VudChiZCxJbmp1cnlOYXR1cmUsIHNvcnQ9VFJVRSkNCiNjb3VudChiZCxCb2R5UGFydFJlZ2lvbiwgc29ydD1UUlVFKQ0KI2NvdW50KGJkLEJvZHlQYXJ0LCBzb3J0PVRSVUUpDQoNCiNPYnNlcnZhY2lvbmVzDQojMS4gQ2FtYmlhciBlbCBtb2RlbG8gcHJlZGljdGl2bw0KYGBgDQoNCg==