# =============================================================================
# MODELACIÓN ESTOCÁSTICA DE PÉRDIDA AGREGADA EN SINIESTROS DE AVIACIÓN
# Fuente de datos: NTSB (National Transportation Safety Board) Aviation Accident Database
# Cómputo Científico
# Angel Albarrán · Jesus Bastida · Angel Díaz
# =============================================================================
# Carga de librerías necesarias para el análisis
library(tidyverse)      # Manipulación y visualización de datos
## Warning: package 'tidyverse' was built under R version 4.5.2
## Warning: package 'ggplot2' was built under R version 4.5.2
## Warning: package 'readr' was built under R version 4.5.2
## Warning: package 'forcats' was built under R version 4.5.2
## Warning: package 'lubridate' was built under R version 4.5.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.2.0
## ✔ forcats   1.0.1     ✔ stringr   1.5.2
## ✔ ggplot2   4.0.2     ✔ tibble    3.3.0
## ✔ lubridate 1.9.5     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)         # Lectura de archivos Excel
## Warning: package 'readxl' was built under R version 4.5.2
library(fitdistrplus)   # Ajuste de distribuciones de probabilidad
## Warning: package 'fitdistrplus' was built under R version 4.5.2
## Cargando paquete requerido: MASS
## Warning: package 'MASS' was built under R version 4.5.2
## 
## Adjuntando el paquete: 'MASS'
## 
## The following object is masked from 'package:dplyr':
## 
##     select
## 
## Cargando paquete requerido: survival
library(actuar)         # Distribuciones actuariales
## Warning: package 'actuar' was built under R version 4.5.2
## 
## Adjuntando el paquete: 'actuar'
## 
## The following objects are masked from 'package:stats':
## 
##     sd, var
## 
## The following object is masked from 'package:grDevices':
## 
##     cm
library(MASS)           # Ajuste de Binomial Negativa
library(gridExtra)      # Organización de gráficos en grid
## 
## Adjuntando el paquete: 'gridExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     combine
library(scales)         # Escalas y formatos para gráficos
## 
## Adjuntando el paquete: 'scales'
## 
## The following object is masked from 'package:purrr':
## 
##     discard
## 
## The following object is masked from 'package:readr':
## 
##     col_factor
library(ggplot2)        # Sistema avanzado de gráficos
library(patchwork)      # Combinación de gráficos ggplot2
## Warning: package 'patchwork' was built under R version 4.5.2
## 
## Adjuntando el paquete: 'patchwork'
## 
## The following object is masked from 'package:MASS':
## 
##     area
library(moments)        # Cálculo de asimetría y curtosis
## Warning: package 'moments' was built under R version 4.5.2
set.seed(2025)  # Fijar semilla para garantizar reproducibilidad de resultados
# =============================================================================
# 1. CARGA Y LIMPIEZA DE DATOS (BASE NTSB)
# =============================================================================
cat("====================================================\n")
## ====================================================
cat("  PASO 1: CARGA Y LIMPIEZA DE DATOS (NTSB)\n")
##   PASO 1: CARGA Y LIMPIEZA DE DATOS (NTSB)
cat("====================================================\n\n")
## ====================================================
# ---------- 1A. LECTURA DEL ARCHIVO NTSB ----------
ntsb_raw <- read_excel("NTSB.xlsx", sheet = "NTSB_Accidents")
## Warning: Expecting logical in H1618 / R1618C8: got 'AIR2507'
## Warning: Expecting logical in H1663 / R1663C8: got 'AIR2602'
## Warning: Expecting logical in H3429 / R3429C8: got 'AIR2504'
## Warning: Expecting logical in H5006 / R5006C8: got 'AIR2402'
## Warning: Expecting logical in H5076 / R5076C8: got 'AIR2401'
## Warning: Expecting logical in H5294 / R5294C8: got 'AIR2407'
## Warning: Expecting logical in H5616 / R5616C8: got 'AIR2301'
## Warning: Expecting logical in H6070 / R6070C8: got 'AIR2404'
## Warning: Expecting logical in H9772 / R9772C8: got 'AAR2101'
## Warning: Expecting logical in H9876 / R9876C8: got 'AIR2205'
## Warning: Expecting logical in H10121 / R10121C8: got 'AAR2105'
## Warning: Expecting logical in H10792 / R10792C8: got 'AAR2102'
## Warning: Expecting logical in H11017 / R11017C8: got 'AAR2104'
## Warning: Expecting logical in H11338 / R11338C8: got 'AAR2002'
## Warning: Expecting logical in H11409 / R11409C8: got 'AAR2001'
## Warning: Expecting logical in H12806 / R12806C8: got 'AAR1903'
## Warning: Expecting logical in H12937 / R12937C8: got 'AAR1904'
## Warning: Expecting logical in H14002 / R14002C8: got 'AIR1801'
## Warning: Expecting logical in H14303 / R14303C8: got 'AAR1902'
## Warning: Expecting logical in H14577 / R14577C8: got 'AAR1901'
## Warning: Expecting logical in H15023 / R15023C8: got 'AAR1801'
## Warning: Expecting logical in H15151 / R15151C8: got 'AAR1802'
## Warning: Expecting logical in H15517 / R15517C8: got 'AAR1703'
## Warning: Expecting logical in H16630 / R16630C8: got 'AAR1603'
## Warning: Expecting logical in H17310 / R17310C8: got 'AAR1701'
## Warning: Expecting logical in H17367 / R17367C8: got 'AAR1702'
## Warning: Expecting logical in H17862 / R17862C8: got 'AAR1602'
## Warning: Expecting logical in H18104 / R18104C8: got 'AAR1601'
## Warning: Expecting logical in H18204 / R18204C8: got 'AAR1502'
## Warning: Expecting logical in H19041 / R19041C8: got 'AAR1503'
## Warning: Expecting logical in H20130 / R20130C8: got 'AAR1402'
## Warning: Expecting logical in H20356 / R20356C8: got 'AAR1401'
## Warning: Expecting logical in H20723 / R20723C8: got 'AAR1501'
## Warning: Expecting logical in H20836 / R20836C8: got 'AAR1403'
## Warning: Expecting logical in H21124 / R21124C8: got 'AIR1401'
## Warning: Expecting logical in H22731 / R22731C8: got 'AAR1404'
## Warning: Expecting logical in H23043 / R23043C8: got 'AAR1301'
## Warning: Expecting logical in H23406 / R23406C8: got 'AAB1201'
## Warning: Expecting logical in H23534 / R23534C8: got 'AAR1302'
## Warning: Expecting logical in H24446 / R24446C8: got 'AAR1202'
## Warning: Expecting logical in H24835 / R24835C8: got 'AAR1201'
## Warning: Expecting logical in H25510 / R25510C8: got 'AAR1103'
## Warning: Expecting logical in H27254 / R27254C8: got 'AAR1005'
## Warning: Expecting logical in H27685 / R27685C8: got 'AAR1104'
## Warning: Expecting logical in H28096 / R28096C8: got 'AAR1105'
## Warning: Expecting logical in H28261 / R28261C8: got 'AAR1001'
## Warning: Expecting logical in H28318 / R28318C8: got 'AAR1102'
## Warning: Expecting logical in H28349 / R28349C8: got 'AAR1003'
## Warning: Expecting logical in H28447 / R28447C8: got 'AAR1004'
## Warning: Expecting logical in H28769 / R28769C8: got 'AAR0907'
## Warning: Expecting logical in H28805 / R28805C8: got 'AAR1002'
## Warning: Expecting logical in H29100 / R29100C8: got 'AAR1006'
## Warning: Expecting logical in H29154 / R29154C8: got 'AAR1101'
## Warning: Expecting logical in H29388 / R29388C8: got 'AAR0904'
## Warning: Expecting logical in H30047 / R30047C8: got 'AAR0905'
## Warning: Expecting logical in H30729 / R30729C8: got 'AAR0903'
## Warning: Expecting logical in H31147 / R31147C8: got 'AAR0902'
## Warning: Expecting logical in H31285 / R31285C8: got 'AAR0901'
## Warning: Expecting logical in H31532 / R31532C8: got 'AAR0906'
## Warning: Expecting logical in H31861 / R31861C8: got 'AAR0802'
## Warning: Expecting logical in H32100 / R32100C8: got 'AAR0801'
## Warning: Expecting logical in H32617 / R32617C8: got 'AAB0702'
## Warning: Expecting logical in H32838 / R32838C8: got 'AAR0705'
## Warning: Expecting logical in H33003 / R33003C8: got 'AAR0803'
## Warning: Expecting logical in H33955 / R33955C8: got 'AAR0707'
## Warning: Expecting logical in H34161 / R34161C8: got 'AAR0704'
## Warning: Expecting logical in H34204 / R34204C8: got 'AAR0706'
## Warning: Expecting logical in H34537 / R34537C8: got 'AAB0701'
## Warning: Expecting logical in H35915 / R35915C8: got 'AAR0702'
## Warning: Expecting logical in H35976 / R35976C8: got 'AAR0604'
## Warning: Expecting logical in H36227 / R36227C8: got 'AAB0603'
## Warning: Expecting logical in H36235 / R36235C8: got 'AAB0607'
## Warning: Expecting logical in H36247 / R36247C8: got 'AAB0606'
## Warning: Expecting logical in H36394 / R36394C8: got 'AAB0601'
## Warning: Expecting logical in H36396 / R36396C8: got 'AAB0605'
## Warning: Expecting logical in H36415 / R36415C8: got 'AAR0601'
## Warning: Expecting logical in H36456 / R36456C8: got 'AAR0701'
## Warning: Expecting logical in H36556 / R36556C8: got 'AAR0703'
## Warning: Expecting logical in H36835 / R36835C8: got 'AAR0603'
## Warning: Expecting logical in H37472 / R37472C8: got 'AAR0502'
## Warning: Expecting logical in H37702 / R37702C8: got 'AAR0602'
## Warning: Expecting logical in H38063 / R38063C8: got 'AAB0604'
## Warning: Expecting logical in H38086 / R38086C8: got 'AAR0501'
## Warning: Expecting logical in H38537 / R38537C8: got 'AAB0703'
## Warning: Expecting logical in H38991 / R38991C8: got 'AAR0403'
## Warning: Expecting logical in H40064 / R40064C8: got 'AAR0401'
## Warning: Expecting logical in H40356 / R40356C8: got 'AAR0303'
## Warning: Expecting logical in H40927 / R40927C8: got 'AAR0402'
## Warning: Expecting logical in H42280 / R42280C8: got 'AAR0404'
## Warning: Expecting logical in H43738 / R43738C8: got 'AAB0203'
## Warning: Expecting logical in H44018 / R44018C8: got 'AAR0301'
## Warning: Expecting logical in H44451 / R44451C8: got 'AAB0202'
## Warning: Expecting logical in H44492 / R44492C8: got 'AAB0401'
## Warning: Expecting logical in H45587 / R45587C8: got 'AAB0205'
## Warning: Expecting logical in H46043 / R46043C8: got 'AAB0204'
## Warning: Expecting logical in H46116 / R46116C8: got 'AAR0302'
## Warning: Expecting logical in H46199 / R46199C8: got 'AAR0201'
## Warning: Expecting logical in H46569 / R46569C8: got 'AAB0201'
## Warning: Expecting logical in H46599 / R46599C8: got 'AAB0001'
## Warning: Expecting logical in H47375 / R47375C8: got 'AAB0206'
## Warning: Expecting logical in H47703 / R47703C8: got 'AAR0102'
## Warning: Expecting logical in H50534 / R50534C8: got 'AAB0101'
## Warning: Expecting logical in H51480 / R51480C8: got 'AAR9802'
## Warning: Expecting logical in H51487 / R51487C8: got 'AAR0001'
## Warning: Expecting logical in H51540 / R51540C8: got 'AAR0002'
## Warning: Expecting logical in H52765 / R52765C8: got 'AAR9804'
## Warning: Expecting logical in H52830 / R52830C8: got 'AAR9705'
## Warning: Expecting logical in H52976 / R52976C8: got 'AAR9704'
## Warning: Expecting logical in H53111 / R53111C8: got 'AAR9703'
## Warning: Expecting logical in H53366 / R53366C8: got 'AAR9803'
## Warning: Expecting logical in H53762 / R53762C8: got 'AAR0003'
## Warning: Expecting logical in H53862 / R53862C8: got 'AAR9801'
## Warning: Expecting logical in H54287 / R54287C8: got 'AAR9706'
## Warning: Expecting logical in H54486 / R54486C8: got 'AAR9702'
## Warning: Expecting logical in H54755 / R54755C8: got 'AAR9701'
## Warning: Expecting logical in H54928 / R54928C8: got 'AAR9607'
## Warning: Expecting logical in H55004 / R55004C8: got 'AAR9604'
## Warning: Expecting logical in H55189 / R55189C8: got 'AAR9605'
## Warning: Expecting logical in H55711 / R55711C8: got 'AAR9606'
## Warning: Expecting logical in H56402 / R56402C8: got 'AAR9603'
## Warning: Expecting logical in H57039 / R57039C8: got 'AAR9506'
## Warning: Expecting logical in H57306 / R57306C8: got 'AAR9504'
## Warning: Expecting logical in H57393 / R57393C8: got 'AAR9505'
## Warning: Expecting logical in H57497 / R57497C8: got 'AAR9601'
## Warning: Expecting logical in H57808 / R57808C8: got 'AAR9901'
## Warning: Expecting logical in H58375 / R58375C8: got 'AAR9503'
## Warning: Expecting logical in H58494 / R58494C8: got 'AAR9502'
## Warning: Expecting logical in H58850 / R58850C8: got 'AAR9408'
## Warning: Expecting logical in H59189 / R59189C8: got 'AAR9501'
## Warning: Expecting logical in H59337 / R59337C8: got 'AAR9406'
## Warning: Expecting logical in H59438 / R59438C8: got 'AAR9407'
## Warning: Expecting logical in H59565 / R59565C8: got 'AAR9405'
## Warning: Expecting logical in H59732 / R59732C8: got 'AAR9403'
## Warning: Expecting logical in H60209 / R60209C8: got 'AAR9404'
## Warning: Expecting logical in H61129 / R61129C8: got 'AAR8902S'
## Warning: Expecting logical in H61199 / R61199C8: got 'AAR9308'
## Warning: Expecting logical in H61240 / R61240C8: got 'AAR9401'
## Warning: Expecting logical in H61299 / R61299C8: got 'AAR9307'
## Warning: Expecting logical in H61327 / R61327C8: got 'AAR9306'
## Warning: Expecting logical in H62345 / R62345C8: got 'AAR9305'
## Warning: Expecting logical in H62731 / R62731C8: got 'AAR9304'
## Warning: Expecting logical in H63185 / R63185C8: got 'AAR9303'
## Warning: Expecting logical in H63508 / R63508C8: got 'AAR9301'
## Warning: Expecting logical in H63676 / R63676C8: got 'AAR9302'
## Warning: Expecting logical in H64079 / R64079C8: got 'AAR9301S'
## Warning: Expecting logical in H64152 / R64152C8: got 'AAR9201S'
## Warning: Expecting logical in H64671 / R64671C8: got 'AAR9204'
## Warning: Expecting logical in H65992 / R65992C8: got 'AAR9203'
## Warning: Expecting logical in H66001 / R66001C8: got 'AAR9101S'
## Warning: Expecting logical in H66199 / R66199C8: got 'AAR0101'
## Warning: Expecting logical in H66261 / R66261C8: got 'AAR9109'
## Warning: Expecting logical in H66347 / R66347C8: got 'AAR9108'
## Warning: Expecting logical in H66596 / R66596C8: got 'AAR9105'
## Warning: Expecting logical in H68019 / R68019C8: got 'AAR9102'
## Warning: Expecting logical in H68845 / R68845C8: got 'AAR9104'
## Warning: Expecting logical in H68884 / R68884C8: got 'AAR9103'
## Warning: Expecting logical in H68998 / R68998C8: got 'AAR9106'
## Warning: Expecting logical in H69278 / R69278C8: got 'AAR9005'
## Warning: Expecting logical in H69510 / R69510C8: got 'AAR9101'
## Warning: Expecting logical in H69548 / R69548C8: got 'AAR9003'
## Warning: Expecting logical in H69646 / R69646C8: got 'AAR9004'
## Warning: Expecting logical in H70093 / R70093C8: got 'AAR9006'
## Warning: Expecting logical in H71063 / R71063C8: got 'AAR9002'
## Warning: Expecting logical in H71199 / R71199C8: got 'AAR9202'
## Warning: Expecting logical in H72263 / R72263C8: got 'AAR8904'
## Warning: Expecting logical in H73425 / R73425C8: got 'AAR8903'
## Warning: Expecting logical in H73508 / R73508C8: got 'AAR8902'
## Warning: Expecting logical in H73881 / R73881C8: got 'AAR8810'
## Warning: Expecting logical in H73978 / R73978C8: got 'HZM8802'
## Warning: Expecting logical in H74052 / R74052C8: got 'AAR8901'
## Warning: Expecting logical in H74195 / R74195C8: got 'AAR8903'
## Warning: Expecting logical in H74347 / R74347C8: got 'AAR8811'
## Warning: Expecting logical in H74388 / R74388C8: got 'AAR8809'
## Warning: Expecting logical in H75111 / R75111C8: got 'AAR8805'
## Warning: Expecting logical in H75926 / R75926C8: got 'AAR8806'
## Warning: Expecting logical in H76101 / R76101C8: got 'AAR8802'
## Warning: Expecting logical in H76390 / R76390C8: got 'AAR8709'
## Warning: Expecting logical in H76585 / R76585C8: got 'AAR8808'
## Warning: Expecting logical in H76842 / R76842C8: got 'AAR8801'
## Warning: Expecting logical in H76867 / R76867C8: got 'AAR8803'
## Warning: Expecting logical in H77198 / R77198C8: got 'AAR8704S'
## Warning: Expecting logical in H77247 / R77247C8: got 'AAR8706'
## Warning: Expecting logical in H77322 / R77322C8: got 'AAR8708'
## Warning: Expecting logical in H77461 / R77461C8: got 'AAR8704'
## Warning: Expecting logical in H77720 / R77720C8: got 'AAR8707'
## Warning: Expecting logical in H77995 / R77995C8: got 'AAR8804'
## Warning: Expecting logical in H78475 / R78475C8: got 'AAR8703'
## Warning: Expecting logical in H79325 / R79325C8: got 'AAR8702'
## Warning: Expecting logical in H79460 / R79460C8: got 'AAR8802'
## Warning: Expecting logical in H80027 / R80027C8: got 'AAR8705'
## Warning: Expecting logical in H80383 / R80383C8: got 'AAR8607'
## Warning: Expecting logical in H80545 / R80545C8: got 'AAR8701'
## Warning: Expecting logical in H80665 / R80665C8: got 'AAR8606'
## Warning: Expecting logical in H80924 / R80924C8: got 'AAR8605'
## Warning: Expecting logical in H82517 / R82517C8: got 'AAR8603'
## Warning: Expecting logical in H82990 / R82990C8: got 'AAR8604'
## Warning: Expecting logical in H83890 / R83890C8: got 'AAR8507'
## Warning: Expecting logical in H84167 / R84167C8: got 'AAR8508'
## Warning: Expecting logical in H84891 / R84891C8: got 'AAR8502'
## Warning: Expecting logical in H84947 / R84947C8: got 'AAR8501'
## Warning: Expecting logical in H85793 / R85793C8: got 'AAR8415'
## Warning: Expecting logical in H86137 / R86137C8: got 'AAR8412'
## Warning: Expecting logical in H86281 / R86281C8: got 'AAR8410'
## Warning: Expecting logical in H86479 / R86479C8: got 'AAR8413'
## Warning: Expecting logical in H86710 / R86710C8: got 'AAR8408'
## Warning: Expecting logical in H87385 / R87385C8: got 'AAR8406'
## Warning: Expecting logical in H87443 / R87443C8: got 'AAR8405'
## Warning: Expecting logical in H88452 / R88452C8: got 'AAR8602'
## Warning: Expecting logical in H88756 / R88756C8: got 'AAR8404'
## Warning: Expecting logical in H89090 / R89090C8: got 'AAR8411'
## Warning: Expecting logical in H89138 / R89138C8: got 'AAR8401'
## Warning: Expecting logical in H89434 / R89434C8: got 'AAR8403'
## Warning: Expecting logical in H89654 / R89654C8: got 'AAR8307'
## Warning: Expecting logical in H89660 / R89660C8: got 'AAR8308'
## Warning: Expecting logical in H89663 / R89663C8: got 'AAR8407'
## Warning: Expecting logical in H89711 / R89711C8: got 'AAR8305'
## Warning: Expecting logical in H90012 / R90012C8: got 'AAR8303'
## Warning: Expecting logical in H90035 / R90035C8: got 'AAR8304'
## Warning: Expecting logical in H91240 / R91240C8: got 'AAR8414'
## Warning: Expecting logical in H92080 / R92080C8: got 'AAR8216'
## Warning: Expecting logical in H92203 / R92203C8: got 'AAR8301'
## Warning: Expecting logical in H92906 / R92906C8: got 'AAR8207'
## Warning: Expecting logical in H92955 / R92955C8: got 'AAR8214'
## Warning: Expecting logical in H93245 / R93245C8: got 'AAR8211'
## Warning: Expecting logical in H93253 / R93253C8: got 'AAR8212'
## Warning: Expecting logical in H93284 / R93284C8: got 'AAR8213'
## Warning: Expecting logical in H94000 / R94000C8: got 'AAR8204'
## Warning: Expecting logical in H94074 / R94074C8: got 'AAR8206'
## Warning: Expecting logical in H94086 / R94086C8: got 'AAR8205'
## Warning: Expecting logical in H94116 / R94116C8: got 'AAR8201'
## Warning: Expecting logical in H95201 / R95201C8: got 'AAR8117'
## Warning: Expecting logical in H96181 / R96181C8: got 'AAR8118'
## Warning: Expecting logical in H96299 / R96299C8: got 'AAR8114'
## Warning: Expecting logical in H96894 / R96894C8: got 'AAR8110'
## Warning: Expecting logical in H96979 / R96979C8: got 'AAR8109'
## Warning: Expecting logical in H96984 / R96984C8: got 'AAR8111'
## Warning: Expecting logical in H97419 / R97419C8: got 'AAR8107'
## Warning: Expecting logical in H98798 / R98798C8: got 'AAR8101'
## Warning: Expecting logical in H98840 / R98840C8: got 'AAR8102'
## Warning: Expecting logical in H99281 / R99281C8: got 'AAR8014'
## Warning: Expecting logical in H99423 / R99423C8: got 'AAR8015'
## Warning: Expecting logical in H99851 / R99851C8: got 'AAR8012'
## Warning: Expecting logical in H99922 / R99922C8: got 'AAR8202'
## Warning: Expecting logical in H100337 / R100337C8: got 'AAR8104'
## Warning: Expecting logical in H100780 / R100780C8: got 'AAR8210'
## Warning: Expecting logical in H100822 / R100822C8: got 'AAR8106'
## Warning: Expecting logical in H101474 / R101474C8: got 'AAR8010'
## Warning: Expecting logical in H101788 / R101788C8: got 'AAR8008'
## Warning: Expecting logical in H102815 / R102815C8: got 'AAR8003'
## Warning: Expecting logical in H103348 / R103348C8: got 'AAR8001'
## Warning: Expecting logical in H103685 / R103685C8: got 'AAR7917'
## Warning: Expecting logical in H104139 / R104139C8: got 'AAR7914'
## Warning: Expecting logical in H104288 / R104288C8: got 'AAR8108'
## Warning: Expecting logical in H104543 / R104543C8: got 'AAR7913'
## Warning: Expecting logical in H104671 / R104671C8: got 'AAR7910'
## Warning: Expecting logical in H104800 / R104800C8: got 'AAR7911'
## Warning: Expecting logical in H105040 / R105040C8: got 'AAR8004'
## Warning: Expecting logical in H105211 / R105211C8: got 'AAR7907'
## Warning: Expecting logical in H105451 / R105451C8: got 'AAR7906'
## Warning: Expecting logical in H105454 / R105454C8: got 'AAR7918'
## Warning: Expecting logical in H106163 / R106163C8: got 'AAR7905'
## Warning: Expecting logical in H106520 / R106520C8: got 'AAR7909'
## Warning: Expecting logical in H107177 / R107177C8: got 'AAR7904'
## Warning: Expecting logical in H107438 / R107438C8: got 'AAR7902'
## Warning: Expecting logical in H108311 / R108311C8: got 'AAR7814'
## Warning: Expecting logical in H108451 / R108451C8: got 'AAR7813'
## Warning: Expecting logical in H109458 / R109458C8: got 'AAR7815'
## Warning: Expecting logical in H109888 / R109888C8: got 'AAR7808'
## Warning: Expecting logical in H109947 / R109947C8: got 'AAR7810'
## Warning: Expecting logical in H110446 / R110446C8: got 'AAR7806'
## Warning: Expecting logical in H110926 / R110926C8: got 'AAR7915'
## Warning: Expecting logical in H110953 / R110953C8: got 'AAR7805'
## Warning: Expecting logical in H112489 / R112489C8: got 'AAR7809'
## Warning: Expecting logical in H113261 / R113261C8: got 'AAR7803'
## Warning: Expecting logical in H115062 / R115062C8: got 'AAR7804'
## Warning: Expecting logical in H115221 / R115221C8: got 'AAR7705'
## Warning: Expecting logical in H115837 / R115837C8: got 'AAR7704'
## Warning: Expecting logical in H116503 / R116503C8: got 'AAR7802'
## Warning: Expecting logical in H117275 / R117275C8: got 'AAR7701'
## Warning: Expecting logical in H117545 / R117545C8: got 'AAR7620'
## Warning: Expecting logical in H117608 / R117608C8: got 'AAR7618'
## Warning: Expecting logical in H118183 / R118183C8: got 'AAR7617'
## Warning: Expecting logical in H118783 / R118783C8: got 'AAR7603'
## Warning: Expecting logical in H118896 / R118896C8: got 'AAR8306'
## Warning: Expecting logical in H119792 / R119792C8: got 'AAR7601'
## Warning: Expecting logical in H120158 / R120158C8: got 'AAR7614'
## Warning: Expecting logical in H120588 / R120588C8: got 'AAR7613'
## Warning: Expecting logical in H120871 / R120871C8: got 'AAR7608'
## Warning: Expecting logical in H121565 / R121565C8: got 'AAR7610'
## Warning: Expecting logical in H122026 / R122026C8: got 'AAR7515'
## Warning: Expecting logical in H122063 / R122063C8: got 'AAR7511'
## Warning: Expecting logical in H122810 / R122810C8: got 'AAR7605'
## Warning: Expecting logical in H122834 / R122834C8: got 'AAR7510'
## Warning: Expecting logical in H123183 / R123183C8: got 'AAR7513'
## Warning: Expecting logical in H123345 / R123345C8: got 'AAR7512'
## Warning: Expecting logical in H123697 / R123697C8: got 'AAR7506'
## Warning: Expecting logical in H124381 / R124381C8: got 'AAR8005'
## Warning: Expecting logical in H125093 / R125093C8: got 'AAR7409'
## Warning: Expecting logical in H125315 / R125315C8: got 'AAR7508'
## Warning: Expecting logical in H126783 / R126783C8: got 'AAR7501'
## Warning: Expecting logical in H127167 / R127167C8: got 'AAR7416'
## Warning: Expecting logical in H127321 / R127321C8: got 'AAR7410'
## Warning: Expecting logical in H127622 / R127622C8: got 'AAR7414'
## Warning: Expecting logical in H127633 / R127633C8: got 'AAR7411'
## Warning: Expecting logical in H127777 / R127777C8: got 'AAR7413'
## Warning: Expecting logical in H128010 / R128010C8: got 'AAR7502'
## Warning: Expecting logical in H128425 / R128425C8: got 'AAR7404'
## Warning: Expecting logical in H128683 / R128683C8: got 'AAR7406'
## Warning: Expecting logical in H128846 / R128846C8: got 'AAR7408'
## Warning: Expecting logical in H129436 / R129436C8: got 'AAR7405'
## Warning: Expecting logical in H130930 / R130930C8: got 'AAR7317'
## Warning: Expecting logical in H131387 / R131387C8: got 'AAR7312'
## Warning: Expecting logical in H131897 / R131897C8: got 'AAR7314'
## Warning: Expecting logical in H132753 / R132753C8: got 'AAR7306'
## Warning: Expecting logical in H133321 / R133321C8: got 'AAR7305'
## Warning: Expecting logical in H134342 / R134342C8: got 'AAR7302'
## Warning: Expecting logical in H134710 / R134710C8: got 'AAR7232'
## Warning: Expecting logical in H134946 / R134946C8: got 'AAR7229'
## Warning: Expecting logical in H135016 / R135016C8: got 'AAR7230'
## Warning: Expecting logical in H135655 / R135655C8: got 'AAR7308'
## Warning: Expecting logical in H135789 / R135789C8: got 'AAR7225'
## Warning: Expecting logical in H136129 / R136129C8: got 'AAR7224'
## Warning: Expecting logical in H136274 / R136274C8: got 'AAR7221'
## Warning: Expecting logical in H136402 / R136402C8: got 'AAR7222'
## Warning: Expecting logical in H136536 / R136536C8: got 'AAR7213'
## Warning: Expecting logical in H136971 / R136971C8: got 'AAR7215'
## Warning: Expecting logical in H137585 / R137585C8: got 'AAR7228'
## Warning: Expecting logical in H138874 / R138874C8: got 'AAR7204'
## Warning: Expecting logical in H139085 / R139085C8: got 'AAR7207'
## Warning: Expecting logical in H139161 / R139161C8: got 'AAR7226'
## Warning: Expecting logical in H139532 / R139532C8: got 'AAR7116'
## Warning: Expecting logical in H139638 / R139638C8: got 'AAR7219'
## Warning: Expecting logical in H140131 / R140131C8: got 'AAR7218'
## Warning: Expecting logical in H140620 / R140620C8: got 'AAR7114'
## Warning: Expecting logical in H141045 / R141045C8: got 'AAR7216'
## Warning: Expecting logical in H141494 / R141494C8: got 'AAR7212'
## Warning: Expecting logical in H141647 / R141647C8: got 'AAR7211'
## Warning: Expecting logical in H141761 / R141761C8: got 'AAR7214'
## Warning: Expecting logical in H142320 / R142320C8: got 'AAR7107'
## Warning: Expecting logical in H142840 / R142840C8: got 'AAR7026'
## Warning: Expecting logical in H143236 / R143236C8: got 'AAR7210'
## Warning: Expecting logical in H143374 / R143374C8: got 'AAR7209'
## Warning: Expecting logical in H143696 / R143696C8: got 'AAR7106'
## Warning: Expecting logical in H144239 / R144239C8: got 'AAR7025'
## Warning: Expecting logical in H144649 / R144649C8: got 'AAR7108'
## Warning: Expecting logical in H145584 / R145584C8: got 'AAR7101'
## Warning: Expecting logical in H145925 / R145925C8: got 'AAR7028'
## Warning: Expecting logical in H146203 / R146203C8: got 'AAR7019'
## Warning: Expecting logical in H146589 / R146589C8: got 'AAR7021'
## Warning: Expecting logical in H146645 / R146645C8: got 'AAR7022'
## Warning: Expecting logical in H146796 / R146796C8: got 'AAR7024'
## Warning: Expecting logical in H147259 / R147259C8: got 'AAR7015'
## Warning: Expecting logical in H147763 / R147763C8: got 'AAR7023'
## Warning: Expecting logical in H148412 / R148412C8: got 'AAR7018'
## Warning: Expecting logical in H148661 / R148661C8: got 'AAR7011'
## Warning: Expecting logical in H148672 / R148672C8: got 'AAR7017'
## Warning: Expecting logical in H149830 / R149830C8: got 'AAR7013'
## Warning: Expecting logical in H150149 / R150149C8: got 'AAR7003'
## Warning: Expecting logical in H150346 / R150346C8: got 'AAR7009'
## Warning: Expecting logical in H150855 / R150855C8: got 'AAR7006'
## Warning: Expecting logical in H151107 / R151107C8: got 'AAR7020'
## Warning: Expecting logical in H151109 / R151109C8: got 'AAR7027'
## Warning: Expecting logical in H151120 / R151120C8: got 'AAR6908'
## Warning: Expecting logical in H151791 / R151791C8: got 'AAR7007'
## Warning: Expecting logical in H152859 / R152859C8: got 'AAR6907'
## Warning: Expecting logical in H179305 / R179305C8: got 'SA091'
## Warning: Expecting logical in H179306 / R179306C8: got 'SA097'
## Warning: Expecting logical in H179307 / R179307C8: got 'SA096'
## Warning: Expecting logical in H179308 / R179308C8: got 'AIR2505'
## Warning: Expecting logical in H179309 / R179309C8: got 'AIR2506'
## Warning: Expecting logical in H179310 / R179310C8: got 'AIR2203'
## Warning: Expecting logical in H179311 / R179311C8: got 'AIR2209'
## Warning: Expecting logical in H179312 / R179312C8: got 'SA084'
## Warning: Expecting logical in H179313 / R179313C8: got 'AIR2202'
## Warning: Expecting logical in H179314 / R179314C8: got 'AIR2206'
## Warning: Expecting logical in H179315 / R179315C8: got 'AIR2201'
## Warning: Expecting logical in H179316 / R179316C8: got 'AIR2202'
## Warning: Expecting logical in H179317 / R179317C8: got 'AIR2204'
## Warning: Expecting logical in H179318 / R179318C8: got 'AIR2601'
## Warning: Expecting logical in H179319 / R179319C8: got 'AIR2207'
## Warning: Expecting logical in H179320 / R179320C8: got 'AIR2403'
## Warning: Expecting logical in H179321 / R179321C8: got 'SA086'
## Warning: Expecting logical in H179322 / R179322C8: got 'SA090'
## Warning: Expecting logical in H179323 / R179323C8: got 'SA088'
## Warning: Expecting logical in H179324 / R179324C8: got 'SA092'
cat("Dimensiones originales (NTSB):", nrow(ntsb_raw), "x", ncol(ntsb_raw), "\n") # Mostrar dimensiones originales del dataset
## Dimensiones originales (NTSB): 179323 x 37
cat("Variables:\n")
## Variables:
cat(paste(" -", names(ntsb_raw), collapse = "\n"), "\n\n") # Mostrar nombres de todas las variables
##  - NtsbNo
##  - EventType
##  - Mkey
##  - EventDate
##  - City
##  - State
##  - Country
##  - ReportNo
##  - N
##  - HasSafetyRec
##  - ReportType
##  - OriginalPublishDate
##  - HighestInjuryLevel
##  - FatalInjuryCount
##  - SeriousInjuryCount
##  - MinorInjuryCount
##  - ProbableCause
##  - EventID
##  - Latitude
##  - Longitude
##  - Make
##  - Model
##  - AirCraftCategory
##  - AirportID
##  - AirportName
##  - AmateurBuilt
##  - NumberOfEngines
##  - Scheduled
##  - PurposeOfFlight
##  - FAR
##  - AirCraftDamage
##  - WeatherCondition
##  - Operator
##  - ReportStatus
##  - RepGenFlag
##  - DocketUrl
##  - DocketPublishDate
# ---------- 1B. LIMPIEZA GENERAL ----------
ntsb_limpio <- ntsb_raw %>% # Aplicar transformaciones de limpieza y creación de variables
  # Descomposicion de la variable fecha: extraer fecha, año y mes
  mutate(
    EventDate = as.Date(substr(EventDate, 1, 10)),  # Convertir a fecha (primeros 10 caracteres)
    Anio      = as.integer(format(EventDate, "%Y")), # Extraer año como entero
    Mes       = as.integer(format(EventDate, "%m"))   # Extraer mes como entero
  ) %>%
  filter(EventType == "ACC") %>% # Solo accidentes confirmados (excluir incidentes)
  filter(!is.na(EventDate)) %>% # Eliminar registros sin fecha válida
  filter(Anio >= 2015, Anio <= 2024) %>% # Delimitación temporal: últimos 10 años completos (2015-2024)
  # Limpiar variable de daño: tomar la primera aeronave listada
  mutate(
    DanoPrimario = str_split_fixed(replace_na(AirCraftDamage, "Unknown"), ",", 2)[, 1],  # Extraer primer daño
    DanoPrimario = case_when(                                                             # Clasificar daño
      DanoPrimario %in% c("Destroyed", "Substantial") ~ DanoPrimario,
      DanoPrimario == "Minor"                          ~ "Minor",
      TRUE                                             ~ "Unknown/None"
    )
  ) %>%
  # Limpiar categoría de aeronave (tomar primera si hay múltiples)
  mutate(
    Categoria = str_split_fixed(replace_na(AirCraftCategory, "AIR"), ",", 2)[, 1],  # Extraer primera categoría
    Categoria = case_when(                                                          # Renombrar categorías
      Categoria == "AIR"  ~ "Ala Fija",
      Categoria == "HELI" ~ "Helicóptero",
      Categoria == "GLI"  ~ "Planeador",
      Categoria == "BALL" ~ "Globo",
      TRUE                ~ "Otro"
    )
  ) %>%
  # Condición meteorológica limpia (unificar VMC/VFR e IMC/IFR)
  mutate(
    Meteorologia = case_when(
      WeatherCondition %in% c("VMC", "VFR") ~ "VMC/VFR",   # Condiciones visuales
      WeatherCondition %in% c("IMC", "IFR") ~ "IMC/IFR",   # Condiciones instrumentales
      TRUE                                   ~ "Desconocido"
    )
  ) %>%
  # Variable de lesiones totales (proxy de magnitud humana del siniestro)
  mutate(
    LesionesTotal = replace_na(FatalInjuryCount, 0) +      # Sumar víctimas fatales
      replace_na(SeriousInjuryCount, 0) +                  # + lesiones graves
      replace_na(MinorInjuryCount, 0),                     # + lesiones leves
    TieneFatales  = FatalInjuryCount > 0                   # Indicador de fatalidad
  ) %>%
  # Clasificar tipo de operación según FAR Part
  mutate(
    TipoOperacion = case_when(
      FAR == 121  ~ "Comercial (Pt.121)",   # Aerolíneas comerciales
      FAR == 135  ~ "Taxi Aéreo (Pt.135)",  # Operaciones de taxi aéreo
      FAR == 91   ~ "Aviación General (Pt.91)", # Aviación privada/recreativa
      TRUE        ~ "Otro/Desconocido"
    )
  )
cat("Registros tras limpieza (2015-2024, solo ACC):", nrow(ntsb_limpio), "\n\n") # Mostrar número de registros después de la limpieza
## Registros tras limpieza (2015-2024, solo ACC): 14945
# Resumen de eliminaciones
cat("Desglose de filtrado:\n")
## Desglose de filtrado:
cat("  - Registros originales:              ", nrow(ntsb_raw), "\n")
##   - Registros originales:               179323
cat("  - No accidentes (INC/OCC) excluidos:", nrow(ntsb_raw) - nrow(filter(ntsb_raw, EventType == "ACC")), "\n")
##   - No accidentes (INC/OCC) excluidos: 5861
cat("  - Sin fecha excluidos:               ",
    nrow(filter(ntsb_raw, EventType == "ACC")) -
      nrow(filter(ntsb_raw, EventType == "ACC", !is.na(as.Date(substr(EventDate,1,10))))), "\n")
##   - Sin fecha excluidos:                0
cat("  - Fuera del período 2015-2024:       ",
    nrow(filter(ntsb_raw, EventType == "ACC", !is.na(as.Date(substr(EventDate,1,10))))) - nrow(ntsb_limpio), "\n")
##   - Fuera del período 2015-2024:        158517
cat("  - Registros finales para análisis:   ", nrow(ntsb_limpio), "\n\n")
##   - Registros finales para análisis:    14945
# =============================================================================
# 2. BASE DE FRECUENCIA
# =============================================================================
cat("====================================================\n")
## ====================================================
cat("  PASO 2: BASE DE FRECUENCIA\n")
##   PASO 2: BASE DE FRECUENCIA
cat("====================================================\n\n")
## ====================================================
# ---------- 2A. CONSTRUCCIÓN ----------
# Variables relevantes para frecuencia: conteo de eventos por período
# Conteo ANUAL (referencia y correlación)
freq_anual <- ntsb_limpio %>%
  group_by(Anio) %>%                                    # Agrupar por año
  summarise(
    N_Accidentes = n(),                                 # Total accidentes por año
    N_Fatales    = sum(TieneFatales, na.rm = TRUE),     # Accidentes con fatales
    Pct_Fatales  = round(100 * N_Fatales / N_Accidentes, 1), # Porcentaje fatal
    .groups = "drop"                                    # Desagrupar resultado
  )
# Conteo MENSUAL para ajuste estadístico (más observaciones = mejor ajuste)
freq_mensual <- ntsb_limpio %>%
  group_by(Anio, Mes) %>%                               # Agrupar por año y mes
  summarise(N_Accidentes = n(), .groups = "drop")       # Contar accidentes por mes
# Asegurar que todos los meses estén representados (incluso si N=0)
todos_meses <- expand.grid(Anio = 2015:2024, Mes = 1:12)  # Crear grid completo de meses
freq_mensual <- todos_meses %>%
  left_join(freq_mensual, by = c("Anio", "Mes")) %>%     # Unir con datos reales
  mutate(N_Accidentes = replace_na(N_Accidentes, 0))     # Reemplazar NA con 0
N_frecuencia <- freq_mensual$N_Accidentes  # Vector para ajuste estadístico
# Mostrar estadísticas descriptivas de frecuencia mensual
cat("Estadísticas de Frecuencia mensual:\n")
## Estadísticas de Frecuencia mensual:
cat(sprintf("  Media:                    %.2f accidentes/mes\n", mean(N_frecuencia)))
##   Media:                    124.54 accidentes/mes
cat(sprintf("  Varianza:                 %.2f\n", var(N_frecuencia)))
##   Varianza:                 1299.34
cat(sprintf("  Índice de dispersión:     %.3f  (Var/Media)\n", var(N_frecuencia)/mean(N_frecuencia)))
##   Índice de dispersión:     10.433  (Var/Media)
cat(sprintf("  → Si > 1: sobredispersión → Binomial Negativa puede superar a Poisson\n\n"))
##   → Si > 1: sobredispersión → Binomial Negativa puede superar a Poisson
# Distribución por categorías relevantes
cat("Distribución por tipo de operación:\n")
## Distribución por tipo de operación:
print(ntsb_limpio %>% count(TipoOperacion, sort = TRUE))
## # A tibble: 4 × 2
##   TipoOperacion                n
##   <chr>                    <int>
## 1 Aviación General (Pt.91) 10947
## 2 Otro/Desconocido          3318
## 3 Taxi Aéreo (Pt.135)        416
## 4 Comercial (Pt.121)         264
cat("\nDistribución por condición meteorológica:\n")
## 
## Distribución por condición meteorológica:
print(ntsb_limpio %>% count(Meteorologia, sort = TRUE))
## # A tibble: 3 × 2
##   Meteorologia     n
##   <chr>        <int>
## 1 VMC/VFR      12032
## 2 Desconocido   2373
## 3 IMC/IFR        540
cat("\nDistribución por categoría de aeronave:\n")
## 
## Distribución por categoría de aeronave:
print(ntsb_limpio %>% count(Categoria, sort = TRUE))
## # A tibble: 5 × 2
##   Categoria       n
##   <chr>       <int>
## 1 Ala Fija    12674
## 2 Helicóptero  1683
## 3 Otro          278
## 4 Planeador     205
## 5 Globo         105
# =============================================================================
# 3. BASE DE SEVERIDAD
# =============================================================================
cat("====================================================\n")
## ====================================================
cat("  PASO 3: BASE DE SEVERIDAD\n")
##   PASO 3: BASE DE SEVERIDAD
cat("====================================================\n\n")
## ====================================================
# ---------- 3A. CONSTRUCCIÓN ----------
# Proxy de severidad: lesiones totales ponderadas por tipo
# Ponderación actuarial: Fatal=10, Serio=3, Menor=1
# (refleja el costo relativo humano/económico de cada tipo de lesión)
sev_limpio <- ntsb_limpio %>%
  mutate(
    # Índice de severidad ponderado (proxy de costo humano)
    IndSev = 10 * replace_na(FatalInjuryCount, 0) +      # Peso 10 para fatales
      3 * replace_na(SeriousInjuryCount, 0) +            # Peso 3 para graves
      1 * replace_na(MinorInjuryCount, 0),               # Peso 1 para leves
    # Variable binaria de siniestro catastrófico (>=5 víctimas fatales)
    EsCatastrofico = FatalInjuryCount >= 5
  ) %>%
  # Para modelar severidad: solo eventos con alguna víctima (IndSev > 0)
  filter(IndSev > 0)
cat("─── Variables de la Base de Severidad ───\n")
## ─── Variables de la Base de Severidad ───
cat(" Unidad de análisis:  Accidente individual\n")
##  Unidad de análisis:  Accidente individual
cat(" Período:             2015-2024\n")
##  Período:             2015-2024
cat(" Criterio de inclusión: Solo accidentes con víctimas (IndSev > 0)\n")
##  Criterio de inclusión: Solo accidentes con víctimas (IndSev > 0)
cat(" Variable respuesta:  IndSev = 10·Fatales + 3·Serios + 1·Menores\n")
##  Variable respuesta:  IndSev = 10·Fatales + 3·Serios + 1·Menores
cat("                      (índice actuarial ponderado por gravedad)\n\n")
##                       (índice actuarial ponderado por gravedad)
cat("Registros válidos para severidad:", nrow(sev_limpio), "\n")
## Registros válidos para severidad: 7155
cat("  (excluidos sin víctimas:", nrow(ntsb_limpio) - nrow(sev_limpio), ")\n\n")
##   (excluidos sin víctimas: 7790 )
X_severidad <- sev_limpio$IndSev
cat("Estadísticas de Severidad (IndSev):\n")
## Estadísticas de Severidad (IndSev):
cat(sprintf("  Media:      %.2f\n", mean(X_severidad)))
##   Media:      13.32
cat(sprintf("  Mediana:    %.2f\n", median(X_severidad)))
##   Mediana:    6.00
cat(sprintf("  Desv.Est:   %.2f\n", sd(X_severidad)))
##   Desv.Est:   61.54
cat(sprintf("  Sesgo:      %.3f\n", moments::skewness(X_severidad)))
##   Sesgo:      24.862
cat(sprintf("  Curtosis:   %.3f\n", moments::kurtosis(X_severidad)))
##   Curtosis:   709.275
cat(sprintf("  Mínimo:     %.0f\n", min(X_severidad)))
##   Mínimo:     1
cat(sprintf("  Máximo:     %.0f\n", max(X_severidad)))
##   Máximo:     2240
cat(sprintf("  CV:         %.1f%%\n\n", 100 * sd(X_severidad) / mean(X_severidad)))
##   CV:         462.1%
cat("Percentiles clave:\n")
## Percentiles clave:
pcts <- quantile(X_severidad, c(0.50, 0.75, 0.90, 0.95, 0.99, 0.999))
for (nm in names(pcts)) cat(sprintf("  P%-5s = %.1f\n", nm, pcts[nm]))
##   P50%   = 6.0
##   P75%   = 13.0
##   P90%   = 23.0
##   P95%   = 40.0
##   P99%   = 70.0
##   P99.9% = 1100.4
cat("Severidad por tipo de daño a la aeronave:\n")
## Severidad por tipo de daño a la aeronave:
print(sev_limpio %>%
        group_by(DanoPrimario) %>%
        summarise(N = n(), Media_IndSev = round(mean(IndSev), 2),
                  Mediana = median(IndSev), .groups = "drop") %>%
        arrange(desc(Media_IndSev)))
## # A tibble: 4 × 4
##   DanoPrimario     N Media_IndSev Mediana
##   <chr>        <int>        <dbl>   <dbl>
## 1 Destroyed     1977        27.8       13
## 2 Unknown/None   392        13.6        3
## 3 Minor           69         7.42       3
## 4 Substantial   4717         7.32       3
# =============================================================================
# 4. AJUSTE DE DISTRIBUCIONES
# =============================================================================
cat("====================================================\n")
## ====================================================
cat("  PASO 4: AJUSTE DE DISTRIBUCIONES\n")
##   PASO 4: AJUSTE DE DISTRIBUCIONES
cat("====================================================\n\n")
## ====================================================
# ---------- 4A. FRECUENCIA ----------
cat("─── 4A. AJUSTE DE FRECUENCIA (mensual) ───\n\n")
## ─── 4A. AJUSTE DE FRECUENCIA (mensual) ───
# Ajustar distribución Poisson a los datos de frecuencia mensual
ajuste_pois <- fitdist(N_frecuencia, "pois")           # Estimación por máxima verosimilitud
lambda_est  <- ajuste_pois$estimate["lambda"]          # Extraer parámetro lambda estimado
# Ajustar distribución Binomial Negativa a los datos
ajuste_nb   <- fitdist(N_frecuencia, "nbinom")         # Estimación por máxima verosimilitud
size_est    <- ajuste_nb$estimate["size"]              # Extraer parámetro size (r)
mu_est      <- ajuste_nb$estimate["mu"]                # Extraer parámetro mu (media)
# Mostrar resultados del ajuste Poisson
cat("  Poisson:\n")
##   Poisson:
cat(sprintf("    λ estimado = %.4f accidentes/mes\n", lambda_est))
##     λ estimado = 124.5417 accidentes/mes
cat(sprintf("    AIC = %.2f  |  BIC = %.2f\n\n", ajuste_pois$aic, ajuste_pois$bic))
##     AIC = 2020.71  |  BIC = 2023.50
# Mostrar resultados del ajuste Binomial Negativa
cat("  Binomial Negativa:\n")
##   Binomial Negativa:
cat(sprintf("    r (size) = %.4f\n", size_est))
##     r (size) = 13.5486
cat(sprintf("    μ (mu)   = %.4f\n", mu_est))
##     μ (mu)   = 124.5334
cat(sprintf("    AIC = %.2f  |  BIC = %.2f\n\n", ajuste_nb$aic, ajuste_nb$bic))
##     AIC = 1195.71  |  BIC = 1201.28
# Prueba Chi-Cuadrada para evaluar bondad de ajuste de Poisson
val_unicos  <- sort(unique(N_frecuencia))              # Valores únicos observados
obs_chi     <- table(N_frecuencia)                     # Frecuencias observadas
exp_pois_v  <- length(N_frecuencia) * dpois(as.integer(names(obs_chi)), lambda_est)  # Esperadas
validos_chi <- exp_pois_v >= 5                         # Celdas con frecuencia esperada >=5
chi2_stat   <- sum((as.numeric(obs_chi[validos_chi]) - exp_pois_v[validos_chi])^2 /
                     exp_pois_v[validos_chi])          # Estadístico chi-cuadrado
gl_chi2     <- sum(validos_chi) - 1 - 1                # Grados de libertad
p_chi2_pois <- pchisq(chi2_stat, df = max(gl_chi2, 1), lower.tail = FALSE)  # Valor p
# Mostrar resultados de prueba chi-cuadrado
cat(sprintf("  Prueba χ² Poisson: χ²=%.3f, gl=%d, p=%.4f\n", chi2_stat, gl_chi2, p_chi2_pois))
##   Prueba χ² Poisson: χ²=0.000, gl=-2, p=1.0000
if (p_chi2_pois > 0.05) {
  cat("  → No se rechaza Poisson (p > 0.05)\n\n")
} else {
  cat("  → Se rechaza Poisson (p ≤ 0.05) → Binomial Negativa es preferida\n\n")
}
##   → No se rechaza Poisson (p > 0.05)
# Seleccionar mejor modelo de frecuencia según AIC
if (ajuste_nb$aic < ajuste_pois$aic) {
  modelo_freq <- "nbinom"
  cat("  ✔ MODELO SELECCIONADO (Frecuencia): Binomial Negativa (menor AIC)\n\n")
} else {
  modelo_freq <- "pois"
  cat("  ✔ MODELO SELECCIONADO (Frecuencia): Poisson (menor AIC)\n\n")
}
##   ✔ MODELO SELECCIONADO (Frecuencia): Binomial Negativa (menor AIC)
# ---------- 4B. SEVERIDAD ----------
cat("─── 4B. AJUSTE DE SEVERIDAD (IndSev) ───\n\n")
## ─── 4B. AJUSTE DE SEVERIDAD (IndSev) ───
# Candidatos de cola pesada (adecuados para pérdidas con asimetría positiva)
ajuste_lnorm  <- fitdist(X_severidad, "lnorm") # Ajustar Log-Normal
ajuste_gamma  <- fitdist(X_severidad, "gamma") # Ajustar Gamma
ajuste_weib   <- fitdist(X_severidad, "weibull") # Ajustar Weibull
# Ajustar Pareto (con manejo de errores por posible fallo en convergencia)
ajuste_pareto <- tryCatch(
  fitdist(X_severidad, "pareto",
          start = list(shape = 2, scale = min(X_severidad) * 0.99)),
  error = function(e) NULL
)
# Tabla comparativa de distribuciones
distribs <- list("Log-Normal" = ajuste_lnorm,
                 "Gamma"      = ajuste_gamma,
                 "Weibull"    = ajuste_weib)
if (!is.null(ajuste_pareto)) distribs[["Pareto"]] <- ajuste_pareto  # Añadir Pareto si existe
# Crear tabla con métricas de bondad de ajuste
tabla_sev <- map_dfr(names(distribs), function(nm) {  # Mapear sobre cada distribución
  d <- distribs[[nm]]                                 # Extraer objeto fitdist
  data.frame(Distribución = nm, AIC = d$aic, BIC = d$bic,  # Crear fila con métricas
             LogLik = as.numeric(logLik(d)))          # Log-verosimilitud
}) %>% arrange(AIC)                                   # Ordenar por AIC ascendente
# Mostrar tabla comparativa
cat("Comparación de distribuciones de Severidad:\n")
## Comparación de distribuciones de Severidad:
print(tabla_sev, row.names = FALSE)
##  Distribución      AIC      BIC    LogLik
##    Log-Normal 47802.59 47816.34 -23899.29
##        Pareto 48328.07 48341.82 -24162.03
##       Weibull 49570.79 49584.54 -24783.39
##         Gamma 50664.67 50678.42 -25330.34
# Seleccionar mejor modelo de severidad (menor AIC)
mejor_sev <- tabla_sev$Distribución[1]
cat(sprintf("  ✔ MODELO SELECCIONADO (Severidad): %s (menor AIC)\n\n", mejor_sev))
##   ✔ MODELO SELECCIONADO (Severidad): Log-Normal (menor AIC)
# Extraer parámetros del mejor modelo
ajuste_sev_best <- distribs[[mejor_sev]]
params_sev      <- ajuste_sev_best$estimate
cat("  Parámetros estimados:\n")
##   Parámetros estimados:
for (nm in names(params_sev)) cat(sprintf("    %s = %.5f\n", nm, params_sev[nm]))  # Mostrar parámetros
##     meanlog = 1.72442
##     sdlog = 1.21758
# Función de distribución acumulada (CDF) según el mejor modelo
cdf_fn <- switch(mejor_sev,
                 "Log-Normal" = function(x) plnorm(x, params_sev["meanlog"], params_sev["sdlog"]),
                 "Gamma"      = function(x) pgamma(x, params_sev["shape"], params_sev["rate"]),
                 "Weibull"    = function(x) pweibull(x, params_sev["shape"], params_sev["scale"]),
                 "Pareto"     = function(x) ppareto(x, params_sev["shape"], params_sev["scale"]),
)
# Prueba Kolmogorov-Smirnov para evaluar bondad de ajuste
ks_sev <- ks.test(X_severidad, cdf_fn)               # Comparar empírica vs teórica
## Warning in ks.test.default(X_severidad, cdf_fn): ties should not be present for
## the one-sample Kolmogorov-Smirnov test
cat(sprintf("\n  Prueba K-S (%s): D=%.4f, p=%.4f\n", mejor_sev, ks_sev$statistic, ks_sev$p.value))
## 
##   Prueba K-S (Log-Normal): D=0.1563, p=0.0000
cat(ifelse(ks_sev$p.value > 0.05,                    # Interpretación del resultado
           "  → No se rechaza el ajuste (p > 0.05)\n\n",
           "  → Ajuste rechazado estadísticamente; se procede con el mejor AIC disponible\n\n"))
##   → Ajuste rechazado estadísticamente; se procede con el mejor AIC disponible
# =============================================================================
# 5. CORRELACIÓN ENTRE FRECUENCIA Y SEVERIDAD
# =============================================================================
cat("====================================================\n")
## ====================================================
cat("  PASO 5: ANÁLISIS DE CORRELACIÓN FRECUENCIA–SEVERIDAD\n")
##   PASO 5: ANÁLISIS DE CORRELACIÓN FRECUENCIA–SEVERIDAD
cat("====================================================\n\n")
## ====================================================
# Calcular severidad media y total por año
sev_anual <- sev_limpio %>%
  group_by(Anio) %>%                                  # Agrupar por año
  summarise(
    IndSev_Media = mean(IndSev),                      # Severidad media anual
    IndSev_Total = sum(IndSev),                       # Severidad total anual
    IndSev_Max   = max(IndSev),                       # Severidad máxima anual
    IndSev_P90   = quantile(IndSev, 0.90),            # Percentil 90 anual
    N_con_sev    = n(),                               # Número de eventos con severidad
    .groups = "drop"
  )
# Unir frecuencia anual con severidad anual
corr_df <- freq_anual %>%
  left_join(sev_anual, by = "Anio") %>%               # Combinar datasets por año
  filter(!is.na(IndSev_Media))                        # Excluir años sin severidad
# Calcular coeficientes de correlación
cor_pearson_med  <- cor(corr_df$N_Accidentes, corr_df$IndSev_Media,  method = "pearson")  # Correlación lineal
cor_spearman_med <- cor(corr_df$N_Accidentes, corr_df$IndSev_Media,  method = "spearman") # Correlación por rangos
cor_pearson_tot  <- cor(corr_df$N_Accidentes, corr_df$IndSev_Total,  method = "pearson")  # Con totales
cor_spearman_tot <- cor(corr_df$N_Accidentes, corr_df$IndSev_Total,  method = "spearman")
cor_pearson_p90  <- cor(corr_df$N_Accidentes, corr_df$IndSev_P90,    method = "pearson")  # Con percentil 90
# Tests de significancia estadística
test_med <- cor.test(corr_df$N_Accidentes, corr_df$IndSev_Media, method = "pearson")  # Prueba para media
test_tot <- cor.test(corr_df$N_Accidentes, corr_df$IndSev_Total, method = "pearson")  # Prueba para total
# Mostrar resultados de correlaciones
cat("¿Los años con más siniestros tienen siniestros más graves?\n\n")
## ¿Los años con más siniestros tienen siniestros más graves?
cat("  Correlación Freq. ↔ Severidad MEDIA por año:\n")
##   Correlación Freq. ↔ Severidad MEDIA por año:
cat(sprintf("    Pearson  r = %.4f  (p = %.4f)\n", cor_pearson_med, test_med$p.value))
##     Pearson  r = -0.1183  (p = 0.7447)
cat(sprintf("    Spearman ρ = %.4f\n", cor_spearman_med))
##     Spearman ρ = 0.0303
cat("\n  Correlación Freq. ↔ Severidad TOTAL por año:\n")
## 
##   Correlación Freq. ↔ Severidad TOTAL por año:
cat(sprintf("    Pearson  r = %.4f  (p = %.4f)\n", cor_pearson_tot, test_tot$p.value))
##     Pearson  r = 0.1732  (p = 0.6322)
cat(sprintf("    Spearman ρ = %.4f\n", cor_spearman_tot))
##     Spearman ρ = 0.1758
cat("\n  Correlación Freq. ↔ Percentil 90 de Severidad:\n")
## 
##   Correlación Freq. ↔ Percentil 90 de Severidad:
cat(sprintf("    Pearson  r = %.4f\n", cor_pearson_p90))
##     Pearson  r = 0.4391
# Función para interpretar la correlación
cat("\n  Interpretación:\n")
## 
##   Interpretación:
interpretar_cor <- function(r, p) {
  fuerza <- if (abs(r) > 0.7) "fuerte" else if (abs(r) > 0.4) "moderada" else "débil"
  dir    <- if (r > 0) "positiva" else "negativa"
  sig    <- if (p < 0.05) "(estadísticamente significativa)" else "(no significativa, p ≥ 0.05)"
  cat(sprintf("    Asociación %s %s %s\n", fuerza, dir, sig))
}
interpretar_cor(cor_pearson_med, test_med$p.value)
##     Asociación débil negativa (no significativa, p ≥ 0.05)
# Mostrar tabla resumen anual
cat("\n  Tabla resumen anual (Frecuencia vs Severidad):\n")
## 
##   Tabla resumen anual (Frecuencia vs Severidad):
print(corr_df %>% dplyr::select(Anio, N_Accidentes, IndSev_Media, IndSev_Total, IndSev_Max), n=20)
## # A tibble: 10 × 5
##     Anio N_Accidentes IndSev_Media IndSev_Total IndSev_Max
##    <int>        <int>        <dbl>        <dbl>      <dbl>
##  1  2015         1487         16.3        12652       2240
##  2  2016         1541         13.0         9737        660
##  3  2017         1512         11.2         8115        414
##  4  2018         1565         16.2        12214       1910
##  5  2019         1504         14.9        11190       1570
##  6  2020         1307         14.5         9059       1760
##  7  2021         1520         11.1         7833        620
##  8  2022         1542         11.9         8774       1320
##  9  2023         1482         10.0         6665        120
## 10  2024         1485         13.5         9049       1736
# =============================================================================
# 6. SIMULACIÓN MONTE CARLO
# =============================================================================
cat("====================================================\n")
## ====================================================
cat("  PASO 6: SIMULACIÓN MONTE CARLO\n")
##   PASO 6: SIMULACIÓN MONTE CARLO
cat("====================================================\n\n")
## ====================================================
n_sim  <- 100000  # Número de simulaciones para Monte Carlo
escala <- 12      # Factor para pasar de parámetros mensuales a anuales
# Funciones simuladoras (frecuencia)
simular_N <- function(n, modelo) {
  if (modelo == "nbinom") rnbinom(n, size = size_est, mu = mu_est * escala)  # Binomial Negativa anual
  else                    rpois(n, lambda = lambda_est * escala)              # Poisson anual
}
# Función simuladora de severidad dada distribución
simular_X <- function(n, dist_nombre, params) {
  switch(dist_nombre,
         "Log-Normal" = rlnorm(n, params["meanlog"], params["sdlog"]),
         "Gamma"      = rgamma(n, params["shape"], params["rate"]),
         "Weibull"    = rweibull(n, params["shape"], params["scale"]),
         "Pareto"     = rpareto(n, params["shape"], params["scale"]),
         stop("Distribución no reconocida")
  )
}
# ---------- Simulación principal (mejor modelo) ----------
S_simulado <- numeric(n_sim)  # Vector para almacenar pérdidas agregadas simuladas
for (i in seq_len(n_sim)) {   # Bucle sobre cada simulación
  N_i <- simular_N(1, modelo_freq)  # Simular número de siniestros anual
  if (N_i > 0) {                    # Si hay al menos un siniestro
    S_simulado[i] <- sum(simular_X(N_i, mejor_sev, params_sev))  # Sumar severidades
  }  # Si N_i = 0, S_simulado[i] permanece como 0
}
# Mostrar estadísticas de la pérdida agregada simulada
cat("Estadísticas de la Pérdida Agregada Anual S (IndSev units):\n")
## Estadísticas de la Pérdida Agregada Anual S (IndSev units):
cat(sprintf("  E[S] (prima pura):   %.2f\n", mean(S_simulado)))
##   E[S] (prima pura):   17601.16
cat(sprintf("  Desv. Est.:          %.2f\n", sd(S_simulado)))
##   Desv. Est.:          4876.76
cat(sprintf("  Mediana:             %.2f\n", median(S_simulado)))
##   Mediana:             17165.24
cat(sprintf("  Máximo simulado:     %.2f\n", max(S_simulado)))
##   Máximo simulado:     45473.95
cat(sprintf("  Sesgo:               %.3f\n", moments::skewness(S_simulado)))
##   Sesgo:               0.542
cat(sprintf("  Curtosis:            %.3f\n\n", moments::kurtosis(S_simulado)))
##   Curtosis:            3.425
# =============================================================================
# 7. MEDIDAS DE RIESGO: VaR y TVaR
# =============================================================================
cat("====================================================\n")
## ====================================================
cat("  PASO 7: VaR y TVaR — MODELO GANADOR\n")
##   PASO 7: VaR y TVaR — MODELO GANADOR
cat("====================================================\n\n")
## ====================================================
niveles <- c(0.90, 0.95, 0.99, 0.995, 0.999)
# Construir tabla con VaR y TVaR
tabla_riesgo <- data.frame(
  Nivel   = paste0(niveles * 100, "%"),                     # Etiquetas de nivel
  VaR     = quantile(S_simulado, probs = niveles),          # Value at Risk (cuantiles)
  TVaR    = sapply(niveles, function(p) {                   # Tail Value at Risk
    u <- quantile(S_simulado, p)                            # Umbral VaR
    mean(S_simulado[S_simulado > u])                        # Media condicional superior
  })
) %>% mutate(Carga_Cola = round(TVaR - VaR, 2))              # Diferencia TVaR - VaR
# Mostrar tabla de medidas de riesgo
cat("Tabla de Medidas de Riesgo (mejor modelo):\n")
## Tabla de Medidas de Riesgo (mejor modelo):
print(tabla_riesgo, row.names = FALSE)
##  Nivel      VaR     TVaR Carga_Cola
##    90% 24049.92 27079.31    3029.39
##    95% 26253.60 29097.81    2844.21
##    99% 30899.05 33327.50    2428.46
##  99.5% 32668.07 34998.21    2330.14
##  99.9% 36220.43 38463.93    2243.49
# Calcular medidas específicas para nivel 99.5%
VaR_995  <- quantile(S_simulado, 0.995)
TVaR_995 <- mean(S_simulado[S_simulado > VaR_995])
E_S      <- mean(S_simulado)
# Mostrar resumen de medidas de riesgo
cat(sprintf("\n  Prima de Riesgo Pura E[S]:   %.2f\n", E_S))
## 
##   Prima de Riesgo Pura E[S]:   17601.16
cat(sprintf("  VaR  al 99.5%%:               %.2f\n", VaR_995))
##   VaR  al 99.5%:               32668.07
cat(sprintf("  TVaR al 99.5%%:               %.2f\n\n", TVaR_995))
##   TVaR al 99.5%:               34998.21
# =============================================================================
# 8. ANÁLISIS DE SENSIBILIDAD DE DISTRIBUCIÓN DE SEVERIDAD
# =============================================================================
cat("====================================================\n")
## ====================================================
cat("  PASO 8: ANÁLISIS DE SENSIBILIDAD — DISTRIBUCIÓN DE SEVERIDAD\n")
##   PASO 8: ANÁLISIS DE SENSIBILIDAD — DISTRIBUCIÓN DE SEVERIDAD
cat("====================================================\n\n")
## ====================================================
cat("¿Qué pasa si cambiamos la distribución de severidad?\n")
## ¿Qué pasa si cambiamos la distribución de severidad?
cat("¿Cómo afecta al VaR y TVaR?\n\n")
## ¿Cómo afecta al VaR y TVaR?
# Todas las distribuciones ajustadas
candidatos_sev <- names(distribs)  # Nombres de distribuciones candidatas
# Realizar simulaciones para cada distribución candidata
resultados_sens <- map_dfr(candidatos_sev, function(dist_nm) {
  p <- distribs[[dist_nm]]$estimate  # Parámetros de la distribución actual
  cat(sprintf("  Simulando con distribución: %s...\n", dist_nm))
  # Simular pérdida agregada con esta distribución
  S_alt <- numeric(n_sim)
  for (i in seq_len(n_sim)) {
    N_i <- simular_N(1, modelo_freq)       # Mismo modelo de frecuencia
    if (N_i > 0) {
      S_alt[i] <- sum(simular_X(N_i, dist_nm, p))  # Sumar con distribución alternativa
    }
  }
  # Calcular medidas de riesgo para esta simulación
  var90  <- quantile(S_alt, 0.90)
  var95  <- quantile(S_alt, 0.95)
  var99  <- quantile(S_alt, 0.99)
  var995 <- quantile(S_alt, 0.995)
  # Devolver dataframe con resultados
  data.frame(
    Distribución = dist_nm,
    Es_Ganador   = dist_nm == mejor_sev,   # Indicador si es el mejor modelo
    AIC          = distribs[[dist_nm]]$aic,
    E_S          = mean(S_alt),
    VaR_90       = var90,
    VaR_95       = var95,
    VaR_99       = var99,
    VaR_995      = var995,
    TVaR_995     = mean(S_alt[S_alt > var995]),
    Carga_Cola   = mean(S_alt[S_alt > var995]) - var995  # Diferencia TVaR - VaR
  )
})
##   Simulando con distribución: Log-Normal...
##   Simulando con distribución: Gamma...
##   Simulando con distribución: Weibull...
##   Simulando con distribución: Pareto...
# Mostrar tabla de sensibilidad completa
cat("\n  Tabla de sensibilidad completa:\n")
## 
##   Tabla de sensibilidad completa:
print(resultados_sens %>% dplyr::select(-Es_Ganador) %>%   # Excluir columna indicadora
        mutate(across(where(is.numeric), ~round(., 2))), row.names = FALSE)  # Redondear numéricos
##  Distribución      AIC      E_S   VaR_90   VaR_95   VaR_99  VaR_995 TVaR_995
##    Log-Normal 47802.59 17592.46 24066.36 26344.66 30871.61 32563.39 34983.93
##         Gamma 50664.67 19878.96 27078.97 29582.38 34694.77 36641.96 39416.56
##       Weibull 49570.79 18206.11 24866.54 27155.18 31762.79 33649.42 36095.92
##        Pareto 48328.07 17330.45 23666.71 25870.22 30405.53 32194.12 34461.42
##  Carga_Cola
##     2420.54
##     2774.60
##     2446.50
##     2267.30
# Calcular valores base (mejor modelo)
base_VaR  <- resultados_sens %>% filter(Es_Ganador) %>% pull(VaR_995)
base_TVaR <- resultados_sens %>% filter(Es_Ganador) %>% pull(TVaR_995)
# Mostrar variación relativa respecto al mejor modelo
cat("\n  Variación relativa al mejor modelo (", mejor_sev, "):\n", sep = "")
## 
##   Variación relativa al mejor modelo (Log-Normal):
resultados_sens %>%
  mutate(
    Delta_VaR_pct  = round(100 * (VaR_995  - base_VaR)  / base_VaR,  1),   # Cambio porcentual en VaR
    Delta_TVaR_pct = round(100 * (TVaR_995 - base_TVaR) / base_TVaR, 1)    # Cambio porcentual en TVaR
  ) %>%
  dplyr::select(Distribución, Delta_VaR_pct, Delta_TVaR_pct) %>%           # Seleccionar columnas
  { cat(sprintf("  %-12s  ΔVaR99.5%%: %+.1f%%   ΔTVaR99.5%%: %+.1f%%\n",
                .$Distribución, .$Delta_VaR_pct, .$Delta_TVaR_pct))
    invisible(.) }
##   Log-Normal    ΔVaR99.5%: +0.0%   ΔTVaR99.5%: +0.0%
##    Gamma         ΔVaR99.5%: +12.5%   ΔTVaR99.5%: +12.7%
##    Weibull       ΔVaR99.5%: +3.3%   ΔTVaR99.5%: +3.2%
##    Pareto        ΔVaR99.5%: -1.1%   ΔTVaR99.5%: -1.5%
cat("\n  Interpretación:\n")
## 
##   Interpretación:
max_var_var <- max(abs(resultados_sens$VaR_995  - base_VaR)  / base_VaR)
max_var_tva <- max(abs(resultados_sens$TVaR_995 - base_TVaR) / base_TVaR)
cat(sprintf("    La elección de distribución puede mover el VaR99.5%% hasta %.1f%%\n", 100 * max_var_var))
##     La elección de distribución puede mover el VaR99.5% hasta 12.5%
cat(sprintf("    y el TVaR99.5%% hasta %.1f%% respecto al modelo base.\n", 100 * max_var_tva))
##     y el TVaR99.5% hasta 12.7% respecto al modelo base.
cat("    Esto ilustra el riesgo de modelo (model risk) en actuaría.\n\n")
##     Esto ilustra el riesgo de modelo (model risk) en actuaría.
# =============================================================================
# 9. INDICE DE CARGA DE RIESGO (Loading Ratio)
# =============================================================================
cat("====================================================\n")
## ====================================================
cat("  PASO 9: INDICE DE CARGA DE RIESGO (Loading Ratio)\n")
##   PASO 9: INDICE DE CARGA DE RIESGO (Loading Ratio)
cat("====================================================\n\n")
## ====================================================
# Calcular índices de carga y cocientes
resultados_sens <- resultados_sens %>%
  mutate(
    Loading_Ratio = round(TVaR_995 / E_S, 3),           # TVaR / E[S] (carga de cola)
    Cociente_VaR_TVaR = round(VaR_995 / TVaR_995, 3)    # VaR / TVaR (1 = cola liviana, <1 = cola pesada)
  )
cat("  Loading Ratio = TVaR99.5% / E[S]  (mayor = cola más pesada)\n")
##   Loading Ratio = TVaR99.5% / E[S]  (mayor = cola más pesada)
cat("  Cociente VaR/TVaR                 (menor = mayor riesgo de cola)\n\n")
##   Cociente VaR/TVaR                 (menor = mayor riesgo de cola)
# Mostrar tabla con índices de carga
print(resultados_sens %>% dplyr::select(Distribución, E_S, TVaR_995, Loading_Ratio, Cociente_VaR_TVaR) %>%
        mutate(across(where(is.numeric), ~round(., 2))), row.names = FALSE)
##  Distribución      E_S TVaR_995 Loading_Ratio Cociente_VaR_TVaR
##    Log-Normal 17592.46 34983.93          1.99              0.93
##         Gamma 19878.96 39416.56          1.98              0.93
##       Weibull 18206.11 36095.92          1.98              0.93
##        Pareto 17330.45 34461.42          1.99              0.93
# =============================================================================
# 10. ANÁLISIS DE EVENTOS EXTREMOS (Pareto 80/20)
# =============================================================================
cat("====================================================\n")
## ====================================================
cat("  PASO 10: ANÁLISIS DE EVENTOS EXTREMOS (Pareto 80/20)\n")
##   PASO 10: ANÁLISIS DE EVENTOS EXTREMOS (Pareto 80/20)
cat("====================================================\n\n")
## ====================================================
cat("─── 9B. PRINCIPIO DE PARETO EN LOS SINIESTROS ───\n\n")
## ─── 9B. PRINCIPIO DE PARETO EN LOS SINIESTROS ───
cat("  ¿Qué porcentaje de accidentes concentra el 80% del daño total?\n\n")
##   ¿Qué porcentaje de accidentes concentra el 80% del daño total?
# Ordenar severidades de mayor a menor
sev_ord <- sort(X_severidad, decreasing = TRUE)        # Vector ordenado descendente
sev_cum <- cumsum(sev_ord) / sum(sev_ord)              # Proporción acumulada de daño
n_20pct  <- ceiling(0.20 * length(sev_ord))            # Número que representa 20% de accidentes
pct_dano <- sev_cum[n_20pct]                           # Daño acumulado en ese 20%
n_para80 <- which(sev_cum >= 0.80)[1]                  # Accidentes necesarios para 80% daño
pct_acc  <- round(100 * n_para80 / length(sev_ord), 1) # Porcentaje de accidentes para 80% daño
# Mostrar resultados del análisis Pareto
cat(sprintf("  El 20%% más grave de los accidentes concentra el %.1f%% del daño total\n", 100 * pct_dano))
##   El 20% más grave de los accidentes concentra el 66.2% del daño total
cat(sprintf("  El %.1f%% de los accidentes concentra el 80%% del daño total\n", pct_acc))
##   El 34.4% de los accidentes concentra el 80% del daño total
cat(sprintf("  → El principio 80/20 (Pareto) %s verificado en estos datos\n\n",
            ifelse(pct_acc < 25, "SÍ es", "NO es completamente")))
##   → El principio 80/20 (Pareto) NO es completamente verificado en estos datos
# =============================================================================
# 11. PERFIL DE RIESGO POR CONDICIÓN METEOROLÓGICA
# =============================================================================
cat("====================================================\n")
## ====================================================
cat("  PASO 11: PERFIL DE RIESGO POR CONDICIÓN METEOROLÓGICA\n")
##   PASO 11: PERFIL DE RIESGO POR CONDICIÓN METEOROLÓGICA
cat("====================================================\n\n")
## ====================================================
cat("─── 9C. PERFIL DE RIESGO: METEOROLOGÍA vs SEVERIDAD ───\n\n")
## ─── 9C. PERFIL DE RIESGO: METEOROLOGÍA vs SEVERIDAD ───
# Calcular estadísticas de severidad agrupadas por condición meteorológica
perfil_meteo <- sev_limpio %>%
  group_by(Meteorologia) %>%                           # Agrupar por clima
  summarise(
    N            = n(),                                # Número de accidentes
    Media_Sev    = round(mean(IndSev), 2),             # Severidad media
    P90_Sev      = round(quantile(IndSev, 0.90), 2),   # Percentil 90
    P99_Sev      = round(quantile(IndSev, 0.99), 2),   # Percentil 99
    Pct_Fatal    = round(100 * mean(FatalInjuryCount > 0), 1),  # Porcentaje con fatales
    .groups = "drop"
  ) %>% arrange(desc(Media_Sev))                       # Ordenar por media descendente
# Mostrar perfil meteorológico
cat("  Perfil por condición meteorológica:\n")
##   Perfil por condición meteorológica:
print(perfil_meteo, row.names = FALSE)
## # A tibble: 3 × 6
##   Meteorologia     N Media_Sev P90_Sev P99_Sev Pct_Fatal
##   <chr>        <int>     <dbl>   <dbl>   <dbl>     <dbl>
## 1 Desconocido   1494      31.3      40   416.       76.4
## 2 IMC/IFR        422      18.1      40    87.9      74.9
## 3 VMC/VFR       5239       7.8      20    50        35.2
# =============================================================================
# 12. GRAFICAS
# =============================================================================
# Paleta de colores
COL_AZUL    <- "#2C5F8A"
COL_ROJO    <- "#C0392B"
COL_VERDE   <- "#1A7A4A"
COL_MORADO  <- "#7D3C98"
COL_NARANJA <- "#D35400"
COL_GRIS    <- "#566573"
# Tema base para todas las gráficas (consistencia visual)
tema_base <- theme_minimal(base_size = 12) +            # Tema minimalista base
  theme(
    plot.title    = element_text(face = "bold", size = 13, hjust = 0.5),    # Título centrado negrita
    plot.subtitle = element_text(hjust = 0.5, color = "grey40"),            # Subtítulo centrado gris
    axis.title    = element_text(face = "bold"),        # Títulos de ejes negrita
    legend.position = "bottom",                         # Leyenda al fondo
    panel.grid.minor = element_blank()                  # Eliminar líneas de grid menores
  )
# ------ FIG 1: Evolución anual de accidentes ------
p1 <- ggplot(freq_anual, aes(x = Anio, y = N_Accidentes)) +
  geom_col(fill = COL_AZUL, alpha = 0.85, color = "white") +
  geom_text(aes(label = N_Accidentes), vjust = -0.4,
            size = 3.5, fontface = "bold", color = "grey30") +
  scale_x_continuous(breaks = 2015:2024) +
  labs(title = "Accidentes de Aviación por Año — NTSB",
       subtitle = "Período 2015–2024 | Solo accidentes confirmados (ACC)",
       x = "Año", y = "N° de accidentes") +
  tema_base
print(p1,  width=9,  height=5,  dpi=150)

# ------ FIG 2: Distribución de frecuencia mensual + ajustes ------
df_freq_plot <- data.frame(n = N_frecuencia)
x_grid <- seq(min(N_frecuencia), max(N_frecuencia))

p2 <- ggplot(df_freq_plot, aes(x = n)) +
  geom_histogram(aes(y = after_stat(density)),
                 binwidth = 5, fill = COL_AZUL, color = "white", alpha = 0.7) +
  stat_function(fun = function(x) dpois(round(x), lambda = lambda_est),
                color = COL_ROJO, linewidth = 1.2, linetype = "dashed",
                n = 200) +
  stat_function(fun = function(x) dnbinom(round(x), size = size_est, mu = mu_est),
                color = COL_VERDE, linewidth = 1.2, n = 200) +
  annotate("text", x = Inf, y = Inf, hjust = 1.1, vjust = 1.5,
           label = paste0("Poisson (rojo)  λ=", round(lambda_est,1),
                          "\nBin.Neg (verde) μ=", round(mu_est,1)),
           size = 3.2, color = "grey30") +
  labs(title = "Frecuencia Mensual de Accidentes — Ajuste de Distribuciones",
       subtitle = "NTSB 2015–2024",
       x = "Accidentes por mes", y = "Densidad") +
  tema_base
print(p1,  width=8,  height=5,  dpi=150)

# ------ FIG 3: Histograma + CDF de Severidad ------
df_sev_plot <- data.frame(x = X_severidad)
x_seq <- seq(min(X_severidad), quantile(X_severidad, 0.995), length.out = 300)

p3 <- ggplot(df_sev_plot, aes(x = x)) +
  geom_histogram(aes(y = after_stat(density)),
                 bins = 60, fill = COL_NARANJA, color = "white", alpha = 0.7) +
  stat_function(fun = cdf_fn %>% {function(x) { h <- 0.001; (.(x+h) - .(x-h))/(2*h) }},
                color = COL_MORADO, linewidth = 1.3, n = 300) +
  coord_cartesian(xlim = c(0, quantile(X_severidad, 0.99))) +
  labs(title = paste("Ajuste de Severidad —", mejor_sev),
       subtitle = "Índice actuarial ponderado (10·Fatal + 3·Serio + 1·Menor)",
       x = "Índice de Severidad (IndSev)", y = "Densidad") +
  tema_base
print(p1,  width=8,  height=5,  dpi=150)

# ------ FIG 4: Q-Q de Severidad ------
q_emp <- quantile(X_severidad, ppoints(min(length(X_severidad), 1000)))
q_teo <- switch(mejor_sev,
                "Log-Normal" = qlnorm(ppoints(min(length(X_severidad), 1000)),
                                      params_sev["meanlog"], params_sev["sdlog"]),
                "Gamma"      = qgamma(ppoints(min(length(X_severidad), 1000)),
                                      params_sev["shape"], params_sev["rate"]),
                "Weibull"    = qweibull(ppoints(min(length(X_severidad), 1000)),
                                        params_sev["shape"], params_sev["scale"]),
                "Pareto"     = qpareto(ppoints(min(length(X_severidad), 1000)),
                                       params_sev["shape"], params_sev["scale"]),
                {
                  q_emp_alt <- quantile(X_severidad, ppoints(min(length(X_severidad), 1000)))
                  q_emp_alt  # fallback
                }
)
df_qq <- data.frame(teorico = sort(q_teo), empirico = sort(q_emp))
p4 <- ggplot(df_qq, aes(x = teorico, y = empirico)) +
  geom_point(color = COL_AZUL, alpha = 0.5, size = 0.9) +
  geom_abline(slope = 1, intercept = 0, color = COL_ROJO,
              linewidth = 1.2, linetype = "dashed") +
  labs(title = paste("Q-Q Plot de Severidad —", mejor_sev),
       subtitle = "Puntos sobre la línea roja = buen ajuste",
       x = "Cuantiles teóricos", y = "Cuantiles empíricos") +
  tema_base
print(p1,  width=7,  height=6,  dpi=150)

# ------ FIG 5: Pérdida Agregada S con VaR y TVaR ------
df_S <- data.frame(S = S_simulado)
p5 <- ggplot(df_S, aes(x = S)) +
  geom_histogram(aes(y = after_stat(density)),
                 bins = 80, fill = COL_AZUL, color = "white", alpha = 0.7) +
  geom_density(color = "#2C3E50", linewidth = 1) +
  geom_vline(xintercept = E_S,      color = COL_VERDE,  linewidth = 1.2, linetype = "dashed") +
  geom_vline(xintercept = VaR_995,  color = COL_ROJO,   linewidth = 1.4) +
  geom_vline(xintercept = TVaR_995, color = COL_MORADO, linewidth = 1.4, linetype = "dotdash") +
  annotate("text", x = E_S,      y = Inf, vjust=2, hjust=-0.1,
           label = sprintf("E[S]=%.0f", E_S), color=COL_VERDE, size=3.2, fontface="bold") +
  annotate("text", x = VaR_995,  y = Inf, vjust=2, hjust=-0.1,
           label = sprintf("VaR99.5%%=%.0f", VaR_995), color=COL_ROJO, size=3.2, fontface="bold") +
  annotate("text", x = TVaR_995, y = Inf, vjust=4, hjust=-0.1,
           label = sprintf("TVaR=%.0f", TVaR_995), color=COL_MORADO, size=3.2, fontface="bold") +
  labs(title = "Distribución de Pérdida Agregada Anual S",
       subtitle = sprintf("Monte Carlo — %s simulaciones | Modelo: %s + %s",
                          format(n_sim, big.mark=","), modelo_freq, mejor_sev),
       x = "Pérdida Agregada Anual (IndSev)", y = "Densidad") +
  tema_base
print(p1,  width=9,  height=5,  dpi=150)

# ------ FIG 6: Curva de Excedencia ------
s_ord    <- sort(S_simulado, decreasing = TRUE)
prob_exc <- seq_along(s_ord) / length(s_ord)
df_exc   <- data.frame(S = s_ord, prob = prob_exc)

p6 <- ggplot(df_exc, aes(x = S, y = prob)) +
  geom_line(color = COL_AZUL, linewidth = 1) +
  geom_hline(yintercept = 0.005, color = COL_ROJO, linewidth = 0.9, linetype = "dashed") +
  geom_vline(xintercept = VaR_995, color = COL_ROJO, linewidth = 0.9, linetype = "dashed") +
  annotate("point", x = VaR_995, y = 0.005, color = COL_ROJO, size = 3.5) +
  annotate("text", x = VaR_995, y = 0.005,
           label = sprintf("VaR 99.5%%\n= %.0f", VaR_995),
           vjust = -0.5, hjust = -0.1, color = COL_ROJO, size = 3.2, fontface = "bold") +
  scale_y_continuous(labels = percent_format(), limits = c(0, 0.5)) +
  labs(title = "Curva de Excedencia — P(S > x)",
       subtitle = "Probabilidad de superar cada nivel de pérdida agregada anual",
       x = "Pérdida Agregada (IndSev)", y = "Probabilidad de Excedencia") +
  tema_base
print(p1,  width=8,  height=5,  dpi=150)

# ------ FIG 7: Correlación Frecuencia–Severidad anual ------
p7 <- ggplot(corr_df, aes(x = N_Accidentes, y = IndSev_Media, label = Anio)) +
  geom_point(color = COL_AZUL, size = 4, alpha = 0.8) +
  geom_text(vjust = -0.7, size = 3.2, color = "grey30", fontface = "bold") +
  geom_smooth(method = "lm", se = TRUE, color = COL_ROJO,
              fill = COL_ROJO, alpha = 0.1, linetype = "dashed") +
  annotate("text", x = Inf, y = Inf, hjust = 1.1, vjust = 1.5,
           label = sprintf("r = %.3f\np = %.3f", cor_pearson_med, test_med$p.value),
           size = 4, color = COL_GRIS, fontface = "bold") +
  labs(title = "Correlación: Frecuencia vs Severidad Media Anual",
       subtitle = "¿Años con más accidentes = accidentes más graves?",
       x = "N° accidentes anuales", y = "Severidad media (IndSev)") +
  tema_base
print(p1,  width=7,  height=6,  dpi=150)

# ------ FIG 8: Análisis de Sensibilidad — VaR y TVaR por distribución ------
res_long <- resultados_sens %>%
  dplyr::select(Distribución, VaR_995, TVaR_995) %>%
  pivot_longer(cols = c(VaR_995, TVaR_995), names_to = "Medida", values_to = "Valor") %>%
  mutate(Medida = recode(Medida, "VaR_995" = "VaR 99.5%", "TVaR_995" = "TVaR 99.5%"))
p8 <- ggplot(res_long, aes(x = reorder(Distribución, Valor), y = Valor, fill = Medida)) +
  geom_col(position = "dodge", alpha = 0.85, color = "white") +
  coord_flip() +
  scale_fill_manual(values = c("VaR 99.5%" = COL_ROJO, "TVaR 99.5%" = COL_MORADO)) +
  labs(title = "Sensibilidad de VaR y TVaR según Distribución de Severidad",
       subtitle = "Mismo modelo de frecuencia — distintas hipótesis de severidad",
       x = NULL, y = "Valor (IndSev)", fill = "Medida de Riesgo") +
  tema_base
print(p1,  width=9,  height=5,  dpi=150)

# ------ FIG 9: Perfil de riesgo meteorológico ------
p9 <- sev_limpio %>%
  filter(Meteorologia != "Desconocido") %>%
  ggplot(aes(x = Meteorologia, y = IndSev, fill = Meteorologia)) +
  geom_violin(trim = TRUE, alpha = 0.6, show.legend = FALSE) +
  geom_boxplot(width = 0.2, alpha = 0.8, outlier.size = 0.6,
               outlier.alpha = 0.3, show.legend = FALSE) +
  scale_y_log10(labels = comma_format()) +
  scale_fill_manual(values = c("VMC/VFR" = COL_VERDE, "IMC/IFR" = COL_ROJO)) +
  labs(title = "Severidad por Condición Meteorológica",
       subtitle = "VMC/VFR = condiciones visuales; IMC/IFR = condiciones por instrumentos",
       x = NULL, y = "IndSev (escala log)") +
  tema_base
print(p1,  width=7,  height=6,  dpi=150)

# ------ FIG 10: Loading Ratio (carga de cola) por distribución ------
p10 <- resultados_sens %>%
  ggplot(aes(x = reorder(Distribución, Loading_Ratio), y = Loading_Ratio)) +
  geom_col(fill = COL_NARANJA, alpha = 0.85, color = "white") +
  geom_text(aes(label = round(Loading_Ratio, 2)),
            hjust = -0.2, size = 3.5, fontface = "bold") +
  coord_flip() +
  geom_hline(yintercept = 1, linetype = "dashed", color = COL_GRIS) +
  labs(title = "Índice de Carga de Cola (Loading Ratio)",
       subtitle = "TVaR99.5% / E[S] — cuántas veces la prima pura exige la cola",
       x = NULL, y = "Loading Ratio") +
  tema_base + expand_limits(y = max(resultados_sens$Loading_Ratio) * 1.15)
print(p1,  width=8,  height=5,  dpi=150)

# Dashboard principal (panel de 6 gráficas clave)
dashboard <- (p1 | p2) / (p5 | p6) / (p7 | p8)
print(dashboard, width=16, height=15, dpi=150)
## Warning: Removed 50000 rows containing missing values or values outside the scale range
## (`geom_line()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: label.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

# Panel de diagnóstico de ajuste
diagnostico <- (p3 | p4) / (p9 | p10)
print(diagnostico, width=16, height=10, dpi=150)