Proyecto

# =============================================================================
# MODELACIÓN ESTOCÁSTICA DE PÉRDIDA AGREGADA EN SINIESTROS DE AVIACIÓN
# Fuente de datos: NTSB (National Transportation Safety Board) Aviation Accident Database
# Cómputo Científico
# Angel Albarrán · Jesus Bastida · Angel Díaz
# =============================================================================
# Carga de librerías necesarias para el análisis
library(tidyverse)      # Manipulación y visualización de datos

## Warning: package 'tidyverse' was built under R version 4.5.2

## Warning: package 'ggplot2' was built under R version 4.5.2

## Warning: package 'readr' was built under R version 4.5.2

## Warning: package 'forcats' was built under R version 4.5.2

## Warning: package 'lubridate' was built under R version 4.5.2

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.2.0
## ✔ forcats   1.0.1     ✔ stringr   1.5.2
## ✔ ggplot2   4.0.2     ✔ tibble    3.3.0
## ✔ lubridate 1.9.5     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(readxl)         # Lectura de archivos Excel

## Warning: package 'readxl' was built under R version 4.5.2

library(fitdistrplus)   # Ajuste de distribuciones de probabilidad

## Warning: package 'fitdistrplus' was built under R version 4.5.2

## Cargando paquete requerido: MASS

## Warning: package 'MASS' was built under R version 4.5.2

## 
## Adjuntando el paquete: 'MASS'
## 
## The following object is masked from 'package:dplyr':
## 
##     select
## 
## Cargando paquete requerido: survival

library(actuar)         # Distribuciones actuariales

## Warning: package 'actuar' was built under R version 4.5.2

## 
## Adjuntando el paquete: 'actuar'
## 
## The following objects are masked from 'package:stats':
## 
##     sd, var
## 
## The following object is masked from 'package:grDevices':
## 
##     cm

library(MASS)           # Ajuste de Binomial Negativa
library(gridExtra)      # Organización de gráficos en grid

## 
## Adjuntando el paquete: 'gridExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     combine

library(scales)         # Escalas y formatos para gráficos

## 
## Adjuntando el paquete: 'scales'
## 
## The following object is masked from 'package:purrr':
## 
##     discard
## 
## The following object is masked from 'package:readr':
## 
##     col_factor

library(ggplot2)        # Sistema avanzado de gráficos
library(patchwork)      # Combinación de gráficos ggplot2

## Warning: package 'patchwork' was built under R version 4.5.2

## 
## Adjuntando el paquete: 'patchwork'
## 
## The following object is masked from 'package:MASS':
## 
##     area

library(moments)        # Cálculo de asimetría y curtosis

## Warning: package 'moments' was built under R version 4.5.2

set.seed(2025)  # Fijar semilla para garantizar reproducibilidad de resultados
# =============================================================================
# 1. CARGA Y LIMPIEZA DE DATOS (BASE NTSB)
# =============================================================================
cat("====================================================\n")

## ====================================================

cat("  PASO 1: CARGA Y LIMPIEZA DE DATOS (NTSB)\n")

##   PASO 1: CARGA Y LIMPIEZA DE DATOS (NTSB)

cat("====================================================\n\n")

## ====================================================

# ---------- 1A. LECTURA DEL ARCHIVO NTSB ----------
ntsb_raw <- read_excel("NTSB.xlsx", sheet = "NTSB_Accidents")

## Warning: Expecting logical in H1618 / R1618C8: got 'AIR2507'

## Warning: Expecting logical in H1663 / R1663C8: got 'AIR2602'

## Warning: Expecting logical in H3429 / R3429C8: got 'AIR2504'

## Warning: Expecting logical in H5006 / R5006C8: got 'AIR2402'

## Warning: Expecting logical in H5076 / R5076C8: got 'AIR2401'

## Warning: Expecting logical in H5294 / R5294C8: got 'AIR2407'

## Warning: Expecting logical in H5616 / R5616C8: got 'AIR2301'

## Warning: Expecting logical in H6070 / R6070C8: got 'AIR2404'

## Warning: Expecting logical in H9772 / R9772C8: got 'AAR2101'

## Warning: Expecting logical in H9876 / R9876C8: got 'AIR2205'

## Warning: Expecting logical in H10121 / R10121C8: got 'AAR2105'

## Warning: Expecting logical in H10792 / R10792C8: got 'AAR2102'

## Warning: Expecting logical in H11017 / R11017C8: got 'AAR2104'

## Warning: Expecting logical in H11338 / R11338C8: got 'AAR2002'

## Warning: Expecting logical in H11409 / R11409C8: got 'AAR2001'

## Warning: Expecting logical in H12806 / R12806C8: got 'AAR1903'

## Warning: Expecting logical in H12937 / R12937C8: got 'AAR1904'

## Warning: Expecting logical in H14002 / R14002C8: got 'AIR1801'

## Warning: Expecting logical in H14303 / R14303C8: got 'AAR1902'

## Warning: Expecting logical in H14577 / R14577C8: got 'AAR1901'

## Warning: Expecting logical in H15023 / R15023C8: got 'AAR1801'

## Warning: Expecting logical in H15151 / R15151C8: got 'AAR1802'

## Warning: Expecting logical in H15517 / R15517C8: got 'AAR1703'

## Warning: Expecting logical in H16630 / R16630C8: got 'AAR1603'

## Warning: Expecting logical in H17310 / R17310C8: got 'AAR1701'

## Warning: Expecting logical in H17367 / R17367C8: got 'AAR1702'

## Warning: Expecting logical in H17862 / R17862C8: got 'AAR1602'

## Warning: Expecting logical in H18104 / R18104C8: got 'AAR1601'

## Warning: Expecting logical in H18204 / R18204C8: got 'AAR1502'

## Warning: Expecting logical in H19041 / R19041C8: got 'AAR1503'

## Warning: Expecting logical in H20130 / R20130C8: got 'AAR1402'

## Warning: Expecting logical in H20356 / R20356C8: got 'AAR1401'

## Warning: Expecting logical in H20723 / R20723C8: got 'AAR1501'

## Warning: Expecting logical in H20836 / R20836C8: got 'AAR1403'

## Warning: Expecting logical in H21124 / R21124C8: got 'AIR1401'

## Warning: Expecting logical in H22731 / R22731C8: got 'AAR1404'

## Warning: Expecting logical in H23043 / R23043C8: got 'AAR1301'

## Warning: Expecting logical in H23406 / R23406C8: got 'AAB1201'

## Warning: Expecting logical in H23534 / R23534C8: got 'AAR1302'

## Warning: Expecting logical in H24446 / R24446C8: got 'AAR1202'

## Warning: Expecting logical in H24835 / R24835C8: got 'AAR1201'

## Warning: Expecting logical in H25510 / R25510C8: got 'AAR1103'

## Warning: Expecting logical in H27254 / R27254C8: got 'AAR1005'

## Warning: Expecting logical in H27685 / R27685C8: got 'AAR1104'

## Warning: Expecting logical in H28096 / R28096C8: got 'AAR1105'

## Warning: Expecting logical in H28261 / R28261C8: got 'AAR1001'

## Warning: Expecting logical in H28318 / R28318C8: got 'AAR1102'

## Warning: Expecting logical in H28349 / R28349C8: got 'AAR1003'

## Warning: Expecting logical in H28447 / R28447C8: got 'AAR1004'

## Warning: Expecting logical in H28769 / R28769C8: got 'AAR0907'

## Warning: Expecting logical in H28805 / R28805C8: got 'AAR1002'

## Warning: Expecting logical in H29100 / R29100C8: got 'AAR1006'

## Warning: Expecting logical in H29154 / R29154C8: got 'AAR1101'

## Warning: Expecting logical in H29388 / R29388C8: got 'AAR0904'

## Warning: Expecting logical in H30047 / R30047C8: got 'AAR0905'

## Warning: Expecting logical in H30729 / R30729C8: got 'AAR0903'

## Warning: Expecting logical in H31147 / R31147C8: got 'AAR0902'

## Warning: Expecting logical in H31285 / R31285C8: got 'AAR0901'

## Warning: Expecting logical in H31532 / R31532C8: got 'AAR0906'

## Warning: Expecting logical in H31861 / R31861C8: got 'AAR0802'

## Warning: Expecting logical in H32100 / R32100C8: got 'AAR0801'

## Warning: Expecting logical in H32617 / R32617C8: got 'AAB0702'

## Warning: Expecting logical in H32838 / R32838C8: got 'AAR0705'

## Warning: Expecting logical in H33003 / R33003C8: got 'AAR0803'

## Warning: Expecting logical in H33955 / R33955C8: got 'AAR0707'

## Warning: Expecting logical in H34161 / R34161C8: got 'AAR0704'

## Warning: Expecting logical in H34204 / R34204C8: got 'AAR0706'

## Warning: Expecting logical in H34537 / R34537C8: got 'AAB0701'

## Warning: Expecting logical in H35915 / R35915C8: got 'AAR0702'

## Warning: Expecting logical in H35976 / R35976C8: got 'AAR0604'

## Warning: Expecting logical in H36227 / R36227C8: got 'AAB0603'

## Warning: Expecting logical in H36235 / R36235C8: got 'AAB0607'

## Warning: Expecting logical in H36247 / R36247C8: got 'AAB0606'

## Warning: Expecting logical in H36394 / R36394C8: got 'AAB0601'

## Warning: Expecting logical in H36396 / R36396C8: got 'AAB0605'

## Warning: Expecting logical in H36415 / R36415C8: got 'AAR0601'

## Warning: Expecting logical in H36456 / R36456C8: got 'AAR0701'

## Warning: Expecting logical in H36556 / R36556C8: got 'AAR0703'

## Warning: Expecting logical in H36835 / R36835C8: got 'AAR0603'

## Warning: Expecting logical in H37472 / R37472C8: got 'AAR0502'

## Warning: Expecting logical in H37702 / R37702C8: got 'AAR0602'

## Warning: Expecting logical in H38063 / R38063C8: got 'AAB0604'

## Warning: Expecting logical in H38086 / R38086C8: got 'AAR0501'

## Warning: Expecting logical in H38537 / R38537C8: got 'AAB0703'

## Warning: Expecting logical in H38991 / R38991C8: got 'AAR0403'

## Warning: Expecting logical in H40064 / R40064C8: got 'AAR0401'

## Warning: Expecting logical in H40356 / R40356C8: got 'AAR0303'

## Warning: Expecting logical in H40927 / R40927C8: got 'AAR0402'

## Warning: Expecting logical in H42280 / R42280C8: got 'AAR0404'

## Warning: Expecting logical in H43738 / R43738C8: got 'AAB0203'

## Warning: Expecting logical in H44018 / R44018C8: got 'AAR0301'

## Warning: Expecting logical in H44451 / R44451C8: got 'AAB0202'

## Warning: Expecting logical in H44492 / R44492C8: got 'AAB0401'

## Warning: Expecting logical in H45587 / R45587C8: got 'AAB0205'

## Warning: Expecting logical in H46043 / R46043C8: got 'AAB0204'

## Warning: Expecting logical in H46116 / R46116C8: got 'AAR0302'

## Warning: Expecting logical in H46199 / R46199C8: got 'AAR0201'

## Warning: Expecting logical in H46569 / R46569C8: got 'AAB0201'

## Warning: Expecting logical in H46599 / R46599C8: got 'AAB0001'

## Warning: Expecting logical in H47375 / R47375C8: got 'AAB0206'

## Warning: Expecting logical in H47703 / R47703C8: got 'AAR0102'

## Warning: Expecting logical in H50534 / R50534C8: got 'AAB0101'

## Warning: Expecting logical in H51480 / R51480C8: got 'AAR9802'

## Warning: Expecting logical in H51487 / R51487C8: got 'AAR0001'

## Warning: Expecting logical in H51540 / R51540C8: got 'AAR0002'

## Warning: Expecting logical in H52765 / R52765C8: got 'AAR9804'

## Warning: Expecting logical in H52830 / R52830C8: got 'AAR9705'

## Warning: Expecting logical in H52976 / R52976C8: got 'AAR9704'

## Warning: Expecting logical in H53111 / R53111C8: got 'AAR9703'

## Warning: Expecting logical in H53366 / R53366C8: got 'AAR9803'

## Warning: Expecting logical in H53762 / R53762C8: got 'AAR0003'

## Warning: Expecting logical in H53862 / R53862C8: got 'AAR9801'

## Warning: Expecting logical in H54287 / R54287C8: got 'AAR9706'

## Warning: Expecting logical in H54486 / R54486C8: got 'AAR9702'

## Warning: Expecting logical in H54755 / R54755C8: got 'AAR9701'

## Warning: Expecting logical in H54928 / R54928C8: got 'AAR9607'

## Warning: Expecting logical in H55004 / R55004C8: got 'AAR9604'

## Warning: Expecting logical in H55189 / R55189C8: got 'AAR9605'

## Warning: Expecting logical in H55711 / R55711C8: got 'AAR9606'

## Warning: Expecting logical in H56402 / R56402C8: got 'AAR9603'

## Warning: Expecting logical in H57039 / R57039C8: got 'AAR9506'

## Warning: Expecting logical in H57306 / R57306C8: got 'AAR9504'

## Warning: Expecting logical in H57393 / R57393C8: got 'AAR9505'

## Warning: Expecting logical in H57497 / R57497C8: got 'AAR9601'

## Warning: Expecting logical in H57808 / R57808C8: got 'AAR9901'

## Warning: Expecting logical in H58375 / R58375C8: got 'AAR9503'

## Warning: Expecting logical in H58494 / R58494C8: got 'AAR9502'

## Warning: Expecting logical in H58850 / R58850C8: got 'AAR9408'

## Warning: Expecting logical in H59189 / R59189C8: got 'AAR9501'

## Warning: Expecting logical in H59337 / R59337C8: got 'AAR9406'

## Warning: Expecting logical in H59438 / R59438C8: got 'AAR9407'

## Warning: Expecting logical in H59565 / R59565C8: got 'AAR9405'

## Warning: Expecting logical in H59732 / R59732C8: got 'AAR9403'

## Warning: Expecting logical in H60209 / R60209C8: got 'AAR9404'

## Warning: Expecting logical in H61129 / R61129C8: got 'AAR8902S'

## Warning: Expecting logical in H61199 / R61199C8: got 'AAR9308'

## Warning: Expecting logical in H61240 / R61240C8: got 'AAR9401'

## Warning: Expecting logical in H61299 / R61299C8: got 'AAR9307'

## Warning: Expecting logical in H61327 / R61327C8: got 'AAR9306'

## Warning: Expecting logical in H62345 / R62345C8: got 'AAR9305'

## Warning: Expecting logical in H62731 / R62731C8: got 'AAR9304'

## Warning: Expecting logical in H63185 / R63185C8: got 'AAR9303'

## Warning: Expecting logical in H63508 / R63508C8: got 'AAR9301'

## Warning: Expecting logical in H63676 / R63676C8: got 'AAR9302'

## Warning: Expecting logical in H64079 / R64079C8: got 'AAR9301S'

## Warning: Expecting logical in H64152 / R64152C8: got 'AAR9201S'

## Warning: Expecting logical in H64671 / R64671C8: got 'AAR9204'

## Warning: Expecting logical in H65992 / R65992C8: got 'AAR9203'

## Warning: Expecting logical in H66001 / R66001C8: got 'AAR9101S'

## Warning: Expecting logical in H66199 / R66199C8: got 'AAR0101'

## Warning: Expecting logical in H66261 / R66261C8: got 'AAR9109'

## Warning: Expecting logical in H66347 / R66347C8: got 'AAR9108'

## Warning: Expecting logical in H66596 / R66596C8: got 'AAR9105'

## Warning: Expecting logical in H68019 / R68019C8: got 'AAR9102'

## Warning: Expecting logical in H68845 / R68845C8: got 'AAR9104'

## Warning: Expecting logical in H68884 / R68884C8: got 'AAR9103'

## Warning: Expecting logical in H68998 / R68998C8: got 'AAR9106'

## Warning: Expecting logical in H69278 / R69278C8: got 'AAR9005'

## Warning: Expecting logical in H69510 / R69510C8: got 'AAR9101'

## Warning: Expecting logical in H69548 / R69548C8: got 'AAR9003'

## Warning: Expecting logical in H69646 / R69646C8: got 'AAR9004'

## Warning: Expecting logical in H70093 / R70093C8: got 'AAR9006'

## Warning: Expecting logical in H71063 / R71063C8: got 'AAR9002'

## Warning: Expecting logical in H71199 / R71199C8: got 'AAR9202'

## Warning: Expecting logical in H72263 / R72263C8: got 'AAR8904'

## Warning: Expecting logical in H73425 / R73425C8: got 'AAR8903'

## Warning: Expecting logical in H73508 / R73508C8: got 'AAR8902'

## Warning: Expecting logical in H73881 / R73881C8: got 'AAR8810'

## Warning: Expecting logical in H73978 / R73978C8: got 'HZM8802'

## Warning: Expecting logical in H74052 / R74052C8: got 'AAR8901'

## Warning: Expecting logical in H74195 / R74195C8: got 'AAR8903'

## Warning: Expecting logical in H74347 / R74347C8: got 'AAR8811'

## Warning: Expecting logical in H74388 / R74388C8: got 'AAR8809'

## Warning: Expecting logical in H75111 / R75111C8: got 'AAR8805'

## Warning: Expecting logical in H75926 / R75926C8: got 'AAR8806'

## Warning: Expecting logical in H76101 / R76101C8: got 'AAR8802'

## Warning: Expecting logical in H76390 / R76390C8: got 'AAR8709'

## Warning: Expecting logical in H76585 / R76585C8: got 'AAR8808'

## Warning: Expecting logical in H76842 / R76842C8: got 'AAR8801'

## Warning: Expecting logical in H76867 / R76867C8: got 'AAR8803'

## Warning: Expecting logical in H77198 / R77198C8: got 'AAR8704S'

## Warning: Expecting logical in H77247 / R77247C8: got 'AAR8706'

## Warning: Expecting logical in H77322 / R77322C8: got 'AAR8708'

## Warning: Expecting logical in H77461 / R77461C8: got 'AAR8704'

## Warning: Expecting logical in H77720 / R77720C8: got 'AAR8707'

## Warning: Expecting logical in H77995 / R77995C8: got 'AAR8804'

## Warning: Expecting logical in H78475 / R78475C8: got 'AAR8703'

## Warning: Expecting logical in H79325 / R79325C8: got 'AAR8702'

## Warning: Expecting logical in H79460 / R79460C8: got 'AAR8802'

## Warning: Expecting logical in H80027 / R80027C8: got 'AAR8705'

## Warning: Expecting logical in H80383 / R80383C8: got 'AAR8607'

## Warning: Expecting logical in H80545 / R80545C8: got 'AAR8701'

## Warning: Expecting logical in H80665 / R80665C8: got 'AAR8606'

## Warning: Expecting logical in H80924 / R80924C8: got 'AAR8605'

## Warning: Expecting logical in H82517 / R82517C8: got 'AAR8603'

## Warning: Expecting logical in H82990 / R82990C8: got 'AAR8604'

## Warning: Expecting logical in H83890 / R83890C8: got 'AAR8507'

## Warning: Expecting logical in H84167 / R84167C8: got 'AAR8508'

## Warning: Expecting logical in H84891 / R84891C8: got 'AAR8502'

## Warning: Expecting logical in H84947 / R84947C8: got 'AAR8501'

## Warning: Expecting logical in H85793 / R85793C8: got 'AAR8415'

## Warning: Expecting logical in H86137 / R86137C8: got 'AAR8412'

## Warning: Expecting logical in H86281 / R86281C8: got 'AAR8410'

## Warning: Expecting logical in H86479 / R86479C8: got 'AAR8413'

## Warning: Expecting logical in H86710 / R86710C8: got 'AAR8408'

## Warning: Expecting logical in H87385 / R87385C8: got 'AAR8406'

## Warning: Expecting logical in H87443 / R87443C8: got 'AAR8405'

## Warning: Expecting logical in H88452 / R88452C8: got 'AAR8602'

## Warning: Expecting logical in H88756 / R88756C8: got 'AAR8404'

## Warning: Expecting logical in H89090 / R89090C8: got 'AAR8411'

## Warning: Expecting logical in H89138 / R89138C8: got 'AAR8401'

## Warning: Expecting logical in H89434 / R89434C8: got 'AAR8403'

## Warning: Expecting logical in H89654 / R89654C8: got 'AAR8307'

## Warning: Expecting logical in H89660 / R89660C8: got 'AAR8308'

## Warning: Expecting logical in H89663 / R89663C8: got 'AAR8407'

## Warning: Expecting logical in H89711 / R89711C8: got 'AAR8305'

## Warning: Expecting logical in H90012 / R90012C8: got 'AAR8303'

## Warning: Expecting logical in H90035 / R90035C8: got 'AAR8304'

## Warning: Expecting logical in H91240 / R91240C8: got 'AAR8414'

## Warning: Expecting logical in H92080 / R92080C8: got 'AAR8216'

## Warning: Expecting logical in H92203 / R92203C8: got 'AAR8301'

## Warning: Expecting logical in H92906 / R92906C8: got 'AAR8207'

## Warning: Expecting logical in H92955 / R92955C8: got 'AAR8214'

## Warning: Expecting logical in H93245 / R93245C8: got 'AAR8211'

## Warning: Expecting logical in H93253 / R93253C8: got 'AAR8212'

## Warning: Expecting logical in H93284 / R93284C8: got 'AAR8213'

## Warning: Expecting logical in H94000 / R94000C8: got 'AAR8204'

## Warning: Expecting logical in H94074 / R94074C8: got 'AAR8206'

## Warning: Expecting logical in H94086 / R94086C8: got 'AAR8205'

## Warning: Expecting logical in H94116 / R94116C8: got 'AAR8201'

## Warning: Expecting logical in H95201 / R95201C8: got 'AAR8117'

## Warning: Expecting logical in H96181 / R96181C8: got 'AAR8118'

## Warning: Expecting logical in H96299 / R96299C8: got 'AAR8114'

## Warning: Expecting logical in H96894 / R96894C8: got 'AAR8110'

## Warning: Expecting logical in H96979 / R96979C8: got 'AAR8109'

## Warning: Expecting logical in H96984 / R96984C8: got 'AAR8111'

## Warning: Expecting logical in H97419 / R97419C8: got 'AAR8107'

## Warning: Expecting logical in H98798 / R98798C8: got 'AAR8101'

## Warning: Expecting logical in H98840 / R98840C8: got 'AAR8102'

## Warning: Expecting logical in H99281 / R99281C8: got 'AAR8014'

## Warning: Expecting logical in H99423 / R99423C8: got 'AAR8015'

## Warning: Expecting logical in H99851 / R99851C8: got 'AAR8012'

## Warning: Expecting logical in H99922 / R99922C8: got 'AAR8202'

## Warning: Expecting logical in H100337 / R100337C8: got 'AAR8104'

## Warning: Expecting logical in H100780 / R100780C8: got 'AAR8210'

## Warning: Expecting logical in H100822 / R100822C8: got 'AAR8106'

## Warning: Expecting logical in H101474 / R101474C8: got 'AAR8010'

## Warning: Expecting logical in H101788 / R101788C8: got 'AAR8008'

## Warning: Expecting logical in H102815 / R102815C8: got 'AAR8003'

## Warning: Expecting logical in H103348 / R103348C8: got 'AAR8001'

## Warning: Expecting logical in H103685 / R103685C8: got 'AAR7917'

## Warning: Expecting logical in H104139 / R104139C8: got 'AAR7914'

## Warning: Expecting logical in H104288 / R104288C8: got 'AAR8108'

## Warning: Expecting logical in H104543 / R104543C8: got 'AAR7913'

## Warning: Expecting logical in H104671 / R104671C8: got 'AAR7910'

## Warning: Expecting logical in H104800 / R104800C8: got 'AAR7911'

## Warning: Expecting logical in H105040 / R105040C8: got 'AAR8004'

## Warning: Expecting logical in H105211 / R105211C8: got 'AAR7907'

## Warning: Expecting logical in H105451 / R105451C8: got 'AAR7906'

## Warning: Expecting logical in H105454 / R105454C8: got 'AAR7918'

## Warning: Expecting logical in H106163 / R106163C8: got 'AAR7905'

## Warning: Expecting logical in H106520 / R106520C8: got 'AAR7909'

## Warning: Expecting logical in H107177 / R107177C8: got 'AAR7904'

## Warning: Expecting logical in H107438 / R107438C8: got 'AAR7902'

## Warning: Expecting logical in H108311 / R108311C8: got 'AAR7814'

## Warning: Expecting logical in H108451 / R108451C8: got 'AAR7813'

## Warning: Expecting logical in H109458 / R109458C8: got 'AAR7815'

## Warning: Expecting logical in H109888 / R109888C8: got 'AAR7808'

## Warning: Expecting logical in H109947 / R109947C8: got 'AAR7810'

## Warning: Expecting logical in H110446 / R110446C8: got 'AAR7806'

## Warning: Expecting logical in H110926 / R110926C8: got 'AAR7915'

## Warning: Expecting logical in H110953 / R110953C8: got 'AAR7805'

## Warning: Expecting logical in H112489 / R112489C8: got 'AAR7809'

## Warning: Expecting logical in H113261 / R113261C8: got 'AAR7803'

## Warning: Expecting logical in H115062 / R115062C8: got 'AAR7804'

## Warning: Expecting logical in H115221 / R115221C8: got 'AAR7705'

## Warning: Expecting logical in H115837 / R115837C8: got 'AAR7704'

## Warning: Expecting logical in H116503 / R116503C8: got 'AAR7802'

## Warning: Expecting logical in H117275 / R117275C8: got 'AAR7701'

## Warning: Expecting logical in H117545 / R117545C8: got 'AAR7620'

## Warning: Expecting logical in H117608 / R117608C8: got 'AAR7618'

## Warning: Expecting logical in H118183 / R118183C8: got 'AAR7617'

## Warning: Expecting logical in H118783 / R118783C8: got 'AAR7603'

## Warning: Expecting logical in H118896 / R118896C8: got 'AAR8306'

## Warning: Expecting logical in H119792 / R119792C8: got 'AAR7601'

## Warning: Expecting logical in H120158 / R120158C8: got 'AAR7614'

## Warning: Expecting logical in H120588 / R120588C8: got 'AAR7613'

## Warning: Expecting logical in H120871 / R120871C8: got 'AAR7608'

## Warning: Expecting logical in H121565 / R121565C8: got 'AAR7610'

## Warning: Expecting logical in H122026 / R122026C8: got 'AAR7515'

## Warning: Expecting logical in H122063 / R122063C8: got 'AAR7511'

## Warning: Expecting logical in H122810 / R122810C8: got 'AAR7605'

## Warning: Expecting logical in H122834 / R122834C8: got 'AAR7510'

## Warning: Expecting logical in H123183 / R123183C8: got 'AAR7513'

## Warning: Expecting logical in H123345 / R123345C8: got 'AAR7512'

## Warning: Expecting logical in H123697 / R123697C8: got 'AAR7506'

## Warning: Expecting logical in H124381 / R124381C8: got 'AAR8005'

## Warning: Expecting logical in H125093 / R125093C8: got 'AAR7409'

## Warning: Expecting logical in H125315 / R125315C8: got 'AAR7508'

## Warning: Expecting logical in H126783 / R126783C8: got 'AAR7501'

## Warning: Expecting logical in H127167 / R127167C8: got 'AAR7416'

## Warning: Expecting logical in H127321 / R127321C8: got 'AAR7410'

## Warning: Expecting logical in H127622 / R127622C8: got 'AAR7414'

## Warning: Expecting logical in H127633 / R127633C8: got 'AAR7411'

## Warning: Expecting logical in H127777 / R127777C8: got 'AAR7413'

## Warning: Expecting logical in H128010 / R128010C8: got 'AAR7502'

## Warning: Expecting logical in H128425 / R128425C8: got 'AAR7404'

## Warning: Expecting logical in H128683 / R128683C8: got 'AAR7406'

## Warning: Expecting logical in H128846 / R128846C8: got 'AAR7408'

## Warning: Expecting logical in H129436 / R129436C8: got 'AAR7405'

## Warning: Expecting logical in H130930 / R130930C8: got 'AAR7317'

## Warning: Expecting logical in H131387 / R131387C8: got 'AAR7312'

## Warning: Expecting logical in H131897 / R131897C8: got 'AAR7314'

## Warning: Expecting logical in H132753 / R132753C8: got 'AAR7306'

## Warning: Expecting logical in H133321 / R133321C8: got 'AAR7305'

## Warning: Expecting logical in H134342 / R134342C8: got 'AAR7302'

## Warning: Expecting logical in H134710 / R134710C8: got 'AAR7232'

## Warning: Expecting logical in H134946 / R134946C8: got 'AAR7229'

## Warning: Expecting logical in H135016 / R135016C8: got 'AAR7230'

## Warning: Expecting logical in H135655 / R135655C8: got 'AAR7308'

## Warning: Expecting logical in H135789 / R135789C8: got 'AAR7225'

## Warning: Expecting logical in H136129 / R136129C8: got 'AAR7224'

## Warning: Expecting logical in H136274 / R136274C8: got 'AAR7221'

## Warning: Expecting logical in H136402 / R136402C8: got 'AAR7222'

## Warning: Expecting logical in H136536 / R136536C8: got 'AAR7213'

## Warning: Expecting logical in H136971 / R136971C8: got 'AAR7215'

## Warning: Expecting logical in H137585 / R137585C8: got 'AAR7228'

## Warning: Expecting logical in H138874 / R138874C8: got 'AAR7204'

## Warning: Expecting logical in H139085 / R139085C8: got 'AAR7207'

## Warning: Expecting logical in H139161 / R139161C8: got 'AAR7226'

## Warning: Expecting logical in H139532 / R139532C8: got 'AAR7116'

## Warning: Expecting logical in H139638 / R139638C8: got 'AAR7219'

## Warning: Expecting logical in H140131 / R140131C8: got 'AAR7218'

## Warning: Expecting logical in H140620 / R140620C8: got 'AAR7114'

## Warning: Expecting logical in H141045 / R141045C8: got 'AAR7216'

## Warning: Expecting logical in H141494 / R141494C8: got 'AAR7212'

## Warning: Expecting logical in H141647 / R141647C8: got 'AAR7211'

## Warning: Expecting logical in H141761 / R141761C8: got 'AAR7214'

## Warning: Expecting logical in H142320 / R142320C8: got 'AAR7107'

## Warning: Expecting logical in H142840 / R142840C8: got 'AAR7026'

## Warning: Expecting logical in H143236 / R143236C8: got 'AAR7210'

## Warning: Expecting logical in H143374 / R143374C8: got 'AAR7209'

## Warning: Expecting logical in H143696 / R143696C8: got 'AAR7106'

## Warning: Expecting logical in H144239 / R144239C8: got 'AAR7025'

## Warning: Expecting logical in H144649 / R144649C8: got 'AAR7108'

## Warning: Expecting logical in H145584 / R145584C8: got 'AAR7101'

## Warning: Expecting logical in H145925 / R145925C8: got 'AAR7028'

## Warning: Expecting logical in H146203 / R146203C8: got 'AAR7019'

## Warning: Expecting logical in H146589 / R146589C8: got 'AAR7021'

## Warning: Expecting logical in H146645 / R146645C8: got 'AAR7022'

## Warning: Expecting logical in H146796 / R146796C8: got 'AAR7024'

## Warning: Expecting logical in H147259 / R147259C8: got 'AAR7015'

## Warning: Expecting logical in H147763 / R147763C8: got 'AAR7023'

## Warning: Expecting logical in H148412 / R148412C8: got 'AAR7018'

## Warning: Expecting logical in H148661 / R148661C8: got 'AAR7011'

## Warning: Expecting logical in H148672 / R148672C8: got 'AAR7017'

## Warning: Expecting logical in H149830 / R149830C8: got 'AAR7013'

## Warning: Expecting logical in H150149 / R150149C8: got 'AAR7003'

## Warning: Expecting logical in H150346 / R150346C8: got 'AAR7009'

## Warning: Expecting logical in H150855 / R150855C8: got 'AAR7006'

## Warning: Expecting logical in H151107 / R151107C8: got 'AAR7020'

## Warning: Expecting logical in H151109 / R151109C8: got 'AAR7027'

## Warning: Expecting logical in H151120 / R151120C8: got 'AAR6908'

## Warning: Expecting logical in H151791 / R151791C8: got 'AAR7007'

## Warning: Expecting logical in H152859 / R152859C8: got 'AAR6907'

## Warning: Expecting logical in H179305 / R179305C8: got 'SA091'

## Warning: Expecting logical in H179306 / R179306C8: got 'SA097'

## Warning: Expecting logical in H179307 / R179307C8: got 'SA096'

## Warning: Expecting logical in H179308 / R179308C8: got 'AIR2505'

## Warning: Expecting logical in H179309 / R179309C8: got 'AIR2506'

## Warning: Expecting logical in H179310 / R179310C8: got 'AIR2203'

## Warning: Expecting logical in H179311 / R179311C8: got 'AIR2209'

## Warning: Expecting logical in H179312 / R179312C8: got 'SA084'

## Warning: Expecting logical in H179313 / R179313C8: got 'AIR2202'

## Warning: Expecting logical in H179314 / R179314C8: got 'AIR2206'

## Warning: Expecting logical in H179315 / R179315C8: got 'AIR2201'

## Warning: Expecting logical in H179316 / R179316C8: got 'AIR2202'

## Warning: Expecting logical in H179317 / R179317C8: got 'AIR2204'

## Warning: Expecting logical in H179318 / R179318C8: got 'AIR2601'

## Warning: Expecting logical in H179319 / R179319C8: got 'AIR2207'

## Warning: Expecting logical in H179320 / R179320C8: got 'AIR2403'

## Warning: Expecting logical in H179321 / R179321C8: got 'SA086'

## Warning: Expecting logical in H179322 / R179322C8: got 'SA090'

## Warning: Expecting logical in H179323 / R179323C8: got 'SA088'

## Warning: Expecting logical in H179324 / R179324C8: got 'SA092'

cat("Dimensiones originales (NTSB):", nrow(ntsb_raw), "x", ncol(ntsb_raw), "\n") # Mostrar dimensiones originales del dataset

## Dimensiones originales (NTSB): 179323 x 37

cat("Variables:\n")

## Variables:

cat(paste(" -", names(ntsb_raw), collapse = "\n"), "\n\n") # Mostrar nombres de todas las variables

##  - NtsbNo
##  - EventType
##  - Mkey
##  - EventDate
##  - City
##  - State
##  - Country
##  - ReportNo
##  - N
##  - HasSafetyRec
##  - ReportType
##  - OriginalPublishDate
##  - HighestInjuryLevel
##  - FatalInjuryCount
##  - SeriousInjuryCount
##  - MinorInjuryCount
##  - ProbableCause
##  - EventID
##  - Latitude
##  - Longitude
##  - Make
##  - Model
##  - AirCraftCategory
##  - AirportID
##  - AirportName
##  - AmateurBuilt
##  - NumberOfEngines
##  - Scheduled
##  - PurposeOfFlight
##  - FAR
##  - AirCraftDamage
##  - WeatherCondition
##  - Operator
##  - ReportStatus
##  - RepGenFlag
##  - DocketUrl
##  - DocketPublishDate

# ---------- 1B. LIMPIEZA GENERAL ----------
ntsb_limpio <- ntsb_raw %>% # Aplicar transformaciones de limpieza y creación de variables
  # Descomposicion de la variable fecha: extraer fecha, año y mes
  mutate(
    EventDate = as.Date(substr(EventDate, 1, 10)),  # Convertir a fecha (primeros 10 caracteres)
    Anio      = as.integer(format(EventDate, "%Y")), # Extraer año como entero
    Mes       = as.integer(format(EventDate, "%m"))   # Extraer mes como entero
  ) %>%
  filter(EventType == "ACC") %>% # Solo accidentes confirmados (excluir incidentes)
  filter(!is.na(EventDate)) %>% # Eliminar registros sin fecha válida
  filter(Anio >= 2015, Anio <= 2024) %>% # Delimitación temporal: últimos 10 años completos (2015-2024)
  # Limpiar variable de daño: tomar la primera aeronave listada
  mutate(
    DanoPrimario = str_split_fixed(replace_na(AirCraftDamage, "Unknown"), ",", 2)[, 1],  # Extraer primer daño
    DanoPrimario = case_when(                                                             # Clasificar daño
      DanoPrimario %in% c("Destroyed", "Substantial") ~ DanoPrimario,
      DanoPrimario == "Minor"                          ~ "Minor",
      TRUE                                             ~ "Unknown/None"
    )
  ) %>%
  # Limpiar categoría de aeronave (tomar primera si hay múltiples)
  mutate(
    Categoria = str_split_fixed(replace_na(AirCraftCategory, "AIR"), ",", 2)[, 1],  # Extraer primera categoría
    Categoria = case_when(                                                          # Renombrar categorías
      Categoria == "AIR"  ~ "Ala Fija",
      Categoria == "HELI" ~ "Helicóptero",
      Categoria == "GLI"  ~ "Planeador",
      Categoria == "BALL" ~ "Globo",
      TRUE                ~ "Otro"
    )
  ) %>%
  # Condición meteorológica limpia (unificar VMC/VFR e IMC/IFR)
  mutate(
    Meteorologia = case_when(
      WeatherCondition %in% c("VMC", "VFR") ~ "VMC/VFR",   # Condiciones visuales
      WeatherCondition %in% c("IMC", "IFR") ~ "IMC/IFR",   # Condiciones instrumentales
      TRUE                                   ~ "Desconocido"
    )
  ) %>%
  # Variable de lesiones totales (proxy de magnitud humana del siniestro)
  mutate(
    LesionesTotal = replace_na(FatalInjuryCount, 0) +      # Sumar víctimas fatales
      replace_na(SeriousInjuryCount, 0) +                  # + lesiones graves
      replace_na(MinorInjuryCount, 0),                     # + lesiones leves
    TieneFatales  = FatalInjuryCount > 0                   # Indicador de fatalidad
  ) %>%
  # Clasificar tipo de operación según FAR Part
  mutate(
    TipoOperacion = case_when(
      FAR == 121  ~ "Comercial (Pt.121)",   # Aerolíneas comerciales
      FAR == 135  ~ "Taxi Aéreo (Pt.135)",  # Operaciones de taxi aéreo
      FAR == 91   ~ "Aviación General (Pt.91)", # Aviación privada/recreativa
      TRUE        ~ "Otro/Desconocido"
    )
  )
cat("Registros tras limpieza (2015-2024, solo ACC):", nrow(ntsb_limpio), "\n\n") # Mostrar número de registros después de la limpieza

## Registros tras limpieza (2015-2024, solo ACC): 14945

# Resumen de eliminaciones
cat("Desglose de filtrado:\n")

## Desglose de filtrado:

cat("  - Registros originales:              ", nrow(ntsb_raw), "\n")

##   - Registros originales:               179323

cat("  - No accidentes (INC/OCC) excluidos:", nrow(ntsb_raw) - nrow(filter(ntsb_raw, EventType == "ACC")), "\n")

##   - No accidentes (INC/OCC) excluidos: 5861

cat("  - Sin fecha excluidos:               ",
    nrow(filter(ntsb_raw, EventType == "ACC")) -
      nrow(filter(ntsb_raw, EventType == "ACC", !is.na(as.Date(substr(EventDate,1,10))))), "\n")

##   - Sin fecha excluidos:                0

cat("  - Fuera del período 2015-2024:       ",
    nrow(filter(ntsb_raw, EventType == "ACC", !is.na(as.Date(substr(EventDate,1,10))))) - nrow(ntsb_limpio), "\n")

##   - Fuera del período 2015-2024:        158517

cat("  - Registros finales para análisis:   ", nrow(ntsb_limpio), "\n\n")

##   - Registros finales para análisis:    14945

# =============================================================================
# 2. BASE DE FRECUENCIA
# =============================================================================
cat("====================================================\n")

## ====================================================

cat("  PASO 2: BASE DE FRECUENCIA\n")

##   PASO 2: BASE DE FRECUENCIA

cat("====================================================\n\n")

## ====================================================

# ---------- 2A. CONSTRUCCIÓN ----------
# Variables relevantes para frecuencia: conteo de eventos por período
# Conteo ANUAL (referencia y correlación)
freq_anual <- ntsb_limpio %>%
  group_by(Anio) %>%                                    # Agrupar por año
  summarise(
    N_Accidentes = n(),                                 # Total accidentes por año
    N_Fatales    = sum(TieneFatales, na.rm = TRUE),     # Accidentes con fatales
    Pct_Fatales  = round(100 * N_Fatales / N_Accidentes, 1), # Porcentaje fatal
    .groups = "drop"                                    # Desagrupar resultado
  )
# Conteo MENSUAL para ajuste estadístico (más observaciones = mejor ajuste)
freq_mensual <- ntsb_limpio %>%
  group_by(Anio, Mes) %>%                               # Agrupar por año y mes
  summarise(N_Accidentes = n(), .groups = "drop")       # Contar accidentes por mes
# Asegurar que todos los meses estén representados (incluso si N=0)
todos_meses <- expand.grid(Anio = 2015:2024, Mes = 1:12)  # Crear grid completo de meses
freq_mensual <- todos_meses %>%
  left_join(freq_mensual, by = c("Anio", "Mes")) %>%     # Unir con datos reales
  mutate(N_Accidentes = replace_na(N_Accidentes, 0))     # Reemplazar NA con 0
N_frecuencia <- freq_mensual$N_Accidentes  # Vector para ajuste estadístico
# Mostrar estadísticas descriptivas de frecuencia mensual
cat("Estadísticas de Frecuencia mensual:\n")

## Estadísticas de Frecuencia mensual:

cat(sprintf("  Media:                    %.2f accidentes/mes\n", mean(N_frecuencia)))

##   Media:                    124.54 accidentes/mes

cat(sprintf("  Varianza:                 %.2f\n", var(N_frecuencia)))

##   Varianza:                 1299.34

cat(sprintf("  Índice de dispersión:     %.3f  (Var/Media)\n", var(N_frecuencia)/mean(N_frecuencia)))

##   Índice de dispersión:     10.433  (Var/Media)

cat(sprintf("  → Si > 1: sobredispersión → Binomial Negativa puede superar a Poisson\n\n"))

##   → Si > 1: sobredispersión → Binomial Negativa puede superar a Poisson

# Distribución por categorías relevantes
cat("Distribución por tipo de operación:\n")

## Distribución por tipo de operación:

print(ntsb_limpio %>% count(TipoOperacion, sort = TRUE))

## # A tibble: 4 × 2
##   TipoOperacion                n
##   <chr>                    <int>
## 1 Aviación General (Pt.91) 10947
## 2 Otro/Desconocido          3318
## 3 Taxi Aéreo (Pt.135)        416
## 4 Comercial (Pt.121)         264

cat("\nDistribución por condición meteorológica:\n")

## 
## Distribución por condición meteorológica:

print(ntsb_limpio %>% count(Meteorologia, sort = TRUE))

## # A tibble: 3 × 2
##   Meteorologia     n
##   <chr>        <int>
## 1 VMC/VFR      12032
## 2 Desconocido   2373
## 3 IMC/IFR        540

cat("\nDistribución por categoría de aeronave:\n")

## 
## Distribución por categoría de aeronave:

print(ntsb_limpio %>% count(Categoria, sort = TRUE))

## # A tibble: 5 × 2
##   Categoria       n
##   <chr>       <int>
## 1 Ala Fija    12674
## 2 Helicóptero  1683
## 3 Otro          278
## 4 Planeador     205
## 5 Globo         105

# =============================================================================
# 3. BASE DE SEVERIDAD
# =============================================================================
cat("====================================================\n")

## ====================================================

cat("  PASO 3: BASE DE SEVERIDAD\n")

##   PASO 3: BASE DE SEVERIDAD

cat("====================================================\n\n")

## ====================================================

# ---------- 3A. CONSTRUCCIÓN ----------
# Proxy de severidad: lesiones totales ponderadas por tipo
# Ponderación actuarial: Fatal=10, Serio=3, Menor=1
# (refleja el costo relativo humano/económico de cada tipo de lesión)
sev_limpio <- ntsb_limpio %>%
  mutate(
    # Índice de severidad ponderado (proxy de costo humano)
    IndSev = 10 * replace_na(FatalInjuryCount, 0) +      # Peso 10 para fatales
      3 * replace_na(SeriousInjuryCount, 0) +            # Peso 3 para graves
      1 * replace_na(MinorInjuryCount, 0),               # Peso 1 para leves
    # Variable binaria de siniestro catastrófico (>=5 víctimas fatales)
    EsCatastrofico = FatalInjuryCount >= 5
  ) %>%
  # Para modelar severidad: solo eventos con alguna víctima (IndSev > 0)
  filter(IndSev > 0)
cat("─── Variables de la Base de Severidad ───\n")

## ─── Variables de la Base de Severidad ───

cat(" Unidad de análisis:  Accidente individual\n")

##  Unidad de análisis:  Accidente individual

cat(" Período:             2015-2024\n")

##  Período:             2015-2024

cat(" Criterio de inclusión: Solo accidentes con víctimas (IndSev > 0)\n")

##  Criterio de inclusión: Solo accidentes con víctimas (IndSev > 0)

cat(" Variable respuesta:  IndSev = 10·Fatales + 3·Serios + 1·Menores\n")

##  Variable respuesta:  IndSev = 10·Fatales + 3·Serios + 1·Menores

cat("                      (índice actuarial ponderado por gravedad)\n\n")

##                       (índice actuarial ponderado por gravedad)

cat("Registros válidos para severidad:", nrow(sev_limpio), "\n")

## Registros válidos para severidad: 7155

cat("  (excluidos sin víctimas:", nrow(ntsb_limpio) - nrow(sev_limpio), ")\n\n")

##   (excluidos sin víctimas: 7790 )

X_severidad <- sev_limpio$IndSev
cat("Estadísticas de Severidad (IndSev):\n")

## Estadísticas de Severidad (IndSev):

cat(sprintf("  Media:      %.2f\n", mean(X_severidad)))

##   Media:      13.32

cat(sprintf("  Mediana:    %.2f\n", median(X_severidad)))

##   Mediana:    6.00

cat(sprintf("  Desv.Est:   %.2f\n", sd(X_severidad)))

##   Desv.Est:   61.54

cat(sprintf("  Sesgo:      %.3f\n", moments::skewness(X_severidad)))

##   Sesgo:      24.862

cat(sprintf("  Curtosis:   %.3f\n", moments::kurtosis(X_severidad)))

##   Curtosis:   709.275

cat(sprintf("  Mínimo:     %.0f\n", min(X_severidad)))

##   Mínimo:     1

cat(sprintf("  Máximo:     %.0f\n", max(X_severidad)))

##   Máximo:     2240

cat(sprintf("  CV:         %.1f%%\n\n", 100 * sd(X_severidad) / mean(X_severidad)))

##   CV:         462.1%

cat("Percentiles clave:\n")

## Percentiles clave:

pcts <- quantile(X_severidad, c(0.50, 0.75, 0.90, 0.95, 0.99, 0.999))
for (nm in names(pcts)) cat(sprintf("  P%-5s = %.1f\n", nm, pcts[nm]))

##   P50%   = 6.0
##   P75%   = 13.0
##   P90%   = 23.0
##   P95%   = 40.0
##   P99%   = 70.0
##   P99.9% = 1100.4

cat("Severidad por tipo de daño a la aeronave:\n")

## Severidad por tipo de daño a la aeronave:

print(sev_limpio %>%
        group_by(DanoPrimario) %>%
        summarise(N = n(), Media_IndSev = round(mean(IndSev), 2),
                  Mediana = median(IndSev), .groups = "drop") %>%
        arrange(desc(Media_IndSev)))

## # A tibble: 4 × 4
##   DanoPrimario     N Media_IndSev Mediana
##   <chr>        <int>        <dbl>   <dbl>
## 1 Destroyed     1977        27.8       13
## 2 Unknown/None   392        13.6        3
## 3 Minor           69         7.42       3
## 4 Substantial   4717         7.32       3

# =============================================================================
# 4. AJUSTE DE DISTRIBUCIONES
# =============================================================================
cat("====================================================\n")

## ====================================================

cat("  PASO 4: AJUSTE DE DISTRIBUCIONES\n")

##   PASO 4: AJUSTE DE DISTRIBUCIONES

cat("====================================================\n\n")

## ====================================================

# ---------- 4A. FRECUENCIA ----------
cat("─── 4A. AJUSTE DE FRECUENCIA (mensual) ───\n\n")

## ─── 4A. AJUSTE DE FRECUENCIA (mensual) ───

# Ajustar distribución Poisson a los datos de frecuencia mensual
ajuste_pois <- fitdist(N_frecuencia, "pois")           # Estimación por máxima verosimilitud
lambda_est  <- ajuste_pois$estimate["lambda"]          # Extraer parámetro lambda estimado
# Ajustar distribución Binomial Negativa a los datos
ajuste_nb   <- fitdist(N_frecuencia, "nbinom")         # Estimación por máxima verosimilitud
size_est    <- ajuste_nb$estimate["size"]              # Extraer parámetro size (r)
mu_est      <- ajuste_nb$estimate["mu"]                # Extraer parámetro mu (media)
# Mostrar resultados del ajuste Poisson
cat("  Poisson:\n")

##   Poisson:

cat(sprintf("    λ estimado = %.4f accidentes/mes\n", lambda_est))

##     λ estimado = 124.5417 accidentes/mes

cat(sprintf("    AIC = %.2f  |  BIC = %.2f\n\n", ajuste_pois$aic, ajuste_pois$bic))

##     AIC = 2020.71  |  BIC = 2023.50

# Mostrar resultados del ajuste Binomial Negativa
cat("  Binomial Negativa:\n")

##   Binomial Negativa:

cat(sprintf("    r (size) = %.4f\n", size_est))

##     r (size) = 13.5486

cat(sprintf("    μ (mu)   = %.4f\n", mu_est))

##     μ (mu)   = 124.5334

cat(sprintf("    AIC = %.2f  |  BIC = %.2f\n\n", ajuste_nb$aic, ajuste_nb$bic))

##     AIC = 1195.71  |  BIC = 1201.28

# Prueba Chi-Cuadrada para evaluar bondad de ajuste de Poisson
val_unicos  <- sort(unique(N_frecuencia))              # Valores únicos observados
obs_chi     <- table(N_frecuencia)                     # Frecuencias observadas
exp_pois_v  <- length(N_frecuencia) * dpois(as.integer(names(obs_chi)), lambda_est)  # Esperadas
validos_chi <- exp_pois_v >= 5                         # Celdas con frecuencia esperada >=5
chi2_stat   <- sum((as.numeric(obs_chi[validos_chi]) - exp_pois_v[validos_chi])^2 /
                     exp_pois_v[validos_chi])          # Estadístico chi-cuadrado
gl_chi2     <- sum(validos_chi) - 1 - 1                # Grados de libertad
p_chi2_pois <- pchisq(chi2_stat, df = max(gl_chi2, 1), lower.tail = FALSE)  # Valor p
# Mostrar resultados de prueba chi-cuadrado
cat(sprintf("  Prueba χ² Poisson: χ²=%.3f, gl=%d, p=%.4f\n", chi2_stat, gl_chi2, p_chi2_pois))

##   Prueba χ² Poisson: χ²=0.000, gl=-2, p=1.0000

if (p_chi2_pois > 0.05) {
  cat("  → No se rechaza Poisson (p > 0.05)\n\n")
} else {
  cat("  → Se rechaza Poisson (p ≤ 0.05) → Binomial Negativa es preferida\n\n")
}

##   → No se rechaza Poisson (p > 0.05)

# Seleccionar mejor modelo de frecuencia según AIC
if (ajuste_nb$aic < ajuste_pois$aic) {
  modelo_freq <- "nbinom"
  cat("  ✔ MODELO SELECCIONADO (Frecuencia): Binomial Negativa (menor AIC)\n\n")
} else {
  modelo_freq <- "pois"
  cat("  ✔ MODELO SELECCIONADO (Frecuencia): Poisson (menor AIC)\n\n")
}

##   ✔ MODELO SELECCIONADO (Frecuencia): Binomial Negativa (menor AIC)

# ---------- 4B. SEVERIDAD ----------
cat("─── 4B. AJUSTE DE SEVERIDAD (IndSev) ───\n\n")

## ─── 4B. AJUSTE DE SEVERIDAD (IndSev) ───

# Candidatos de cola pesada (adecuados para pérdidas con asimetría positiva)
ajuste_lnorm  <- fitdist(X_severidad, "lnorm") # Ajustar Log-Normal
ajuste_gamma  <- fitdist(X_severidad, "gamma") # Ajustar Gamma
ajuste_weib   <- fitdist(X_severidad, "weibull") # Ajustar Weibull
# Ajustar Pareto (con manejo de errores por posible fallo en convergencia)
ajuste_pareto <- tryCatch(
  fitdist(X_severidad, "pareto",
          start = list(shape = 2, scale = min(X_severidad) * 0.99)),
  error = function(e) NULL
)
# Tabla comparativa de distribuciones
distribs <- list("Log-Normal" = ajuste_lnorm,
                 "Gamma"      = ajuste_gamma,
                 "Weibull"    = ajuste_weib)
if (!is.null(ajuste_pareto)) distribs[["Pareto"]] <- ajuste_pareto  # Añadir Pareto si existe
# Crear tabla con métricas de bondad de ajuste
tabla_sev <- map_dfr(names(distribs), function(nm) {  # Mapear sobre cada distribución
  d <- distribs[[nm]]                                 # Extraer objeto fitdist
  data.frame(Distribución = nm, AIC = d$aic, BIC = d$bic,  # Crear fila con métricas
             LogLik = as.numeric(logLik(d)))          # Log-verosimilitud
}) %>% arrange(AIC)                                   # Ordenar por AIC ascendente
# Mostrar tabla comparativa
cat("Comparación de distribuciones de Severidad:\n")

## Comparación de distribuciones de Severidad:

print(tabla_sev, row.names = FALSE)

##  Distribución      AIC      BIC    LogLik
##    Log-Normal 47802.59 47816.34 -23899.29
##        Pareto 48328.07 48341.82 -24162.03
##       Weibull 49570.79 49584.54 -24783.39
##         Gamma 50664.67 50678.42 -25330.34

# Seleccionar mejor modelo de severidad (menor AIC)
mejor_sev <- tabla_sev$Distribución[1]
cat(sprintf("  ✔ MODELO SELECCIONADO (Severidad): %s (menor AIC)\n\n", mejor_sev))

##   ✔ MODELO SELECCIONADO (Severidad): Log-Normal (menor AIC)

# Extraer parámetros del mejor modelo
ajuste_sev_best <- distribs[[mejor_sev]]
params_sev      <- ajuste_sev_best$estimate
cat("  Parámetros estimados:\n")

##   Parámetros estimados:

for (nm in names(params_sev)) cat(sprintf("    %s = %.5f\n", nm, params_sev[nm]))  # Mostrar parámetros

##     meanlog = 1.72442
##     sdlog = 1.21758

# Función de distribución acumulada (CDF) según el mejor modelo
cdf_fn <- switch(mejor_sev,
                 "Log-Normal" = function(x) plnorm(x, params_sev["meanlog"], params_sev["sdlog"]),
                 "Gamma"      = function(x) pgamma(x, params_sev["shape"], params_sev["rate"]),
                 "Weibull"    = function(x) pweibull(x, params_sev["shape"], params_sev["scale"]),
                 "Pareto"     = function(x) ppareto(x, params_sev["shape"], params_sev["scale"]),
)
# Prueba Kolmogorov-Smirnov para evaluar bondad de ajuste
ks_sev <- ks.test(X_severidad, cdf_fn)               # Comparar empírica vs teórica

## Warning in ks.test.default(X_severidad, cdf_fn): ties should not be present for
## the one-sample Kolmogorov-Smirnov test

cat(sprintf("\n  Prueba K-S (%s): D=%.4f, p=%.4f\n", mejor_sev, ks_sev$statistic, ks_sev$p.value))

## 
##   Prueba K-S (Log-Normal): D=0.1563, p=0.0000

cat(ifelse(ks_sev$p.value > 0.05,                    # Interpretación del resultado
           "  → No se rechaza el ajuste (p > 0.05)\n\n",
           "  → Ajuste rechazado estadísticamente; se procede con el mejor AIC disponible\n\n"))

##   → Ajuste rechazado estadísticamente; se procede con el mejor AIC disponible

# =============================================================================
# 5. CORRELACIÓN ENTRE FRECUENCIA Y SEVERIDAD
# =============================================================================
cat("====================================================\n")

## ====================================================

cat("  PASO 5: ANÁLISIS DE CORRELACIÓN FRECUENCIA–SEVERIDAD\n")

##   PASO 5: ANÁLISIS DE CORRELACIÓN FRECUENCIA–SEVERIDAD

cat("====================================================\n\n")

## ====================================================

# Calcular severidad media y total por año
sev_anual <- sev_limpio %>%
  group_by(Anio) %>%                                  # Agrupar por año
  summarise(
    IndSev_Media = mean(IndSev),                      # Severidad media anual
    IndSev_Total = sum(IndSev),                       # Severidad total anual
    IndSev_Max   = max(IndSev),                       # Severidad máxima anual
    IndSev_P90   = quantile(IndSev, 0.90),            # Percentil 90 anual
    N_con_sev    = n(),                               # Número de eventos con severidad
    .groups = "drop"
  )
# Unir frecuencia anual con severidad anual
corr_df <- freq_anual %>%
  left_join(sev_anual, by = "Anio") %>%               # Combinar datasets por año
  filter(!is.na(IndSev_Media))                        # Excluir años sin severidad
# Calcular coeficientes de correlación
cor_pearson_med  <- cor(corr_df$N_Accidentes, corr_df$IndSev_Media,  method = "pearson")  # Correlación lineal
cor_spearman_med <- cor(corr_df$N_Accidentes, corr_df$IndSev_Media,  method = "spearman") # Correlación por rangos
cor_pearson_tot  <- cor(corr_df$N_Accidentes, corr_df$IndSev_Total,  method = "pearson")  # Con totales
cor_spearman_tot <- cor(corr_df$N_Accidentes, corr_df$IndSev_Total,  method = "spearman")
cor_pearson_p90  <- cor(corr_df$N_Accidentes, corr_df$IndSev_P90,    method = "pearson")  # Con percentil 90
# Tests de significancia estadística
test_med <- cor.test(corr_df$N_Accidentes, corr_df$IndSev_Media, method = "pearson")  # Prueba para media
test_tot <- cor.test(corr_df$N_Accidentes, corr_df$IndSev_Total, method = "pearson")  # Prueba para total
# Mostrar resultados de correlaciones
cat("¿Los años con más siniestros tienen siniestros más graves?\n\n")

## ¿Los años con más siniestros tienen siniestros más graves?

cat("  Correlación Freq. ↔ Severidad MEDIA por año:\n")

##   Correlación Freq. ↔ Severidad MEDIA por año:

cat(sprintf("    Pearson  r = %.4f  (p = %.4f)\n", cor_pearson_med, test_med$p.value))

##     Pearson  r = -0.1183  (p = 0.7447)

cat(sprintf("    Spearman ρ = %.4f\n", cor_spearman_med))

##     Spearman ρ = 0.0303

cat("\n  Correlación Freq. ↔ Severidad TOTAL por año:\n")

## 
##   Correlación Freq. ↔ Severidad TOTAL por año:

cat(sprintf("    Pearson  r = %.4f  (p = %.4f)\n", cor_pearson_tot, test_tot$p.value))

##     Pearson  r = 0.1732  (p = 0.6322)

cat(sprintf("    Spearman ρ = %.4f\n", cor_spearman_tot))

##     Spearman ρ = 0.1758

cat("\n  Correlación Freq. ↔ Percentil 90 de Severidad:\n")

## 
##   Correlación Freq. ↔ Percentil 90 de Severidad:

cat(sprintf("    Pearson  r = %.4f\n", cor_pearson_p90))

##     Pearson  r = 0.4391

# Función para interpretar la correlación
cat("\n  Interpretación:\n")

## 
##   Interpretación:

interpretar_cor <- function(r, p) {
  fuerza <- if (abs(r) > 0.7) "fuerte" else if (abs(r) > 0.4) "moderada" else "débil"
  dir    <- if (r > 0) "positiva" else "negativa"
  sig    <- if (p < 0.05) "(estadísticamente significativa)" else "(no significativa, p ≥ 0.05)"
  cat(sprintf("    Asociación %s %s %s\n", fuerza, dir, sig))
}
interpretar_cor(cor_pearson_med, test_med$p.value)

##     Asociación débil negativa (no significativa, p ≥ 0.05)

# Mostrar tabla resumen anual
cat("\n  Tabla resumen anual (Frecuencia vs Severidad):\n")

## 
##   Tabla resumen anual (Frecuencia vs Severidad):

print(corr_df %>% dplyr::select(Anio, N_Accidentes, IndSev_Media, IndSev_Total, IndSev_Max), n=20)

## # A tibble: 10 × 5
##     Anio N_Accidentes IndSev_Media IndSev_Total IndSev_Max
##    <int>        <int>        <dbl>        <dbl>      <dbl>
##  1  2015         1487         16.3        12652       2240
##  2  2016         1541         13.0         9737        660
##  3  2017         1512         11.2         8115        414
##  4  2018         1565         16.2        12214       1910
##  5  2019         1504         14.9        11190       1570
##  6  2020         1307         14.5         9059       1760
##  7  2021         1520         11.1         7833        620
##  8  2022         1542         11.9         8774       1320
##  9  2023         1482         10.0         6665        120
## 10  2024         1485         13.5         9049       1736

# =============================================================================
# 6. SIMULACIÓN MONTE CARLO
# =============================================================================
cat("====================================================\n")

## ====================================================

cat("  PASO 6: SIMULACIÓN MONTE CARLO\n")

##   PASO 6: SIMULACIÓN MONTE CARLO

cat("====================================================\n\n")

## ====================================================

n_sim  <- 100000  # Número de simulaciones para Monte Carlo
escala <- 12      # Factor para pasar de parámetros mensuales a anuales
# Funciones simuladoras (frecuencia)
simular_N <- function(n, modelo) {
  if (modelo == "nbinom") rnbinom(n, size = size_est, mu = mu_est * escala)  # Binomial Negativa anual
  else                    rpois(n, lambda = lambda_est * escala)              # Poisson anual
}
# Función simuladora de severidad dada distribución
simular_X <- function(n, dist_nombre, params) {
  switch(dist_nombre,
         "Log-Normal" = rlnorm(n, params["meanlog"], params["sdlog"]),
         "Gamma"      = rgamma(n, params["shape"], params["rate"]),
         "Weibull"    = rweibull(n, params["shape"], params["scale"]),
         "Pareto"     = rpareto(n, params["shape"], params["scale"]),
         stop("Distribución no reconocida")
  )
}
# ---------- Simulación principal (mejor modelo) ----------
S_simulado <- numeric(n_sim)  # Vector para almacenar pérdidas agregadas simuladas
for (i in seq_len(n_sim)) {   # Bucle sobre cada simulación
  N_i <- simular_N(1, modelo_freq)  # Simular número de siniestros anual
  if (N_i > 0) {                    # Si hay al menos un siniestro
    S_simulado[i] <- sum(simular_X(N_i, mejor_sev, params_sev))  # Sumar severidades
  }  # Si N_i = 0, S_simulado[i] permanece como 0
}
# Mostrar estadísticas de la pérdida agregada simulada
cat("Estadísticas de la Pérdida Agregada Anual S (IndSev units):\n")

## Estadísticas de la Pérdida Agregada Anual S (IndSev units):

cat(sprintf("  E[S] (prima pura):   %.2f\n", mean(S_simulado)))

##   E[S] (prima pura):   17601.16

cat(sprintf("  Desv. Est.:          %.2f\n", sd(S_simulado)))

##   Desv. Est.:          4876.76

cat(sprintf("  Mediana:             %.2f\n", median(S_simulado)))

##   Mediana:             17165.24

cat(sprintf("  Máximo simulado:     %.2f\n", max(S_simulado)))

##   Máximo simulado:     45473.95

cat(sprintf("  Sesgo:               %.3f\n", moments::skewness(S_simulado)))

##   Sesgo:               0.542

cat(sprintf("  Curtosis:            %.3f\n\n", moments::kurtosis(S_simulado)))

##   Curtosis:            3.425

# =============================================================================
# 7. MEDIDAS DE RIESGO: VaR y TVaR
# =============================================================================
cat("====================================================\n")

## ====================================================

cat("  PASO 7: VaR y TVaR — MODELO GANADOR\n")

##   PASO 7: VaR y TVaR — MODELO GANADOR

cat("====================================================\n\n")

## ====================================================

niveles <- c(0.90, 0.95, 0.99, 0.995, 0.999)
# Construir tabla con VaR y TVaR
tabla_riesgo <- data.frame(
  Nivel   = paste0(niveles * 100, "%"),                     # Etiquetas de nivel
  VaR     = quantile(S_simulado, probs = niveles),          # Value at Risk (cuantiles)
  TVaR    = sapply(niveles, function(p) {                   # Tail Value at Risk
    u <- quantile(S_simulado, p)                            # Umbral VaR
    mean(S_simulado[S_simulado > u])                        # Media condicional superior
  })
) %>% mutate(Carga_Cola = round(TVaR - VaR, 2))              # Diferencia TVaR - VaR
# Mostrar tabla de medidas de riesgo
cat("Tabla de Medidas de Riesgo (mejor modelo):\n")

## Tabla de Medidas de Riesgo (mejor modelo):

print(tabla_riesgo, row.names = FALSE)

##  Nivel      VaR     TVaR Carga_Cola
##    90% 24049.92 27079.31    3029.39
##    95% 26253.60 29097.81    2844.21
##    99% 30899.05 33327.50    2428.46
##  99.5% 32668.07 34998.21    2330.14
##  99.9% 36220.43 38463.93    2243.49

# Calcular medidas específicas para nivel 99.5%
VaR_995  <- quantile(S_simulado, 0.995)
TVaR_995 <- mean(S_simulado[S_simulado > VaR_995])
E_S      <- mean(S_simulado)
# Mostrar resumen de medidas de riesgo
cat(sprintf("\n  Prima de Riesgo Pura E[S]:   %.2f\n", E_S))

## 
##   Prima de Riesgo Pura E[S]:   17601.16

cat(sprintf("  VaR  al 99.5%%:               %.2f\n", VaR_995))

##   VaR  al 99.5%:               32668.07

cat(sprintf("  TVaR al 99.5%%:               %.2f\n\n", TVaR_995))

##   TVaR al 99.5%:               34998.21

# =============================================================================
# 8. ANÁLISIS DE SENSIBILIDAD DE DISTRIBUCIÓN DE SEVERIDAD
# =============================================================================
cat("====================================================\n")

## ====================================================

cat("  PASO 8: ANÁLISIS DE SENSIBILIDAD — DISTRIBUCIÓN DE SEVERIDAD\n")

##   PASO 8: ANÁLISIS DE SENSIBILIDAD — DISTRIBUCIÓN DE SEVERIDAD

cat("====================================================\n\n")

## ====================================================

cat("¿Qué pasa si cambiamos la distribución de severidad?\n")

## ¿Qué pasa si cambiamos la distribución de severidad?

cat("¿Cómo afecta al VaR y TVaR?\n\n")

## ¿Cómo afecta al VaR y TVaR?

# Todas las distribuciones ajustadas
candidatos_sev <- names(distribs)  # Nombres de distribuciones candidatas
# Realizar simulaciones para cada distribución candidata
resultados_sens <- map_dfr(candidatos_sev, function(dist_nm) {
  p <- distribs[[dist_nm]]$estimate  # Parámetros de la distribución actual
  cat(sprintf("  Simulando con distribución: %s...\n", dist_nm))
  # Simular pérdida agregada con esta distribución
  S_alt <- numeric(n_sim)
  for (i in seq_len(n_sim)) {
    N_i <- simular_N(1, modelo_freq)       # Mismo modelo de frecuencia
    if (N_i > 0) {
      S_alt[i] <- sum(simular_X(N_i, dist_nm, p))  # Sumar con distribución alternativa
    }
  }
  # Calcular medidas de riesgo para esta simulación
  var90  <- quantile(S_alt, 0.90)
  var95  <- quantile(S_alt, 0.95)
  var99  <- quantile(S_alt, 0.99)
  var995 <- quantile(S_alt, 0.995)
  # Devolver dataframe con resultados
  data.frame(
    Distribución = dist_nm,
    Es_Ganador   = dist_nm == mejor_sev,   # Indicador si es el mejor modelo
    AIC          = distribs[[dist_nm]]$aic,
    E_S          = mean(S_alt),
    VaR_90       = var90,
    VaR_95       = var95,
    VaR_99       = var99,
    VaR_995      = var995,
    TVaR_995     = mean(S_alt[S_alt > var995]),
    Carga_Cola   = mean(S_alt[S_alt > var995]) - var995  # Diferencia TVaR - VaR
  )
})

##   Simulando con distribución: Log-Normal...
##   Simulando con distribución: Gamma...
##   Simulando con distribución: Weibull...
##   Simulando con distribución: Pareto...

# Mostrar tabla de sensibilidad completa
cat("\n  Tabla de sensibilidad completa:\n")

## 
##   Tabla de sensibilidad completa:

print(resultados_sens %>% dplyr::select(-Es_Ganador) %>%   # Excluir columna indicadora
        mutate(across(where(is.numeric), ~round(., 2))), row.names = FALSE)  # Redondear numéricos

##  Distribución      AIC      E_S   VaR_90   VaR_95   VaR_99  VaR_995 TVaR_995
##    Log-Normal 47802.59 17592.46 24066.36 26344.66 30871.61 32563.39 34983.93
##         Gamma 50664.67 19878.96 27078.97 29582.38 34694.77 36641.96 39416.56
##       Weibull 49570.79 18206.11 24866.54 27155.18 31762.79 33649.42 36095.92
##        Pareto 48328.07 17330.45 23666.71 25870.22 30405.53 32194.12 34461.42
##  Carga_Cola
##     2420.54
##     2774.60
##     2446.50
##     2267.30

# Calcular valores base (mejor modelo)
base_VaR  <- resultados_sens %>% filter(Es_Ganador) %>% pull(VaR_995)
base_TVaR <- resultados_sens %>% filter(Es_Ganador) %>% pull(TVaR_995)
# Mostrar variación relativa respecto al mejor modelo
cat("\n  Variación relativa al mejor modelo (", mejor_sev, "):\n", sep = "")

## 
##   Variación relativa al mejor modelo (Log-Normal):

resultados_sens %>%
  mutate(
    Delta_VaR_pct  = round(100 * (VaR_995  - base_VaR)  / base_VaR,  1),   # Cambio porcentual en VaR
    Delta_TVaR_pct = round(100 * (TVaR_995 - base_TVaR) / base_TVaR, 1)    # Cambio porcentual en TVaR
  ) %>%
  dplyr::select(Distribución, Delta_VaR_pct, Delta_TVaR_pct) %>%           # Seleccionar columnas
  { cat(sprintf("  %-12s  ΔVaR99.5%%: %+.1f%%   ΔTVaR99.5%%: %+.1f%%\n",
                .$Distribución, .$Delta_VaR_pct, .$Delta_TVaR_pct))
    invisible(.) }

##   Log-Normal    ΔVaR99.5%: +0.0%   ΔTVaR99.5%: +0.0%
##    Gamma         ΔVaR99.5%: +12.5%   ΔTVaR99.5%: +12.7%
##    Weibull       ΔVaR99.5%: +3.3%   ΔTVaR99.5%: +3.2%
##    Pareto        ΔVaR99.5%: -1.1%   ΔTVaR99.5%: -1.5%

cat("\n  Interpretación:\n")

## 
##   Interpretación:

max_var_var <- max(abs(resultados_sens$VaR_995  - base_VaR)  / base_VaR)
max_var_tva <- max(abs(resultados_sens$TVaR_995 - base_TVaR) / base_TVaR)
cat(sprintf("    La elección de distribución puede mover el VaR99.5%% hasta %.1f%%\n", 100 * max_var_var))

##     La elección de distribución puede mover el VaR99.5% hasta 12.5%

cat(sprintf("    y el TVaR99.5%% hasta %.1f%% respecto al modelo base.\n", 100 * max_var_tva))

##     y el TVaR99.5% hasta 12.7% respecto al modelo base.

cat("    Esto ilustra el riesgo de modelo (model risk) en actuaría.\n\n")

##     Esto ilustra el riesgo de modelo (model risk) en actuaría.

# =============================================================================
# 9. INDICE DE CARGA DE RIESGO (Loading Ratio)
# =============================================================================
cat("====================================================\n")

## ====================================================

cat("  PASO 9: INDICE DE CARGA DE RIESGO (Loading Ratio)\n")

##   PASO 9: INDICE DE CARGA DE RIESGO (Loading Ratio)

cat("====================================================\n\n")

## ====================================================

# Calcular índices de carga y cocientes
resultados_sens <- resultados_sens %>%
  mutate(
    Loading_Ratio = round(TVaR_995 / E_S, 3),           # TVaR / E[S] (carga de cola)
    Cociente_VaR_TVaR = round(VaR_995 / TVaR_995, 3)    # VaR / TVaR (1 = cola liviana, <1 = cola pesada)
  )
cat("  Loading Ratio = TVaR99.5% / E[S]  (mayor = cola más pesada)\n")

##   Loading Ratio = TVaR99.5% / E[S]  (mayor = cola más pesada)

cat("  Cociente VaR/TVaR                 (menor = mayor riesgo de cola)\n\n")

##   Cociente VaR/TVaR                 (menor = mayor riesgo de cola)

# Mostrar tabla con índices de carga
print(resultados_sens %>% dplyr::select(Distribución, E_S, TVaR_995, Loading_Ratio, Cociente_VaR_TVaR) %>%
        mutate(across(where(is.numeric), ~round(., 2))), row.names = FALSE)

##  Distribución      E_S TVaR_995 Loading_Ratio Cociente_VaR_TVaR
##    Log-Normal 17592.46 34983.93          1.99              0.93
##         Gamma 19878.96 39416.56          1.98              0.93
##       Weibull 18206.11 36095.92          1.98              0.93
##        Pareto 17330.45 34461.42          1.99              0.93

# =============================================================================
# 10. ANÁLISIS DE EVENTOS EXTREMOS (Pareto 80/20)
# =============================================================================
cat("====================================================\n")

## ====================================================

cat("  PASO 10: ANÁLISIS DE EVENTOS EXTREMOS (Pareto 80/20)\n")

##   PASO 10: ANÁLISIS DE EVENTOS EXTREMOS (Pareto 80/20)

cat("====================================================\n\n")

## ====================================================

cat("─── 9B. PRINCIPIO DE PARETO EN LOS SINIESTROS ───\n\n")

## ─── 9B. PRINCIPIO DE PARETO EN LOS SINIESTROS ───

cat("  ¿Qué porcentaje de accidentes concentra el 80% del daño total?\n\n")

##   ¿Qué porcentaje de accidentes concentra el 80% del daño total?

# Ordenar severidades de mayor a menor
sev_ord <- sort(X_severidad, decreasing = TRUE)        # Vector ordenado descendente
sev_cum <- cumsum(sev_ord) / sum(sev_ord)              # Proporción acumulada de daño
n_20pct  <- ceiling(0.20 * length(sev_ord))            # Número que representa 20% de accidentes
pct_dano <- sev_cum[n_20pct]                           # Daño acumulado en ese 20%
n_para80 <- which(sev_cum >= 0.80)[1]                  # Accidentes necesarios para 80% daño
pct_acc  <- round(100 * n_para80 / length(sev_ord), 1) # Porcentaje de accidentes para 80% daño
# Mostrar resultados del análisis Pareto
cat(sprintf("  El 20%% más grave de los accidentes concentra el %.1f%% del daño total\n", 100 * pct_dano))

##   El 20% más grave de los accidentes concentra el 66.2% del daño total

cat(sprintf("  El %.1f%% de los accidentes concentra el 80%% del daño total\n", pct_acc))

##   El 34.4% de los accidentes concentra el 80% del daño total

cat(sprintf("  → El principio 80/20 (Pareto) %s verificado en estos datos\n\n",
            ifelse(pct_acc < 25, "SÍ es", "NO es completamente")))

##   → El principio 80/20 (Pareto) NO es completamente verificado en estos datos

# =============================================================================
# 11. PERFIL DE RIESGO POR CONDICIÓN METEOROLÓGICA
# =============================================================================
cat("====================================================\n")

## ====================================================

cat("  PASO 11: PERFIL DE RIESGO POR CONDICIÓN METEOROLÓGICA\n")

##   PASO 11: PERFIL DE RIESGO POR CONDICIÓN METEOROLÓGICA

cat("====================================================\n\n")

## ====================================================

cat("─── 9C. PERFIL DE RIESGO: METEOROLOGÍA vs SEVERIDAD ───\n\n")

## ─── 9C. PERFIL DE RIESGO: METEOROLOGÍA vs SEVERIDAD ───

# Calcular estadísticas de severidad agrupadas por condición meteorológica
perfil_meteo <- sev_limpio %>%
  group_by(Meteorologia) %>%                           # Agrupar por clima
  summarise(
    N            = n(),                                # Número de accidentes
    Media_Sev    = round(mean(IndSev), 2),             # Severidad media
    P90_Sev      = round(quantile(IndSev, 0.90), 2),   # Percentil 90
    P99_Sev      = round(quantile(IndSev, 0.99), 2),   # Percentil 99
    Pct_Fatal    = round(100 * mean(FatalInjuryCount > 0), 1),  # Porcentaje con fatales
    .groups = "drop"
  ) %>% arrange(desc(Media_Sev))                       # Ordenar por media descendente
# Mostrar perfil meteorológico
cat("  Perfil por condición meteorológica:\n")

##   Perfil por condición meteorológica:

print(perfil_meteo, row.names = FALSE)

## # A tibble: 3 × 6
##   Meteorologia     N Media_Sev P90_Sev P99_Sev Pct_Fatal
##   <chr>        <int>     <dbl>   <dbl>   <dbl>     <dbl>
## 1 Desconocido   1494      31.3      40   416.       76.4
## 2 IMC/IFR        422      18.1      40    87.9      74.9
## 3 VMC/VFR       5239       7.8      20    50        35.2

# =============================================================================
# 12. GRAFICAS
# =============================================================================
# Paleta de colores
COL_AZUL    <- "#2C5F8A"
COL_ROJO    <- "#C0392B"
COL_VERDE   <- "#1A7A4A"
COL_MORADO  <- "#7D3C98"
COL_NARANJA <- "#D35400"
COL_GRIS    <- "#566573"
# Tema base para todas las gráficas (consistencia visual)
tema_base <- theme_minimal(base_size = 12) +            # Tema minimalista base
  theme(
    plot.title    = element_text(face = "bold", size = 13, hjust = 0.5),    # Título centrado negrita
    plot.subtitle = element_text(hjust = 0.5, color = "grey40"),            # Subtítulo centrado gris
    axis.title    = element_text(face = "bold"),        # Títulos de ejes negrita
    legend.position = "bottom",                         # Leyenda al fondo
    panel.grid.minor = element_blank()                  # Eliminar líneas de grid menores
  )
# ------ FIG 1: Evolución anual de accidentes ------
p1 <- ggplot(freq_anual, aes(x = Anio, y = N_Accidentes)) +
  geom_col(fill = COL_AZUL, alpha = 0.85, color = "white") +
  geom_text(aes(label = N_Accidentes), vjust = -0.4,
            size = 3.5, fontface = "bold", color = "grey30") +
  scale_x_continuous(breaks = 2015:2024) +
  labs(title = "Accidentes de Aviación por Año — NTSB",
       subtitle = "Período 2015–2024 | Solo accidentes confirmados (ACC)",
       x = "Año", y = "N° de accidentes") +
  tema_base
print(p1,  width=9,  height=5,  dpi=150)

# ------ FIG 2: Distribución de frecuencia mensual + ajustes ------
df_freq_plot <- data.frame(n = N_frecuencia)
x_grid <- seq(min(N_frecuencia), max(N_frecuencia))

p2 <- ggplot(df_freq_plot, aes(x = n)) +
  geom_histogram(aes(y = after_stat(density)),
                 binwidth = 5, fill = COL_AZUL, color = "white", alpha = 0.7) +
  stat_function(fun = function(x) dpois(round(x), lambda = lambda_est),
                color = COL_ROJO, linewidth = 1.2, linetype = "dashed",
                n = 200) +
  stat_function(fun = function(x) dnbinom(round(x), size = size_est, mu = mu_est),
                color = COL_VERDE, linewidth = 1.2, n = 200) +
  annotate("text", x = Inf, y = Inf, hjust = 1.1, vjust = 1.5,
           label = paste0("Poisson (rojo)  λ=", round(lambda_est,1),
                          "\nBin.Neg (verde) μ=", round(mu_est,1)),
           size = 3.2, color = "grey30") +
  labs(title = "Frecuencia Mensual de Accidentes — Ajuste de Distribuciones",
       subtitle = "NTSB 2015–2024",
       x = "Accidentes por mes", y = "Densidad") +
  tema_base
print(p1,  width=8,  height=5,  dpi=150)

# ------ FIG 3: Histograma + CDF de Severidad ------
df_sev_plot <- data.frame(x = X_severidad)
x_seq <- seq(min(X_severidad), quantile(X_severidad, 0.995), length.out = 300)

p3 <- ggplot(df_sev_plot, aes(x = x)) +
  geom_histogram(aes(y = after_stat(density)),
                 bins = 60, fill = COL_NARANJA, color = "white", alpha = 0.7) +
  stat_function(fun = cdf_fn %>% {function(x) { h <- 0.001; (.(x+h) - .(x-h))/(2*h) }},
                color = COL_MORADO, linewidth = 1.3, n = 300) +
  coord_cartesian(xlim = c(0, quantile(X_severidad, 0.99))) +
  labs(title = paste("Ajuste de Severidad —", mejor_sev),
       subtitle = "Índice actuarial ponderado (10·Fatal + 3·Serio + 1·Menor)",
       x = "Índice de Severidad (IndSev)", y = "Densidad") +
  tema_base
print(p1,  width=8,  height=5,  dpi=150)

# ------ FIG 4: Q-Q de Severidad ------
q_emp <- quantile(X_severidad, ppoints(min(length(X_severidad), 1000)))
q_teo <- switch(mejor_sev,
                "Log-Normal" = qlnorm(ppoints(min(length(X_severidad), 1000)),
                                      params_sev["meanlog"], params_sev["sdlog"]),
                "Gamma"      = qgamma(ppoints(min(length(X_severidad), 1000)),
                                      params_sev["shape"], params_sev["rate"]),
                "Weibull"    = qweibull(ppoints(min(length(X_severidad), 1000)),
                                        params_sev["shape"], params_sev["scale"]),
                "Pareto"     = qpareto(ppoints(min(length(X_severidad), 1000)),
                                       params_sev["shape"], params_sev["scale"]),
                {
                  q_emp_alt <- quantile(X_severidad, ppoints(min(length(X_severidad), 1000)))
                  q_emp_alt  # fallback
                }
)
df_qq <- data.frame(teorico = sort(q_teo), empirico = sort(q_emp))
p4 <- ggplot(df_qq, aes(x = teorico, y = empirico)) +
  geom_point(color = COL_AZUL, alpha = 0.5, size = 0.9) +
  geom_abline(slope = 1, intercept = 0, color = COL_ROJO,
              linewidth = 1.2, linetype = "dashed") +
  labs(title = paste("Q-Q Plot de Severidad —", mejor_sev),
       subtitle = "Puntos sobre la línea roja = buen ajuste",
       x = "Cuantiles teóricos", y = "Cuantiles empíricos") +
  tema_base
print(p1,  width=7,  height=6,  dpi=150)

# ------ FIG 5: Pérdida Agregada S con VaR y TVaR ------
df_S <- data.frame(S = S_simulado)
p5 <- ggplot(df_S, aes(x = S)) +
  geom_histogram(aes(y = after_stat(density)),
                 bins = 80, fill = COL_AZUL, color = "white", alpha = 0.7) +
  geom_density(color = "#2C3E50", linewidth = 1) +
  geom_vline(xintercept = E_S,      color = COL_VERDE,  linewidth = 1.2, linetype = "dashed") +
  geom_vline(xintercept = VaR_995,  color = COL_ROJO,   linewidth = 1.4) +
  geom_vline(xintercept = TVaR_995, color = COL_MORADO, linewidth = 1.4, linetype = "dotdash") +
  annotate("text", x = E_S,      y = Inf, vjust=2, hjust=-0.1,
           label = sprintf("E[S]=%.0f", E_S), color=COL_VERDE, size=3.2, fontface="bold") +
  annotate("text", x = VaR_995,  y = Inf, vjust=2, hjust=-0.1,
           label = sprintf("VaR99.5%%=%.0f", VaR_995), color=COL_ROJO, size=3.2, fontface="bold") +
  annotate("text", x = TVaR_995, y = Inf, vjust=4, hjust=-0.1,
           label = sprintf("TVaR=%.0f", TVaR_995), color=COL_MORADO, size=3.2, fontface="bold") +
  labs(title = "Distribución de Pérdida Agregada Anual S",
       subtitle = sprintf("Monte Carlo — %s simulaciones | Modelo: %s + %s",
                          format(n_sim, big.mark=","), modelo_freq, mejor_sev),
       x = "Pérdida Agregada Anual (IndSev)", y = "Densidad") +
  tema_base
print(p1,  width=9,  height=5,  dpi=150)

# ------ FIG 6: Curva de Excedencia ------
s_ord    <- sort(S_simulado, decreasing = TRUE)
prob_exc <- seq_along(s_ord) / length(s_ord)
df_exc   <- data.frame(S = s_ord, prob = prob_exc)

p6 <- ggplot(df_exc, aes(x = S, y = prob)) +
  geom_line(color = COL_AZUL, linewidth = 1) +
  geom_hline(yintercept = 0.005, color = COL_ROJO, linewidth = 0.9, linetype = "dashed") +
  geom_vline(xintercept = VaR_995, color = COL_ROJO, linewidth = 0.9, linetype = "dashed") +
  annotate("point", x = VaR_995, y = 0.005, color = COL_ROJO, size = 3.5) +
  annotate("text", x = VaR_995, y = 0.005,
           label = sprintf("VaR 99.5%%\n= %.0f", VaR_995),
           vjust = -0.5, hjust = -0.1, color = COL_ROJO, size = 3.2, fontface = "bold") +
  scale_y_continuous(labels = percent_format(), limits = c(0, 0.5)) +
  labs(title = "Curva de Excedencia — P(S > x)",
       subtitle = "Probabilidad de superar cada nivel de pérdida agregada anual",
       x = "Pérdida Agregada (IndSev)", y = "Probabilidad de Excedencia") +
  tema_base
print(p1,  width=8,  height=5,  dpi=150)

# ------ FIG 7: Correlación Frecuencia–Severidad anual ------
p7 <- ggplot(corr_df, aes(x = N_Accidentes, y = IndSev_Media, label = Anio)) +
  geom_point(color = COL_AZUL, size = 4, alpha = 0.8) +
  geom_text(vjust = -0.7, size = 3.2, color = "grey30", fontface = "bold") +
  geom_smooth(method = "lm", se = TRUE, color = COL_ROJO,
              fill = COL_ROJO, alpha = 0.1, linetype = "dashed") +
  annotate("text", x = Inf, y = Inf, hjust = 1.1, vjust = 1.5,
           label = sprintf("r = %.3f\np = %.3f", cor_pearson_med, test_med$p.value),
           size = 4, color = COL_GRIS, fontface = "bold") +
  labs(title = "Correlación: Frecuencia vs Severidad Media Anual",
       subtitle = "¿Años con más accidentes = accidentes más graves?",
       x = "N° accidentes anuales", y = "Severidad media (IndSev)") +
  tema_base
print(p1,  width=7,  height=6,  dpi=150)

# ------ FIG 8: Análisis de Sensibilidad — VaR y TVaR por distribución ------
res_long <- resultados_sens %>%
  dplyr::select(Distribución, VaR_995, TVaR_995) %>%
  pivot_longer(cols = c(VaR_995, TVaR_995), names_to = "Medida", values_to = "Valor") %>%
  mutate(Medida = recode(Medida, "VaR_995" = "VaR 99.5%", "TVaR_995" = "TVaR 99.5%"))
p8 <- ggplot(res_long, aes(x = reorder(Distribución, Valor), y = Valor, fill = Medida)) +
  geom_col(position = "dodge", alpha = 0.85, color = "white") +
  coord_flip() +
  scale_fill_manual(values = c("VaR 99.5%" = COL_ROJO, "TVaR 99.5%" = COL_MORADO)) +
  labs(title = "Sensibilidad de VaR y TVaR según Distribución de Severidad",
       subtitle = "Mismo modelo de frecuencia — distintas hipótesis de severidad",
       x = NULL, y = "Valor (IndSev)", fill = "Medida de Riesgo") +
  tema_base
print(p1,  width=9,  height=5,  dpi=150)

# ------ FIG 9: Perfil de riesgo meteorológico ------
p9 <- sev_limpio %>%
  filter(Meteorologia != "Desconocido") %>%
  ggplot(aes(x = Meteorologia, y = IndSev, fill = Meteorologia)) +
  geom_violin(trim = TRUE, alpha = 0.6, show.legend = FALSE) +
  geom_boxplot(width = 0.2, alpha = 0.8, outlier.size = 0.6,
               outlier.alpha = 0.3, show.legend = FALSE) +
  scale_y_log10(labels = comma_format()) +
  scale_fill_manual(values = c("VMC/VFR" = COL_VERDE, "IMC/IFR" = COL_ROJO)) +
  labs(title = "Severidad por Condición Meteorológica",
       subtitle = "VMC/VFR = condiciones visuales; IMC/IFR = condiciones por instrumentos",
       x = NULL, y = "IndSev (escala log)") +
  tema_base
print(p1,  width=7,  height=6,  dpi=150)

# ------ FIG 10: Loading Ratio (carga de cola) por distribución ------
p10 <- resultados_sens %>%
  ggplot(aes(x = reorder(Distribución, Loading_Ratio), y = Loading_Ratio)) +
  geom_col(fill = COL_NARANJA, alpha = 0.85, color = "white") +
  geom_text(aes(label = round(Loading_Ratio, 2)),
            hjust = -0.2, size = 3.5, fontface = "bold") +
  coord_flip() +
  geom_hline(yintercept = 1, linetype = "dashed", color = COL_GRIS) +
  labs(title = "Índice de Carga de Cola (Loading Ratio)",
       subtitle = "TVaR99.5% / E[S] — cuántas veces la prima pura exige la cola",
       x = NULL, y = "Loading Ratio") +
  tema_base + expand_limits(y = max(resultados_sens$Loading_Ratio) * 1.15)
print(p1,  width=8,  height=5,  dpi=150)

# Dashboard principal (panel de 6 gráficas clave)
dashboard <- (p1 | p2) / (p5 | p6) / (p7 | p8)
print(dashboard, width=16, height=15, dpi=150)

## Warning: Removed 50000 rows containing missing values or values outside the scale range
## (`geom_line()`).

## `geom_smooth()` using formula = 'y ~ x'

## Warning: The following aesthetics were dropped during statistical transformation: label.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

# Panel de diagnóstico de ajuste
diagnostico <- (p3 | p4) / (p9 | p10)
print(diagnostico, width=16, height=10, dpi=150)

Proyecto

Ángel Albarrán - Jesús Bastida - Ángel Diaz

2026-04-07