# =============================================================================
# MODELACIÓN ESTOCÁSTICA DE PÉRDIDA AGREGADA EN SINIESTROS DE AVIACIÓN
# Fuente de datos: NTSB (National Transportation Safety Board) Aviation Accident Database
# Cómputo Científico
# Angel Albarrán · Jesus Bastida · Angel Díaz
# =============================================================================
# Carga de librerías necesarias para el análisis
library(tidyverse) # Manipulación y visualización de datos
## Warning: package 'tidyverse' was built under R version 4.5.2
## Warning: package 'ggplot2' was built under R version 4.5.2
## Warning: package 'readr' was built under R version 4.5.2
## Warning: package 'forcats' was built under R version 4.5.2
## Warning: package 'lubridate' was built under R version 4.5.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.2.0
## ✔ forcats 1.0.1 ✔ stringr 1.5.2
## ✔ ggplot2 4.0.2 ✔ tibble 3.3.0
## ✔ lubridate 1.9.5 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl) # Lectura de archivos Excel
## Warning: package 'readxl' was built under R version 4.5.2
library(fitdistrplus) # Ajuste de distribuciones de probabilidad
## Warning: package 'fitdistrplus' was built under R version 4.5.2
## Cargando paquete requerido: MASS
## Warning: package 'MASS' was built under R version 4.5.2
##
## Adjuntando el paquete: 'MASS'
##
## The following object is masked from 'package:dplyr':
##
## select
##
## Cargando paquete requerido: survival
library(actuar) # Distribuciones actuariales
## Warning: package 'actuar' was built under R version 4.5.2
##
## Adjuntando el paquete: 'actuar'
##
## The following objects are masked from 'package:stats':
##
## sd, var
##
## The following object is masked from 'package:grDevices':
##
## cm
library(MASS) # Ajuste de Binomial Negativa
library(gridExtra) # Organización de gráficos en grid
##
## Adjuntando el paquete: 'gridExtra'
##
## The following object is masked from 'package:dplyr':
##
## combine
library(scales) # Escalas y formatos para gráficos
##
## Adjuntando el paquete: 'scales'
##
## The following object is masked from 'package:purrr':
##
## discard
##
## The following object is masked from 'package:readr':
##
## col_factor
library(ggplot2) # Sistema avanzado de gráficos
library(patchwork) # Combinación de gráficos ggplot2
## Warning: package 'patchwork' was built under R version 4.5.2
##
## Adjuntando el paquete: 'patchwork'
##
## The following object is masked from 'package:MASS':
##
## area
library(moments) # Cálculo de asimetría y curtosis
## Warning: package 'moments' was built under R version 4.5.2
set.seed(2025) # Fijar semilla para garantizar reproducibilidad de resultados
# =============================================================================
# 1. CARGA Y LIMPIEZA DE DATOS (BASE NTSB)
# =============================================================================
cat("====================================================\n")
## ====================================================
cat(" PASO 1: CARGA Y LIMPIEZA DE DATOS (NTSB)\n")
## PASO 1: CARGA Y LIMPIEZA DE DATOS (NTSB)
cat("====================================================\n\n")
## ====================================================
# ---------- 1A. LECTURA DEL ARCHIVO NTSB ----------
ntsb_raw <- read_excel("NTSB.xlsx", sheet = "NTSB_Accidents")
## Warning: Expecting logical in H1618 / R1618C8: got 'AIR2507'
## Warning: Expecting logical in H1663 / R1663C8: got 'AIR2602'
## Warning: Expecting logical in H3429 / R3429C8: got 'AIR2504'
## Warning: Expecting logical in H5006 / R5006C8: got 'AIR2402'
## Warning: Expecting logical in H5076 / R5076C8: got 'AIR2401'
## Warning: Expecting logical in H5294 / R5294C8: got 'AIR2407'
## Warning: Expecting logical in H5616 / R5616C8: got 'AIR2301'
## Warning: Expecting logical in H6070 / R6070C8: got 'AIR2404'
## Warning: Expecting logical in H9772 / R9772C8: got 'AAR2101'
## Warning: Expecting logical in H9876 / R9876C8: got 'AIR2205'
## Warning: Expecting logical in H10121 / R10121C8: got 'AAR2105'
## Warning: Expecting logical in H10792 / R10792C8: got 'AAR2102'
## Warning: Expecting logical in H11017 / R11017C8: got 'AAR2104'
## Warning: Expecting logical in H11338 / R11338C8: got 'AAR2002'
## Warning: Expecting logical in H11409 / R11409C8: got 'AAR2001'
## Warning: Expecting logical in H12806 / R12806C8: got 'AAR1903'
## Warning: Expecting logical in H12937 / R12937C8: got 'AAR1904'
## Warning: Expecting logical in H14002 / R14002C8: got 'AIR1801'
## Warning: Expecting logical in H14303 / R14303C8: got 'AAR1902'
## Warning: Expecting logical in H14577 / R14577C8: got 'AAR1901'
## Warning: Expecting logical in H15023 / R15023C8: got 'AAR1801'
## Warning: Expecting logical in H15151 / R15151C8: got 'AAR1802'
## Warning: Expecting logical in H15517 / R15517C8: got 'AAR1703'
## Warning: Expecting logical in H16630 / R16630C8: got 'AAR1603'
## Warning: Expecting logical in H17310 / R17310C8: got 'AAR1701'
## Warning: Expecting logical in H17367 / R17367C8: got 'AAR1702'
## Warning: Expecting logical in H17862 / R17862C8: got 'AAR1602'
## Warning: Expecting logical in H18104 / R18104C8: got 'AAR1601'
## Warning: Expecting logical in H18204 / R18204C8: got 'AAR1502'
## Warning: Expecting logical in H19041 / R19041C8: got 'AAR1503'
## Warning: Expecting logical in H20130 / R20130C8: got 'AAR1402'
## Warning: Expecting logical in H20356 / R20356C8: got 'AAR1401'
## Warning: Expecting logical in H20723 / R20723C8: got 'AAR1501'
## Warning: Expecting logical in H20836 / R20836C8: got 'AAR1403'
## Warning: Expecting logical in H21124 / R21124C8: got 'AIR1401'
## Warning: Expecting logical in H22731 / R22731C8: got 'AAR1404'
## Warning: Expecting logical in H23043 / R23043C8: got 'AAR1301'
## Warning: Expecting logical in H23406 / R23406C8: got 'AAB1201'
## Warning: Expecting logical in H23534 / R23534C8: got 'AAR1302'
## Warning: Expecting logical in H24446 / R24446C8: got 'AAR1202'
## Warning: Expecting logical in H24835 / R24835C8: got 'AAR1201'
## Warning: Expecting logical in H25510 / R25510C8: got 'AAR1103'
## Warning: Expecting logical in H27254 / R27254C8: got 'AAR1005'
## Warning: Expecting logical in H27685 / R27685C8: got 'AAR1104'
## Warning: Expecting logical in H28096 / R28096C8: got 'AAR1105'
## Warning: Expecting logical in H28261 / R28261C8: got 'AAR1001'
## Warning: Expecting logical in H28318 / R28318C8: got 'AAR1102'
## Warning: Expecting logical in H28349 / R28349C8: got 'AAR1003'
## Warning: Expecting logical in H28447 / R28447C8: got 'AAR1004'
## Warning: Expecting logical in H28769 / R28769C8: got 'AAR0907'
## Warning: Expecting logical in H28805 / R28805C8: got 'AAR1002'
## Warning: Expecting logical in H29100 / R29100C8: got 'AAR1006'
## Warning: Expecting logical in H29154 / R29154C8: got 'AAR1101'
## Warning: Expecting logical in H29388 / R29388C8: got 'AAR0904'
## Warning: Expecting logical in H30047 / R30047C8: got 'AAR0905'
## Warning: Expecting logical in H30729 / R30729C8: got 'AAR0903'
## Warning: Expecting logical in H31147 / R31147C8: got 'AAR0902'
## Warning: Expecting logical in H31285 / R31285C8: got 'AAR0901'
## Warning: Expecting logical in H31532 / R31532C8: got 'AAR0906'
## Warning: Expecting logical in H31861 / R31861C8: got 'AAR0802'
## Warning: Expecting logical in H32100 / R32100C8: got 'AAR0801'
## Warning: Expecting logical in H32617 / R32617C8: got 'AAB0702'
## Warning: Expecting logical in H32838 / R32838C8: got 'AAR0705'
## Warning: Expecting logical in H33003 / R33003C8: got 'AAR0803'
## Warning: Expecting logical in H33955 / R33955C8: got 'AAR0707'
## Warning: Expecting logical in H34161 / R34161C8: got 'AAR0704'
## Warning: Expecting logical in H34204 / R34204C8: got 'AAR0706'
## Warning: Expecting logical in H34537 / R34537C8: got 'AAB0701'
## Warning: Expecting logical in H35915 / R35915C8: got 'AAR0702'
## Warning: Expecting logical in H35976 / R35976C8: got 'AAR0604'
## Warning: Expecting logical in H36227 / R36227C8: got 'AAB0603'
## Warning: Expecting logical in H36235 / R36235C8: got 'AAB0607'
## Warning: Expecting logical in H36247 / R36247C8: got 'AAB0606'
## Warning: Expecting logical in H36394 / R36394C8: got 'AAB0601'
## Warning: Expecting logical in H36396 / R36396C8: got 'AAB0605'
## Warning: Expecting logical in H36415 / R36415C8: got 'AAR0601'
## Warning: Expecting logical in H36456 / R36456C8: got 'AAR0701'
## Warning: Expecting logical in H36556 / R36556C8: got 'AAR0703'
## Warning: Expecting logical in H36835 / R36835C8: got 'AAR0603'
## Warning: Expecting logical in H37472 / R37472C8: got 'AAR0502'
## Warning: Expecting logical in H37702 / R37702C8: got 'AAR0602'
## Warning: Expecting logical in H38063 / R38063C8: got 'AAB0604'
## Warning: Expecting logical in H38086 / R38086C8: got 'AAR0501'
## Warning: Expecting logical in H38537 / R38537C8: got 'AAB0703'
## Warning: Expecting logical in H38991 / R38991C8: got 'AAR0403'
## Warning: Expecting logical in H40064 / R40064C8: got 'AAR0401'
## Warning: Expecting logical in H40356 / R40356C8: got 'AAR0303'
## Warning: Expecting logical in H40927 / R40927C8: got 'AAR0402'
## Warning: Expecting logical in H42280 / R42280C8: got 'AAR0404'
## Warning: Expecting logical in H43738 / R43738C8: got 'AAB0203'
## Warning: Expecting logical in H44018 / R44018C8: got 'AAR0301'
## Warning: Expecting logical in H44451 / R44451C8: got 'AAB0202'
## Warning: Expecting logical in H44492 / R44492C8: got 'AAB0401'
## Warning: Expecting logical in H45587 / R45587C8: got 'AAB0205'
## Warning: Expecting logical in H46043 / R46043C8: got 'AAB0204'
## Warning: Expecting logical in H46116 / R46116C8: got 'AAR0302'
## Warning: Expecting logical in H46199 / R46199C8: got 'AAR0201'
## Warning: Expecting logical in H46569 / R46569C8: got 'AAB0201'
## Warning: Expecting logical in H46599 / R46599C8: got 'AAB0001'
## Warning: Expecting logical in H47375 / R47375C8: got 'AAB0206'
## Warning: Expecting logical in H47703 / R47703C8: got 'AAR0102'
## Warning: Expecting logical in H50534 / R50534C8: got 'AAB0101'
## Warning: Expecting logical in H51480 / R51480C8: got 'AAR9802'
## Warning: Expecting logical in H51487 / R51487C8: got 'AAR0001'
## Warning: Expecting logical in H51540 / R51540C8: got 'AAR0002'
## Warning: Expecting logical in H52765 / R52765C8: got 'AAR9804'
## Warning: Expecting logical in H52830 / R52830C8: got 'AAR9705'
## Warning: Expecting logical in H52976 / R52976C8: got 'AAR9704'
## Warning: Expecting logical in H53111 / R53111C8: got 'AAR9703'
## Warning: Expecting logical in H53366 / R53366C8: got 'AAR9803'
## Warning: Expecting logical in H53762 / R53762C8: got 'AAR0003'
## Warning: Expecting logical in H53862 / R53862C8: got 'AAR9801'
## Warning: Expecting logical in H54287 / R54287C8: got 'AAR9706'
## Warning: Expecting logical in H54486 / R54486C8: got 'AAR9702'
## Warning: Expecting logical in H54755 / R54755C8: got 'AAR9701'
## Warning: Expecting logical in H54928 / R54928C8: got 'AAR9607'
## Warning: Expecting logical in H55004 / R55004C8: got 'AAR9604'
## Warning: Expecting logical in H55189 / R55189C8: got 'AAR9605'
## Warning: Expecting logical in H55711 / R55711C8: got 'AAR9606'
## Warning: Expecting logical in H56402 / R56402C8: got 'AAR9603'
## Warning: Expecting logical in H57039 / R57039C8: got 'AAR9506'
## Warning: Expecting logical in H57306 / R57306C8: got 'AAR9504'
## Warning: Expecting logical in H57393 / R57393C8: got 'AAR9505'
## Warning: Expecting logical in H57497 / R57497C8: got 'AAR9601'
## Warning: Expecting logical in H57808 / R57808C8: got 'AAR9901'
## Warning: Expecting logical in H58375 / R58375C8: got 'AAR9503'
## Warning: Expecting logical in H58494 / R58494C8: got 'AAR9502'
## Warning: Expecting logical in H58850 / R58850C8: got 'AAR9408'
## Warning: Expecting logical in H59189 / R59189C8: got 'AAR9501'
## Warning: Expecting logical in H59337 / R59337C8: got 'AAR9406'
## Warning: Expecting logical in H59438 / R59438C8: got 'AAR9407'
## Warning: Expecting logical in H59565 / R59565C8: got 'AAR9405'
## Warning: Expecting logical in H59732 / R59732C8: got 'AAR9403'
## Warning: Expecting logical in H60209 / R60209C8: got 'AAR9404'
## Warning: Expecting logical in H61129 / R61129C8: got 'AAR8902S'
## Warning: Expecting logical in H61199 / R61199C8: got 'AAR9308'
## Warning: Expecting logical in H61240 / R61240C8: got 'AAR9401'
## Warning: Expecting logical in H61299 / R61299C8: got 'AAR9307'
## Warning: Expecting logical in H61327 / R61327C8: got 'AAR9306'
## Warning: Expecting logical in H62345 / R62345C8: got 'AAR9305'
## Warning: Expecting logical in H62731 / R62731C8: got 'AAR9304'
## Warning: Expecting logical in H63185 / R63185C8: got 'AAR9303'
## Warning: Expecting logical in H63508 / R63508C8: got 'AAR9301'
## Warning: Expecting logical in H63676 / R63676C8: got 'AAR9302'
## Warning: Expecting logical in H64079 / R64079C8: got 'AAR9301S'
## Warning: Expecting logical in H64152 / R64152C8: got 'AAR9201S'
## Warning: Expecting logical in H64671 / R64671C8: got 'AAR9204'
## Warning: Expecting logical in H65992 / R65992C8: got 'AAR9203'
## Warning: Expecting logical in H66001 / R66001C8: got 'AAR9101S'
## Warning: Expecting logical in H66199 / R66199C8: got 'AAR0101'
## Warning: Expecting logical in H66261 / R66261C8: got 'AAR9109'
## Warning: Expecting logical in H66347 / R66347C8: got 'AAR9108'
## Warning: Expecting logical in H66596 / R66596C8: got 'AAR9105'
## Warning: Expecting logical in H68019 / R68019C8: got 'AAR9102'
## Warning: Expecting logical in H68845 / R68845C8: got 'AAR9104'
## Warning: Expecting logical in H68884 / R68884C8: got 'AAR9103'
## Warning: Expecting logical in H68998 / R68998C8: got 'AAR9106'
## Warning: Expecting logical in H69278 / R69278C8: got 'AAR9005'
## Warning: Expecting logical in H69510 / R69510C8: got 'AAR9101'
## Warning: Expecting logical in H69548 / R69548C8: got 'AAR9003'
## Warning: Expecting logical in H69646 / R69646C8: got 'AAR9004'
## Warning: Expecting logical in H70093 / R70093C8: got 'AAR9006'
## Warning: Expecting logical in H71063 / R71063C8: got 'AAR9002'
## Warning: Expecting logical in H71199 / R71199C8: got 'AAR9202'
## Warning: Expecting logical in H72263 / R72263C8: got 'AAR8904'
## Warning: Expecting logical in H73425 / R73425C8: got 'AAR8903'
## Warning: Expecting logical in H73508 / R73508C8: got 'AAR8902'
## Warning: Expecting logical in H73881 / R73881C8: got 'AAR8810'
## Warning: Expecting logical in H73978 / R73978C8: got 'HZM8802'
## Warning: Expecting logical in H74052 / R74052C8: got 'AAR8901'
## Warning: Expecting logical in H74195 / R74195C8: got 'AAR8903'
## Warning: Expecting logical in H74347 / R74347C8: got 'AAR8811'
## Warning: Expecting logical in H74388 / R74388C8: got 'AAR8809'
## Warning: Expecting logical in H75111 / R75111C8: got 'AAR8805'
## Warning: Expecting logical in H75926 / R75926C8: got 'AAR8806'
## Warning: Expecting logical in H76101 / R76101C8: got 'AAR8802'
## Warning: Expecting logical in H76390 / R76390C8: got 'AAR8709'
## Warning: Expecting logical in H76585 / R76585C8: got 'AAR8808'
## Warning: Expecting logical in H76842 / R76842C8: got 'AAR8801'
## Warning: Expecting logical in H76867 / R76867C8: got 'AAR8803'
## Warning: Expecting logical in H77198 / R77198C8: got 'AAR8704S'
## Warning: Expecting logical in H77247 / R77247C8: got 'AAR8706'
## Warning: Expecting logical in H77322 / R77322C8: got 'AAR8708'
## Warning: Expecting logical in H77461 / R77461C8: got 'AAR8704'
## Warning: Expecting logical in H77720 / R77720C8: got 'AAR8707'
## Warning: Expecting logical in H77995 / R77995C8: got 'AAR8804'
## Warning: Expecting logical in H78475 / R78475C8: got 'AAR8703'
## Warning: Expecting logical in H79325 / R79325C8: got 'AAR8702'
## Warning: Expecting logical in H79460 / R79460C8: got 'AAR8802'
## Warning: Expecting logical in H80027 / R80027C8: got 'AAR8705'
## Warning: Expecting logical in H80383 / R80383C8: got 'AAR8607'
## Warning: Expecting logical in H80545 / R80545C8: got 'AAR8701'
## Warning: Expecting logical in H80665 / R80665C8: got 'AAR8606'
## Warning: Expecting logical in H80924 / R80924C8: got 'AAR8605'
## Warning: Expecting logical in H82517 / R82517C8: got 'AAR8603'
## Warning: Expecting logical in H82990 / R82990C8: got 'AAR8604'
## Warning: Expecting logical in H83890 / R83890C8: got 'AAR8507'
## Warning: Expecting logical in H84167 / R84167C8: got 'AAR8508'
## Warning: Expecting logical in H84891 / R84891C8: got 'AAR8502'
## Warning: Expecting logical in H84947 / R84947C8: got 'AAR8501'
## Warning: Expecting logical in H85793 / R85793C8: got 'AAR8415'
## Warning: Expecting logical in H86137 / R86137C8: got 'AAR8412'
## Warning: Expecting logical in H86281 / R86281C8: got 'AAR8410'
## Warning: Expecting logical in H86479 / R86479C8: got 'AAR8413'
## Warning: Expecting logical in H86710 / R86710C8: got 'AAR8408'
## Warning: Expecting logical in H87385 / R87385C8: got 'AAR8406'
## Warning: Expecting logical in H87443 / R87443C8: got 'AAR8405'
## Warning: Expecting logical in H88452 / R88452C8: got 'AAR8602'
## Warning: Expecting logical in H88756 / R88756C8: got 'AAR8404'
## Warning: Expecting logical in H89090 / R89090C8: got 'AAR8411'
## Warning: Expecting logical in H89138 / R89138C8: got 'AAR8401'
## Warning: Expecting logical in H89434 / R89434C8: got 'AAR8403'
## Warning: Expecting logical in H89654 / R89654C8: got 'AAR8307'
## Warning: Expecting logical in H89660 / R89660C8: got 'AAR8308'
## Warning: Expecting logical in H89663 / R89663C8: got 'AAR8407'
## Warning: Expecting logical in H89711 / R89711C8: got 'AAR8305'
## Warning: Expecting logical in H90012 / R90012C8: got 'AAR8303'
## Warning: Expecting logical in H90035 / R90035C8: got 'AAR8304'
## Warning: Expecting logical in H91240 / R91240C8: got 'AAR8414'
## Warning: Expecting logical in H92080 / R92080C8: got 'AAR8216'
## Warning: Expecting logical in H92203 / R92203C8: got 'AAR8301'
## Warning: Expecting logical in H92906 / R92906C8: got 'AAR8207'
## Warning: Expecting logical in H92955 / R92955C8: got 'AAR8214'
## Warning: Expecting logical in H93245 / R93245C8: got 'AAR8211'
## Warning: Expecting logical in H93253 / R93253C8: got 'AAR8212'
## Warning: Expecting logical in H93284 / R93284C8: got 'AAR8213'
## Warning: Expecting logical in H94000 / R94000C8: got 'AAR8204'
## Warning: Expecting logical in H94074 / R94074C8: got 'AAR8206'
## Warning: Expecting logical in H94086 / R94086C8: got 'AAR8205'
## Warning: Expecting logical in H94116 / R94116C8: got 'AAR8201'
## Warning: Expecting logical in H95201 / R95201C8: got 'AAR8117'
## Warning: Expecting logical in H96181 / R96181C8: got 'AAR8118'
## Warning: Expecting logical in H96299 / R96299C8: got 'AAR8114'
## Warning: Expecting logical in H96894 / R96894C8: got 'AAR8110'
## Warning: Expecting logical in H96979 / R96979C8: got 'AAR8109'
## Warning: Expecting logical in H96984 / R96984C8: got 'AAR8111'
## Warning: Expecting logical in H97419 / R97419C8: got 'AAR8107'
## Warning: Expecting logical in H98798 / R98798C8: got 'AAR8101'
## Warning: Expecting logical in H98840 / R98840C8: got 'AAR8102'
## Warning: Expecting logical in H99281 / R99281C8: got 'AAR8014'
## Warning: Expecting logical in H99423 / R99423C8: got 'AAR8015'
## Warning: Expecting logical in H99851 / R99851C8: got 'AAR8012'
## Warning: Expecting logical in H99922 / R99922C8: got 'AAR8202'
## Warning: Expecting logical in H100337 / R100337C8: got 'AAR8104'
## Warning: Expecting logical in H100780 / R100780C8: got 'AAR8210'
## Warning: Expecting logical in H100822 / R100822C8: got 'AAR8106'
## Warning: Expecting logical in H101474 / R101474C8: got 'AAR8010'
## Warning: Expecting logical in H101788 / R101788C8: got 'AAR8008'
## Warning: Expecting logical in H102815 / R102815C8: got 'AAR8003'
## Warning: Expecting logical in H103348 / R103348C8: got 'AAR8001'
## Warning: Expecting logical in H103685 / R103685C8: got 'AAR7917'
## Warning: Expecting logical in H104139 / R104139C8: got 'AAR7914'
## Warning: Expecting logical in H104288 / R104288C8: got 'AAR8108'
## Warning: Expecting logical in H104543 / R104543C8: got 'AAR7913'
## Warning: Expecting logical in H104671 / R104671C8: got 'AAR7910'
## Warning: Expecting logical in H104800 / R104800C8: got 'AAR7911'
## Warning: Expecting logical in H105040 / R105040C8: got 'AAR8004'
## Warning: Expecting logical in H105211 / R105211C8: got 'AAR7907'
## Warning: Expecting logical in H105451 / R105451C8: got 'AAR7906'
## Warning: Expecting logical in H105454 / R105454C8: got 'AAR7918'
## Warning: Expecting logical in H106163 / R106163C8: got 'AAR7905'
## Warning: Expecting logical in H106520 / R106520C8: got 'AAR7909'
## Warning: Expecting logical in H107177 / R107177C8: got 'AAR7904'
## Warning: Expecting logical in H107438 / R107438C8: got 'AAR7902'
## Warning: Expecting logical in H108311 / R108311C8: got 'AAR7814'
## Warning: Expecting logical in H108451 / R108451C8: got 'AAR7813'
## Warning: Expecting logical in H109458 / R109458C8: got 'AAR7815'
## Warning: Expecting logical in H109888 / R109888C8: got 'AAR7808'
## Warning: Expecting logical in H109947 / R109947C8: got 'AAR7810'
## Warning: Expecting logical in H110446 / R110446C8: got 'AAR7806'
## Warning: Expecting logical in H110926 / R110926C8: got 'AAR7915'
## Warning: Expecting logical in H110953 / R110953C8: got 'AAR7805'
## Warning: Expecting logical in H112489 / R112489C8: got 'AAR7809'
## Warning: Expecting logical in H113261 / R113261C8: got 'AAR7803'
## Warning: Expecting logical in H115062 / R115062C8: got 'AAR7804'
## Warning: Expecting logical in H115221 / R115221C8: got 'AAR7705'
## Warning: Expecting logical in H115837 / R115837C8: got 'AAR7704'
## Warning: Expecting logical in H116503 / R116503C8: got 'AAR7802'
## Warning: Expecting logical in H117275 / R117275C8: got 'AAR7701'
## Warning: Expecting logical in H117545 / R117545C8: got 'AAR7620'
## Warning: Expecting logical in H117608 / R117608C8: got 'AAR7618'
## Warning: Expecting logical in H118183 / R118183C8: got 'AAR7617'
## Warning: Expecting logical in H118783 / R118783C8: got 'AAR7603'
## Warning: Expecting logical in H118896 / R118896C8: got 'AAR8306'
## Warning: Expecting logical in H119792 / R119792C8: got 'AAR7601'
## Warning: Expecting logical in H120158 / R120158C8: got 'AAR7614'
## Warning: Expecting logical in H120588 / R120588C8: got 'AAR7613'
## Warning: Expecting logical in H120871 / R120871C8: got 'AAR7608'
## Warning: Expecting logical in H121565 / R121565C8: got 'AAR7610'
## Warning: Expecting logical in H122026 / R122026C8: got 'AAR7515'
## Warning: Expecting logical in H122063 / R122063C8: got 'AAR7511'
## Warning: Expecting logical in H122810 / R122810C8: got 'AAR7605'
## Warning: Expecting logical in H122834 / R122834C8: got 'AAR7510'
## Warning: Expecting logical in H123183 / R123183C8: got 'AAR7513'
## Warning: Expecting logical in H123345 / R123345C8: got 'AAR7512'
## Warning: Expecting logical in H123697 / R123697C8: got 'AAR7506'
## Warning: Expecting logical in H124381 / R124381C8: got 'AAR8005'
## Warning: Expecting logical in H125093 / R125093C8: got 'AAR7409'
## Warning: Expecting logical in H125315 / R125315C8: got 'AAR7508'
## Warning: Expecting logical in H126783 / R126783C8: got 'AAR7501'
## Warning: Expecting logical in H127167 / R127167C8: got 'AAR7416'
## Warning: Expecting logical in H127321 / R127321C8: got 'AAR7410'
## Warning: Expecting logical in H127622 / R127622C8: got 'AAR7414'
## Warning: Expecting logical in H127633 / R127633C8: got 'AAR7411'
## Warning: Expecting logical in H127777 / R127777C8: got 'AAR7413'
## Warning: Expecting logical in H128010 / R128010C8: got 'AAR7502'
## Warning: Expecting logical in H128425 / R128425C8: got 'AAR7404'
## Warning: Expecting logical in H128683 / R128683C8: got 'AAR7406'
## Warning: Expecting logical in H128846 / R128846C8: got 'AAR7408'
## Warning: Expecting logical in H129436 / R129436C8: got 'AAR7405'
## Warning: Expecting logical in H130930 / R130930C8: got 'AAR7317'
## Warning: Expecting logical in H131387 / R131387C8: got 'AAR7312'
## Warning: Expecting logical in H131897 / R131897C8: got 'AAR7314'
## Warning: Expecting logical in H132753 / R132753C8: got 'AAR7306'
## Warning: Expecting logical in H133321 / R133321C8: got 'AAR7305'
## Warning: Expecting logical in H134342 / R134342C8: got 'AAR7302'
## Warning: Expecting logical in H134710 / R134710C8: got 'AAR7232'
## Warning: Expecting logical in H134946 / R134946C8: got 'AAR7229'
## Warning: Expecting logical in H135016 / R135016C8: got 'AAR7230'
## Warning: Expecting logical in H135655 / R135655C8: got 'AAR7308'
## Warning: Expecting logical in H135789 / R135789C8: got 'AAR7225'
## Warning: Expecting logical in H136129 / R136129C8: got 'AAR7224'
## Warning: Expecting logical in H136274 / R136274C8: got 'AAR7221'
## Warning: Expecting logical in H136402 / R136402C8: got 'AAR7222'
## Warning: Expecting logical in H136536 / R136536C8: got 'AAR7213'
## Warning: Expecting logical in H136971 / R136971C8: got 'AAR7215'
## Warning: Expecting logical in H137585 / R137585C8: got 'AAR7228'
## Warning: Expecting logical in H138874 / R138874C8: got 'AAR7204'
## Warning: Expecting logical in H139085 / R139085C8: got 'AAR7207'
## Warning: Expecting logical in H139161 / R139161C8: got 'AAR7226'
## Warning: Expecting logical in H139532 / R139532C8: got 'AAR7116'
## Warning: Expecting logical in H139638 / R139638C8: got 'AAR7219'
## Warning: Expecting logical in H140131 / R140131C8: got 'AAR7218'
## Warning: Expecting logical in H140620 / R140620C8: got 'AAR7114'
## Warning: Expecting logical in H141045 / R141045C8: got 'AAR7216'
## Warning: Expecting logical in H141494 / R141494C8: got 'AAR7212'
## Warning: Expecting logical in H141647 / R141647C8: got 'AAR7211'
## Warning: Expecting logical in H141761 / R141761C8: got 'AAR7214'
## Warning: Expecting logical in H142320 / R142320C8: got 'AAR7107'
## Warning: Expecting logical in H142840 / R142840C8: got 'AAR7026'
## Warning: Expecting logical in H143236 / R143236C8: got 'AAR7210'
## Warning: Expecting logical in H143374 / R143374C8: got 'AAR7209'
## Warning: Expecting logical in H143696 / R143696C8: got 'AAR7106'
## Warning: Expecting logical in H144239 / R144239C8: got 'AAR7025'
## Warning: Expecting logical in H144649 / R144649C8: got 'AAR7108'
## Warning: Expecting logical in H145584 / R145584C8: got 'AAR7101'
## Warning: Expecting logical in H145925 / R145925C8: got 'AAR7028'
## Warning: Expecting logical in H146203 / R146203C8: got 'AAR7019'
## Warning: Expecting logical in H146589 / R146589C8: got 'AAR7021'
## Warning: Expecting logical in H146645 / R146645C8: got 'AAR7022'
## Warning: Expecting logical in H146796 / R146796C8: got 'AAR7024'
## Warning: Expecting logical in H147259 / R147259C8: got 'AAR7015'
## Warning: Expecting logical in H147763 / R147763C8: got 'AAR7023'
## Warning: Expecting logical in H148412 / R148412C8: got 'AAR7018'
## Warning: Expecting logical in H148661 / R148661C8: got 'AAR7011'
## Warning: Expecting logical in H148672 / R148672C8: got 'AAR7017'
## Warning: Expecting logical in H149830 / R149830C8: got 'AAR7013'
## Warning: Expecting logical in H150149 / R150149C8: got 'AAR7003'
## Warning: Expecting logical in H150346 / R150346C8: got 'AAR7009'
## Warning: Expecting logical in H150855 / R150855C8: got 'AAR7006'
## Warning: Expecting logical in H151107 / R151107C8: got 'AAR7020'
## Warning: Expecting logical in H151109 / R151109C8: got 'AAR7027'
## Warning: Expecting logical in H151120 / R151120C8: got 'AAR6908'
## Warning: Expecting logical in H151791 / R151791C8: got 'AAR7007'
## Warning: Expecting logical in H152859 / R152859C8: got 'AAR6907'
## Warning: Expecting logical in H179305 / R179305C8: got 'SA091'
## Warning: Expecting logical in H179306 / R179306C8: got 'SA097'
## Warning: Expecting logical in H179307 / R179307C8: got 'SA096'
## Warning: Expecting logical in H179308 / R179308C8: got 'AIR2505'
## Warning: Expecting logical in H179309 / R179309C8: got 'AIR2506'
## Warning: Expecting logical in H179310 / R179310C8: got 'AIR2203'
## Warning: Expecting logical in H179311 / R179311C8: got 'AIR2209'
## Warning: Expecting logical in H179312 / R179312C8: got 'SA084'
## Warning: Expecting logical in H179313 / R179313C8: got 'AIR2202'
## Warning: Expecting logical in H179314 / R179314C8: got 'AIR2206'
## Warning: Expecting logical in H179315 / R179315C8: got 'AIR2201'
## Warning: Expecting logical in H179316 / R179316C8: got 'AIR2202'
## Warning: Expecting logical in H179317 / R179317C8: got 'AIR2204'
## Warning: Expecting logical in H179318 / R179318C8: got 'AIR2601'
## Warning: Expecting logical in H179319 / R179319C8: got 'AIR2207'
## Warning: Expecting logical in H179320 / R179320C8: got 'AIR2403'
## Warning: Expecting logical in H179321 / R179321C8: got 'SA086'
## Warning: Expecting logical in H179322 / R179322C8: got 'SA090'
## Warning: Expecting logical in H179323 / R179323C8: got 'SA088'
## Warning: Expecting logical in H179324 / R179324C8: got 'SA092'
cat("Dimensiones originales (NTSB):", nrow(ntsb_raw), "x", ncol(ntsb_raw), "\n") # Mostrar dimensiones originales del dataset
## Dimensiones originales (NTSB): 179323 x 37
cat("Variables:\n")
## Variables:
cat(paste(" -", names(ntsb_raw), collapse = "\n"), "\n\n") # Mostrar nombres de todas las variables
## - NtsbNo
## - EventType
## - Mkey
## - EventDate
## - City
## - State
## - Country
## - ReportNo
## - N
## - HasSafetyRec
## - ReportType
## - OriginalPublishDate
## - HighestInjuryLevel
## - FatalInjuryCount
## - SeriousInjuryCount
## - MinorInjuryCount
## - ProbableCause
## - EventID
## - Latitude
## - Longitude
## - Make
## - Model
## - AirCraftCategory
## - AirportID
## - AirportName
## - AmateurBuilt
## - NumberOfEngines
## - Scheduled
## - PurposeOfFlight
## - FAR
## - AirCraftDamage
## - WeatherCondition
## - Operator
## - ReportStatus
## - RepGenFlag
## - DocketUrl
## - DocketPublishDate
# ---------- 1B. LIMPIEZA GENERAL ----------
ntsb_limpio <- ntsb_raw %>% # Aplicar transformaciones de limpieza y creación de variables
# Descomposicion de la variable fecha: extraer fecha, año y mes
mutate(
EventDate = as.Date(substr(EventDate, 1, 10)), # Convertir a fecha (primeros 10 caracteres)
Anio = as.integer(format(EventDate, "%Y")), # Extraer año como entero
Mes = as.integer(format(EventDate, "%m")) # Extraer mes como entero
) %>%
filter(EventType == "ACC") %>% # Solo accidentes confirmados (excluir incidentes)
filter(!is.na(EventDate)) %>% # Eliminar registros sin fecha válida
filter(Anio >= 2015, Anio <= 2024) %>% # Delimitación temporal: últimos 10 años completos (2015-2024)
# Limpiar variable de daño: tomar la primera aeronave listada
mutate(
DanoPrimario = str_split_fixed(replace_na(AirCraftDamage, "Unknown"), ",", 2)[, 1], # Extraer primer daño
DanoPrimario = case_when( # Clasificar daño
DanoPrimario %in% c("Destroyed", "Substantial") ~ DanoPrimario,
DanoPrimario == "Minor" ~ "Minor",
TRUE ~ "Unknown/None"
)
) %>%
# Limpiar categoría de aeronave (tomar primera si hay múltiples)
mutate(
Categoria = str_split_fixed(replace_na(AirCraftCategory, "AIR"), ",", 2)[, 1], # Extraer primera categoría
Categoria = case_when( # Renombrar categorías
Categoria == "AIR" ~ "Ala Fija",
Categoria == "HELI" ~ "Helicóptero",
Categoria == "GLI" ~ "Planeador",
Categoria == "BALL" ~ "Globo",
TRUE ~ "Otro"
)
) %>%
# Condición meteorológica limpia (unificar VMC/VFR e IMC/IFR)
mutate(
Meteorologia = case_when(
WeatherCondition %in% c("VMC", "VFR") ~ "VMC/VFR", # Condiciones visuales
WeatherCondition %in% c("IMC", "IFR") ~ "IMC/IFR", # Condiciones instrumentales
TRUE ~ "Desconocido"
)
) %>%
# Variable de lesiones totales (proxy de magnitud humana del siniestro)
mutate(
LesionesTotal = replace_na(FatalInjuryCount, 0) + # Sumar víctimas fatales
replace_na(SeriousInjuryCount, 0) + # + lesiones graves
replace_na(MinorInjuryCount, 0), # + lesiones leves
TieneFatales = FatalInjuryCount > 0 # Indicador de fatalidad
) %>%
# Clasificar tipo de operación según FAR Part
mutate(
TipoOperacion = case_when(
FAR == 121 ~ "Comercial (Pt.121)", # Aerolíneas comerciales
FAR == 135 ~ "Taxi Aéreo (Pt.135)", # Operaciones de taxi aéreo
FAR == 91 ~ "Aviación General (Pt.91)", # Aviación privada/recreativa
TRUE ~ "Otro/Desconocido"
)
)
cat("Registros tras limpieza (2015-2024, solo ACC):", nrow(ntsb_limpio), "\n\n") # Mostrar número de registros después de la limpieza
## Registros tras limpieza (2015-2024, solo ACC): 14945
# Resumen de eliminaciones
cat("Desglose de filtrado:\n")
## Desglose de filtrado:
cat(" - Registros originales: ", nrow(ntsb_raw), "\n")
## - Registros originales: 179323
cat(" - No accidentes (INC/OCC) excluidos:", nrow(ntsb_raw) - nrow(filter(ntsb_raw, EventType == "ACC")), "\n")
## - No accidentes (INC/OCC) excluidos: 5861
cat(" - Sin fecha excluidos: ",
nrow(filter(ntsb_raw, EventType == "ACC")) -
nrow(filter(ntsb_raw, EventType == "ACC", !is.na(as.Date(substr(EventDate,1,10))))), "\n")
## - Sin fecha excluidos: 0
cat(" - Fuera del período 2015-2024: ",
nrow(filter(ntsb_raw, EventType == "ACC", !is.na(as.Date(substr(EventDate,1,10))))) - nrow(ntsb_limpio), "\n")
## - Fuera del período 2015-2024: 158517
cat(" - Registros finales para análisis: ", nrow(ntsb_limpio), "\n\n")
## - Registros finales para análisis: 14945
# =============================================================================
# 2. BASE DE FRECUENCIA
# =============================================================================
cat("====================================================\n")
## ====================================================
cat(" PASO 2: BASE DE FRECUENCIA\n")
## PASO 2: BASE DE FRECUENCIA
cat("====================================================\n\n")
## ====================================================
# ---------- 2A. CONSTRUCCIÓN ----------
# Variables relevantes para frecuencia: conteo de eventos por período
# Conteo ANUAL (referencia y correlación)
freq_anual <- ntsb_limpio %>%
group_by(Anio) %>% # Agrupar por año
summarise(
N_Accidentes = n(), # Total accidentes por año
N_Fatales = sum(TieneFatales, na.rm = TRUE), # Accidentes con fatales
Pct_Fatales = round(100 * N_Fatales / N_Accidentes, 1), # Porcentaje fatal
.groups = "drop" # Desagrupar resultado
)
# Conteo MENSUAL para ajuste estadístico (más observaciones = mejor ajuste)
freq_mensual <- ntsb_limpio %>%
group_by(Anio, Mes) %>% # Agrupar por año y mes
summarise(N_Accidentes = n(), .groups = "drop") # Contar accidentes por mes
# Asegurar que todos los meses estén representados (incluso si N=0)
todos_meses <- expand.grid(Anio = 2015:2024, Mes = 1:12) # Crear grid completo de meses
freq_mensual <- todos_meses %>%
left_join(freq_mensual, by = c("Anio", "Mes")) %>% # Unir con datos reales
mutate(N_Accidentes = replace_na(N_Accidentes, 0)) # Reemplazar NA con 0
N_frecuencia <- freq_mensual$N_Accidentes # Vector para ajuste estadístico
# Mostrar estadísticas descriptivas de frecuencia mensual
cat("Estadísticas de Frecuencia mensual:\n")
## Estadísticas de Frecuencia mensual:
cat(sprintf(" Media: %.2f accidentes/mes\n", mean(N_frecuencia)))
## Media: 124.54 accidentes/mes
cat(sprintf(" Varianza: %.2f\n", var(N_frecuencia)))
## Varianza: 1299.34
cat(sprintf(" Índice de dispersión: %.3f (Var/Media)\n", var(N_frecuencia)/mean(N_frecuencia)))
## Índice de dispersión: 10.433 (Var/Media)
cat(sprintf(" → Si > 1: sobredispersión → Binomial Negativa puede superar a Poisson\n\n"))
## → Si > 1: sobredispersión → Binomial Negativa puede superar a Poisson
# Distribución por categorías relevantes
cat("Distribución por tipo de operación:\n")
## Distribución por tipo de operación:
print(ntsb_limpio %>% count(TipoOperacion, sort = TRUE))
## # A tibble: 4 × 2
## TipoOperacion n
## <chr> <int>
## 1 Aviación General (Pt.91) 10947
## 2 Otro/Desconocido 3318
## 3 Taxi Aéreo (Pt.135) 416
## 4 Comercial (Pt.121) 264
cat("\nDistribución por condición meteorológica:\n")
##
## Distribución por condición meteorológica:
print(ntsb_limpio %>% count(Meteorologia, sort = TRUE))
## # A tibble: 3 × 2
## Meteorologia n
## <chr> <int>
## 1 VMC/VFR 12032
## 2 Desconocido 2373
## 3 IMC/IFR 540
cat("\nDistribución por categoría de aeronave:\n")
##
## Distribución por categoría de aeronave:
print(ntsb_limpio %>% count(Categoria, sort = TRUE))
## # A tibble: 5 × 2
## Categoria n
## <chr> <int>
## 1 Ala Fija 12674
## 2 Helicóptero 1683
## 3 Otro 278
## 4 Planeador 205
## 5 Globo 105
# =============================================================================
# 3. BASE DE SEVERIDAD
# =============================================================================
cat("====================================================\n")
## ====================================================
cat(" PASO 3: BASE DE SEVERIDAD\n")
## PASO 3: BASE DE SEVERIDAD
cat("====================================================\n\n")
## ====================================================
# ---------- 3A. CONSTRUCCIÓN ----------
# Proxy de severidad: lesiones totales ponderadas por tipo
# Ponderación actuarial: Fatal=10, Serio=3, Menor=1
# (refleja el costo relativo humano/económico de cada tipo de lesión)
sev_limpio <- ntsb_limpio %>%
mutate(
# Índice de severidad ponderado (proxy de costo humano)
IndSev = 10 * replace_na(FatalInjuryCount, 0) + # Peso 10 para fatales
3 * replace_na(SeriousInjuryCount, 0) + # Peso 3 para graves
1 * replace_na(MinorInjuryCount, 0), # Peso 1 para leves
# Variable binaria de siniestro catastrófico (>=5 víctimas fatales)
EsCatastrofico = FatalInjuryCount >= 5
) %>%
# Para modelar severidad: solo eventos con alguna víctima (IndSev > 0)
filter(IndSev > 0)
cat("─── Variables de la Base de Severidad ───\n")
## ─── Variables de la Base de Severidad ───
cat(" Unidad de análisis: Accidente individual\n")
## Unidad de análisis: Accidente individual
cat(" Período: 2015-2024\n")
## Período: 2015-2024
cat(" Criterio de inclusión: Solo accidentes con víctimas (IndSev > 0)\n")
## Criterio de inclusión: Solo accidentes con víctimas (IndSev > 0)
cat(" Variable respuesta: IndSev = 10·Fatales + 3·Serios + 1·Menores\n")
## Variable respuesta: IndSev = 10·Fatales + 3·Serios + 1·Menores
cat(" (índice actuarial ponderado por gravedad)\n\n")
## (índice actuarial ponderado por gravedad)
cat("Registros válidos para severidad:", nrow(sev_limpio), "\n")
## Registros válidos para severidad: 7155
cat(" (excluidos sin víctimas:", nrow(ntsb_limpio) - nrow(sev_limpio), ")\n\n")
## (excluidos sin víctimas: 7790 )
X_severidad <- sev_limpio$IndSev
cat("Estadísticas de Severidad (IndSev):\n")
## Estadísticas de Severidad (IndSev):
cat(sprintf(" Media: %.2f\n", mean(X_severidad)))
## Media: 13.32
cat(sprintf(" Mediana: %.2f\n", median(X_severidad)))
## Mediana: 6.00
cat(sprintf(" Desv.Est: %.2f\n", sd(X_severidad)))
## Desv.Est: 61.54
cat(sprintf(" Sesgo: %.3f\n", moments::skewness(X_severidad)))
## Sesgo: 24.862
cat(sprintf(" Curtosis: %.3f\n", moments::kurtosis(X_severidad)))
## Curtosis: 709.275
cat(sprintf(" Mínimo: %.0f\n", min(X_severidad)))
## Mínimo: 1
cat(sprintf(" Máximo: %.0f\n", max(X_severidad)))
## Máximo: 2240
cat(sprintf(" CV: %.1f%%\n\n", 100 * sd(X_severidad) / mean(X_severidad)))
## CV: 462.1%
cat("Percentiles clave:\n")
## Percentiles clave:
pcts <- quantile(X_severidad, c(0.50, 0.75, 0.90, 0.95, 0.99, 0.999))
for (nm in names(pcts)) cat(sprintf(" P%-5s = %.1f\n", nm, pcts[nm]))
## P50% = 6.0
## P75% = 13.0
## P90% = 23.0
## P95% = 40.0
## P99% = 70.0
## P99.9% = 1100.4
cat("Severidad por tipo de daño a la aeronave:\n")
## Severidad por tipo de daño a la aeronave:
print(sev_limpio %>%
group_by(DanoPrimario) %>%
summarise(N = n(), Media_IndSev = round(mean(IndSev), 2),
Mediana = median(IndSev), .groups = "drop") %>%
arrange(desc(Media_IndSev)))
## # A tibble: 4 × 4
## DanoPrimario N Media_IndSev Mediana
## <chr> <int> <dbl> <dbl>
## 1 Destroyed 1977 27.8 13
## 2 Unknown/None 392 13.6 3
## 3 Minor 69 7.42 3
## 4 Substantial 4717 7.32 3
# =============================================================================
# 4. AJUSTE DE DISTRIBUCIONES
# =============================================================================
cat("====================================================\n")
## ====================================================
cat(" PASO 4: AJUSTE DE DISTRIBUCIONES\n")
## PASO 4: AJUSTE DE DISTRIBUCIONES
cat("====================================================\n\n")
## ====================================================
# ---------- 4A. FRECUENCIA ----------
cat("─── 4A. AJUSTE DE FRECUENCIA (mensual) ───\n\n")
## ─── 4A. AJUSTE DE FRECUENCIA (mensual) ───
# Ajustar distribución Poisson a los datos de frecuencia mensual
ajuste_pois <- fitdist(N_frecuencia, "pois") # Estimación por máxima verosimilitud
lambda_est <- ajuste_pois$estimate["lambda"] # Extraer parámetro lambda estimado
# Ajustar distribución Binomial Negativa a los datos
ajuste_nb <- fitdist(N_frecuencia, "nbinom") # Estimación por máxima verosimilitud
size_est <- ajuste_nb$estimate["size"] # Extraer parámetro size (r)
mu_est <- ajuste_nb$estimate["mu"] # Extraer parámetro mu (media)
# Mostrar resultados del ajuste Poisson
cat(" Poisson:\n")
## Poisson:
cat(sprintf(" λ estimado = %.4f accidentes/mes\n", lambda_est))
## λ estimado = 124.5417 accidentes/mes
cat(sprintf(" AIC = %.2f | BIC = %.2f\n\n", ajuste_pois$aic, ajuste_pois$bic))
## AIC = 2020.71 | BIC = 2023.50
# Mostrar resultados del ajuste Binomial Negativa
cat(" Binomial Negativa:\n")
## Binomial Negativa:
cat(sprintf(" r (size) = %.4f\n", size_est))
## r (size) = 13.5486
cat(sprintf(" μ (mu) = %.4f\n", mu_est))
## μ (mu) = 124.5334
cat(sprintf(" AIC = %.2f | BIC = %.2f\n\n", ajuste_nb$aic, ajuste_nb$bic))
## AIC = 1195.71 | BIC = 1201.28
# Prueba Chi-Cuadrada para evaluar bondad de ajuste de Poisson
val_unicos <- sort(unique(N_frecuencia)) # Valores únicos observados
obs_chi <- table(N_frecuencia) # Frecuencias observadas
exp_pois_v <- length(N_frecuencia) * dpois(as.integer(names(obs_chi)), lambda_est) # Esperadas
validos_chi <- exp_pois_v >= 5 # Celdas con frecuencia esperada >=5
chi2_stat <- sum((as.numeric(obs_chi[validos_chi]) - exp_pois_v[validos_chi])^2 /
exp_pois_v[validos_chi]) # Estadístico chi-cuadrado
gl_chi2 <- sum(validos_chi) - 1 - 1 # Grados de libertad
p_chi2_pois <- pchisq(chi2_stat, df = max(gl_chi2, 1), lower.tail = FALSE) # Valor p
# Mostrar resultados de prueba chi-cuadrado
cat(sprintf(" Prueba χ² Poisson: χ²=%.3f, gl=%d, p=%.4f\n", chi2_stat, gl_chi2, p_chi2_pois))
## Prueba χ² Poisson: χ²=0.000, gl=-2, p=1.0000
if (p_chi2_pois > 0.05) {
cat(" → No se rechaza Poisson (p > 0.05)\n\n")
} else {
cat(" → Se rechaza Poisson (p ≤ 0.05) → Binomial Negativa es preferida\n\n")
}
## → No se rechaza Poisson (p > 0.05)
# Seleccionar mejor modelo de frecuencia según AIC
if (ajuste_nb$aic < ajuste_pois$aic) {
modelo_freq <- "nbinom"
cat(" ✔ MODELO SELECCIONADO (Frecuencia): Binomial Negativa (menor AIC)\n\n")
} else {
modelo_freq <- "pois"
cat(" ✔ MODELO SELECCIONADO (Frecuencia): Poisson (menor AIC)\n\n")
}
## ✔ MODELO SELECCIONADO (Frecuencia): Binomial Negativa (menor AIC)
# ---------- 4B. SEVERIDAD ----------
cat("─── 4B. AJUSTE DE SEVERIDAD (IndSev) ───\n\n")
## ─── 4B. AJUSTE DE SEVERIDAD (IndSev) ───
# Candidatos de cola pesada (adecuados para pérdidas con asimetría positiva)
ajuste_lnorm <- fitdist(X_severidad, "lnorm") # Ajustar Log-Normal
ajuste_gamma <- fitdist(X_severidad, "gamma") # Ajustar Gamma
ajuste_weib <- fitdist(X_severidad, "weibull") # Ajustar Weibull
# Ajustar Pareto (con manejo de errores por posible fallo en convergencia)
ajuste_pareto <- tryCatch(
fitdist(X_severidad, "pareto",
start = list(shape = 2, scale = min(X_severidad) * 0.99)),
error = function(e) NULL
)
# Tabla comparativa de distribuciones
distribs <- list("Log-Normal" = ajuste_lnorm,
"Gamma" = ajuste_gamma,
"Weibull" = ajuste_weib)
if (!is.null(ajuste_pareto)) distribs[["Pareto"]] <- ajuste_pareto # Añadir Pareto si existe
# Crear tabla con métricas de bondad de ajuste
tabla_sev <- map_dfr(names(distribs), function(nm) { # Mapear sobre cada distribución
d <- distribs[[nm]] # Extraer objeto fitdist
data.frame(Distribución = nm, AIC = d$aic, BIC = d$bic, # Crear fila con métricas
LogLik = as.numeric(logLik(d))) # Log-verosimilitud
}) %>% arrange(AIC) # Ordenar por AIC ascendente
# Mostrar tabla comparativa
cat("Comparación de distribuciones de Severidad:\n")
## Comparación de distribuciones de Severidad:
print(tabla_sev, row.names = FALSE)
## Distribución AIC BIC LogLik
## Log-Normal 47802.59 47816.34 -23899.29
## Pareto 48328.07 48341.82 -24162.03
## Weibull 49570.79 49584.54 -24783.39
## Gamma 50664.67 50678.42 -25330.34
# Seleccionar mejor modelo de severidad (menor AIC)
mejor_sev <- tabla_sev$Distribución[1]
cat(sprintf(" ✔ MODELO SELECCIONADO (Severidad): %s (menor AIC)\n\n", mejor_sev))
## ✔ MODELO SELECCIONADO (Severidad): Log-Normal (menor AIC)
# Extraer parámetros del mejor modelo
ajuste_sev_best <- distribs[[mejor_sev]]
params_sev <- ajuste_sev_best$estimate
cat(" Parámetros estimados:\n")
## Parámetros estimados:
for (nm in names(params_sev)) cat(sprintf(" %s = %.5f\n", nm, params_sev[nm])) # Mostrar parámetros
## meanlog = 1.72442
## sdlog = 1.21758
# Función de distribución acumulada (CDF) según el mejor modelo
cdf_fn <- switch(mejor_sev,
"Log-Normal" = function(x) plnorm(x, params_sev["meanlog"], params_sev["sdlog"]),
"Gamma" = function(x) pgamma(x, params_sev["shape"], params_sev["rate"]),
"Weibull" = function(x) pweibull(x, params_sev["shape"], params_sev["scale"]),
"Pareto" = function(x) ppareto(x, params_sev["shape"], params_sev["scale"]),
)
# Prueba Kolmogorov-Smirnov para evaluar bondad de ajuste
ks_sev <- ks.test(X_severidad, cdf_fn) # Comparar empírica vs teórica
## Warning in ks.test.default(X_severidad, cdf_fn): ties should not be present for
## the one-sample Kolmogorov-Smirnov test
cat(sprintf("\n Prueba K-S (%s): D=%.4f, p=%.4f\n", mejor_sev, ks_sev$statistic, ks_sev$p.value))
##
## Prueba K-S (Log-Normal): D=0.1563, p=0.0000
cat(ifelse(ks_sev$p.value > 0.05, # Interpretación del resultado
" → No se rechaza el ajuste (p > 0.05)\n\n",
" → Ajuste rechazado estadísticamente; se procede con el mejor AIC disponible\n\n"))
## → Ajuste rechazado estadísticamente; se procede con el mejor AIC disponible
# =============================================================================
# 5. CORRELACIÓN ENTRE FRECUENCIA Y SEVERIDAD
# =============================================================================
cat("====================================================\n")
## ====================================================
cat(" PASO 5: ANÁLISIS DE CORRELACIÓN FRECUENCIA–SEVERIDAD\n")
## PASO 5: ANÁLISIS DE CORRELACIÓN FRECUENCIA–SEVERIDAD
cat("====================================================\n\n")
## ====================================================
# Calcular severidad media y total por año
sev_anual <- sev_limpio %>%
group_by(Anio) %>% # Agrupar por año
summarise(
IndSev_Media = mean(IndSev), # Severidad media anual
IndSev_Total = sum(IndSev), # Severidad total anual
IndSev_Max = max(IndSev), # Severidad máxima anual
IndSev_P90 = quantile(IndSev, 0.90), # Percentil 90 anual
N_con_sev = n(), # Número de eventos con severidad
.groups = "drop"
)
# Unir frecuencia anual con severidad anual
corr_df <- freq_anual %>%
left_join(sev_anual, by = "Anio") %>% # Combinar datasets por año
filter(!is.na(IndSev_Media)) # Excluir años sin severidad
# Calcular coeficientes de correlación
cor_pearson_med <- cor(corr_df$N_Accidentes, corr_df$IndSev_Media, method = "pearson") # Correlación lineal
cor_spearman_med <- cor(corr_df$N_Accidentes, corr_df$IndSev_Media, method = "spearman") # Correlación por rangos
cor_pearson_tot <- cor(corr_df$N_Accidentes, corr_df$IndSev_Total, method = "pearson") # Con totales
cor_spearman_tot <- cor(corr_df$N_Accidentes, corr_df$IndSev_Total, method = "spearman")
cor_pearson_p90 <- cor(corr_df$N_Accidentes, corr_df$IndSev_P90, method = "pearson") # Con percentil 90
# Tests de significancia estadística
test_med <- cor.test(corr_df$N_Accidentes, corr_df$IndSev_Media, method = "pearson") # Prueba para media
test_tot <- cor.test(corr_df$N_Accidentes, corr_df$IndSev_Total, method = "pearson") # Prueba para total
# Mostrar resultados de correlaciones
cat("¿Los años con más siniestros tienen siniestros más graves?\n\n")
## ¿Los años con más siniestros tienen siniestros más graves?
cat(" Correlación Freq. ↔ Severidad MEDIA por año:\n")
## Correlación Freq. ↔ Severidad MEDIA por año:
cat(sprintf(" Pearson r = %.4f (p = %.4f)\n", cor_pearson_med, test_med$p.value))
## Pearson r = -0.1183 (p = 0.7447)
cat(sprintf(" Spearman ρ = %.4f\n", cor_spearman_med))
## Spearman ρ = 0.0303
cat("\n Correlación Freq. ↔ Severidad TOTAL por año:\n")
##
## Correlación Freq. ↔ Severidad TOTAL por año:
cat(sprintf(" Pearson r = %.4f (p = %.4f)\n", cor_pearson_tot, test_tot$p.value))
## Pearson r = 0.1732 (p = 0.6322)
cat(sprintf(" Spearman ρ = %.4f\n", cor_spearman_tot))
## Spearman ρ = 0.1758
cat("\n Correlación Freq. ↔ Percentil 90 de Severidad:\n")
##
## Correlación Freq. ↔ Percentil 90 de Severidad:
cat(sprintf(" Pearson r = %.4f\n", cor_pearson_p90))
## Pearson r = 0.4391
# Función para interpretar la correlación
cat("\n Interpretación:\n")
##
## Interpretación:
interpretar_cor <- function(r, p) {
fuerza <- if (abs(r) > 0.7) "fuerte" else if (abs(r) > 0.4) "moderada" else "débil"
dir <- if (r > 0) "positiva" else "negativa"
sig <- if (p < 0.05) "(estadísticamente significativa)" else "(no significativa, p ≥ 0.05)"
cat(sprintf(" Asociación %s %s %s\n", fuerza, dir, sig))
}
interpretar_cor(cor_pearson_med, test_med$p.value)
## Asociación débil negativa (no significativa, p ≥ 0.05)
# Mostrar tabla resumen anual
cat("\n Tabla resumen anual (Frecuencia vs Severidad):\n")
##
## Tabla resumen anual (Frecuencia vs Severidad):
print(corr_df %>% dplyr::select(Anio, N_Accidentes, IndSev_Media, IndSev_Total, IndSev_Max), n=20)
## # A tibble: 10 × 5
## Anio N_Accidentes IndSev_Media IndSev_Total IndSev_Max
## <int> <int> <dbl> <dbl> <dbl>
## 1 2015 1487 16.3 12652 2240
## 2 2016 1541 13.0 9737 660
## 3 2017 1512 11.2 8115 414
## 4 2018 1565 16.2 12214 1910
## 5 2019 1504 14.9 11190 1570
## 6 2020 1307 14.5 9059 1760
## 7 2021 1520 11.1 7833 620
## 8 2022 1542 11.9 8774 1320
## 9 2023 1482 10.0 6665 120
## 10 2024 1485 13.5 9049 1736
# =============================================================================
# 6. SIMULACIÓN MONTE CARLO
# =============================================================================
cat("====================================================\n")
## ====================================================
cat(" PASO 6: SIMULACIÓN MONTE CARLO\n")
## PASO 6: SIMULACIÓN MONTE CARLO
cat("====================================================\n\n")
## ====================================================
n_sim <- 100000 # Número de simulaciones para Monte Carlo
escala <- 12 # Factor para pasar de parámetros mensuales a anuales
# Funciones simuladoras (frecuencia)
simular_N <- function(n, modelo) {
if (modelo == "nbinom") rnbinom(n, size = size_est, mu = mu_est * escala) # Binomial Negativa anual
else rpois(n, lambda = lambda_est * escala) # Poisson anual
}
# Función simuladora de severidad dada distribución
simular_X <- function(n, dist_nombre, params) {
switch(dist_nombre,
"Log-Normal" = rlnorm(n, params["meanlog"], params["sdlog"]),
"Gamma" = rgamma(n, params["shape"], params["rate"]),
"Weibull" = rweibull(n, params["shape"], params["scale"]),
"Pareto" = rpareto(n, params["shape"], params["scale"]),
stop("Distribución no reconocida")
)
}
# ---------- Simulación principal (mejor modelo) ----------
S_simulado <- numeric(n_sim) # Vector para almacenar pérdidas agregadas simuladas
for (i in seq_len(n_sim)) { # Bucle sobre cada simulación
N_i <- simular_N(1, modelo_freq) # Simular número de siniestros anual
if (N_i > 0) { # Si hay al menos un siniestro
S_simulado[i] <- sum(simular_X(N_i, mejor_sev, params_sev)) # Sumar severidades
} # Si N_i = 0, S_simulado[i] permanece como 0
}
# Mostrar estadísticas de la pérdida agregada simulada
cat("Estadísticas de la Pérdida Agregada Anual S (IndSev units):\n")
## Estadísticas de la Pérdida Agregada Anual S (IndSev units):
cat(sprintf(" E[S] (prima pura): %.2f\n", mean(S_simulado)))
## E[S] (prima pura): 17601.16
cat(sprintf(" Desv. Est.: %.2f\n", sd(S_simulado)))
## Desv. Est.: 4876.76
cat(sprintf(" Mediana: %.2f\n", median(S_simulado)))
## Mediana: 17165.24
cat(sprintf(" Máximo simulado: %.2f\n", max(S_simulado)))
## Máximo simulado: 45473.95
cat(sprintf(" Sesgo: %.3f\n", moments::skewness(S_simulado)))
## Sesgo: 0.542
cat(sprintf(" Curtosis: %.3f\n\n", moments::kurtosis(S_simulado)))
## Curtosis: 3.425
# =============================================================================
# 7. MEDIDAS DE RIESGO: VaR y TVaR
# =============================================================================
cat("====================================================\n")
## ====================================================
cat(" PASO 7: VaR y TVaR — MODELO GANADOR\n")
## PASO 7: VaR y TVaR — MODELO GANADOR
cat("====================================================\n\n")
## ====================================================
niveles <- c(0.90, 0.95, 0.99, 0.995, 0.999)
# Construir tabla con VaR y TVaR
tabla_riesgo <- data.frame(
Nivel = paste0(niveles * 100, "%"), # Etiquetas de nivel
VaR = quantile(S_simulado, probs = niveles), # Value at Risk (cuantiles)
TVaR = sapply(niveles, function(p) { # Tail Value at Risk
u <- quantile(S_simulado, p) # Umbral VaR
mean(S_simulado[S_simulado > u]) # Media condicional superior
})
) %>% mutate(Carga_Cola = round(TVaR - VaR, 2)) # Diferencia TVaR - VaR
# Mostrar tabla de medidas de riesgo
cat("Tabla de Medidas de Riesgo (mejor modelo):\n")
## Tabla de Medidas de Riesgo (mejor modelo):
print(tabla_riesgo, row.names = FALSE)
## Nivel VaR TVaR Carga_Cola
## 90% 24049.92 27079.31 3029.39
## 95% 26253.60 29097.81 2844.21
## 99% 30899.05 33327.50 2428.46
## 99.5% 32668.07 34998.21 2330.14
## 99.9% 36220.43 38463.93 2243.49
# Calcular medidas específicas para nivel 99.5%
VaR_995 <- quantile(S_simulado, 0.995)
TVaR_995 <- mean(S_simulado[S_simulado > VaR_995])
E_S <- mean(S_simulado)
# Mostrar resumen de medidas de riesgo
cat(sprintf("\n Prima de Riesgo Pura E[S]: %.2f\n", E_S))
##
## Prima de Riesgo Pura E[S]: 17601.16
cat(sprintf(" VaR al 99.5%%: %.2f\n", VaR_995))
## VaR al 99.5%: 32668.07
cat(sprintf(" TVaR al 99.5%%: %.2f\n\n", TVaR_995))
## TVaR al 99.5%: 34998.21
# =============================================================================
# 8. ANÁLISIS DE SENSIBILIDAD DE DISTRIBUCIÓN DE SEVERIDAD
# =============================================================================
cat("====================================================\n")
## ====================================================
cat(" PASO 8: ANÁLISIS DE SENSIBILIDAD — DISTRIBUCIÓN DE SEVERIDAD\n")
## PASO 8: ANÁLISIS DE SENSIBILIDAD — DISTRIBUCIÓN DE SEVERIDAD
cat("====================================================\n\n")
## ====================================================
cat("¿Qué pasa si cambiamos la distribución de severidad?\n")
## ¿Qué pasa si cambiamos la distribución de severidad?
cat("¿Cómo afecta al VaR y TVaR?\n\n")
## ¿Cómo afecta al VaR y TVaR?
# Todas las distribuciones ajustadas
candidatos_sev <- names(distribs) # Nombres de distribuciones candidatas
# Realizar simulaciones para cada distribución candidata
resultados_sens <- map_dfr(candidatos_sev, function(dist_nm) {
p <- distribs[[dist_nm]]$estimate # Parámetros de la distribución actual
cat(sprintf(" Simulando con distribución: %s...\n", dist_nm))
# Simular pérdida agregada con esta distribución
S_alt <- numeric(n_sim)
for (i in seq_len(n_sim)) {
N_i <- simular_N(1, modelo_freq) # Mismo modelo de frecuencia
if (N_i > 0) {
S_alt[i] <- sum(simular_X(N_i, dist_nm, p)) # Sumar con distribución alternativa
}
}
# Calcular medidas de riesgo para esta simulación
var90 <- quantile(S_alt, 0.90)
var95 <- quantile(S_alt, 0.95)
var99 <- quantile(S_alt, 0.99)
var995 <- quantile(S_alt, 0.995)
# Devolver dataframe con resultados
data.frame(
Distribución = dist_nm,
Es_Ganador = dist_nm == mejor_sev, # Indicador si es el mejor modelo
AIC = distribs[[dist_nm]]$aic,
E_S = mean(S_alt),
VaR_90 = var90,
VaR_95 = var95,
VaR_99 = var99,
VaR_995 = var995,
TVaR_995 = mean(S_alt[S_alt > var995]),
Carga_Cola = mean(S_alt[S_alt > var995]) - var995 # Diferencia TVaR - VaR
)
})
## Simulando con distribución: Log-Normal...
## Simulando con distribución: Gamma...
## Simulando con distribución: Weibull...
## Simulando con distribución: Pareto...
# Mostrar tabla de sensibilidad completa
cat("\n Tabla de sensibilidad completa:\n")
##
## Tabla de sensibilidad completa:
print(resultados_sens %>% dplyr::select(-Es_Ganador) %>% # Excluir columna indicadora
mutate(across(where(is.numeric), ~round(., 2))), row.names = FALSE) # Redondear numéricos
## Distribución AIC E_S VaR_90 VaR_95 VaR_99 VaR_995 TVaR_995
## Log-Normal 47802.59 17592.46 24066.36 26344.66 30871.61 32563.39 34983.93
## Gamma 50664.67 19878.96 27078.97 29582.38 34694.77 36641.96 39416.56
## Weibull 49570.79 18206.11 24866.54 27155.18 31762.79 33649.42 36095.92
## Pareto 48328.07 17330.45 23666.71 25870.22 30405.53 32194.12 34461.42
## Carga_Cola
## 2420.54
## 2774.60
## 2446.50
## 2267.30
# Calcular valores base (mejor modelo)
base_VaR <- resultados_sens %>% filter(Es_Ganador) %>% pull(VaR_995)
base_TVaR <- resultados_sens %>% filter(Es_Ganador) %>% pull(TVaR_995)
# Mostrar variación relativa respecto al mejor modelo
cat("\n Variación relativa al mejor modelo (", mejor_sev, "):\n", sep = "")
##
## Variación relativa al mejor modelo (Log-Normal):
resultados_sens %>%
mutate(
Delta_VaR_pct = round(100 * (VaR_995 - base_VaR) / base_VaR, 1), # Cambio porcentual en VaR
Delta_TVaR_pct = round(100 * (TVaR_995 - base_TVaR) / base_TVaR, 1) # Cambio porcentual en TVaR
) %>%
dplyr::select(Distribución, Delta_VaR_pct, Delta_TVaR_pct) %>% # Seleccionar columnas
{ cat(sprintf(" %-12s ΔVaR99.5%%: %+.1f%% ΔTVaR99.5%%: %+.1f%%\n",
.$Distribución, .$Delta_VaR_pct, .$Delta_TVaR_pct))
invisible(.) }
## Log-Normal ΔVaR99.5%: +0.0% ΔTVaR99.5%: +0.0%
## Gamma ΔVaR99.5%: +12.5% ΔTVaR99.5%: +12.7%
## Weibull ΔVaR99.5%: +3.3% ΔTVaR99.5%: +3.2%
## Pareto ΔVaR99.5%: -1.1% ΔTVaR99.5%: -1.5%
cat("\n Interpretación:\n")
##
## Interpretación:
max_var_var <- max(abs(resultados_sens$VaR_995 - base_VaR) / base_VaR)
max_var_tva <- max(abs(resultados_sens$TVaR_995 - base_TVaR) / base_TVaR)
cat(sprintf(" La elección de distribución puede mover el VaR99.5%% hasta %.1f%%\n", 100 * max_var_var))
## La elección de distribución puede mover el VaR99.5% hasta 12.5%
cat(sprintf(" y el TVaR99.5%% hasta %.1f%% respecto al modelo base.\n", 100 * max_var_tva))
## y el TVaR99.5% hasta 12.7% respecto al modelo base.
cat(" Esto ilustra el riesgo de modelo (model risk) en actuaría.\n\n")
## Esto ilustra el riesgo de modelo (model risk) en actuaría.
# =============================================================================
# 9. INDICE DE CARGA DE RIESGO (Loading Ratio)
# =============================================================================
cat("====================================================\n")
## ====================================================
cat(" PASO 9: INDICE DE CARGA DE RIESGO (Loading Ratio)\n")
## PASO 9: INDICE DE CARGA DE RIESGO (Loading Ratio)
cat("====================================================\n\n")
## ====================================================
# Calcular índices de carga y cocientes
resultados_sens <- resultados_sens %>%
mutate(
Loading_Ratio = round(TVaR_995 / E_S, 3), # TVaR / E[S] (carga de cola)
Cociente_VaR_TVaR = round(VaR_995 / TVaR_995, 3) # VaR / TVaR (1 = cola liviana, <1 = cola pesada)
)
cat(" Loading Ratio = TVaR99.5% / E[S] (mayor = cola más pesada)\n")
## Loading Ratio = TVaR99.5% / E[S] (mayor = cola más pesada)
cat(" Cociente VaR/TVaR (menor = mayor riesgo de cola)\n\n")
## Cociente VaR/TVaR (menor = mayor riesgo de cola)
# Mostrar tabla con índices de carga
print(resultados_sens %>% dplyr::select(Distribución, E_S, TVaR_995, Loading_Ratio, Cociente_VaR_TVaR) %>%
mutate(across(where(is.numeric), ~round(., 2))), row.names = FALSE)
## Distribución E_S TVaR_995 Loading_Ratio Cociente_VaR_TVaR
## Log-Normal 17592.46 34983.93 1.99 0.93
## Gamma 19878.96 39416.56 1.98 0.93
## Weibull 18206.11 36095.92 1.98 0.93
## Pareto 17330.45 34461.42 1.99 0.93
# =============================================================================
# 10. ANÁLISIS DE EVENTOS EXTREMOS (Pareto 80/20)
# =============================================================================
cat("====================================================\n")
## ====================================================
cat(" PASO 10: ANÁLISIS DE EVENTOS EXTREMOS (Pareto 80/20)\n")
## PASO 10: ANÁLISIS DE EVENTOS EXTREMOS (Pareto 80/20)
cat("====================================================\n\n")
## ====================================================
cat("─── 9B. PRINCIPIO DE PARETO EN LOS SINIESTROS ───\n\n")
## ─── 9B. PRINCIPIO DE PARETO EN LOS SINIESTROS ───
cat(" ¿Qué porcentaje de accidentes concentra el 80% del daño total?\n\n")
## ¿Qué porcentaje de accidentes concentra el 80% del daño total?
# Ordenar severidades de mayor a menor
sev_ord <- sort(X_severidad, decreasing = TRUE) # Vector ordenado descendente
sev_cum <- cumsum(sev_ord) / sum(sev_ord) # Proporción acumulada de daño
n_20pct <- ceiling(0.20 * length(sev_ord)) # Número que representa 20% de accidentes
pct_dano <- sev_cum[n_20pct] # Daño acumulado en ese 20%
n_para80 <- which(sev_cum >= 0.80)[1] # Accidentes necesarios para 80% daño
pct_acc <- round(100 * n_para80 / length(sev_ord), 1) # Porcentaje de accidentes para 80% daño
# Mostrar resultados del análisis Pareto
cat(sprintf(" El 20%% más grave de los accidentes concentra el %.1f%% del daño total\n", 100 * pct_dano))
## El 20% más grave de los accidentes concentra el 66.2% del daño total
cat(sprintf(" El %.1f%% de los accidentes concentra el 80%% del daño total\n", pct_acc))
## El 34.4% de los accidentes concentra el 80% del daño total
cat(sprintf(" → El principio 80/20 (Pareto) %s verificado en estos datos\n\n",
ifelse(pct_acc < 25, "SÍ es", "NO es completamente")))
## → El principio 80/20 (Pareto) NO es completamente verificado en estos datos
# =============================================================================
# 11. PERFIL DE RIESGO POR CONDICIÓN METEOROLÓGICA
# =============================================================================
cat("====================================================\n")
## ====================================================
cat(" PASO 11: PERFIL DE RIESGO POR CONDICIÓN METEOROLÓGICA\n")
## PASO 11: PERFIL DE RIESGO POR CONDICIÓN METEOROLÓGICA
cat("====================================================\n\n")
## ====================================================
cat("─── 9C. PERFIL DE RIESGO: METEOROLOGÍA vs SEVERIDAD ───\n\n")
## ─── 9C. PERFIL DE RIESGO: METEOROLOGÍA vs SEVERIDAD ───
# Calcular estadísticas de severidad agrupadas por condición meteorológica
perfil_meteo <- sev_limpio %>%
group_by(Meteorologia) %>% # Agrupar por clima
summarise(
N = n(), # Número de accidentes
Media_Sev = round(mean(IndSev), 2), # Severidad media
P90_Sev = round(quantile(IndSev, 0.90), 2), # Percentil 90
P99_Sev = round(quantile(IndSev, 0.99), 2), # Percentil 99
Pct_Fatal = round(100 * mean(FatalInjuryCount > 0), 1), # Porcentaje con fatales
.groups = "drop"
) %>% arrange(desc(Media_Sev)) # Ordenar por media descendente
# Mostrar perfil meteorológico
cat(" Perfil por condición meteorológica:\n")
## Perfil por condición meteorológica:
print(perfil_meteo, row.names = FALSE)
## # A tibble: 3 × 6
## Meteorologia N Media_Sev P90_Sev P99_Sev Pct_Fatal
## <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 Desconocido 1494 31.3 40 416. 76.4
## 2 IMC/IFR 422 18.1 40 87.9 74.9
## 3 VMC/VFR 5239 7.8 20 50 35.2
# =============================================================================
# 12. GRAFICAS
# =============================================================================
# Paleta de colores
COL_AZUL <- "#2C5F8A"
COL_ROJO <- "#C0392B"
COL_VERDE <- "#1A7A4A"
COL_MORADO <- "#7D3C98"
COL_NARANJA <- "#D35400"
COL_GRIS <- "#566573"
# Tema base para todas las gráficas (consistencia visual)
tema_base <- theme_minimal(base_size = 12) + # Tema minimalista base
theme(
plot.title = element_text(face = "bold", size = 13, hjust = 0.5), # Título centrado negrita
plot.subtitle = element_text(hjust = 0.5, color = "grey40"), # Subtítulo centrado gris
axis.title = element_text(face = "bold"), # Títulos de ejes negrita
legend.position = "bottom", # Leyenda al fondo
panel.grid.minor = element_blank() # Eliminar líneas de grid menores
)
# ------ FIG 1: Evolución anual de accidentes ------
p1 <- ggplot(freq_anual, aes(x = Anio, y = N_Accidentes)) +
geom_col(fill = COL_AZUL, alpha = 0.85, color = "white") +
geom_text(aes(label = N_Accidentes), vjust = -0.4,
size = 3.5, fontface = "bold", color = "grey30") +
scale_x_continuous(breaks = 2015:2024) +
labs(title = "Accidentes de Aviación por Año — NTSB",
subtitle = "Período 2015–2024 | Solo accidentes confirmados (ACC)",
x = "Año", y = "N° de accidentes") +
tema_base
print(p1, width=9, height=5, dpi=150)

# ------ FIG 2: Distribución de frecuencia mensual + ajustes ------
df_freq_plot <- data.frame(n = N_frecuencia)
x_grid <- seq(min(N_frecuencia), max(N_frecuencia))
p2 <- ggplot(df_freq_plot, aes(x = n)) +
geom_histogram(aes(y = after_stat(density)),
binwidth = 5, fill = COL_AZUL, color = "white", alpha = 0.7) +
stat_function(fun = function(x) dpois(round(x), lambda = lambda_est),
color = COL_ROJO, linewidth = 1.2, linetype = "dashed",
n = 200) +
stat_function(fun = function(x) dnbinom(round(x), size = size_est, mu = mu_est),
color = COL_VERDE, linewidth = 1.2, n = 200) +
annotate("text", x = Inf, y = Inf, hjust = 1.1, vjust = 1.5,
label = paste0("Poisson (rojo) λ=", round(lambda_est,1),
"\nBin.Neg (verde) μ=", round(mu_est,1)),
size = 3.2, color = "grey30") +
labs(title = "Frecuencia Mensual de Accidentes — Ajuste de Distribuciones",
subtitle = "NTSB 2015–2024",
x = "Accidentes por mes", y = "Densidad") +
tema_base
print(p1, width=8, height=5, dpi=150)

# ------ FIG 3: Histograma + CDF de Severidad ------
df_sev_plot <- data.frame(x = X_severidad)
x_seq <- seq(min(X_severidad), quantile(X_severidad, 0.995), length.out = 300)
p3 <- ggplot(df_sev_plot, aes(x = x)) +
geom_histogram(aes(y = after_stat(density)),
bins = 60, fill = COL_NARANJA, color = "white", alpha = 0.7) +
stat_function(fun = cdf_fn %>% {function(x) { h <- 0.001; (.(x+h) - .(x-h))/(2*h) }},
color = COL_MORADO, linewidth = 1.3, n = 300) +
coord_cartesian(xlim = c(0, quantile(X_severidad, 0.99))) +
labs(title = paste("Ajuste de Severidad —", mejor_sev),
subtitle = "Índice actuarial ponderado (10·Fatal + 3·Serio + 1·Menor)",
x = "Índice de Severidad (IndSev)", y = "Densidad") +
tema_base
print(p1, width=8, height=5, dpi=150)

# ------ FIG 4: Q-Q de Severidad ------
q_emp <- quantile(X_severidad, ppoints(min(length(X_severidad), 1000)))
q_teo <- switch(mejor_sev,
"Log-Normal" = qlnorm(ppoints(min(length(X_severidad), 1000)),
params_sev["meanlog"], params_sev["sdlog"]),
"Gamma" = qgamma(ppoints(min(length(X_severidad), 1000)),
params_sev["shape"], params_sev["rate"]),
"Weibull" = qweibull(ppoints(min(length(X_severidad), 1000)),
params_sev["shape"], params_sev["scale"]),
"Pareto" = qpareto(ppoints(min(length(X_severidad), 1000)),
params_sev["shape"], params_sev["scale"]),
{
q_emp_alt <- quantile(X_severidad, ppoints(min(length(X_severidad), 1000)))
q_emp_alt # fallback
}
)
df_qq <- data.frame(teorico = sort(q_teo), empirico = sort(q_emp))
p4 <- ggplot(df_qq, aes(x = teorico, y = empirico)) +
geom_point(color = COL_AZUL, alpha = 0.5, size = 0.9) +
geom_abline(slope = 1, intercept = 0, color = COL_ROJO,
linewidth = 1.2, linetype = "dashed") +
labs(title = paste("Q-Q Plot de Severidad —", mejor_sev),
subtitle = "Puntos sobre la línea roja = buen ajuste",
x = "Cuantiles teóricos", y = "Cuantiles empíricos") +
tema_base
print(p1, width=7, height=6, dpi=150)

# ------ FIG 5: Pérdida Agregada S con VaR y TVaR ------
df_S <- data.frame(S = S_simulado)
p5 <- ggplot(df_S, aes(x = S)) +
geom_histogram(aes(y = after_stat(density)),
bins = 80, fill = COL_AZUL, color = "white", alpha = 0.7) +
geom_density(color = "#2C3E50", linewidth = 1) +
geom_vline(xintercept = E_S, color = COL_VERDE, linewidth = 1.2, linetype = "dashed") +
geom_vline(xintercept = VaR_995, color = COL_ROJO, linewidth = 1.4) +
geom_vline(xintercept = TVaR_995, color = COL_MORADO, linewidth = 1.4, linetype = "dotdash") +
annotate("text", x = E_S, y = Inf, vjust=2, hjust=-0.1,
label = sprintf("E[S]=%.0f", E_S), color=COL_VERDE, size=3.2, fontface="bold") +
annotate("text", x = VaR_995, y = Inf, vjust=2, hjust=-0.1,
label = sprintf("VaR99.5%%=%.0f", VaR_995), color=COL_ROJO, size=3.2, fontface="bold") +
annotate("text", x = TVaR_995, y = Inf, vjust=4, hjust=-0.1,
label = sprintf("TVaR=%.0f", TVaR_995), color=COL_MORADO, size=3.2, fontface="bold") +
labs(title = "Distribución de Pérdida Agregada Anual S",
subtitle = sprintf("Monte Carlo — %s simulaciones | Modelo: %s + %s",
format(n_sim, big.mark=","), modelo_freq, mejor_sev),
x = "Pérdida Agregada Anual (IndSev)", y = "Densidad") +
tema_base
print(p1, width=9, height=5, dpi=150)

# ------ FIG 6: Curva de Excedencia ------
s_ord <- sort(S_simulado, decreasing = TRUE)
prob_exc <- seq_along(s_ord) / length(s_ord)
df_exc <- data.frame(S = s_ord, prob = prob_exc)
p6 <- ggplot(df_exc, aes(x = S, y = prob)) +
geom_line(color = COL_AZUL, linewidth = 1) +
geom_hline(yintercept = 0.005, color = COL_ROJO, linewidth = 0.9, linetype = "dashed") +
geom_vline(xintercept = VaR_995, color = COL_ROJO, linewidth = 0.9, linetype = "dashed") +
annotate("point", x = VaR_995, y = 0.005, color = COL_ROJO, size = 3.5) +
annotate("text", x = VaR_995, y = 0.005,
label = sprintf("VaR 99.5%%\n= %.0f", VaR_995),
vjust = -0.5, hjust = -0.1, color = COL_ROJO, size = 3.2, fontface = "bold") +
scale_y_continuous(labels = percent_format(), limits = c(0, 0.5)) +
labs(title = "Curva de Excedencia — P(S > x)",
subtitle = "Probabilidad de superar cada nivel de pérdida agregada anual",
x = "Pérdida Agregada (IndSev)", y = "Probabilidad de Excedencia") +
tema_base
print(p1, width=8, height=5, dpi=150)

# ------ FIG 7: Correlación Frecuencia–Severidad anual ------
p7 <- ggplot(corr_df, aes(x = N_Accidentes, y = IndSev_Media, label = Anio)) +
geom_point(color = COL_AZUL, size = 4, alpha = 0.8) +
geom_text(vjust = -0.7, size = 3.2, color = "grey30", fontface = "bold") +
geom_smooth(method = "lm", se = TRUE, color = COL_ROJO,
fill = COL_ROJO, alpha = 0.1, linetype = "dashed") +
annotate("text", x = Inf, y = Inf, hjust = 1.1, vjust = 1.5,
label = sprintf("r = %.3f\np = %.3f", cor_pearson_med, test_med$p.value),
size = 4, color = COL_GRIS, fontface = "bold") +
labs(title = "Correlación: Frecuencia vs Severidad Media Anual",
subtitle = "¿Años con más accidentes = accidentes más graves?",
x = "N° accidentes anuales", y = "Severidad media (IndSev)") +
tema_base
print(p1, width=7, height=6, dpi=150)

# ------ FIG 8: Análisis de Sensibilidad — VaR y TVaR por distribución ------
res_long <- resultados_sens %>%
dplyr::select(Distribución, VaR_995, TVaR_995) %>%
pivot_longer(cols = c(VaR_995, TVaR_995), names_to = "Medida", values_to = "Valor") %>%
mutate(Medida = recode(Medida, "VaR_995" = "VaR 99.5%", "TVaR_995" = "TVaR 99.5%"))
p8 <- ggplot(res_long, aes(x = reorder(Distribución, Valor), y = Valor, fill = Medida)) +
geom_col(position = "dodge", alpha = 0.85, color = "white") +
coord_flip() +
scale_fill_manual(values = c("VaR 99.5%" = COL_ROJO, "TVaR 99.5%" = COL_MORADO)) +
labs(title = "Sensibilidad de VaR y TVaR según Distribución de Severidad",
subtitle = "Mismo modelo de frecuencia — distintas hipótesis de severidad",
x = NULL, y = "Valor (IndSev)", fill = "Medida de Riesgo") +
tema_base
print(p1, width=9, height=5, dpi=150)

# ------ FIG 9: Perfil de riesgo meteorológico ------
p9 <- sev_limpio %>%
filter(Meteorologia != "Desconocido") %>%
ggplot(aes(x = Meteorologia, y = IndSev, fill = Meteorologia)) +
geom_violin(trim = TRUE, alpha = 0.6, show.legend = FALSE) +
geom_boxplot(width = 0.2, alpha = 0.8, outlier.size = 0.6,
outlier.alpha = 0.3, show.legend = FALSE) +
scale_y_log10(labels = comma_format()) +
scale_fill_manual(values = c("VMC/VFR" = COL_VERDE, "IMC/IFR" = COL_ROJO)) +
labs(title = "Severidad por Condición Meteorológica",
subtitle = "VMC/VFR = condiciones visuales; IMC/IFR = condiciones por instrumentos",
x = NULL, y = "IndSev (escala log)") +
tema_base
print(p1, width=7, height=6, dpi=150)

# ------ FIG 10: Loading Ratio (carga de cola) por distribución ------
p10 <- resultados_sens %>%
ggplot(aes(x = reorder(Distribución, Loading_Ratio), y = Loading_Ratio)) +
geom_col(fill = COL_NARANJA, alpha = 0.85, color = "white") +
geom_text(aes(label = round(Loading_Ratio, 2)),
hjust = -0.2, size = 3.5, fontface = "bold") +
coord_flip() +
geom_hline(yintercept = 1, linetype = "dashed", color = COL_GRIS) +
labs(title = "Índice de Carga de Cola (Loading Ratio)",
subtitle = "TVaR99.5% / E[S] — cuántas veces la prima pura exige la cola",
x = NULL, y = "Loading Ratio") +
tema_base + expand_limits(y = max(resultados_sens$Loading_Ratio) * 1.15)
print(p1, width=8, height=5, dpi=150)

# Dashboard principal (panel de 6 gráficas clave)
dashboard <- (p1 | p2) / (p5 | p6) / (p7 | p8)
print(dashboard, width=16, height=15, dpi=150)
## Warning: Removed 50000 rows containing missing values or values outside the scale range
## (`geom_line()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: label.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?

# Panel de diagnóstico de ajuste
diagnostico <- (p3 | p4) / (p9 | p10)
print(diagnostico, width=16, height=10, dpi=150)
