# ==================================================================================
# REPORTE FINAL R PUBS: PROPERTY DAMAGE COSTS (USD)
# ==================================================================================

# 1. LIBRERÍAS
library(ggplot2)
library(dplyr)
## 
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(readxl)
library(scales)

# 2. DATOS
database <- read_excel("database.xlsx")
## Warning: Expecting numeric in C2189 / R2189C3: got 'Accident Year'
## Warning: Expecting numeric in C2215 / R2215C3: got 'Accident Year'
nombre_var <- "Property Damage Costs"

datos_poblacion <- as.numeric(database[[nombre_var]])
datos_poblacion <- datos_poblacion[!is.na(datos_poblacion)]

# ZOOM VISUAL
limite_visual <- median(datos_poblacion) * 10
datos_v <- datos_poblacion[datos_poblacion <= limite_visual]
n_zoom <- length(datos_v)

# 3. TABLA DE FRECUENCIAS
cortes <- seq(0, limite_visual, length.out = 11)

etiquetas <- paste0(
  comma(cortes[-length(cortes)]),
  " - ",
  comma(cortes[-1])
)

tabla_frecuencia <- data.frame(
  Intervalo = cut(
    datos_v,
    breaks = cortes,
    include.lowest = TRUE,
    right = FALSE,
    labels = etiquetas
  )
) %>%
  count(Intervalo, name = "ni") %>%
  mutate(
    hi = round(ni / n_zoom, 3),
    Ni_asc = cumsum(ni),
    Ni_desc = n_zoom - Ni_asc + ni
  )

print(tabla_frecuencia)
##          Intervalo   ni    hi Ni_asc Ni_desc
## 1        0 - 3,000 1367 0.633   1367    2160
## 2    3,000 - 6,000  247 0.114   1614     793
## 3    6,000 - 9,000  116 0.054   1730     546
## 4   9,000 - 12,000  118 0.055   1848     430
## 5  12,000 - 15,000   56 0.026   1904     312
## 6  15,000 - 18,000   70 0.032   1974     256
## 7  18,000 - 21,000   74 0.034   2048     186
## 8  21,000 - 24,000   16 0.007   2064     112
## 9  24,000 - 27,000   51 0.024   2115      96
## 10 27,000 - 30,000   45 0.021   2160      45
# 4. HISTOGRAMA
p1 <- ggplot(tabla_frecuencia, aes(x = Intervalo, y = ni)) +
  geom_col(fill = "#546E7A", color = "white") +
  theme_minimal() +
  labs(
    title = "Histograma de Costos por Daños a la Propiedad",
    x = "Rango de Costo (USD)",
    y = "Frecuencia Absoluta"
  ) +
  theme(axis.text.x = element_text(angle = 40, hjust = 1))

print(p1)

# 5. OJIVAS (MENOR QUE Y MAYOR QUE)
puntos_x <- cortes[-1]

ojiva <- rbind(
  data.frame(
    Limite = puntos_x,
    Frecuencia = tabla_frecuencia$Ni_asc,
    Tipo = "Menor que"
  ),
  data.frame(
    Limite = puntos_x,
    Frecuencia = tabla_frecuencia$Ni_desc,
    Tipo = "Mayor que"
  )
)

p2 <- ggplot(ojiva, aes(x = Limite, y = Frecuencia, color = Tipo)) +
  geom_line(size = 1.2) +
  geom_point(size = 2) +
  scale_x_continuous(labels = label_dollar()) +
  theme_minimal() +
  labs(
    title = "Ojivas de Frecuencia Acumulada",
    x = "Costo (USD)",
    y = "Frecuencia Acumulada"
  )
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
print(p2)

# 6. BOXPLOT LIMPIO Y ELEGANTE (SIN MUCHOS PUNTOS)
p3 <- ggplot(data.frame(v = datos_v), aes(x = "", y = v)) +
  geom_boxplot(
    fill = "#4E342E",
    color = "black",
    outlier.color = "#D32F2F",
    outlier.alpha = 0.7
  ) +
  scale_y_continuous(labels = label_dollar()) +
  coord_flip() +
  theme_minimal() +
  labs(
    title = "Boxplot de Costos por Daños a la Propiedad",
    y = "Dólares (USD)",
    x = ""
  )

print(p3)

# ==================================================================================
# FIN
# ==================================================================================