Análisis Estadístico de Other.Costs

knitr::opts_chunk$set(
  echo = TRUE,
  warning = FALSE,
  message = FALSE
)
options(scipen = 999)

options(scipen = 999)

#1. Carga de datos

setwd("C:/Users/ronal/OneDrive/Desktop")

datos <- read.csv("database (1).csv",
header = TRUE,
sep = ",",
dec = ".")

#2. Cargar librerías

library(readxl)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

#3. Extracción y limpieza de la variable Other.Costs

Other.Costs <- na.omit(datos$Other.Costs)
Other.Costs <- Other.Costs[Other.Costs > 0]

#4. Rango y número de clases (Regla de Sturges)

xmin <- min(Other.Costs)
xmax <- max(Other.Costs)

R <- xmax - xmin
K <- floor(1 + 3.3 * log10(length(Other.Costs)))
A <- R / K

#5. Límites de clase y marcas de clase

Li <- round(seq(from = xmin, to = xmax - A, by = A), 2)
Ls <- round(seq(from = xmin + A, to = xmax, by = A), 2)
MC <- round((Li + Ls) / 2)

length(MC) == K

## [1] TRUE

#6. Tabla de distribución de frecuencias
#6.1 Frecuencia absoluta

ni <- numeric(K)

for (i in 1) {
ni[i] <- sum(Other.Costs >= Li[i] & Other.Costs < Ls[i])
}

ni[K] <- sum(Other.Costs >= Li[K] & Other.Costs <= xmax)

sum(ni)

## [1] 307

length(Other.Costs)

## [1] 314

sum(ni) == length(Other.Costs)

## [1] FALSE

#6.2 Frecuencias relativas y acumuladas

hi <- ni / sum(ni) * 100

Ni_asc <- cumsum(ni)
Ni_desc <- rev(cumsum(rev(ni)))

Hi_asc <- cumsum(hi)
Hi_desc <- rev(cumsum(rev(hi)))

#6.3 Tabla final de frecuencias

TDF <- data.frame(
Li = Li,
Ls = Ls,
MC = MC,
ni = ni,
hi_porc = round(hi, 2),
Ni_asc = Ni_asc,
Ni_desc = Ni_desc,
Hi_asc_porc = round(Hi_asc, 2),
Hi_desc_porc = round(Hi_desc, 2)
)

TDF

##         Li       Ls       MC  ni hi_porc Ni_asc Ni_desc Hi_asc_porc
## 1       50  2483378  1241714 306   99.67    306     307       99.67
## 2  2483378  4966706  3725042   0    0.00    306       1       99.67
## 3  4966706  7450033  6208369   0    0.00    306       1       99.67
## 4  7450033  9933361  8691697   0    0.00    306       1       99.67
## 5  9933361 12416689 11175025   0    0.00    306       1       99.67
## 6 12416689 14900017 13658353   0    0.00    306       1       99.67
## 7 14900017 17383344 16141681   0    0.00    306       1       99.67
## 8 17383344 19866672 18625008   0    0.00    306       1       99.67
## 9 19866672 22350000 21108336   1    0.33    307       1      100.00
##   Hi_desc_porc
## 1       100.00
## 2         0.33
## 3         0.33
## 4         0.33
## 5         0.33
## 6         0.33
## 7         0.33
## 8         0.33
## 9         0.33

#7. Histograma

h <- hist(
Other.Costs,
main = "Gráfica No.1: Distribución de Otros Costos",
breaks = seq(min(Other.Costs), max(Other.Costs) + A, by = A),
xlab = "Otros Costos",
ylab = "Frecuencia",
col = "pink2",
xaxt = "n"
)

axis(1,
at = pretty(h$breaks),
labels = format(pretty(h$breaks), scientific = FALSE))

#8. Ojivas ascendente y descendente

x_asc <- c(min(Ls), Ls)
y_asc <- c(0, Ni_asc)

x_desc <- c(Li, max(Li))
y_desc <- c(Ni_desc, 0)

x_range <- range(c(x_asc, x_desc))
y_range <- c(0, max(c(y_asc, y_desc)))

plot(x_asc, y_asc, type = "o", col = "skyblue",
main = "Gráfica No.2: Ojivas Ascendente y Descendente de Otros Costos",
xlab = "Otros Costos",
ylab = "Frecuencia acumulada",
xlim = x_range, ylim = y_range,
xaxt = "n", yaxt = "n")

axis(1,
at = pretty(x_range),
labels = format(pretty(x_range), scientific = FALSE))
axis(2, at = pretty(y_range))

lines(x_desc, y_desc, type = "o", col = "pink4")

legend("topright",
legend = c("Ascendente", "Descendente"),
col = c("skyblue", "pink4"),
lty = 1, pch = 1, cex = 0.8)

#9. Diagramas de caja

boxplot(Other.Costs, horizontal = TRUE, col = "blue",
main = "Gráfica No.3: Otros Costos",
xlab = "Costos",
xaxt = "n")

axis(1,
at = pretty(Other.Costs),
labels = format(pretty(Other.Costs), scientific = FALSE))

#9.1 Diagrama de caja logarítmico

boxplot(log10(Other.Costs),
horizontal = TRUE,
col = "skyblue",
main = "Gráfica No.4 (log10): Otros Costos",
xlab = "log10(Otros Costos)")

#10. Indicadores estadísticos

library(moments)

media <- mean(Other.Costs)
mediana <- median(Other.Costs)
desv <- sd(Other.Costs)
varianza <- var(Other.Costs)
cv <- (desv / media) * 100

asim <- skewness(Other.Costs)
curt <- kurtosis(Other.Costs)

# Moda por intervalo (clase modal)


hist_data <- hist(Other.Costs, plot = FALSE)

indice_modal <- which(hist_data$counts == max(hist_data$counts))[1]

moda_intervalo <- paste0(
"[",
round(hist_data$breaks[indice_modal], 2),
" - ",
round(hist_data$breaks[indice_modal + 1], 2),
"]"
)

#11. Tabla de indicadores finales

indicadores_OtrosCostos <- data.frame(
Indicador = c("Moda (intervalo)", "Mediana", "Media",
"Desviación Estándar", "Varianza",
"Coef. de Variación (%)", "Asimetría", "Curtosis"),
Valor = c(moda_intervalo,
round(mediana, 2),
round(media, 2),
round(desv, 2),
round(varianza, 2),
round(cv, 2),
round(asim, 2),
round(curt, 2)),
stringsAsFactors = FALSE
)

indicadores_OtrosCostos$Valor <-
format(indicadores_OtrosCostos$Valor, scientific = FALSE)

print(indicadores_OtrosCostos, row.names = FALSE)

##               Indicador            Valor
##        Moda (intervalo) [0 - 2000000]   
##                 Mediana 9914            
##                   Media 304061.89       
##     Desviación Estándar 1822030.32      
##                Varianza 3319794475085.87
##  Coef. de Variación (%) 599.23          
##               Asimetría 9.96            
##                Curtosis 111.76

Análisis Estadístico de Other.Costs

Ronal Calderón