CARGA DE DATOS Y LIBRERÍAS

knitr::opts_chunk$set(
    echo = TRUE,
    message = FALSE,
    warning = FALSE,
    fig.align = "center"
)

datos <- read.csv("C:/Users/USER/Documents/PROYECTO ESTADISTICA/CMDB_Data.csv", 
                  header = TRUE, 
                  sep = ";", 
                  dec = ".", 
                  fileEncoding = "latin1")

str(datos)
## 'data.frame':    1366 obs. of  103 variables:
##  $ ï..LAB_ID            : chr  "C355417" "C360759" "C360762" "C360763" ...
##  $ PREVIOUS_LAB_ID1     : chr  "" "" "" "" ...
##  $ PREVIOUS_LAB_ID2     : chr  "" "" "" "" ...
##  $ PREVIOUS_LAB_ID3     : chr  "" "" "" "" ...
##  $ FIELD_ID             : chr  "RM0001" "RM0027" "RM0030" "RM0031" ...
##  $ JOB_ID               : chr  "MRP11968" "MRP12307" "MRP12307" "MRP12307" ...
##  $ PREVIOUS_JOB_ID1     : chr  "" "" "" "" ...
##  $ PREVIOUS_JOB_ID2     : chr  "" "" "" "" ...
##  $ PREVIOUS_JOB_ID3     : chr  "" "" "" "" ...
##  $ SUBMITTER            : chr  "Rare Metals Task" "Rare Metals Task" "Rare Metals Task" "Rare Metals Task" ...
##  $ PROJECT_NAME         : chr  "Critical and Rare Metals" "Critical and Rare Metals" "Critical and Rare Metals" "Critical and Rare Metals" ...
##  $ DATE_SUBMITTED       : chr  "30/6/2011" "31/8/2011" "31/8/2011" "31/8/2011" ...
##  $ COLLECTION           : chr  "Mackay-Keck Ore Deposits Collection" "Mackay-Stanford Ore Deposits Collection" "Mackay-Stanford Ore Deposits Collection" "Mackay-Stanford Ore Deposits Collection" ...
##  $ COLLECTION_ID        : chr  "PHNC08_39_1183" "OD21441" "OD22811" "OD25716" ...
##  $ CONTINENT            : chr  "North America" "South America" "South America" "Africa" ...
##  $ COUNTRY              : chr  "United States" "Chile" "Chile" "South Africa" ...
##  $ STATE_PROVINCE       : chr  "Nevada" "Antofagasta" "Tarapacá" "Transvaal" ...
##  $ COUNTY               : chr  "Lyon" "El Loa" "El Tamarugal" "" ...
##  $ DISTRICT_NAME        : chr  "Yerington" "Chuquicamata" "Collahuasi/Quebrada Blanca" "" ...
##  $ DEPOSIT_NAME         : chr  "Pumpkin Hollow" "" "" "" ...
##  $ MINE_NAME            : chr  "Pumpkin Hollow" "Chuquicamata mine" "Collahuasi district" "" ...
##  $ DISTRICT_NAME_COLLECT: chr  "Yerington" "" "" "" ...
##  $ DEPOSIT_NAME_COLLECT : chr  "" "" "" "" ...
##  $ MINE_NAME_COLLECT    : chr  "Pumpkin Hollow" "Chuquicamata" "Poduosa mine" "Messina Mines Ltd." ...
##  $ LOCATE_DESC          : chr  "" "" "Level 25" "" ...
##  $ LATITUDE             : chr  "38,94021" "-22,2871" "-21,0309" "-24,7" ...
##  $ LONGITUDE            : chr  "-119,05178" "-68,8991" "-68,74951" "29,3" ...
##  $ DATUM                : chr  "WGS84" "WGS84" "WGS84" "" ...
##  $ LATITUDE_COLLECT     : chr  "38,92492" "22,28944" "" "" ...
##  $ LONGITUDE_COLLECT    : chr  "-119,1071" "-68,90111" "" "" ...
##  $ DATUM_COLLECT        : chr  "" "WGS84" "" "" ...
##  $ COORDINATES_QUAL     : chr  "100 m" "Exact" "" "" ...
##  $ COORDINATES_SOURCE   : chr  "1) iTouchMap.com, approx, A. Orkild-Norton; 2) Mineral Resource Deposit Database Deposit ID 10174173, ore body, M. Granitto" "1) Mindat.org, approx, A. Orkild-Norton; 2) Open-File Report 2017-1079 ID 549, mine, M. Granitto" "1) No coordinates; 2) Mineral Resource Deposit Database Deposit ID 10057511, district, M. Granitto" "1) No coordinates; 2) Google Earth Pro, approx ctr of former province of Transvaal, M. Granitto" ...
##  $ PRIMARY_CLASS        : chr  "rock" "rock" "rock" "rock" ...
##  $ SYSTEM_TYPE          : chr  "IOA-IOCG" "Porphyry Cu-Mo-Au" "Porphyry Cu-Mo-Au" "IOA-IOCG" ...
##  $ DEPOSIT_TYPE         : chr  "IOCG" "Supergene Cu" "Porphyry Cu" "IOCG" ...
##  $ SAMPLE_DESC          : chr  "Nearly solid chalcopyrite mixed with small light brown irregular inclusions of unknown mineralogy; clouds of ma"| __truncated__ "Chalcocite-bronchatite-antlerite(?); highly microfractured igneous rock with green copper sulfates coating microfractures" "Bornite-chalcopyrite; mostly massive chalcopyrite with numerous inclusions of micro-chalcopyrite and widely sca"| __truncated__ "Massive chalcopyrite, IOCG in shear zone; mostly massive fine grain cuprite with widely distributed malachite t"| __truncated__ ...
##  $ Al_pct_AES_ST        : chr  "0,33" "6,65" "0,46" "0,7" ...
##  $ Ca_pct_AES_ST        : chr  "1,1" "0,4" "-0,1" "0,3" ...
##  $ Fe_pct_AES_ST        : chr  "42,4" "0,25" "6,98" "27,8" ...
##  $ K_pct_AES_ST         : chr  "-0,1" "6,1" "0,2" "-0,1" ...
##  $ Mg_pct_AES_ST        : chr  "0,57" "0,1" "0,01" "0,33" ...
##  $ Mn_pct_AES_ST        : chr  "0,02" "-0,01" "-0,01" "-0,01" ...
##  $ P_pct_AES_ST         : chr  "-0,01" "0,01" "0,05" "0,01" ...
##  $ S_pct_AES_ST         : chr  "" "" "" "" ...
##  $ Si_pct_AES_ST        : chr  "" "" "" "" ...
##  $ Ti_pct_AES_ST        : chr  "0,01" "0,11" "-0,01" "-0,01" ...
##  $ F_pct_ISE_Fuse       : chr  "" "" "" "" ...
##  $ Ag_ppm_MS_ST         : chr  "58" "6" "468" "16" ...
##  $ As_ppm_MS_ST         : chr  "-30" "-30" "90" "-30" ...
##  $ Au_ppm               : chr  "" "" "" "" ...
##  $ Au_AM                : chr  "" "" "" "" ...
##  $ B_ppm_AES_ST         : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ Ba_ppm_AES_ST        : chr  "-0,5" "924" "121" "174" ...
##  $ Be_ppm_AES_ST        : int  -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 ...
##  $ Bi_ppm_MS_ST         : chr  "1,5" "3,6" "190" "0,4" ...
##  $ Cd_ppm_MS_ST         : chr  "3,6" "-0,2" "0,9" "-0,2" ...
##  $ Ce_ppm_MS_ST         : chr  "0,4" "8,8" "16,3" "3,5" ...
##  $ Co_ppm_MS_ST         : chr  "209" "-0,5" "1,3" "44,8" ...
##  $ Cr_ppm_AES_ST        : int  -10 -10 -10 30 20 20 60 40 20 10 ...
##  $ Cs_ppm_MS_ST         : chr  "0,5" "1,4" "0,2" "-0,1" ...
##  $ Cu_ppm_AES_ST        : chr  "50000,11111" "23300" "50000,11111" "50000,11111" ...
##  $ Dy_ppm_MS_ST         : chr  "-0,05" "0,32" "1,38" "0,37" ...
##  $ Er_ppm_MS_ST         : chr  "-0,05" "0,22" "0,77" "0,23" ...
##  $ Eu_ppm_MS_ST         : chr  "-0,05" "0,14" "0,17" "0,1" ...
##  $ Ga_ppm_MS_ST         : chr  "5" "15" "6" "3" ...
##  $ Gd_ppm_MS_ST         : chr  "-0,05" "0,45" "1,5" "0,39" ...
##  $ Ge_ppm_MS_ST         : int  -1 5 -1 -1 3 8 8 1 2 2 ...
##  $ Hf_ppm_MS_ST         : int  -1 4 -1 -1 5 13 12 2 3 6 ...
##  $ Ho_ppm_MS_ST         : chr  "-0,05" "0,07" "0,25" "0,07" ...
##  $ In_ppm_MS_ST         : chr  "6,4" "-0,2" "3,7" "0,2" ...
##  $ La_ppm_MS_ST         : chr  "0,2" "4,6" "7,2" "1,7" ...
##  $ Li_ppm_AES_ST        : int  -10 -10 -10 -10 30 20 20 20 -10 20 ...
##  $ Lu_ppm_MS_ST         : chr  "-0,05" "-0,05" "0,08" "-0,05" ...
##  $ Mo_ppm_MS_ST         : chr  "-2" "60" "3" "2" ...
##  $ Nb_ppm_MS_ST         : chr  "-1" "4" "-1" "-1" ...
##  $ Nd_ppm_MS_ST         : chr  "0,2" "3,8" "9,1" "1,7" ...
##  $ Ni_ppm_AES_ST        : chr  "144" "6" "-5" "48" ...
##  $ Pb_ppm_MS_ST         : chr  "23" "16" "188" "39" ...
##  $ Pd_ppm_FA_MS         : chr  "" "" "" "" ...
##  $ Pr_ppm_MS_ST         : chr  "-0,05" "1,09" "2,21" "0,46" ...
##  $ Pt_ppm_FA_MS         : chr  "" "" "" "" ...
##  $ Rb_ppm_MS_ST         : chr  "1,2" "148" "7,1" "0,7" ...
##  $ Re_ppm_MS_HF         : chr  "" "" "" "" ...
##  $ Sb_ppm_MS_ST         : chr  "1,2" "2,4" "2,9" "0,3" ...
##  $ Sc_ppm_AES_ST        : int  -5 -5 -5 -5 11 6 15 10 5 6 ...
##  $ Se_ppm_MS_ST         : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ Sm_ppm_MS_ST         : chr  "-0,1" "0,6" "1,6" "0,4" ...
##  $ Sn_ppm_MS_ST         : chr  "2" "3" "106" "-1" ...
##  $ Sr_ppm_AES_ST        : chr  "26,6" "114" "22,5" "38,4" ...
##  $ Ta_ppm_MS_ST         : chr  "-0,5" "-0,5" "-0,5" "-0,5" ...
##  $ Tb_ppm_MS_ST         : chr  "-0,05" "0,07" "0,23" "-0,05" ...
##  $ Te_ppm_MS_ST         : chr  "" "" "" "" ...
##  $ Th_ppm_MS_ST         : chr  "0,2" "9,7" "2,6" "0,2" ...
##  $ Tl_ppm_MS_ST         : chr  "-0,5" "0,5" "-0,5" "-0,5" ...
##  $ Tm_ppm_MS_ST         : chr  "-0,05" "-0,05" "0,08" "-0,05" ...
##  $ U_ppm_MS_ST          : chr  "0,3" "1,75" "0,63" "34,8" ...
##  $ V_ppm_AES_ST         : int  51 24 -5 493 68 20 40 159 39 61 ...
##  $ W_ppm_MS_ST          : chr  "-1" "28" "22" "11" ...
##   [list output truncated]

Se cargaron correctamente los datos para el análisis de distribución geográfica.

ANÁLISIS POR PAIS (PARES DE VALORES)

TDFCOUNTRY <- table(datos$COUNTRY) 

tabla_agrupada <- as.data.frame(TDFCOUNTRY)
colnames(tabla_agrupada) <- c("COUNTRY", "ni")

tabla_agrupada$hi <- tabla_agrupada$ni / sum(tabla_agrupada$ni)
tabla_agrupada$hi_porc <- round(tabla_agrupada$hi * 100, 2)

Total <- data.frame(COUNTRY = "TOTAL", 
                    ni = sum(tabla_agrupada$ni), 
                    hi = sum(tabla_agrupada$hi), 
                    hi_porc = sum(tabla_agrupada$hi) * 100)

tabla_final <- rbind(tabla_agrupada, Total)

knitr::kable(tabla_final, caption = "Tabla 1: Frecuencias de la variable COUNTRY")
Tabla 1: Frecuencias de la variable COUNTRY
COUNTRY ni hi hi_porc
1 0.0007321 0.07
Argentina 4 0.0029283 0.29
Australia 39 0.0285505 2.86
Brazil 17 0.0124451 1.24
Burma 1 0.0007321 0.07
Canada 59 0.0431918 4.32
Chile 53 0.0387994 3.88
China 3 0.0021962 0.22
Cuba 2 0.0014641 0.15
Democratic Republic of Congo 4 0.0029283 0.29
Finland 4 0.0029283 0.29
Germany 2 0.0014641 0.15
Honduras 1 0.0007321 0.07
India 1 0.0007321 0.07
Indonesia 4 0.0029283 0.29
Japan 18 0.0131772 1.32
Mauritania 4 0.0029283 0.29
Mexico 30 0.0219619 2.20
Namibia 3 0.0021962 0.22
Norway 2 0.0014641 0.15
Papua New Guinea 1 0.0007321 0.07
Peru 19 0.0139092 1.39
Philippines 3 0.0021962 0.22
Poland 4 0.0029283 0.29
Portugal 4 0.0029283 0.29
Russia 1 0.0007321 0.07
South Africa 10 0.0073206 0.73
Sweden 38 0.0278184 2.78
United States 1021 0.7474378 74.74
Vietnam 6 0.0043924 0.44
Zambia 7 0.0051245 0.51
TOTAL 1366 1.0000000 100.00

CLASIFICACIÓN POR REGIONES

norte_america <- c("United States", "Canada", "Mexico") 
latino_america <- c("Brazil", "Chile", "Peru", "Argentina", "Colombia", "Ecuador")
africa <- c("South Africa", "Democratic Republic of the Congo", "Ghana", "Mali")
asia <- c("China", "India", "Indonesia", "Kazakhstan")
europa <- c("Russia", "Sweden", "Poland", "Germany")
oceania <- c("Australia", "Papua New Guinea", "New Zealand", "New Caledonia")

datos$REGION <- "Otra / No Clasificado" 

datos$REGION[datos$COUNTRY %in% norte_america] <- "Norte America"
datos$REGION[datos$COUNTRY %in% latino_america] <- "Latino America"
datos$REGION[datos$COUNTRY %in% africa] <- "Africa"
datos$REGION[datos$COUNTRY %in% asia] <- "Asia"
datos$REGION[datos$COUNTRY %in% europa] <- "Europa"
datos$REGION[datos$COUNTRY %in% oceania] <- "Oceania"

TDFREGION <- table(datos$REGION) 

tabla_regiones <- as.data.frame(TDFREGION)
colnames(tabla_regiones) <- c("REGION", "ni")

tabla_regiones$hi <- tabla_regiones$ni / sum(tabla_regiones$ni)
tabla_regiones$hi_porc <- round(tabla_regiones$hi * 100, 2)

Total_Region <- data.frame(REGION = "TOTAL", 
                           ni = sum(tabla_regiones$ni), 
                           hi = sum(tabla_regiones$hi), 
                           hi_porc = sum(tabla_regiones$hi) * 100)

tabla_final_regiones <- rbind(tabla_regiones, Total_Region)

knitr::kable(tabla_final_regiones, caption = "Tabla 2: Frecuencias Agrupadas por Región Geográfica")
Tabla 2: Frecuencias Agrupadas por Región Geográfica
REGION ni hi hi_porc
Africa 10 0.0073206 0.73
Asia 8 0.0058565 0.59
Europa 45 0.0329429 3.29
Latino America 93 0.0680820 6.81
Norte America 1110 0.8125915 81.26
Oceania 40 0.0292826 2.93
Otra / No Clasificado 60 0.0439239 4.39
TOTAL 1366 1.0000000 100.00

VISUALIZACIÓN DE COMPARATIVAS BÁSICAS

barplot(TDFREGION, 
        main = "Gráfica 1: Muestras de Elementos Críticos por Región (LOCAL)", 
        ylab = "Cantidad de depósitos por región", 
        xlab = "Región", 
        col = "pink", 
        las = 1,          
        cex.names = 0.7,  
        ylim = c(0, 1400))

barplot(TDFREGION, 
        main = "Gráfica 2: Muestras de Elementos Críticos por Región (GLOBAL)",
        ylab = "Cantidad de muestras por región",
        xlab = "Región",
        col = "skyblue",
        las = 1,
        cex.names = 0.7,
        ylim = c(0, 1200))

ANÁLISIS DE FRECUENCIA ABSOLUTA

freq_abs <- TDFREGION

bar_centers <- barplot(freq_abs, 
                       main = "Gráfica 3: Distribución de Muestras por Región",
                       ylab = "Cantidad",
                       xlab = "Región",
                       col = "orange",
                       las = 1,
                       cex.names = 0.75,
                       ylim = c(0, max(freq_abs) * 1.2))

text(x = bar_centers,      
     y = freq_abs,         
     labels = freq_abs,    
     pos = 3,              
     cex = 0.8,
     col = "black")

ANÁLISIS DE FRECUENCIA RELATIVA (PORCENTAJES)

freq_rel <- prop.table(TDFREGION)
freq_rel_porc <- round(freq_rel * 100, 2)

bar_centers_rel <- barplot(freq_rel_porc, 
                           main = "Gráfica 4: Distribución Porcentual Total por Región", 
                           ylab = "Porcentaje (%)", 
                           xlab = "Región", 
                           col = "orchid", 
                           las = 1, 
                           cex.names = 0.7,
                           ylim = c(0, 100),       
                           yaxt = "n",             
                           yaxs = "i")             

axis(2, at = seq(0, 100, by = 20), las = 1, cex.axis = 0.8)

text(x = bar_centers_rel, 
     y = freq_rel_porc, 
     labels = paste0(freq_rel_porc, "%"), 
     pos = 3, 
     cex = 0.8, 
     col = "black")

COMPOSICIÓN GLOBAL (GRÁFICA CIRCULAR)

Grupo_reg <- TDFREGION
hi_reg_porc <- round(prop.table(Grupo_reg) * 100, 1)
colores <- c("#66C2A5", "#FC8D62", "#8DA0CB", "#E78AC3", "#A6D854", "#FFD92F", "#E5C494")
etiquetas_leyenda <- paste0(names(Grupo_reg), " (", hi_reg_porc, "%)")

par(mar = c(2, 2, 3, 12), xpd = TRUE)

pie(hi_reg_porc, 
    labels = NA, 
    col = colores, 
    main = "Gráfica 5: Distribución Porcentual de Muestras por Región", 
    radius = 0.9) 

x0 <- 1.0   
x1 <- 2.4   
y1 <- 0.9   
y0 <- -0.3  

rect(xleft = x0, ybottom = y0, xright = x1, ytop = y1, col = "white", border = "black")

text(x = (x0 + x1)/2, y = y1 - 0.15, labels = "Regiones", font = 2, cex = 1.0)

legend(x = (x0 + x1)/2, 
       y = y1 - 0.25,      
       legend = etiquetas_leyenda, 
       fill = colores, 
       cex = 0.8,          
       bty = "n",          
       xjust = 0.5,        
       y.intersp = 1.4)