Datos de Deslazamientos de Centroamérica

library(readr)
library(knitr)
df <- read.csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
colnames(df)[2] <- "Date"
colnames(df)[4] <- "America"
colnames(df)[5] <- "Country"
colnames(df)[7] <- "State"
colnames(df)[9] <- "City"
colnames(df)[10] <- "Distance"
library(ggplot2)
library(dplyr)
ggplot(data=df, aes(x=America, y=Distance)) + geom_bar(stat="identity", width=0.5)
## Warning: Removed 1 rows containing missing values (position_stack).

ggplot(data=df, aes(x=America, y=Distance)) + geom_bar(stat="identity") + coord_flip()
## Warning: Removed 1 rows containing missing values (position_stack).

Republica Dominicana

library(readr)
library(knitr)
df_DO <- subset (df, Country == "Dominican Republic")
knitr::kable(head(df_DO))
id Date time America Country country_code State population City Distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
15 124 7/13/07 Night NA Dominican Republic DO Distrito Nacional 13456 San Carlos 1.70298 18.4757 -69.9140 (18.4757, -69.914000000000001) Landslide Landslide Small Unknown NA NA Dominican Today http://www.dominicantoday.com/app/article.aspx?id=24682
52 333 10/29/07 NA Dominican Republic DO San Cristóbal 66784 Bajos de Haina 1.72138 18.4270 -70.0440 (18.427, -70.043999999999997) Landslide Mudslide Medium Tropical cyclone Tropical Storm Noel NA 3 United Nations Development Programme - Relief Web http://news.scotsman.com/international.cfm?id=1730152007
58 343 11/1/07 NA Dominican Republic DO La Vega 3613 Río Verde Abajo 3.72637 19.3050 -70.6000 (19.305, -70.599999999999994) Landslide Complex Large Tropical cyclone Tropical Storm Noel NA 68 United Nations Development Programme - Relief Web http://www.reliefweb.int/rw/fullMaps_Am.nsf/luFullMap/CEB72F0756431A7CC125738D003E2EF4/$File/ifrc_TC_carib071108.pdf?OpenElement
64 388 12/11/07 NA Dominican Republic DO Santiago 1200000 Santiago de los Caballeros 1.10868 19.4550 -70.7070 (19.454999999999998, -70.706999999999994) Landslide Landslide Medium Tropical cyclone Tropical Storm Olga NA 17 news.gossip.info http://clutchmagonline.com/newsgossipinfo/caribbean-storm-death-toll-rises/
132 724 8/17/08 NA Dominican Republic DO Hato Mayor 13977 Sabana de La Mar 0.75284 19.0560 -69.3822 (19.056000000000001, -69.382199999999997) Landslide Complex Medium Tropical cyclone Tropical Storm Fay NA NA http://www.dominicantoday.com/dr/economy/2008/8/18/29085/Storms-downpours-block-transit-on-newest-Dominican-highway
138 746 8/26/08 NA Dominican Republic DO Distrito Nacional 10457 La Agustina 5.71058 18.5500 -69.9200 (18.55, -69.92) Landslide Mudslide Medium Tropical cyclone Hurricane Gustav NA 8 http://www.reuters.com/article/worldNews/idUSN2541891320080827?pageNumber=1&virtualBrandChannel=0
df_DO %>% 
  select(Country, State, City, Distance, Date)
##                 Country             State                       City Distance
## 15   Dominican Republic Distrito Nacional                 San Carlos  1.70298
## 52   Dominican Republic    San Cristóbal             Bajos de Haina  1.72138
## 58   Dominican Republic           La Vega           Río Verde Abajo  3.72637
## 64   Dominican Republic          Santiago Santiago de los Caballeros  1.10868
## 132  Dominican Republic        Hato Mayor           Sabana de La Mar  0.75284
## 138  Dominican Republic Distrito Nacional                La Agustina  5.71058
## 178  Dominican Republic          Santiago              Pedro García  4.86398
## 211  Dominican Republic      Puerto Plata                   Altamira  0.88500
## 212  Dominican Republic          Santiago                   Tamboril  4.31327
## 750  Dominican Republic          Santiago     San José de Las Matas  2.72462
## 774  Dominican Republic Distrito Nacional              Santo Domingo  0.55721
## 833  Dominican Republic           La Vega                  Constanza  0.52969
## 923  Dominican Republic      Puerto Plata               Puerto Plata  1.19636
## 1394 Dominican Republic     Santo Domingo         Santo Domingo Este  3.98059
## 1395 Dominican Republic      Puerto Plata                   Luperón  1.54885
##          Date
## 15    7/13/07
## 52   10/29/07
## 58    11/1/07
## 64   12/11/07
## 132   8/17/08
## 138   8/26/08
## 178   2/12/09
## 211   9/20/09
## 212   9/20/09
## 750    6/3/11
## 774    7/6/11
## 833  11/18/11
## 923   12/5/12
## 1394   8/3/14
## 1395  11/7/14

Distrito Nacional:

Deslizamientos de las ciudades de Distrito Nacional

library(readr)
library(knitr)
df_DN <- subset (df, State == "Distrito Nacional")
df_DN %>% 
  select(Country, State, City, Distance, Date) 
##                Country             State          City Distance    Date
## 15  Dominican Republic Distrito Nacional    San Carlos  1.70298 7/13/07
## 138 Dominican Republic Distrito Nacional   La Agustina  5.71058 8/26/08
## 774 Dominican Republic Distrito Nacional Santo Domingo  0.55721  7/6/11
head(df_DN)
##       id    Date  time America            Country country_code
## 15   124 7/13/07 Night    <NA> Dominican Republic           DO
## 138  746 8/26/08          <NA> Dominican Republic           DO
## 774 3736  7/6/11          <NA> Dominican Republic           DO
##                 State population          City Distance location_description
## 15  Distrito Nacional      13456    San Carlos  1.70298                     
## 138 Distrito Nacional      10457   La Agustina  5.71058                     
## 774 Distrito Nacional    2201941 Santo Domingo  0.55721                     
##     latitude longitude                    geolocation hazard_type
## 15   18.4757  -69.9140 (18.4757, -69.914000000000001)   Landslide
## 138  18.5500  -69.9200                (18.55, -69.92)   Landslide
## 774  18.5000  -69.9833               (18.5, -69.9833)   Landslide
##     landslide_type landslide_size          trigger       storm_name injuries
## 15       Landslide          Small          Unknown                        NA
## 138       Mudslide         Medium Tropical cyclone Hurricane Gustav       NA
## 774      Landslide         Medium         Downpour                        NA
##     fatalities     source_name
## 15          NA Dominican Today
## 138          8                
## 774          1                
##                                                                                                               source_link
## 15                                                                http://www.dominicantoday.com/app/article.aspx?id=24682
## 138                     http://www.reuters.com/article/worldNews/idUSN2541891320080827?pageNumber=1&virtualBrandChannel=0
## 774 http://www.google.com/hostednews/ap/article/ALeqM5jKexw046ZtYaNWiMsOVfydb2LttA?docId=1ef9771813d64e66919fb5a9e3633a52
ggplot(data=df_DN, aes(x=City, y=Distance)) + geom_bar(stat="identity", color="blue", fill="white")

Gráfico circular

ggplot(df_DN,aes(x="Distrito Nacional",y=Distance, fill=City))+
  geom_bar(stat = "identity",
           color="white")+
    geom_text(aes(label=(Distance*1)),
              position=position_stack(vjust=0.5),color="white",size=6)+
  coord_polar(theta = "y")+
    labs(title="Gráfico de Deslizamiento")

Diagrama de pareto

library(qcc)
## Warning: package 'qcc' was built under R version 4.1.1
## Package 'qcc' version 2.7
## Type 'citation("qcc")' for citing this R package in publications.
Distance <- df_DN$Distance
names(Distance) <- df_DN$City 

pareto.chart(Distance, 
             ylab="Distance",
             col = heat.colors(length(Distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "DONDE SE CONCENTRAN LAS CIUDADES CON MAYORES DESLIZAMIENTOS"
)

##                
## Pareto chart analysis for Distance
##                  Frequency  Cum.Freq. Percentage Cum.Percent.
##   La Agustina     5.710580   5.710580  71.644019    71.644019
##   San Carlos      1.702980   7.413560  21.365314    93.009333
##   Santo Domingo   0.557210   7.970770   6.990667   100.000000

Diagrama de tallo y hojas

stem(df_DN$"Distance")
## 
##   The decimal point is at the |
## 
##   0 | 67
##   2 | 
##   4 | 7
head(df_DN)
##       id    Date  time America            Country country_code
## 15   124 7/13/07 Night    <NA> Dominican Republic           DO
## 138  746 8/26/08          <NA> Dominican Republic           DO
## 774 3736  7/6/11          <NA> Dominican Republic           DO
##                 State population          City Distance location_description
## 15  Distrito Nacional      13456    San Carlos  1.70298                     
## 138 Distrito Nacional      10457   La Agustina  5.71058                     
## 774 Distrito Nacional    2201941 Santo Domingo  0.55721                     
##     latitude longitude                    geolocation hazard_type
## 15   18.4757  -69.9140 (18.4757, -69.914000000000001)   Landslide
## 138  18.5500  -69.9200                (18.55, -69.92)   Landslide
## 774  18.5000  -69.9833               (18.5, -69.9833)   Landslide
##     landslide_type landslide_size          trigger       storm_name injuries
## 15       Landslide          Small          Unknown                        NA
## 138       Mudslide         Medium Tropical cyclone Hurricane Gustav       NA
## 774      Landslide         Medium         Downpour                        NA
##     fatalities     source_name
## 15          NA Dominican Today
## 138          8                
## 774          1                
##                                                                                                               source_link
## 15                                                                http://www.dominicantoday.com/app/article.aspx?id=24682
## 138                     http://www.reuters.com/article/worldNews/idUSN2541891320080827?pageNumber=1&virtualBrandChannel=0
## 774 http://www.google.com/hostednews/ap/article/ALeqM5jKexw046ZtYaNWiMsOVfydb2LttA?docId=1ef9771813d64e66919fb5a9e3633a52
knitr::kable(head(df_DN))
id Date time America Country country_code State population City Distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
15 124 7/13/07 Night NA Dominican Republic DO Distrito Nacional 13456 San Carlos 1.70298 18.4757 -69.9140 (18.4757, -69.914000000000001) Landslide Landslide Small Unknown NA NA Dominican Today http://www.dominicantoday.com/app/article.aspx?id=24682
138 746 8/26/08 NA Dominican Republic DO Distrito Nacional 10457 La Agustina 5.71058 18.5500 -69.9200 (18.55, -69.92) Landslide Mudslide Medium Tropical cyclone Hurricane Gustav NA 8 http://www.reuters.com/article/worldNews/idUSN2541891320080827?pageNumber=1&virtualBrandChannel=0
774 3736 7/6/11 NA Dominican Republic DO Distrito Nacional 2201941 Santo Domingo 0.55721 18.5000 -69.9833 (18.5, -69.9833) Landslide Landslide Medium Downpour NA 1 http://www.google.com/hostednews/ap/article/ALeqM5jKexw046ZtYaNWiMsOVfydb2LttA?docId=1ef9771813d64e66919fb5a9e3633a52
stem(df_DN$"Distance")
## 
##   The decimal point is at the |
## 
##   0 | 67
##   2 | 
##   4 | 7
stem(df_DN$"Distance", scale = 2)
## 
##   The decimal point is at the |
## 
##   0 | 6
##   1 | 7
##   2 | 
##   3 | 
##   4 | 
##   5 | 7

Gráfico de series temporales

library(forecast)
## Warning: package 'forecast' was built under R version 4.1.1
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
data_serie<- ts(df_DN$Distance, frequency=12, start=2007)
head(data_serie)
##          Jan     Feb     Mar
## 2007 1.70298 5.71058 0.55721
autoplot(data_serie)+
labs(title = "Serie de Deslizamiento", x="Años", y = "Distancia", colour = "#00a0dc") +theme_bw()

Tablas de frecuencia

library(questionr)
## Warning: package 'questionr' was built under R version 4.1.1
table <- questionr::freq(Distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
0.55721 1 33.3 33.3 33.3 33.3
1.70298 1 33.3 33.3 66.7 66.7
5.71058 1 33.3 33.3 100.0 100.0
Total 3 100.0 100.0 100.0 100.0
str(table) 
## Classes 'freqtab' and 'data.frame':  4 obs. of  5 variables:
##  $ n      : num  1 1 1 3
##  $ %      : num  33.3 33.3 33.3 100
##  $ val%   : num  33.3 33.3 33.3 100
##  $ %cum   : num  33.3 66.7 100 100
##  $ val%cum: num  33.3 66.7 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
0.55721 1
1.70298 1
5.71058 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="white", fill="blue") +
  xlab("Número de asistencias") +
  ylab("Frecuencia")

##Tabla de frecuencias agrupada

n_sturges = 1 + log(length(Distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(Distance) - min(Distance)
w = ceiling(R/n_clases)
bins <- seq(min(Distance), max(Distance) + w, by = w)
bins
## [1] 0.55721 2.55721 4.55721 6.55721
Edades <- cut(Distance, bins)
Freq_table <- transform(table(Distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
Distance Freq Rel_Freq Cum_Freq
0.55721 1 0.3333333 1
1.70298 1 0.3333333 2
5.71058 1 0.3333333 3
str(Freq_table)
## 'data.frame':    3 obs. of  4 variables:
##  $ Distance: Factor w/ 3 levels "0.55721","1.70298",..: 1 2 3
##  $ Freq    : int  1 1 1
##  $ Rel_Freq: num  0.333 0.333 0.333
##  $ Cum_Freq: int  1 2 3
df <- data.frame(x = Freq_table$Distance, y = Freq_table$Freq)
knitr::kable(df)
x y
0.55721 1
1.70298 1
5.71058 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="blue", fill="green") +
  xlab("Rango de Distance") +
  ylab("Frecuencia")

Estadísticos

Personas Afectadas por Deslizamiento

summary(df_DN$Distance)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.5572  1.1301  1.7030  2.6569  3.7068  5.7106
library(pastecs)
## Warning: package 'pastecs' was built under R version 4.1.1
## 
## Attaching package: 'pastecs'
## The following objects are masked from 'package:dplyr':
## 
##     first, last
stat.desc(df_DN)
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
##                        id Date time America Country country_code State
## nbr.val      3.000000e+00   NA   NA      NA      NA           NA    NA
## nbr.null     0.000000e+00   NA   NA      NA      NA           NA    NA
## nbr.na       0.000000e+00   NA   NA      NA      NA           NA    NA
## min          1.240000e+02   NA   NA      NA      NA           NA    NA
## max          3.736000e+03   NA   NA      NA      NA           NA    NA
## range        3.612000e+03   NA   NA      NA      NA           NA    NA
## sum          4.606000e+03   NA   NA      NA      NA           NA    NA
## median       7.460000e+02   NA   NA      NA      NA           NA    NA
## mean         1.535333e+03   NA   NA      NA      NA           NA    NA
## SE.mean      1.114887e+03   NA   NA      NA      NA           NA    NA
## CI.mean.0.95 4.796973e+03   NA   NA      NA      NA           NA    NA
## var          3.728921e+06   NA   NA      NA      NA           NA    NA
## std.dev      1.931042e+03   NA   NA      NA      NA           NA    NA
## coef.var     1.257734e+00   NA   NA      NA      NA           NA    NA
##                population City Distance location_description     latitude
## nbr.val      3.000000e+00   NA 3.000000                   NA  3.000000000
## nbr.null     0.000000e+00   NA 0.000000                   NA  0.000000000
## nbr.na       0.000000e+00   NA 0.000000                   NA  0.000000000
## min          1.045700e+04   NA 0.557210                   NA 18.475700000
## max          2.201941e+06   NA 5.710580                   NA 18.550000000
## range        2.191484e+06   NA 5.153370                   NA  0.074300000
## sum          2.225854e+06   NA 7.970770                   NA 55.525700000
## median       1.345600e+04   NA 1.702980                   NA 18.500000000
## mean         7.419513e+05   NA 2.656923                   NA 18.508566667
## SE.mean      7.299953e+05   NA 1.562243                   NA  0.021872078
## CI.mean.0.95 3.140916e+06   NA 6.721790                   NA  0.094107954
## var          1.598680e+12   NA 7.321812                   NA  0.001435163
## std.dev      1.264389e+06   NA 2.705885                   NA  0.037883550
## coef.var     1.704140e+00   NA 1.018428                   NA  0.002046812
##                  longitude geolocation hazard_type landslide_type
## nbr.val       3.000000e+00          NA          NA             NA
## nbr.null      0.000000e+00          NA          NA             NA
## nbr.na        0.000000e+00          NA          NA             NA
## min          -6.998330e+01          NA          NA             NA
## max          -6.991400e+01          NA          NA             NA
## range         6.930000e-02          NA          NA             NA
## sum          -2.098173e+02          NA          NA             NA
## median       -6.992000e+01          NA          NA             NA
## mean         -6.993910e+01          NA          NA             NA
## SE.mean       2.216777e-02          NA          NA             NA
## CI.mean.0.95  9.538021e-02          NA          NA             NA
## var           1.474230e-03          NA          NA             NA
## std.dev       3.839570e-02          NA          NA             NA
## coef.var     -5.489877e-04          NA          NA             NA
##              landslide_size trigger storm_name injuries fatalities source_name
## nbr.val                  NA      NA         NA        0   2.000000          NA
## nbr.null                 NA      NA         NA        0   0.000000          NA
## nbr.na                   NA      NA         NA        3   1.000000          NA
## min                      NA      NA         NA      Inf   1.000000          NA
## max                      NA      NA         NA     -Inf   8.000000          NA
## range                    NA      NA         NA     -Inf   7.000000          NA
## sum                      NA      NA         NA        0   9.000000          NA
## median                   NA      NA         NA       NA   4.500000          NA
## mean                     NA      NA         NA      NaN   4.500000          NA
## SE.mean                  NA      NA         NA       NA   3.500000          NA
## CI.mean.0.95             NA      NA         NA      NaN  44.471717          NA
## var                      NA      NA         NA       NA  24.500000          NA
## std.dev                  NA      NA         NA       NA   4.949747          NA
## coef.var                 NA      NA         NA       NA   1.099944          NA
##              source_link
## nbr.val               NA
## nbr.null              NA
## nbr.na                NA
## min                   NA
## max                   NA
## range                 NA
## sum                   NA
## median                NA
## mean                  NA
## SE.mean               NA
## CI.mean.0.95          NA
## var                   NA
## std.dev               NA
## coef.var              NA

Caja y extensión

boxplot(Distance, horizontal=TRUE, col='steelblue')

library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v tibble  3.1.3     v stringr 1.4.0
## v tidyr   1.1.3     v forcats 0.5.1
## v purrr   0.3.4
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x tidyr::extract() masks pastecs::extract()
## x dplyr::filter()  masks stats::filter()
## x pastecs::first() masks dplyr::first()
## x dplyr::lag()     masks stats::lag()
## x pastecs::last()  masks dplyr::last()
library(hrbrthemes)
## Warning: package 'hrbrthemes' was built under R version 4.1.1
## NOTE: Either Arial Narrow or Roboto Condensed fonts are required to use these themes.
##       Please use hrbrthemes::import_roboto_condensed() to install Roboto Condensed and
##       if Arial Narrow is not on your system, please see https://bit.ly/arialnarrow
library(viridis)
## Warning: package 'viridis' was built under R version 4.1.1
## Loading required package: viridisLite
df <- data.frame(Distance)
df %>% ggplot(aes(x = "", y = Distance)) +
  geom_boxplot(color="red", fill="orange", alpha=0.5) +
  theme_ipsum() +
  theme(legend.position="none", plot.title = element_text(size=11)) +
  ggtitle("Deslizamientos") +
  coord_flip() +
  xlab("") +
  ylab("")
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

```