Introducción

Gráfico para Panamá

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_PA <- subset(df, country_name == "Panama")
knitr::kable(head(df_PA))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
750 8/26/08 NA NA Panama PA Panamá 1287 Cerro Azul 1.84596 NA 9.1559 -79.4295 (9.1559000000000008, -79.429500000000004) Landslide Landslide Medium Downpour NA NA NA NA http://news.xinhuanet.com/english/2008-08/27/content_9721298.htm
905 11/25/08 NA NA Panama PA Chiriquí 6123 Boquete 9.83700 NA 8.7870 -82.3440 (8.7870000000000008, -82.343999999999994) Landslide Complex Medium Downpour NA NA 8 NA http://www.iht.com/articles/ap/2008/11/26/news/LT-Panama-Flooding.php
2794 12/8/10 NA NA Panama PA Colón 1310 El Giral 1.80330 NA 9.2332 -79.6961 (9.2332000000000001, -79.696100000000001) Landslide Landslide Medium Downpour NA NA 0 NA http://www.portworld.com/news/i98731/Panama_Canal_disrupted_by_floods
2795 12/9/10 Morning NA Panama PA Colón 1274 Portobelo 0.09491 Deforested slope 9.5493 -79.6505 (9.5493000000000006, -79.650499999999994) Landslide Mudslide Medium Downpour NA NA 8 Fox News Latino http://latino.foxnews.com/latino/news/2010/12/09/heavy-rains-kill-panama-force-canal-close/
3167 3/2/11 Night NA Panama PA Los Santos 1283 La Palma 13.40535 NA 7.5955 -80.3800 (7.5955000000000004, -80.38) Landslide Landslide Medium Downpour NA NA 0 NA http://www.newsroompanama.com/panama/2434-off-season-rain-creating-crop-worries-and-landslides.html
3168 3/2/11 Night NA Panama PA Bocas del Toro 8114 Almirante 8.41491 NA 9.3224 -82.4732 (9.3224, -82.473200000000006) Landslide Landslide Medium Downpour NA NA 0 NA http://www.newsroompanama.com/panama/2434-off-season-rain-creating-crop-worries-and-landslides.html

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_PA, aes(fill= state, y=distance, x=country_name)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_PA, aes(fill=state, y=distance, x=country_name)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_PA, aes(x=country_name, y=distance, fill=state)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_PA <- df_PA %>% 
  arrange(desc(state)) %>%
  mutate(prop = distance / sum(df_PA$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
## Loading required package: scales
## 
## Attaching package: 'scales'
## The following object is masked from 'package:readr':
## 
##     col_factor
ggplot(df_PA, aes(x=country_name, y=prop, fill=state)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
## Warning: package 'forecast' was built under R version 4.1.1
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
data<- ts(df_PA$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
1.84596
2.81479
1.41526
4.18074
2.24069
0.48725
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
## Package 'qcc' version 2.7
## Type 'citation("qcc")' for citing this R package in publications.
distance <- df_PA$distance
names(distance) <- df_PA$state 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por estados"
)

##                 
## Pareto chart analysis for distance
##                     Frequency    Cum.Freq.   Percentage Cum.Percent.
##   Los Santos      13.40535000  13.40535000  11.36719394  11.36719394
##   Chiriquí         9.83700000  23.24235000   8.34137764  19.70857158
##   Bocas del Toro   8.41491000  31.65726000   7.13550291  26.84407449
##   Chiriquí         6.74338000  38.40064000   5.71811316  32.56218765
##   Bocas del Toro   6.65185000  45.05249000   5.64049943  38.20268708
##   Bocas del Toro   5.97977000  51.03226000   5.07060280  43.27328989
##   Bocas del Toro   5.13913000  56.17139000   4.35777413  47.63106401
##   Colón            4.74914000  60.92053000   4.02707840  51.65814241
##   Bocas del Toro   4.45630000  65.37683000   3.77876194  55.43690435
##   Panamá           4.18074000  69.55757000   3.54509822  58.98200257
##   Bocas del Toro   3.92621000  73.48378000   3.32926708  62.31126965
##   Chiriquí         3.66775000  77.15153000   3.11010347  65.42137312
##   Panamá           3.54386000  80.69539000   3.00504977  68.42642289
##   Chiriquí         3.33873000  84.03412000   2.83110784  71.25753073
##   Panamá           3.30848000  87.34260000   2.80545706  74.06298779
##   Panamá           2.81479000  90.15739000   2.38682793  76.44981572
##   Colón            2.67409000  92.83148000   2.26752003  78.71733575
##   Panamá           2.59449000  95.42597000   2.20002245  80.91735821
##   Panamá           2.57852000  98.00449000   2.18648054  83.10383875
##   Chiriquí         2.44254000 100.44703000   2.07117501  85.17501375
##   Colón            2.28589000 102.73292000   1.93834215  87.11335590
##   Panamá           2.24069000 104.97361000   1.90001438  89.01337029
##   Panamá           1.84596000 106.81957000   1.56529933  90.57866961
##   Panamá           1.80341000 108.62298000   1.52921865  92.10788826
##   Colón            1.80330000 110.42628000   1.52912537  93.63701364
##   Panamá           1.41526000 111.84154000   1.20008317  94.83709680
##   Chiriquí         0.90169000 112.74323000   0.76459661  95.60169341
##   Colón            0.76740000 113.51063000   0.65072412  96.25241753
##   Colón            0.74760000 114.25823000   0.63393453  96.88635206
##   Chiriquí         0.64491000 114.90314000   0.54685756  97.43320962
##   Colón            0.63948000 115.54262000   0.54225314  97.97546277
##   Panamá           0.48725000 116.02987000   0.41316827  98.38863103
##   Chiriquí         0.37999000 116.40986000   0.32221613  98.71084717
##   Chiriquí         0.35187000 116.76173000   0.29837151  99.00921868
##   Bocas del Toro   0.29869000 117.06042000   0.25327702  99.26249570
##   Coclé            0.21605000 117.27647000   0.18320165  99.44569735
##   Colón            0.20365000 117.48012000   0.17268695  99.61838430
##   Colón            0.18619000 117.66631000   0.15788158  99.77626588
##   Colón            0.16894000 117.83525000   0.14325428  99.91952016
##   Colón            0.09491000 117.93016000   0.08047984 100.00000000
stem(df_PA$"distance")
## 
##   The decimal point is at the |
## 
##    0 | 122223445667894888
##    2 | 234667833579
##    4 | 2571
##    6 | 077
##    8 | 48
##   10 | 
##   12 | 4
head(df_PA)
## # A tibble: 6 x 25
##      id date    time      continent_code country_name country_code state  population
##   <dbl> <chr>   <chr>     <chr>          <chr>        <chr>        <chr>       <dbl>
## 1   750 8/26/08 <NA>      <NA>           Panama       PA           Panamá       1287
## 2  5562 9/29/13 <NA>      <NA>           Panama       PA           Panamá      69102
## 3  6701 9/14/14 Morning   <NA>           Panama       PA           Panamá      19782
## 4  6703 6/17/14 <NA>      <NA>           Panama       PA           Panamá     321501
## 5  6705 6/24/14 <NA>      <NA>           Panama       PA           Panamá       9169
## 6  7453 6/5/15  Afternoon <NA>           Panama       PA           Panamá          0
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_PA))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
750 8/26/08 NA NA Panama PA Panamá 1287 Cerro Azul 1.84596 NA 9.1559 -79.4295 (9.1559000000000008, -79.429500000000004) Landslide Landslide Medium Downpour NA NA NA NA http://news.xinhuanet.com/english/2008-08/27/content_9721298.htm 1.5652993 0.7826497
5562 9/29/13 NA NA Panama PA Panamá 69102 Las Cumbres 2.81479 NA 9.0655 -79.5516 (9.0655000000000001, -79.551599999999993) Landslide Landslide Medium Downpour NA NA NA www.newsroompanama.com http://www.newsroompanama.com/panama/6467--nearly-800-affeccted-by-floods-in-panama-city-region.html 2.3868279 2.7587133
6701 9/14/14 Morning NA Panama PA Panamá 19782 Alcaldedíaz 1.41526 Urban area 9.1139 -79.5626 (9.1138999999999992, -79.562600000000003) Landslide Other Small Rain NA 0 0 Estrella de Panama http://laestrella.com.pa/panama/nacional/colapsa-pared-vivienda-alcade-diaz/23804690/foto/50372 1.2000832 4.5521688
6703 6/17/14 NA NA Panama PA Panamá 321501 San Miguelito 4.18074 Unknown 9.0329 -79.5380 (9.0328999999999997, -79.537999999999997) Landslide Mudslide Medium Unknown NA 0 0 PanamaAmerica http://www.panamaamerica.com.pa/nacion/reportan-deslizamiento-en-autopista-panama-colon# 3.5450982 6.9247595
6705 6/24/14 NA NA Panama PA Panamá 9169 Ancón 2.24069 Below road 8.9763 -79.5391 (8.9763000000000002, -79.539100000000005) Landslide Landslide Small Unknown NA 0 0 Ministerio de Obras Públicas de Panamá http://www.mop.gob.pa/poste-electrico-de-la-avenida-frangipany-cae-sobre-camion-del-mop/ 1.9000144 9.6473158
7453 6/5/15 Afternoon NA Panama PA Panamá 0 Las Margaritas 0.48725 Above road 9.1805 -79.0896 (9.1805000000000003, -79.089600000000004) Landslide Landslide Medium Downpour NA 0 0 La Estrella de Panama http://laestrella.com.pa/panama/nacional/deslizamiento-tierra-bayano-inundaciones-colon/23870997/foto/136658 0.4131683 10.8039072
stem(df_PA$"distance")
## 
##   The decimal point is at the |
## 
##    0 | 122223445667894888
##    2 | 234667833579
##    4 | 2571
##    6 | 077
##    8 | 48
##   10 | 
##   12 | 4
stem(df_PA$"distance", scale = 2)
## 
##   The decimal point is at the |
## 
##    0 | 12222344566789
##    1 | 4888
##    2 | 2346678
##    3 | 33579
##    4 | 257
##    5 | 1
##    6 | 077
##    7 | 
##    8 | 4
##    9 | 8
##   10 | 
##   11 | 
##   12 | 
##   13 | 4

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
0.09491 1 2.5 2.5 2.5 2.5
0.16894 1 2.5 2.5 5.0 5.0
0.18619 1 2.5 2.5 7.5 7.5
0.20365 1 2.5 2.5 10.0 10.0
0.21605 1 2.5 2.5 12.5 12.5
0.29869 1 2.5 2.5 15.0 15.0
0.35187 1 2.5 2.5 17.5 17.5
0.37999 1 2.5 2.5 20.0 20.0
0.48725 1 2.5 2.5 22.5 22.5
0.63948 1 2.5 2.5 25.0 25.0
0.64491 1 2.5 2.5 27.5 27.5
0.7476 1 2.5 2.5 30.0 30.0
0.7674 1 2.5 2.5 32.5 32.5
0.90169 1 2.5 2.5 35.0 35.0
1.41526 1 2.5 2.5 37.5 37.5
1.8033 1 2.5 2.5 40.0 40.0
1.80341 1 2.5 2.5 42.5 42.5
1.84596 1 2.5 2.5 45.0 45.0
2.24069 1 2.5 2.5 47.5 47.5
2.28589 1 2.5 2.5 50.0 50.0
2.44254 1 2.5 2.5 52.5 52.5
2.57852 1 2.5 2.5 55.0 55.0
2.59449 1 2.5 2.5 57.5 57.5
2.67409 1 2.5 2.5 60.0 60.0
2.81479 1 2.5 2.5 62.5 62.5
3.30848 1 2.5 2.5 65.0 65.0
3.33873 1 2.5 2.5 67.5 67.5
3.54386 1 2.5 2.5 70.0 70.0
3.66775 1 2.5 2.5 72.5 72.5
3.92621 1 2.5 2.5 75.0 75.0
4.18074 1 2.5 2.5 77.5 77.5
4.4563 1 2.5 2.5 80.0 80.0
4.74914 1 2.5 2.5 82.5 82.5
5.13913 1 2.5 2.5 85.0 85.0
5.97977 1 2.5 2.5 87.5 87.5
6.65185 1 2.5 2.5 90.0 90.0
6.74338 1 2.5 2.5 92.5 92.5
8.41491 1 2.5 2.5 95.0 95.0
9.837 1 2.5 2.5 97.5 97.5
13.40535 1 2.5 2.5 100.0 100.0
Total 40 100.0 100.0 100.0 100.0
str(table)
## Classes 'freqtab' and 'data.frame':  41 obs. of  5 variables:
##  $ n      : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ %      : num  2.5 2.5 2.5 2.5 2.5 2.5 2.5 2.5 2.5 2.5 ...
##  $ val%   : num  2.5 2.5 2.5 2.5 2.5 2.5 2.5 2.5 2.5 2.5 ...
##  $ %cum   : num  2.5 5 7.5 10 12.5 15 17.5 20 22.5 25 ...
##  $ val%cum: num  2.5 5 7.5 10 12.5 15 17.5 20 22.5 25 ...
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
0.09491 1
0.16894 1
0.18619 1
0.20365 1
0.21605 1
0.29869 1
0.35187 1
0.37999 1
0.48725 1
0.63948 1
0.64491 1
0.7476 1
0.7674 1
0.90169 1
1.41526 1
1.8033 1
1.80341 1
1.84596 1
2.24069 1
2.28589 1
2.44254 1
2.57852 1
2.59449 1
2.67409 1
2.81479 1
3.30848 1
3.33873 1
3.54386 1
3.66775 1
3.92621 1
4.18074 1
4.4563 1
4.74914 1
5.13913 1
5.97977 1
6.65185 1
6.74338 1
8.41491 1
9.837 1
13.40535 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1]  0.09491  2.09491  4.09491  6.09491  8.09491 10.09491 12.09491 14.09491
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(0.0949,2.09] 17 0.4358974 17
(2.09,4.09] 12 0.3076923 29
(4.09,6.09] 5 0.1282051 34
(6.09,8.09] 2 0.0512821 36
(8.09,10.1] 2 0.0512821 38
(10.1,12.1] 0 0.0000000 38
(12.1,14.1] 1 0.0256410 39
str(Freq_table)
## 'data.frame':    7 obs. of  4 variables:
##  $ distance: Factor w/ 7 levels "(0.0949,2.09]",..: 1 2 3 4 5 6 7
##  $ Freq    : int  17 12 5 2 2 0 1
##  $ Rel_Freq: num  0.4359 0.3077 0.1282 0.0513 0.0513 ...
##  $ Cum_Freq: int  17 29 34 36 38 38 39
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(0.0949,2.09] 17
(2.09,4.09] 12
(4.09,6.09] 5
(6.09,8.09] 2
(8.09,10.1] 2
(10.1,12.1] 0
(12.1,14.1] 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
## Warning: package 'pastecs' was built under R version 4.1.1
## 
## Attaching package: 'pastecs'
## The following objects are masked from 'package:dplyr':
## 
##     first, last
stat.desc(df_PA)
##                        id date time continent_code country_name country_code
## nbr.val      4.000000e+01   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          7.500000e+02   NA   NA             NA           NA           NA
## max          7.512000e+03   NA   NA             NA           NA           NA
## range        6.762000e+03   NA   NA             NA           NA           NA
## sum          2.479400e+05   NA   NA             NA           NA           NA
## median       7.450500e+03   NA   NA             NA           NA           NA
## mean         6.198500e+03   NA   NA             NA           NA           NA
## SE.mean      3.051354e+02   NA   NA             NA           NA           NA
## CI.mean.0.95 6.171945e+02   NA   NA             NA           NA           NA
## var          3.724304e+06   NA   NA             NA           NA           NA
## std.dev      1.929846e+03   NA   NA             NA           NA           NA
## coef.var     3.113407e-01   NA   NA             NA           NA           NA
##              state   population city    distance location_description
## nbr.val         NA 4.000000e+01   NA  40.0000000                   NA
## nbr.null        NA 8.000000e+00   NA   0.0000000                   NA
## nbr.na          NA 0.000000e+00   NA   0.0000000                   NA
## min             NA 0.000000e+00   NA   0.0949100                   NA
## max             NA 3.215010e+05   NA  13.4053500                   NA
## range           NA 3.215010e+05   NA  13.3104400                   NA
## sum             NA 2.041483e+06   NA 117.9301600                   NA
## median          NA 2.365500e+03   NA   2.3642150                   NA
## mean            NA 5.103707e+04   NA   2.9482540                   NA
## SE.mean         NA 1.674111e+04   NA   0.4634140                   NA
## CI.mean.0.95    NA 3.386208e+04   NA   0.9373432                   NA
## var             NA 1.121058e+10   NA   8.5900997                   NA
## std.dev         NA 1.058800e+05   NA   2.9308872                   NA
## coef.var        NA 2.074571e+00   NA   0.9941095                   NA
##                  latitude     longitude geolocation hazard_type landslide_type
## nbr.val       40.00000000  4.000000e+01          NA          NA             NA
## nbr.null       0.00000000  0.000000e+00          NA          NA             NA
## nbr.na         0.00000000  0.000000e+00          NA          NA             NA
## min            7.59550000 -8.286620e+01          NA          NA             NA
## max            9.56760000 -7.908960e+01          NA          NA             NA
## range          1.97210000  3.776600e+00          NA          NA             NA
## sum          361.65930000 -3.232397e+03          NA          NA             NA
## median         9.07300000 -7.989490e+01          NA          NA             NA
## mean           9.04148250 -8.080992e+01          NA          NA             NA
## SE.mean        0.05632097  2.230672e-01          NA          NA             NA
## CI.mean.0.95   0.11391991  4.511960e-01          NA          NA             NA
## var            0.12688206  1.990359e+00          NA          NA             NA
## std.dev        0.35620508  1.410801e+00          NA          NA             NA
## coef.var       0.03939676 -1.745826e-02          NA          NA             NA
##              landslide_size trigger storm_name  injuries fatalities source_name
## nbr.val                  NA      NA         NA 28.000000 38.0000000          NA
## nbr.null                 NA      NA         NA 27.000000 33.0000000          NA
## nbr.na                   NA      NA         NA 12.000000  2.0000000          NA
## min                      NA      NA         NA  0.000000  0.0000000          NA
## max                      NA      NA         NA 45.000000  8.0000000          NA
## range                    NA      NA         NA 45.000000  8.0000000          NA
## sum                      NA      NA         NA 45.000000 23.0000000          NA
## median                   NA      NA         NA  0.000000  0.0000000          NA
## mean                     NA      NA         NA  1.607143  0.6052632          NA
## SE.mean                  NA      NA         NA  1.607143  0.3099568          NA
## CI.mean.0.95             NA      NA         NA  3.297585  0.6280321          NA
## var                      NA      NA         NA 72.321429  3.6507824          NA
## std.dev                  NA      NA         NA  8.504201  1.9107021          NA
## coef.var                 NA      NA         NA  5.291503  3.1568121          NA
##              source_link         prop         ypos
## nbr.val               NA  40.00000000   40.0000000
## nbr.null              NA   0.00000000    0.0000000
## nbr.na                NA   0.00000000    0.0000000
## min                   NA   0.08047984    0.7826497
## max                   NA  11.36719394   98.1106190
## range                 NA  11.28671410   97.3279694
## sum                   NA 100.00000000 1766.6812798
## median                NA   2.00475858   42.6414180
## mean                  NA   2.50000000   44.1670320
## SE.mean               NA   0.39295627    4.2497154
## CI.mean.0.95          NA   0.79482907    8.5958607
## var                   NA   6.17658511  722.4032223
## std.dev               NA   2.48527365   26.8775598
## coef.var              NA   0.99410946    0.6085435
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Bocas del Toro (Panama)

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_PA<- subset(df, state == "Bocas del Toro")
knitr::kable(head(df_PA))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
3168 3/2/11 Night NA Panama PA Bocas del Toro 8114 Almirante 8.41491 NA 9.3224 -82.4732 (9.3224, -82.473200000000006) Landslide Landslide Medium Downpour NA NA 0 NA http://www.newsroompanama.com/panama/2434-off-season-rain-creating-crop-worries-and-landslides.html
7489 6/21/15 Morning NA Panama PA Bocas del Toro 993 Punta Peña 0.29869 Above road 8.9070 -82.1867 (8.907, -82.186700000000002) Landslide Landslide Small Rain NA 0 0 tvn http://www.tvn-2.com/videos/noticias/Deslizamientos-Punta-Pena-Chiriqui-Grande_2_4237096253.html
7497 6/29/15 NA NA Panama PA Bocas del Toro 22900 Changuinola 5.97977 Above road 9.3880 -82.4858 (9.3879999999999999, -82.485799999999998) Landslide Landslide Small Downpour NA 0 0 La Prensa http://www.prensa.com/provincias/Continuan-fuertes-lluvias-Bocas-Toro_0_4243075788.html
7498 6/29/15 NA NA Panama PA Bocas del Toro 918 Valle del Risco 6.65185 Unknown 9.2102 -82.3705 (9.2102000000000004, -82.370500000000007) Landslide Landslide Medium Downpour NA 0 0 La Prensa http://www.prensa.com/provincias/Continuan-fuertes-lluvias-Bocas-Toro_0_4243075788.html
7499 12/21/15 NA NA Panama PA Bocas del Toro 0 Pueblo Nuevo 3.92621 Above road 8.8543 -82.1825 (8.8543000000000003, -82.182500000000005) Landslide Mudslide Small Rain NA 0 0 Panama America http://www.panamaamerica.com.pa/provincias/despejan-hacia-bocas-del-toro-por-deslizamiento-1005781
7500 5/24/15 23:30 NA Panama PA Bocas del Toro 918 Valle del Risco 5.13913 Above road 9.2279 -82.3808 (9.2279, -82.380799999999994) Landslide Mudslide Medium Rain NA 0 0 Panama America http://www.panamaamerica.com.pa/provincias/deslizamiento-de-tierra-en-bocas-del-toro-provoca-obstruccion-de-ambos-panos-977372

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_PA, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_PA, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_PA, aes(x=state, y=distance, fill=city)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_PA <- df_PA %>% 
  arrange(desc(city)) %>%
  mutate(prop = distance / sum(df_PA$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_PA, aes(x=state, y = prop, fill=city)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
data<- ts(df_PA$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
6.65185
5.13913
0.29869
3.92621
5.97977
4.45630
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_PA$distance
names(distance) <- df_PA$city 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por ciudades"
)

##                  
## Pareto chart analysis for distance
##                     Frequency   Cum.Freq.  Percentage Cum.Percent.
##   Almirante         8.4149100   8.4149100  24.1344073   24.1344073
##   Valle del Risco   6.6518500  15.0667600  19.0778579   43.2122652
##   Changuinola       5.9797700  21.0465300  17.1502969   60.3625620
##   Valle del Risco   5.1391300  26.1856600  14.7392969   75.1018589
##   Cauchero          4.4563000  30.6419600  12.7809043   87.8827632
##   Pueblo Nuevo      3.9262100  34.5681700  11.2605781   99.1433413
##   Punta Peña        0.2986900  34.8668600   0.8566587  100.0000000
stem(df_PA$"distance")
## 
##   The decimal point is at the |
## 
##   0 | 3
##   2 | 9
##   4 | 51
##   6 | 07
##   8 | 4
head(df_PA)
## # A tibble: 6 x 25
##      id date     time    continent_code country_name country_code state population
##   <dbl> <chr>    <chr>   <chr>          <chr>        <chr>        <chr>      <dbl>
## 1  7498 6/29/15  <NA>    <NA>           Panama       PA           Boca~        918
## 2  7500 5/24/15  23:30   <NA>           Panama       PA           Boca~        918
## 3  7489 6/21/15  Morning <NA>           Panama       PA           Boca~        993
## 4  7499 12/21/15 <NA>    <NA>           Panama       PA           Boca~          0
## 5  7497 6/29/15  <NA>    <NA>           Panama       PA           Boca~      22900
## 6  7501 10/8/15  <NA>    <NA>           Panama       PA           Boca~          0
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_PA))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
7498 6/29/15 NA NA Panama PA Bocas del Toro 918 Valle del Risco 6.65185 Unknown 9.2102 -82.3705 (9.2102000000000004, -82.370500000000007) Landslide Landslide Medium Downpour NA 0 0 La Prensa http://www.prensa.com/provincias/Continuan-fuertes-lluvias-Bocas-Toro_0_4243075788.html 19.0778579 9.538929
7500 5/24/15 23:30 NA Panama PA Bocas del Toro 918 Valle del Risco 5.13913 Above road 9.2279 -82.3808 (9.2279, -82.380799999999994) Landslide Mudslide Medium Rain NA 0 0 Panama America http://www.panamaamerica.com.pa/provincias/deslizamiento-de-tierra-en-bocas-del-toro-provoca-obstruccion-de-ambos-panos-977372 14.7392969 26.447506
7489 6/21/15 Morning NA Panama PA Bocas del Toro 993 Punta Peña 0.29869 Above road 8.9070 -82.1867 (8.907, -82.186700000000002) Landslide Landslide Small Rain NA 0 0 tvn http://www.tvn-2.com/videos/noticias/Deslizamientos-Punta-Pena-Chiriqui-Grande_2_4237096253.html 0.8566587 34.245484
7499 12/21/15 NA NA Panama PA Bocas del Toro 0 Pueblo Nuevo 3.92621 Above road 8.8543 -82.1825 (8.8543000000000003, -82.182500000000005) Landslide Mudslide Small Rain NA 0 0 Panama America http://www.panamaamerica.com.pa/provincias/despejan-hacia-bocas-del-toro-por-deslizamiento-1005781 11.2605781 40.304102
7497 6/29/15 NA NA Panama PA Bocas del Toro 22900 Changuinola 5.97977 Above road 9.3880 -82.4858 (9.3879999999999999, -82.485799999999998) Landslide Landslide Small Downpour NA 0 0 La Prensa http://www.prensa.com/provincias/Continuan-fuertes-lluvias-Bocas-Toro_0_4243075788.html 17.1502969 54.509540
7501 10/8/15 NA NA Panama PA Bocas del Toro 0 Cauchero 4.45630 Above road 9.1261 -82.2996 (9.1260999999999992, -82.299599999999998) Landslide Landslide Small Rain NA 0 0 Mi Diario http://m.midiario.com/uhora/nacionales/se-registran-deslizamientos-en-chiriqu%C3%AD-grande 12.7809043 69.475141
stem(df_PA$"distance")
## 
##   The decimal point is at the |
## 
##   0 | 3
##   2 | 9
##   4 | 51
##   6 | 07
##   8 | 4
stem(df_PA$"distance", scale = 2)
## 
##   The decimal point is at the |
## 
##   0 | 3
##   1 | 
##   2 | 
##   3 | 9
##   4 | 5
##   5 | 1
##   6 | 07
##   7 | 
##   8 | 4

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
0.29869 1 14.3 14.3 14.3 14.3
3.92621 1 14.3 14.3 28.6 28.6
4.4563 1 14.3 14.3 42.9 42.9
5.13913 1 14.3 14.3 57.1 57.1
5.97977 1 14.3 14.3 71.4 71.4
6.65185 1 14.3 14.3 85.7 85.7
8.41491 1 14.3 14.3 100.0 100.0
Total 7 100.0 100.0 100.0 100.0
str(table)
## Classes 'freqtab' and 'data.frame':  8 obs. of  5 variables:
##  $ n      : num  1 1 1 1 1 1 1 7
##  $ %      : num  14.3 14.3 14.3 14.3 14.3 14.3 14.3 100
##  $ val%   : num  14.3 14.3 14.3 14.3 14.3 14.3 14.3 100
##  $ %cum   : num  14.3 28.6 42.9 57.1 71.4 85.7 100 100
##  $ val%cum: num  14.3 28.6 42.9 57.1 71.4 85.7 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
0.29869 1
3.92621 1
4.4563 1
5.13913 1
5.97977 1
6.65185 1
8.41491 1
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.29869 3.29869 6.29869 9.29869
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(0.299,3.3] 0 0.0000000 0
(3.3,6.3] 4 0.6666667 4
(6.3,9.3] 2 0.3333333 6
str(Freq_table)
## 'data.frame':    3 obs. of  4 variables:
##  $ distance: Factor w/ 3 levels "(0.299,3.3]",..: 1 2 3
##  $ Freq    : int  0 4 2
##  $ Rel_Freq: num  0 0.667 0.333
##  $ Cum_Freq: int  0 4 6
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(0.299,3.3] 0
(3.3,6.3] 4
(6.3,9.3] 2
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_PA)
##                        id date time continent_code country_name country_code
## nbr.val      7.000000e+00   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          3.168000e+03   NA   NA             NA           NA           NA
## max          7.501000e+03   NA   NA             NA           NA           NA
## range        4.333000e+03   NA   NA             NA           NA           NA
## sum          4.815200e+04   NA   NA             NA           NA           NA
## median       7.498000e+03   NA   NA             NA           NA           NA
## mean         6.878857e+03   NA   NA             NA           NA           NA
## SE.mean      6.184780e+02   NA   NA             NA           NA           NA
## CI.mean.0.95 1.513361e+03   NA   NA             NA           NA           NA
## var          2.677605e+06   NA   NA             NA           NA           NA
## std.dev      1.636339e+03   NA   NA             NA           NA           NA
## coef.var     2.378795e-01   NA   NA             NA           NA           NA
##              state   population city   distance location_description
## nbr.val         NA 7.000000e+00   NA  7.0000000                   NA
## nbr.null        NA 2.000000e+00   NA  0.0000000                   NA
## nbr.na          NA 0.000000e+00   NA  0.0000000                   NA
## min             NA 0.000000e+00   NA  0.2986900                   NA
## max             NA 2.290000e+04   NA  8.4149100                   NA
## range           NA 2.290000e+04   NA  8.1162200                   NA
## sum             NA 3.384300e+04   NA 34.8668600                   NA
## median          NA 9.180000e+02   NA  5.1391300                   NA
## mean            NA 4.834714e+03   NA  4.9809800                   NA
## SE.mean         NA 3.197087e+03   NA  0.9626098                   NA
## CI.mean.0.95    NA 7.822989e+03   NA  2.3554214                   NA
## var             NA 7.154954e+07   NA  6.4863239                   NA
## std.dev         NA 8.458696e+03   NA  2.5468262                   NA
## coef.var        NA 1.749575e+00   NA  0.5113103                   NA
##                 latitude     longitude geolocation hazard_type landslide_type
## nbr.val       7.00000000  7.000000e+00          NA          NA             NA
## nbr.null      0.00000000  0.000000e+00          NA          NA             NA
## nbr.na        0.00000000  0.000000e+00          NA          NA             NA
## min           8.85430000 -8.248580e+01          NA          NA             NA
## max           9.38800000 -8.218250e+01          NA          NA             NA
## range         0.53370000  3.033000e-01          NA          NA             NA
## sum          64.03590000 -5.763791e+02          NA          NA             NA
## median        9.21020000 -8.237050e+01          NA          NA             NA
## mean          9.14798571 -8.233987e+01          NA          NA             NA
## SE.mean       0.07607773  4.668383e-02          NA          NA             NA
## CI.mean.0.95  0.18615551  1.142312e-01          NA          NA             NA
## var           0.04051475  1.525566e-02          NA          NA             NA
## std.dev       0.20128276  1.235138e-01          NA          NA             NA
## coef.var      0.02200296 -1.500049e-03          NA          NA             NA
##              landslide_size trigger storm_name injuries fatalities source_name
## nbr.val                  NA      NA         NA        6          7          NA
## nbr.null                 NA      NA         NA        6          7          NA
## nbr.na                   NA      NA         NA        1          0          NA
## min                      NA      NA         NA        0          0          NA
## max                      NA      NA         NA        0          0          NA
## range                    NA      NA         NA        0          0          NA
## sum                      NA      NA         NA        0          0          NA
## median                   NA      NA         NA        0          0          NA
## mean                     NA      NA         NA        0          0          NA
## SE.mean                  NA      NA         NA        0          0          NA
## CI.mean.0.95             NA      NA         NA        0          0          NA
## var                      NA      NA         NA        0          0          NA
## std.dev                  NA      NA         NA        0          0          NA
## coef.var                 NA      NA         NA      NaN        NaN          NA
##              source_link        prop        ypos
## nbr.val               NA   7.0000000   7.0000000
## nbr.null              NA   0.0000000   0.0000000
## nbr.na                NA   0.0000000   0.0000000
## min                   NA   0.8566587   9.5389289
## max                   NA  24.1344073  87.9327964
## range                 NA  23.2777486  78.3938674
## sum                   NA 100.0000000 322.4534988
## median                NA  14.7392969  40.3041025
## mean                  NA  14.2857143  46.0647855
## SE.mean               NA   2.7608160  10.0760751
## CI.mean.0.95          NA   6.7554733  24.6552675
## var                   NA  53.3547333 710.6910217
## std.dev               NA   7.3044324  26.6587888
## coef.var              NA   0.5113103   0.5787238
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Chiriquí (Panama)

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_PA<- subset(df, state == "Chiriquí")
knitr::kable(head(df_PA))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
905 11/25/08 NA NA Panama PA Chiriquí 6123 Boquete 9.83700 NA 8.7870 -82.3440 (8.7870000000000008, -82.343999999999994) Landslide Complex Medium Downpour NA NA 8 NA http://www.iht.com/articles/ap/2008/11/26/news/LT-Panama-Flooding.php
6699 9/12/14 NA NA Panama PA Chiriquí 11245 Volcán 0.35187 Above road 8.7636 -82.6342 (8.7636000000000003, -82.634200000000007) Landslide Landslide Small Rain NA 0 0 Estrella de Panama http://laestrella.com.pa/panama/nacional/reportan-deslizamiento-tierra-chiriqui/23804230
6700 9/25/14 NA NA Panama PA Chiriquí 1908 Río Sereno 0.64491 Above road 8.8229 -82.8416 (8.8229000000000006, -82.8416) Landslide Landslide Medium Downpour NA 0 0 Estrella de Panama http://laestrella.com.pa/panama/nacional/deslizamiento-tierra-obstruye-sereno/23807753
6704 8/17/14 Afternoon NA Panama PA Chiriquí 2957 Cerro Punta 3.33873 Unknown 8.8528 -82.5969 (8.8528000000000002, -82.596900000000005) Landslide Landslide Small Downpour NA 0 0 FullPassPanama http://fullpasspanama.net/situacion-en-cerro-punta-chiriqui/
7502 2/10/15 NA NA Panama PA Chiriquí 0 Boca de Balsa 2.44254 Above road 8.5121 -82.0513 (8.5121000000000002, -82.051299999999998) Landslide Landslide Medium Rain NA 0 0 NA http://www.telemetro.com/nacionales/Crecida-Chiriqui-provoca-puente-potabilizadoras_0_860014389.html
7503 11/23/15 NA NA Panama PA Chiriquí 1908 Río Sereno 3.66775 Unknown 8.7868 -82.8662 (8.7867999999999995, -82.866200000000006) Landslide Landslide Medium Rain NA 0 0 La Estrella http://laestrella.com.pa/panama/nacional/inundaciones-deslizamientos-afectan-chiriqui/23905610

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_PA, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_PA, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_PA, aes(x=state, y=distance, fill=city)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_PA <- df_PA %>% 
  arrange(desc(city)) %>%
  mutate(prop = distance / sum(df_PA$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_PA, aes(x=state, y = prop, fill=city)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
data<- ts(df_PA$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
0.35187
0.64491
3.66775
0.90169
6.74338
3.33873
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_PA$distance
names(distance) <- df_PA$city 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por ciudades"
)

##                 
## Pareto chart analysis for distance
##                   Frequency  Cum.Freq. Percentage Cum.Percent.
##   Boquete          9.837000   9.837000  34.750066    34.750066
##   Monte Lirio      6.743380  16.580380  23.821582    58.571648
##   Río Sereno       3.667750  20.248130  12.956649    71.528296
##   Cerro Punta      3.338730  23.586860  11.794357    83.322653
##   Boca de Balsa    2.442540  26.029400   8.628487    91.951140
##   Palmira Centro   0.901690  26.931090   3.185299    95.136439
##   Río Sereno       0.644910  27.576000   2.278201    97.414640
##   Breñón           0.379990  27.955990   1.342348    98.756988
##   Volcán           0.351870  28.307860   1.243012   100.000000
stem(df_PA$"distance")
## 
##   The decimal point is at the |
## 
##   0 | 4469
##   2 | 437
##   4 | 
##   6 | 7
##   8 | 8
head(df_PA)
## # A tibble: 6 x 25
##      id date     time      continent_code country_name country_code state population
##   <dbl> <chr>    <chr>     <chr>          <chr>        <chr>        <chr>      <dbl>
## 1  6699 9/12/14  <NA>      <NA>           Panama       PA           Chir~      11245
## 2  6700 9/25/14  <NA>      <NA>           Panama       PA           Chir~       1908
## 3  7503 11/23/15 <NA>      <NA>           Panama       PA           Chir~       1908
## 4  7504 11/22/15 <NA>      <NA>           Panama       PA           Chir~          0
## 5  7507 11/11/15 <NA>      <NA>           Panama       PA           Chir~       2823
## 6  6704 8/17/14  Afternoon <NA>           Panama       PA           Chir~       2957
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_PA))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
6699 9/12/14 NA NA Panama PA Chiriquí 11245 Volcán 0.35187 Above road 8.7636 -82.6342 (8.7636000000000003, -82.634200000000007) Landslide Landslide Small Rain NA 0 0 Estrella de Panama http://laestrella.com.pa/panama/nacional/reportan-deslizamiento-tierra-chiriqui/23804230 1.243012 0.6215058
6700 9/25/14 NA NA Panama PA Chiriquí 1908 Río Sereno 0.64491 Above road 8.8229 -82.8416 (8.8229000000000006, -82.8416) Landslide Landslide Medium Downpour NA 0 0 Estrella de Panama http://laestrella.com.pa/panama/nacional/deslizamiento-tierra-obstruye-sereno/23807753 2.278201 2.3821122
7503 11/23/15 NA NA Panama PA Chiriquí 1908 Río Sereno 3.66775 Unknown 8.7868 -82.8662 (8.7867999999999995, -82.866200000000006) Landslide Landslide Medium Rain NA 0 0 La Estrella http://laestrella.com.pa/panama/nacional/inundaciones-deslizamientos-afectan-chiriqui/23905610 12.956649 9.9995372
7504 11/22/15 NA NA Panama PA Chiriquí 0 Palmira Centro 0.90169 Above road 8.7463 -82.4569 (8.7462999999999997, -82.456900000000005) Landslide Landslide Medium Rain NA 0 0 Estrella de Panama http://laestrella.com.pa/panama/nacional/inundaciones-deslizamientos-afectan-chiriqui/23905610 3.185299 18.0705112
7507 11/11/15 NA NA Panama PA Chiriquí 2823 Monte Lirio 6.74338 Above road 8.8330 -82.8014 (8.8330000000000002, -82.801400000000001) Landslide Landslide Small Downpour NA 0 0 Bugaba http://www.soydebugaba.com/noticias/fuertes-lluvias-causaron-inundaciones-en-boquete-y-deslizamientos-en-renacimiento 23.821582 31.5739515
6704 8/17/14 Afternoon NA Panama PA Chiriquí 2957 Cerro Punta 3.33873 Unknown 8.8528 -82.5969 (8.8528000000000002, -82.596900000000005) Landslide Landslide Small Downpour NA 0 0 FullPassPanama http://fullpasspanama.net/situacion-en-cerro-punta-chiriqui/ 11.794357 49.3819208
stem(df_PA$"distance")
## 
##   The decimal point is at the |
## 
##   0 | 4469
##   2 | 437
##   4 | 
##   6 | 7
##   8 | 8
stem(df_PA$"distance", scale = 2)
## 
##   The decimal point is at the |
## 
##   0 | 4469
##   1 | 
##   2 | 4
##   3 | 37
##   4 | 
##   5 | 
##   6 | 7
##   7 | 
##   8 | 
##   9 | 8

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
0.35187 1 11.1 11.1 11.1 11.1
0.37999 1 11.1 11.1 22.2 22.2
0.64491 1 11.1 11.1 33.3 33.3
0.90169 1 11.1 11.1 44.4 44.4
2.44254 1 11.1 11.1 55.6 55.6
3.33873 1 11.1 11.1 66.7 66.7
3.66775 1 11.1 11.1 77.8 77.8
6.74338 1 11.1 11.1 88.9 88.9
9.837 1 11.1 11.1 100.0 100.0
Total 9 100.0 100.0 100.0 100.0
str(table)
## Classes 'freqtab' and 'data.frame':  10 obs. of  5 variables:
##  $ n      : num  1 1 1 1 1 1 1 1 1 9
##  $ %      : num  11.1 11.1 11.1 11.1 11.1 11.1 11.1 11.1 11.1 100
##  $ val%   : num  11.1 11.1 11.1 11.1 11.1 11.1 11.1 11.1 11.1 100
##  $ %cum   : num  11.1 22.2 33.3 44.4 55.6 66.7 77.8 88.9 100 100
##  $ val%cum: num  11.1 22.2 33.3 44.4 55.6 66.7 77.8 88.9 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
0.35187 1
0.37999 1
0.64491 1
0.90169 1
2.44254 1
3.33873 1
3.66775 1
6.74338 1
9.837 1
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1]  0.35187  2.35187  4.35187  6.35187  8.35187 10.35187
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(0.352,2.35] 3 0.375 3
(2.35,4.35] 3 0.375 6
(4.35,6.35] 0 0.000 6
(6.35,8.35] 1 0.125 7
(8.35,10.4] 1 0.125 8
str(Freq_table)
## 'data.frame':    5 obs. of  4 variables:
##  $ distance: Factor w/ 5 levels "(0.352,2.35]",..: 1 2 3 4 5
##  $ Freq    : int  3 3 0 1 1
##  $ Rel_Freq: num  0.375 0.375 0 0.125 0.125
##  $ Cum_Freq: int  3 6 6 7 8
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(0.352,2.35] 3
(2.35,4.35] 3
(4.35,6.35] 0
(6.35,8.35] 1
(8.35,10.4] 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_PA)
##                        id date time continent_code country_name country_code
## nbr.val      9.000000e+00   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          9.050000e+02   NA   NA             NA           NA           NA
## max          7.507000e+03   NA   NA             NA           NA           NA
## range        6.602000e+03   NA   NA             NA           NA           NA
## sum          5.852900e+04   NA   NA             NA           NA           NA
## median       7.502000e+03   NA   NA             NA           NA           NA
## mean         6.503222e+03   NA   NA             NA           NA           NA
## SE.mean      7.116808e+02   NA   NA             NA           NA           NA
## CI.mean.0.95 1.641139e+03   NA   NA             NA           NA           NA
## var          4.558406e+06   NA   NA             NA           NA           NA
## std.dev      2.135042e+03   NA   NA             NA           NA           NA
## coef.var     3.283053e-01   NA   NA             NA           NA           NA
##              state   population city  distance location_description    latitude
## nbr.val         NA 9.000000e+00   NA  9.000000                   NA  9.00000000
## nbr.null        NA 3.000000e+00   NA  0.000000                   NA  0.00000000
## nbr.na          NA 0.000000e+00   NA  0.000000                   NA  0.00000000
## min             NA 0.000000e+00   NA  0.351870                   NA  8.51210000
## max             NA 1.124500e+04   NA  9.837000                   NA  8.85280000
## range           NA 1.124500e+04   NA  9.485130                   NA  0.34070000
## sum             NA 2.696400e+04   NA 28.307860                   NA 78.68170000
## median          NA 1.908000e+03   NA  2.442540                   NA  8.78680000
## mean            NA 2.996000e+03   NA  3.145318                   NA  8.74241111
## SE.mean         NA 1.219922e+03   NA  1.088227                   NA  0.03937552
## CI.mean.0.95    NA 2.813145e+03   NA  2.509457                   NA  0.09080011
## var             NA 1.339389e+07   NA 10.658149                   NA  0.01395388
## std.dev         NA 3.659766e+03   NA  3.264682                   NA  0.11812656
## coef.var        NA 1.221551e+00   NA  1.037950                   NA  0.01351190
##                  longitude geolocation hazard_type landslide_type
## nbr.val       9.000000e+00          NA          NA             NA
## nbr.null      0.000000e+00          NA          NA             NA
## nbr.na        0.000000e+00          NA          NA             NA
## min          -8.286620e+01          NA          NA             NA
## max          -8.205130e+01          NA          NA             NA
## range         8.149000e-01          NA          NA             NA
## sum          -7.434157e+02          NA          NA             NA
## median       -8.263420e+01          NA          NA             NA
## mean         -8.260174e+01          NA          NA             NA
## SE.mean       9.188235e-02          NA          NA             NA
## CI.mean.0.95  2.118811e-01          NA          NA             NA
## var           7.598130e-02          NA          NA             NA
## std.dev       2.756470e-01          NA          NA             NA
## coef.var     -3.337061e-03          NA          NA             NA
##              landslide_size trigger storm_name injuries fatalities source_name
## nbr.val                  NA      NA         NA        8  9.0000000          NA
## nbr.null                 NA      NA         NA        8  8.0000000          NA
## nbr.na                   NA      NA         NA        1  0.0000000          NA
## min                      NA      NA         NA        0  0.0000000          NA
## max                      NA      NA         NA        0  8.0000000          NA
## range                    NA      NA         NA        0  8.0000000          NA
## sum                      NA      NA         NA        0  8.0000000          NA
## median                   NA      NA         NA        0  0.0000000          NA
## mean                     NA      NA         NA        0  0.8888889          NA
## SE.mean                  NA      NA         NA        0  0.8888889          NA
## CI.mean.0.95             NA      NA         NA        0  2.0497815          NA
## var                      NA      NA         NA        0  7.1111111          NA
## std.dev                  NA      NA         NA        0  2.6666667          NA
## coef.var                 NA      NA         NA      NaN  3.0000000          NA
##              source_link       prop         ypos
## nbr.val               NA   9.000000    9.0000000
## nbr.null              NA   0.000000    0.0000000
## nbr.na                NA   0.000000    0.0000000
## min                   NA   1.243012    0.6215058
## max                   NA  34.750066   95.6857565
## range                 NA  33.507054   95.0642507
## sum                   NA 100.000000  337.6620486
## median                NA   8.628487   31.5739515
## mean                  NA  11.111111   37.5180054
## SE.mean               NA   3.844259   11.1563092
## CI.mean.0.95          NA   8.864877   25.7264952
## var                   NA 133.004927 1120.1691177
## std.dev               NA  11.532776   33.4689276
## coef.var              NA   1.037950    0.8920764
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Colón (Panama)

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_PA<- subset(df, state == "Colón")
knitr::kable(head(df_PA))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
2652 10/25/10 NA NA Honduras HN Colón 1441 Cusuna 36.37629 NA 15.5227 -85.2650 (15.5227, -85.265000000000001) Landslide Landslide Medium Tropical cyclone Tropical Storm Richard NA 0 NA http://www.reliefweb.int/rw/RWFiles2010.nsf/FilesByRWDocUnidFilename/LSGZ-8ALBZE-full_report.pdf/$File/full_report.pdf
2794 12/8/10 NA NA Panama PA Colón 1310 El Giral 1.80330 NA 9.2332 -79.6961 (9.2332000000000001, -79.696100000000001) Landslide Landslide Medium Downpour NA NA 0 NA http://www.portworld.com/news/i98731/Panama_Canal_disrupted_by_floods
2795 12/9/10 Morning NA Panama PA Colón 1274 Portobelo 0.09491 Deforested slope 9.5493 -79.6505 (9.5493000000000006, -79.650499999999994) Landslide Mudslide Medium Downpour NA NA 8 Fox News Latino http://latino.foxnews.com/latino/news/2010/12/09/heavy-rains-kill-panama-force-canal-close/
4632 11/25/12 NA NA Panama PA Colón 76643 Colón 0.16894 NA 9.3600 -79.9001 (9.36, -79.900099999999995) Landslide Landslide Medium Rain NA NA 2 NA http://www.ndtv.com/article/world/two-dead-in-landslide-as-floods-hit-panama-297138
4879 5/28/13 NA NA Panama PA Colón 1274 Portobelo 2.67409 NA 9.5676 -79.6667 (9.5676000000000005, -79.666700000000006) Landslide Landslide Medium Downpour NA NA 0 www.newsroompanama.com http://www.newsroompanama.com/panama/5804-rain-brings-floods-landslides-and-traffic-chaos-to-colon.html
4880 5/28/13 NA NA Panama PA Colón 3302 Margarita 0.74760 NA 9.3381 -79.8897 (9.3381000000000007, -79.889700000000005) Landslide Landslide Medium Downpour NA NA 0 NA NA

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_PA, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_PA, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_PA, aes(x=state, y=distance, fill=city)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_PA <- df_PA %>% 
  arrange(desc(city)) %>%
  mutate(prop = distance / sum(df_PA$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_PA, aes(x=state, y = prop, fill=city)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
data<- ts(df_PA$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
0.09491
2.67409
4.74914
0.18619
2.28589
0.74760
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_PA$distance
names(distance) <- df_PA$city 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por ciudades"
)

##                    
## Pareto chart analysis for distance
##                       Frequency   Cum.Freq.  Percentage Cum.Percent.
##   Cusuna             36.3762900  36.3762900  71.7525220   71.7525220
##   Nuevo San Juan      4.7491400  41.1254300   9.3677165   81.1202386
##   Portobelo           2.6740900  43.7995200   5.2746638   86.3949024
##   María Chiquita      2.2858900  46.0854100   4.5089363   90.9038387
##   El Giral            1.8033000  47.8887100   3.5570236   94.4608623
##   Margarita           0.7674000  48.6561100   1.5137026   95.9745649
##   Margarita           0.7476000  49.4037100   1.4746470   97.4492119
##   Cativá              0.6394800  50.0431900   1.2613794   98.7105913
##   Colón               0.2036500  50.2468400   0.4017012   99.1122925
##   Nueva Providencia   0.1861900  50.4330300   0.3672613   99.4795538
##   Colón               0.1689400  50.6019700   0.3332355   99.8127893
##   Portobelo           0.0949100  50.6968800   0.1872107  100.0000000
stem(df_PA$"distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 00001112235
##   1 | 
##   2 | 
##   3 | 6
head(df_PA)
## # A tibble: 6 x 25
##      id date    time    continent_code country_name country_code state population
##   <dbl> <chr>   <chr>   <chr>          <chr>        <chr>        <chr>      <dbl>
## 1  2795 12/9/10 Morning <NA>           Panama       PA           Colón       1274
## 2  4879 5/28/13 <NA>    <NA>           Panama       PA           Colón       1274
## 3  6702 5/9/14  <NA>    <NA>           Panama       PA           Colón       1232
## 4  7450 9/7/15  <NA>    <NA>           Panama       PA           Colón          0
## 5  7451 7/2/15  <NA>    <NA>           Panama       PA           Colón       1146
## 6  4880 5/28/13 <NA>    <NA>           Panama       PA           Colón       3302
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_PA))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
2795 12/9/10 Morning NA Panama PA Colón 1274 Portobelo 0.09491 Deforested slope 9.5493 -79.6505 (9.5493000000000006, -79.650499999999994) Landslide Mudslide Medium Downpour NA NA 8 Fox News Latino http://latino.foxnews.com/latino/news/2010/12/09/heavy-rains-kill-panama-force-canal-close/ 0.1872107 0.0936054
4879 5/28/13 NA NA Panama PA Colón 1274 Portobelo 2.67409 NA 9.5676 -79.6667 (9.5676000000000005, -79.666700000000006) Landslide Landslide Medium Downpour NA NA 0 www.newsroompanama.com http://www.newsroompanama.com/panama/5804-rain-brings-floods-landslides-and-traffic-chaos-to-colon.html 5.2746638 2.8245427
6702 5/9/14 NA NA Panama PA Colón 1232 Nuevo San Juan 4.74914 Below road 9.2924 -79.7478 (9.2924000000000007, -79.747799999999998) Landslide Landslide Small Downpour NA 0 0 NEXtv http://www.nexpanama.com/videos/deslizamiento-de-tierra-en-carretera-boyd-roosevelt-3800 9.3677165 10.1457328
7450 9/7/15 NA NA Panama PA Colón 0 Nueva Providencia 0.18619 Unknown 9.2619 -79.8164 (9.2619000000000007, -79.816400000000002) Landslide Landslide Medium Downpour NA 0 0 Panamá América http://www.panamaamerica.com.pa/provincias/un-colegio-y-20-casas-afectadas-por-las-lluvias-en-colon-991260 0.3672613 15.0132217
7451 7/2/15 NA NA Panama PA Colón 1146 María Chiquita 2.28589 Mine construction 9.4405 -79.7754 (9.4405000000000001, -79.775400000000005) Landslide Other Medium Construction NA 0 1 El Siglo http://elsiglo.com/panama/alud-tierra-obrero-colon-muere/23876592 4.5089363 17.4513205
4880 5/28/13 NA NA Panama PA Colón 3302 Margarita 0.74760 NA 9.3381 -79.8897 (9.3381000000000007, -79.889700000000005) Landslide Landslide Medium Downpour NA NA 0 NA NA 1.4746470 20.4431121
stem(df_PA$"distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 00001112235
##   1 | 
##   2 | 
##   3 | 6
stem(df_PA$"distance", scale = 2)
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 0000111223
##   0 | 5
##   1 | 
##   1 | 
##   2 | 
##   2 | 
##   3 | 
##   3 | 6

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
0.09491 1 8.3 8.3 8.3 8.3
0.16894 1 8.3 8.3 16.7 16.7
0.18619 1 8.3 8.3 25.0 25.0
0.20365 1 8.3 8.3 33.3 33.3
0.63948 1 8.3 8.3 41.7 41.7
0.7476 1 8.3 8.3 50.0 50.0
0.7674 1 8.3 8.3 58.3 58.3
1.8033 1 8.3 8.3 66.7 66.7
2.28589 1 8.3 8.3 75.0 75.0
2.67409 1 8.3 8.3 83.3 83.3
4.74914 1 8.3 8.3 91.7 91.7
36.37629 1 8.3 8.3 100.0 100.0
Total 12 100.0 100.0 100.0 100.0
str(table)
## Classes 'freqtab' and 'data.frame':  13 obs. of  5 variables:
##  $ n      : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ %      : num  8.3 8.3 8.3 8.3 8.3 8.3 8.3 8.3 8.3 8.3 ...
##  $ val%   : num  8.3 8.3 8.3 8.3 8.3 8.3 8.3 8.3 8.3 8.3 ...
##  $ %cum   : num  8.3 16.7 25 33.3 41.7 50 58.3 66.7 75 83.3 ...
##  $ val%cum: num  8.3 16.7 25 33.3 41.7 50 58.3 66.7 75 83.3 ...
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
0.09491 1
0.16894 1
0.18619 1
0.20365 1
0.63948 1
0.7476 1
0.7674 1
1.8033 1
2.28589 1
2.67409 1
4.74914 1
36.37629 1
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1]  0.09491  8.09491 16.09491 24.09491 32.09491 40.09491
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(0.0949,8.09] 10 0.9090909 10
(8.09,16.1] 0 0.0000000 10
(16.1,24.1] 0 0.0000000 10
(24.1,32.1] 0 0.0000000 10
(32.1,40.1] 1 0.0909091 11
str(Freq_table)
## 'data.frame':    5 obs. of  4 variables:
##  $ distance: Factor w/ 5 levels "(0.0949,8.09]",..: 1 2 3 4 5
##  $ Freq    : int  10 0 0 0 1
##  $ Rel_Freq: num  0.9091 0 0 0 0.0909
##  $ Cum_Freq: int  10 10 10 10 11
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(0.0949,8.09] 10
(8.09,16.1] 0
(16.1,24.1] 0
(24.1,32.1] 0
(32.1,40.1] 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_PA)
##                        id date time continent_code country_name country_code
## nbr.val      1.200000e+01   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          2.652000e+03   NA   NA             NA           NA           NA
## max          7.452000e+03   NA   NA             NA           NA           NA
## range        4.800000e+03   NA   NA             NA           NA           NA
## sum          6.181400e+04   NA   NA             NA           NA           NA
## median       4.880500e+03   NA   NA             NA           NA           NA
## mean         5.151167e+03   NA   NA             NA           NA           NA
## SE.mean      5.231425e+02   NA   NA             NA           NA           NA
## CI.mean.0.95 1.151429e+03   NA   NA             NA           NA           NA
## var          3.284136e+06   NA   NA             NA           NA           NA
## std.dev      1.812219e+03   NA   NA             NA           NA           NA
## coef.var     3.518074e-01   NA   NA             NA           NA           NA
##              state   population city   distance location_description
## nbr.val         NA 1.200000e+01   NA  12.000000                   NA
## nbr.null        NA 1.000000e+00   NA   0.000000                   NA
## nbr.na          NA 0.000000e+00   NA   0.000000                   NA
## min             NA 0.000000e+00   NA   0.094910                   NA
## max             NA 7.664300e+04   NA  36.376290                   NA
## range           NA 7.664300e+04   NA  36.281380                   NA
## sum             NA 1.971740e+05   NA  50.696880                   NA
## median          NA 1.375500e+03   NA   0.757500                   NA
## mean            NA 1.643117e+04   NA   4.224740                   NA
## SE.mean         NA 8.446243e+03   NA   2.950226                   NA
## CI.mean.0.95    NA 1.859006e+04   NA   6.493404                   NA
## var             NA 8.560683e+08   NA 104.446004                   NA
## std.dev         NA 2.925864e+04   NA  10.219883                   NA
## coef.var        NA 1.780680e+00   NA   2.419056                   NA
##                 latitude     longitude geolocation hazard_type landslide_type
## nbr.val       12.0000000   12.00000000          NA          NA             NA
## nbr.null       0.0000000    0.00000000          NA          NA             NA
## nbr.na         0.0000000    0.00000000          NA          NA             NA
## min            9.2332000  -85.26500000          NA          NA             NA
## max           15.5227000  -79.65050000          NA          NA             NA
## range          6.2895000    5.61450000          NA          NA             NA
## sum          118.6112000 -963.01940000          NA          NA             NA
## median         9.3590500  -79.81925000          NA          NA             NA
## mean           9.8842667  -80.25161667          NA          NA             NA
## SE.mean        0.5134407    0.45651767          NA          NA             NA
## CI.mean.0.95   1.1300754    1.00478862          NA          NA             NA
## var            3.1634562    2.50090061          NA          NA             NA
## std.dev        1.7786107    1.58142360          NA          NA             NA
## coef.var       0.1799436   -0.01970582          NA          NA             NA
##              landslide_size trigger storm_name injuries fatalities source_name
## nbr.val                  NA      NA         NA        4 12.0000000          NA
## nbr.null                 NA      NA         NA        4  8.0000000          NA
## nbr.na                   NA      NA         NA        8  0.0000000          NA
## min                      NA      NA         NA        0  0.0000000          NA
## max                      NA      NA         NA        0  8.0000000          NA
## range                    NA      NA         NA        0  8.0000000          NA
## sum                      NA      NA         NA        0 15.0000000          NA
## median                   NA      NA         NA        0  0.0000000          NA
## mean                     NA      NA         NA        0  1.2500000          NA
## SE.mean                  NA      NA         NA        0  0.7084447          NA
## CI.mean.0.95             NA      NA         NA        0  1.5592763          NA
## var                      NA      NA         NA        0  6.0227273          NA
## std.dev                  NA      NA         NA        0  2.4541245          NA
## coef.var                 NA      NA         NA      NaN  1.9632996          NA
##              source_link        prop         ypos
## nbr.val               NA  12.0000000 1.200000e+01
## nbr.null              NA   0.0000000 0.000000e+00
## nbr.na                NA   0.0000000 0.000000e+00
## min                   NA   0.1872107 9.360537e-02
## max                   NA  71.7525220 9.936931e+01
## range                 NA  71.5653113 9.927570e+01
## sum                   NA 100.0000000 4.705863e+02
## median                NA   1.4941748 2.119020e+01
## mean                  NA   8.3333333 3.921552e+01
## SE.mean               NA   5.8193444 1.127004e+01
## CI.mean.0.95          NA  12.8082906 2.480520e+01
## var                   NA 406.3772270 1.524167e+03
## std.dev               NA  20.1588002 3.904058e+01
## coef.var              NA   2.4190560 9.955389e-01
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Los Santos (Panama)

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_PA<- subset(df, state == "Los Santos")
knitr::kable(head(df_PA))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
3167 3/2/11 Night NA Panama PA Los Santos 1283 La Palma 13.40535 NA 7.5955 -80.38 (7.5955000000000004, -80.38) Landslide Landslide Medium Downpour NA NA 0 NA http://www.newsroompanama.com/panama/2434-off-season-rain-creating-crop-worries-and-landslides.html

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_PA, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_PA, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_PA, aes(x=state, y=distance, fill=city)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_PA <- df_PA %>% 
  arrange(desc(city)) %>%
  mutate(prop = distance / sum(df_PA$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_PA, aes(x=state, y = prop, fill=city)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
data<- ts(df_PA$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
13.40535
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?

Diagrama de pareto

library(qcc)
distance <- df_PA$distance
names(distance) <- df_PA$city 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por ciudades"
)

##           
## Pareto chart analysis for distance
##            Frequency Cum.Freq. Percentage Cum.Percent.
##   La Palma  13.40535  13.40535  100.00000    100.00000
stem(df_PA$"distance")
head(df_PA)
## # A tibble: 1 x 25
##      id date   time  continent_code country_name country_code state      population
##   <dbl> <chr>  <chr> <chr>          <chr>        <chr>        <chr>           <dbl>
## 1  3167 3/2/11 Night <NA>           Panama       PA           Los Santos       1283
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_PA))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
3167 3/2/11 Night NA Panama PA Los Santos 1283 La Palma 13.40535 NA 7.5955 -80.38 (7.5955000000000004, -80.38) Landslide Landslide Medium Downpour NA NA 0 NA http://www.newsroompanama.com/panama/2434-off-season-rain-creating-crop-worries-and-landslides.html 100 50
stem(df_PA$"distance")
stem(df_PA$"distance", scale = 2)

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
13.40535 1 100 100 100 100
Total 1 100 100 100 100
str(table)
## Classes 'freqtab' and 'data.frame':  2 obs. of  5 variables:
##  $ n      : num  1 1
##  $ %      : num  100 100
##  $ val%   : num  100 100
##  $ %cum   : num  100 100
##  $ val%cum: num  100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
13.40535 1
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 13.40535
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(13.392,13.394] 0 0 0
(13.394,13.396] 0 0 0
(13.396,13.398] 0 0 0
(13.398,13.4] 0 0 0
(13.4,13.402] 0 0 0
(13.402,13.404] 0 0 0
(13.404,13.406] 1 1 1
(13.406,13.408] 0 0 1
(13.408,13.411] 0 0 1
(13.411,13.413] 0 0 1
(13.413,13.415] 0 0 1
(13.415,13.417] 0 0 1
(13.417,13.419] 0 0 1
str(Freq_table)
## 'data.frame':    13 obs. of  4 variables:
##  $ distance: Factor w/ 13 levels "(13.392,13.394]",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ Freq    : int  0 0 0 0 0 0 1 0 0 0 ...
##  $ Rel_Freq: num  0 0 0 0 0 0 1 0 0 0 ...
##  $ Cum_Freq: int  0 0 0 0 0 0 1 1 1 1 ...
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(13.392,13.394] 0
(13.394,13.396] 0
(13.396,13.398] 0
(13.398,13.4] 0
(13.4,13.402] 0
(13.402,13.404] 0
(13.404,13.406] 1
(13.406,13.408] 0
(13.408,13.411] 0
(13.411,13.413] 0
(13.413,13.415] 0
(13.415,13.417] 0
(13.417,13.419] 0
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_PA)
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced

## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced

## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced

## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced

## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## Warning in min(x): ningún argumento finito para min; retornando Inf
## Warning in max(x): ningun argumento finito para max; retornando -Inf
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced

## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced

## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced

## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
##                id date time continent_code country_name country_code state
## nbr.val         1   NA   NA             NA           NA           NA    NA
## nbr.null        0   NA   NA             NA           NA           NA    NA
## nbr.na          0   NA   NA             NA           NA           NA    NA
## min          3167   NA   NA             NA           NA           NA    NA
## max          3167   NA   NA             NA           NA           NA    NA
## range           0   NA   NA             NA           NA           NA    NA
## sum          3167   NA   NA             NA           NA           NA    NA
## median       3167   NA   NA             NA           NA           NA    NA
## mean         3167   NA   NA             NA           NA           NA    NA
## SE.mean        NA   NA   NA             NA           NA           NA    NA
## CI.mean.0.95  NaN   NA   NA             NA           NA           NA    NA
## var            NA   NA   NA             NA           NA           NA    NA
## std.dev        NA   NA   NA             NA           NA           NA    NA
## coef.var       NA   NA   NA             NA           NA           NA    NA
##              population city distance location_description latitude longitude
## nbr.val               1   NA  1.00000                   NA   1.0000      1.00
## nbr.null              0   NA  0.00000                   NA   0.0000      0.00
## nbr.na                0   NA  0.00000                   NA   0.0000      0.00
## min                1283   NA 13.40535                   NA   7.5955    -80.38
## max                1283   NA 13.40535                   NA   7.5955    -80.38
## range                 0   NA  0.00000                   NA   0.0000      0.00
## sum                1283   NA 13.40535                   NA   7.5955    -80.38
## median             1283   NA 13.40535                   NA   7.5955    -80.38
## mean               1283   NA 13.40535                   NA   7.5955    -80.38
## SE.mean              NA   NA       NA                   NA       NA        NA
## CI.mean.0.95        NaN   NA      NaN                   NA      NaN       NaN
## var                  NA   NA       NA                   NA       NA        NA
## std.dev              NA   NA       NA                   NA       NA        NA
## coef.var             NA   NA       NA                   NA       NA        NA
##              geolocation hazard_type landslide_type landslide_size trigger
## nbr.val               NA          NA             NA             NA      NA
## nbr.null              NA          NA             NA             NA      NA
## nbr.na                NA          NA             NA             NA      NA
## min                   NA          NA             NA             NA      NA
## max                   NA          NA             NA             NA      NA
## range                 NA          NA             NA             NA      NA
## sum                   NA          NA             NA             NA      NA
## median                NA          NA             NA             NA      NA
## mean                  NA          NA             NA             NA      NA
## SE.mean               NA          NA             NA             NA      NA
## CI.mean.0.95          NA          NA             NA             NA      NA
## var                   NA          NA             NA             NA      NA
## std.dev               NA          NA             NA             NA      NA
## coef.var              NA          NA             NA             NA      NA
##              storm_name injuries fatalities source_name source_link prop ypos
## nbr.val              NA        0          1          NA          NA    1    1
## nbr.null             NA        0          1          NA          NA    0    0
## nbr.na               NA        1          0          NA          NA    0    0
## min                  NA      Inf          0          NA          NA  100   50
## max                  NA     -Inf          0          NA          NA  100   50
## range                NA     -Inf          0          NA          NA    0    0
## sum                  NA        0          0          NA          NA  100   50
## median               NA       NA          0          NA          NA  100   50
## mean                 NA      NaN          0          NA          NA  100   50
## SE.mean              NA       NA         NA          NA          NA   NA   NA
## CI.mean.0.95         NA      NaN        NaN          NA          NA  NaN  NaN
## var                  NA       NA         NA          NA          NA   NA   NA
## std.dev              NA       NA         NA          NA          NA   NA   NA
## coef.var             NA       NA         NA          NA          NA   NA   NA
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Panamá (Panama)

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_PA<- subset(df, state == "Panamá")
knitr::kable(head(df_PA))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
750 8/26/08 NA NA Panama PA Panamá 1287 Cerro Azul 1.84596 NA 9.1559 -79.4295 (9.1559000000000008, -79.429500000000004) Landslide Landslide Medium Downpour NA NA NA NA http://news.xinhuanet.com/english/2008-08/27/content_9721298.htm
5562 9/29/13 NA NA Panama PA Panamá 69102 Las Cumbres 2.81479 NA 9.0655 -79.5516 (9.0655000000000001, -79.551599999999993) Landslide Landslide Medium Downpour NA NA NA www.newsroompanama.com http://www.newsroompanama.com/panama/6467--nearly-800-affeccted-by-floods-in-panama-city-region.html
6701 9/14/14 Morning NA Panama PA Panamá 19782 Alcaldedíaz 1.41526 Urban area 9.1139 -79.5626 (9.1138999999999992, -79.562600000000003) Landslide Other Small Rain NA 0 0 Estrella de Panama http://laestrella.com.pa/panama/nacional/colapsa-pared-vivienda-alcade-diaz/23804690/foto/50372
6703 6/17/14 NA NA Panama PA Panamá 321501 San Miguelito 4.18074 Unknown 9.0329 -79.5380 (9.0328999999999997, -79.537999999999997) Landslide Mudslide Medium Unknown NA 0 0 PanamaAmerica http://www.panamaamerica.com.pa/nacion/reportan-deslizamiento-en-autopista-panama-colon#
6705 6/24/14 NA NA Panama PA Panamá 9169 Ancón 2.24069 Below road 8.9763 -79.5391 (8.9763000000000002, -79.539100000000005) Landslide Landslide Small Unknown NA 0 0 Ministerio de Obras Públicas de Panamá http://www.mop.gob.pa/poste-electrico-de-la-avenida-frangipany-cae-sobre-camion-del-mop/
7453 6/5/15 Afternoon NA Panama PA Panamá 0 Las Margaritas 0.48725 Above road 9.1805 -79.0896 (9.1805000000000003, -79.089600000000004) Landslide Landslide Medium Downpour NA 0 0 La Estrella de Panama http://laestrella.com.pa/panama/nacional/deslizamiento-tierra-bayano-inundaciones-colon/23870997/foto/136658

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_PA, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_PA, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_PA, aes(x=state, y=distance, fill=city)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_PA <- df_PA %>% 
  arrange(desc(city)) %>%
  mutate(prop = distance / sum(df_PA$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_PA, aes(x=state, y = prop, fill=city)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
data<- ts(df_PA$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
4.18074
2.57852
2.59449
3.30848
3.54386
0.48725
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_PA$distance
names(distance) <- df_PA$city 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por ciudades"
)

##                 
## Pareto chart analysis for distance
##                   Frequency  Cum.Freq. Percentage Cum.Percent.
##   San Miguelito    4.180740   4.180740  15.591951    15.591951
##   San Miguelito    3.543860   7.724600  13.216725    28.808676
##   San Miguelito    3.308480  11.033080  12.338882    41.147558
##   Las Cumbres      2.814790  13.847870  10.497679    51.645238
##   San Miguelito    2.594490  16.442360   9.676077    61.321314
##   San Miguelito    2.578520  19.020880   9.616517    70.937832
##   Ancón            2.240690  21.261570   8.356590    79.294421
##   Cerro Azul       1.845960  23.107530   6.884455    86.178877
##   Arraiján         1.803410  24.910940   6.725766    92.904643
##   Alcaldedíaz      1.415260  26.326200   5.278172    98.182815
##   Las Margaritas   0.487250  26.813450   1.817185   100.000000
stem(df_PA$"distance")
## 
##   The decimal point is at the |
## 
##   0 | 5
##   1 | 488
##   2 | 2668
##   3 | 35
##   4 | 2
head(df_PA)
## # A tibble: 6 x 25
##      id date    time      continent_code country_name country_code state  population
##   <dbl> <chr>   <chr>     <chr>          <chr>        <chr>        <chr>       <dbl>
## 1  6703 6/17/14 <NA>      <NA>           Panama       PA           Panamá     321501
## 2  7506 9/10/15 <NA>      <NA>           Panama       PA           Panamá     321501
## 3  7509 9/10/15 <NA>      <NA>           Panama       PA           Panamá     321501
## 4  7510 8/31/15 <NA>      <NA>           Panama       PA           Panamá     321501
## 5  7511 8/31/15 <NA>      <NA>           Panama       PA           Panamá     321501
## 6  7453 6/5/15  Afternoon <NA>           Panama       PA           Panamá          0
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_PA))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
6703 6/17/14 NA NA Panama PA Panamá 321501 San Miguelito 4.18074 Unknown 9.0329 -79.5380 (9.0328999999999997, -79.537999999999997) Landslide Mudslide Medium Unknown NA 0 0 PanamaAmerica http://www.panamaamerica.com.pa/nacion/reportan-deslizamiento-en-autopista-panama-colon# 15.591951 7.795975
7506 9/10/15 NA NA Panama PA Panamá 321501 San Miguelito 2.57852 Unknown 9.0539 -79.4945 (9.0539000000000005, -79.494500000000002) Landslide Landslide Large Downpour NA 45 0 IDAAN http://www.idaan.gob.pa/noticias/idaan-prest%C3%B3-apoyo-los-afectados-por-inundaciones-y-deslizamientos-de-tierra 9.616517 20.400210
7509 9/10/15 NA NA Panama PA Panamá 321501 San Miguelito 2.59449 Urban area 9.0417 -79.4487 (9.0417000000000005, -79.448700000000002) Landslide Landslide Large Downpour NA 0 0 Cuerpos de Bomberos http://www.bomberos.gob.pa/2015/09/gobierno-de-panama-reitera-el-apoyo-a-todos-los-afectados-por-inundaciones-en-el-pais/ 9.676077 30.046506
7510 8/31/15 NA NA Panama PA Panamá 321501 San Miguelito 3.30848 Urban area 9.0688 -79.4944 (9.0687999999999995, -79.494399999999999) Landslide Landslide Medium Downpour NA 0 0 Critica http://www.critica.com.pa/nacional/deslizamientos-de-tierra-causan-afectaciones-3-casas-404174 12.338882 41.053986
7511 8/31/15 NA NA Panama PA Panamá 321501 San Miguelito 3.54386 Urban area 9.0772 -79.4882 (9.0771999999999995, -79.488200000000006) Landslide Landslide Medium Downpour NA 0 0 Critica http://www.critica.com.pa/nacional/deslizamientos-de-tierra-causan-afectaciones-3-casas-404174 13.216725 53.831790
7453 6/5/15 Afternoon NA Panama PA Panamá 0 Las Margaritas 0.48725 Above road 9.1805 -79.0896 (9.1805000000000003, -79.089600000000004) Landslide Landslide Medium Downpour NA 0 0 La Estrella de Panama http://laestrella.com.pa/panama/nacional/deslizamiento-tierra-bayano-inundaciones-colon/23870997/foto/136658 1.817185 61.348745
stem(df_PA$"distance")
## 
##   The decimal point is at the |
## 
##   0 | 5
##   1 | 488
##   2 | 2668
##   3 | 35
##   4 | 2
stem(df_PA$"distance", scale = 2)
## 
##   The decimal point is at the |
## 
##   0 | 
##   0 | 5
##   1 | 4
##   1 | 88
##   2 | 2
##   2 | 668
##   3 | 3
##   3 | 5
##   4 | 2

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
0.48725 1 9.1 9.1 9.1 9.1
1.41526 1 9.1 9.1 18.2 18.2
1.80341 1 9.1 9.1 27.3 27.3
1.84596 1 9.1 9.1 36.4 36.4
2.24069 1 9.1 9.1 45.5 45.5
2.57852 1 9.1 9.1 54.5 54.5
2.59449 1 9.1 9.1 63.6 63.6
2.81479 1 9.1 9.1 72.7 72.7
3.30848 1 9.1 9.1 81.8 81.8
3.54386 1 9.1 9.1 90.9 90.9
4.18074 1 9.1 9.1 100.0 100.0
Total 11 100.0 100.0 100.0 100.0
str(table)
## Classes 'freqtab' and 'data.frame':  12 obs. of  5 variables:
##  $ n      : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ %      : num  9.1 9.1 9.1 9.1 9.1 9.1 9.1 9.1 9.1 9.1 ...
##  $ val%   : num  9.1 9.1 9.1 9.1 9.1 9.1 9.1 9.1 9.1 9.1 ...
##  $ %cum   : num  9.1 18.2 27.3 36.4 45.5 54.5 63.6 72.7 81.8 90.9 ...
##  $ val%cum: num  9.1 18.2 27.3 36.4 45.5 54.5 63.6 72.7 81.8 90.9 ...
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
0.48725 1
1.41526 1
1.80341 1
1.84596 1
2.24069 1
2.57852 1
2.59449 1
2.81479 1
3.30848 1
3.54386 1
4.18074 1
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.48725 1.48725 2.48725 3.48725 4.48725
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(0.487,1.49] 1 0.1 1
(1.49,2.49] 3 0.3 4
(2.49,3.49] 4 0.4 8
(3.49,4.49] 2 0.2 10
str(Freq_table)
## 'data.frame':    4 obs. of  4 variables:
##  $ distance: Factor w/ 4 levels "(0.487,1.49]",..: 1 2 3 4
##  $ Freq    : int  1 3 4 2
##  $ Rel_Freq: num  0.1 0.3 0.4 0.2
##  $ Cum_Freq: int  1 4 8 10
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(0.487,1.49] 1
(1.49,2.49] 3
(2.49,3.49] 4
(3.49,4.49] 2
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_PA)
##                        id date time continent_code country_name country_code
## nbr.val      1.100000e+01   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          7.500000e+02   NA   NA             NA           NA           NA
## max          7.512000e+03   NA   NA             NA           NA           NA
## range        6.762000e+03   NA   NA             NA           NA           NA
## sum          7.142200e+04   NA   NA             NA           NA           NA
## median       7.453000e+03   NA   NA             NA           NA           NA
## mean         6.492909e+03   NA   NA             NA           NA           NA
## SE.mean      6.035510e+02   NA   NA             NA           NA           NA
## CI.mean.0.95 1.344795e+03   NA   NA             NA           NA           NA
## var          4.007012e+06   NA   NA             NA           NA           NA
## std.dev      2.001752e+03   NA   NA             NA           NA           NA
## coef.var     3.082982e-01   NA   NA             NA           NA           NA
##              state   population city   distance location_description
## nbr.val         NA 1.100000e+01   NA 11.0000000                   NA
## nbr.null        NA 1.000000e+00   NA  0.0000000                   NA
## nbr.na          NA 0.000000e+00   NA  0.0000000                   NA
## min             NA 0.000000e+00   NA  0.4872500                   NA
## max             NA 3.215010e+05   NA  4.1807400                   NA
## range           NA 3.215010e+05   NA  3.6934900                   NA
## sum             NA 1.783660e+06   NA 26.8134500                   NA
## median          NA 7.681500e+04   NA  2.5785200                   NA
## mean            NA 1.621509e+05   NA  2.4375864                   NA
## SE.mean         NA 4.658766e+04   NA  0.3140292                   NA
## CI.mean.0.95    NA 1.038038e+05   NA  0.6997007                   NA
## var             NA 2.387451e+10   NA  1.0847578                   NA
## std.dev         NA 1.545138e+05   NA  1.0415171                   NA
## coef.var        NA 9.529011e-01   NA  0.4272739                   NA
##                  latitude     longitude geolocation hazard_type landslide_type
## nbr.val      11.000000000  1.100000e+01          NA          NA             NA
## nbr.null      0.000000000  0.000000e+00          NA          NA             NA
## nbr.na        0.000000000  0.000000e+00          NA          NA             NA
## min           8.949600000 -7.963360e+01          NA          NA             NA
## max           9.180500000 -7.908960e+01          NA          NA             NA
## range         0.230900000  5.440000e-01          NA          NA             NA
## sum          99.716200000 -8.742698e+02          NA          NA             NA
## median        9.065500000 -7.949450e+01          NA          NA             NA
## mean          9.065109091 -7.947907e+01          NA          NA             NA
## SE.mean       0.020679157  4.251380e-02          NA          NA             NA
## CI.mean.0.95  0.046076033  9.472666e-02          NA          NA             NA
## var           0.004703903  1.988166e-02          NA          NA             NA
## std.dev       0.068585005  1.410023e-01          NA          NA             NA
## coef.var      0.007565822 -1.774081e-03          NA          NA             NA
##              landslide_size trigger storm_name  injuries fatalities source_name
## nbr.val                  NA      NA         NA   9.00000          9          NA
## nbr.null                 NA      NA         NA   8.00000          9          NA
## nbr.na                   NA      NA         NA   2.00000          2          NA
## min                      NA      NA         NA   0.00000          0          NA
## max                      NA      NA         NA  45.00000          0          NA
## range                    NA      NA         NA  45.00000          0          NA
## sum                      NA      NA         NA  45.00000          0          NA
## median                   NA      NA         NA   0.00000          0          NA
## mean                     NA      NA         NA   5.00000          0          NA
## SE.mean                  NA      NA         NA   5.00000          0          NA
## CI.mean.0.95             NA      NA         NA  11.53002          0          NA
## var                      NA      NA         NA 225.00000          0          NA
## std.dev                  NA      NA         NA  15.00000          0          NA
## coef.var                 NA      NA         NA   3.00000        NaN          NA
##              source_link        prop        ypos
## nbr.val               NA  11.0000000  11.0000000
## nbr.null              NA   0.0000000   0.0000000
## nbr.na                NA   0.0000000   0.0000000
## min                   NA   1.8171850   7.7959755
## max                   NA  15.5919511  97.3609140
## range                 NA  13.7747660  89.5649385
## sum                   NA 100.0000000 629.0874356
## median                NA   9.6165171  61.3487448
## mean                  NA   9.0909091  57.1897669
## SE.mean               NA   1.1711630   8.8826226
## CI.mean.0.95          NA   2.6095138  19.7917165
## var                   NA  15.0878511 867.9108284
## std.dev               NA   3.8843083  29.4603263
## coef.var              NA   0.4272739   0.5151328
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Mexico

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_MX <- subset(df, country_name == "Mexico")
knitr::kable(head(df_MX))  
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
115 7/4/07 NA NA Mexico MX Veracruz-Llave 1947 Laguna Chica (Pueblo Nuevo) 9.51003 NA 18.5369 -96.8229 (18.536899999999999, -96.822900000000004) Landslide Landslide Medium Rain NA NA 7 nytimes.com http://www.nytimes.com/2007/07/04/world/americas/04cnd-mexico.html?_r=1&hp&oref=slogin
201 8/23/07 NA NA Mexico MX Puebla 1414 Xochitlaxco (San Baltazar) 1.68294 NA 19.9333 -97.8500 (19.933299999999999, -97.85) Landslide Mudslide Medium Tropical cyclone Hurricane Dean NA 6 Inquirer.com http://newsinfo.inquirer.net/breakingnews/world/view_article.php?article_id=84452
225 9/2/07 NA NA Mexico MX Sinaloa 3191 El Limón de los Ramos 10.88351 NA 24.9531 -107.6220 (24.953099999999999, -107.622) Landslide Complex Medium Tropical cyclone Tropical Storm Henrietta NA 3 NA NA
284 9/28/07 NA NA Mexico MX Puebla 3761 Xaltepuxtla 7.93258 NA 20.2000 -97.9000 (20.2, -97.9) Landslide Mudslide Medium Tropical cyclone Hurricane Lorenzo NA 1 PressTV.ir http://www.presstv.ir/detail.aspx?id=25037&sectionid=3510207
342 10/31/07 NA NA Mexico MX Tabasco 4468 Buenavista 4.19108 NA 17.9493 -92.5534 (17.949300000000001, -92.553399999999996) Landslide Landslide Medium Rain NA NA 18 CapeTimes http://www.capetimes.co.za/?fArticleId=4109453
346 11/4/07 NA NA Mexico MX Chiapas 3183 Ostuacán 3.74149 NA 17.3900 -93.3060 (17.39, -93.305999999999997) Landslide Mudslide Very_large Rain NA NA 5 Reuters - AlertNet.org http://news.monstersandcritics.com/americas/news/article_1371436.php/Mudslide_is_latest_disaster_for_hard-hit_Mexico__2nd_Roundup_

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(fill=state, y=distance, x=country_name)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(fill=state, y=distance, x=country_name)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(x=country_name, y=distance, fill=state)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_MX <- df_MX %>% 
  arrange(desc(state)) %>%
  mutate(prop = distance / sum(df_MX$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_MX, aes(x=country_name, y = prop, fill=state)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4
## Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Greens is 9
## Returning the palette you asked for with that many colors

Grafico de series temporales

library(forecast)
data<- ts(df_MX$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
9.51003
8.28739
1.27837
1.52983
2.85382
3.73160
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_MX$distance
names(distance) <- df_MX$state 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por estados"
)

##                       
## Pareto chart analysis for distance
##                           Frequency    Cum.Freq.   Percentage Cum.Percent.
##   Guerrero              32.12708000  32.12708000   7.76637116   7.76637116
##   Oaxaca                24.67589000  56.80297000   5.96512725  13.73149841
##   Nayarit               21.80060000  78.60357000   5.27005726  19.00155566
##   Nayarit               19.41353000  98.01710000   4.69300912  23.69456478
##   Oaxaca                16.16369000 114.18079000   3.90739575  27.60196053
##   Oaxaca                15.74984000 129.93063000   3.80735203  31.40931256
##   Tabasco               15.22260000 145.15323000   3.67989751  35.08921007
##   Guerrero              14.04274000 159.19597000   3.39467922  38.48388929
##   Baja California       12.53758000 171.73355000   3.03082321  41.51471249
##   Baja California       12.36500000 184.09855000   2.98910388  44.50381637
##   Guerrero              12.33417000 196.43272000   2.98165106  47.48546743
##   Oaxaca                11.83490000 208.26762000   2.86095798  50.34642541
##   Sinaloa               10.88351000 219.15113000   2.63096983  52.97739524
##   Hidalgo                9.78251000 228.93364000   2.36481509  55.34221033
##   Oaxaca                 9.56829000 238.50193000   2.31302974  57.65524007
##   Veracruz-Llave         9.51003000 248.01196000   2.29894602  59.95418609
##   Tabasco                8.93271000 256.94467000   2.15938521  62.11357130
##   Chiapas                8.46579000 265.41046000   2.04651239  64.16008369
##   Veracruz-Llave         8.28739000 273.69785000   2.00338614  66.16346982
##   Chiapas                7.93996000 281.63781000   1.91939872  68.08286855
##   Puebla                 7.93258000 289.57039000   1.91761469  70.00048324
##   Guerrero               7.07138000 296.64177000   1.70942898  71.70991222
##   Guerrero               6.80950000 303.45127000   1.64612235  73.35603457
##   Baja California        6.46156000 309.91283000   1.56201165  74.91804622
##   Puebla                 5.24855000 315.16138000   1.26877971  76.18682593
##   Tabasco                4.81680000 319.97818000   1.16440886  77.35123479
##   Chiapas                4.68443000 324.66261000   1.13240986  78.48364465
##   Veracruz-Llave         4.51820000 329.18081000   1.09222557  79.57587021
##   México                 4.40801000 333.58882000   1.06558834  80.64145855
##   Tabasco                4.32007000 337.90889000   1.04432980  81.68578835
##   Tabasco                4.19108000 342.09997000   1.01314787  82.69893622
##   Michoacán              4.18059000 346.28056000   1.01061203  83.70954826
##   Guerrero               4.10830000 350.38886000   0.99313671  84.70268497
##   Chiapas                3.74149000 354.13035000   0.90446440  85.60714937
##   Veracruz-Llave         3.73160000 357.86195000   0.90207360  86.50922296
##   Oaxaca                 3.64682000 361.50877000   0.88157896  87.39080192
##   The Federal District   3.49173000 365.00050000   0.84408764  88.23488956
##   Michoacán              3.42740000 368.42790000   0.82853656  89.06342613
##   Michoacán              3.36905000 371.79695000   0.81443109  89.87785721
##   Nuevo León             3.30074000 375.09769000   0.79791789  90.67577511
##   Chihuahua              3.05542000 378.15311000   0.73861446  91.41438956
##   Veracruz-Llave         2.85382000 381.00693000   0.68987986  92.10426942
##   México                 2.59637000 383.60330000   0.62764413  92.73191355
##   Baja California        2.49770000 386.10100000   0.60379173  93.33570528
##   Veracruz               2.47800000 388.57900000   0.59902947  93.93473475
##   Tabasco                2.15703000 390.73603000   0.52143847  94.45617322
##   Chiapas                2.06743000 392.80346000   0.49977865  94.95595187
##   Veracruz               1.93516000 394.73862000   0.46780382  95.42375570
##   Chiapas                1.73469000 396.47331000   0.41934239  95.84309808
##   Michoacán              1.69508000 398.16839000   0.40976710  96.25286518
##   Puebla                 1.68294000 399.85133000   0.40683239  96.65969757
##   Veracruz-Llave         1.52983000 401.38116000   0.36981972  97.02951729
##   Chiapas                1.41805000 402.79921000   0.34279812  97.37231541
##   Veracruz-Llave         1.27837000 404.07758000   0.30903200  97.68134741
##   México                 1.06048000 405.13806000   0.25635947  97.93770688
##   Chiapas                0.94118000 406.07924000   0.22752000  98.16522688
##   Guerrero               0.90692000 406.98616000   0.21923802  98.38446490
##   Guerrero               0.88149000 407.86765000   0.21309059  98.59755549
##   Oaxaca                 0.78340000 408.65105000   0.18937840  98.78693390
##   Chiapas                0.76257000 409.41362000   0.18434298  98.97127688
##   México                 0.66626000 410.07988000   0.16106109  99.13233796
##   Hidalgo                0.64483000 410.72471000   0.15588062  99.28821858
##   Oaxaca                 0.63550000 411.36021000   0.15362519  99.44184377
##   Veracruz-Llave         0.50188000 411.86209000   0.12132402  99.56316779
##   Colima                 0.36051000 412.22260000   0.08714936  99.65031715
##   Chiapas                0.31118000 412.53378000   0.07522437  99.72554152
##   Puebla                 0.30326000 412.83704000   0.07330980  99.79885132
##   Oaxaca                 0.28905000 413.12609000   0.06987468  99.86872600
##   The Federal District   0.15208000 413.27817000   0.03676368  99.90548968
##   The Federal District   0.15208000 413.43025000   0.03676368  99.94225337
##   Veracruz-Llave         0.09971000 413.52996000   0.02410380  99.96635717
##   Jalisco                0.08269000 413.61265000   0.01998941  99.98634658
##   Baja California Sur    0.05648000 413.66913000   0.01365342 100.00000000
stem(df_MX$"distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 000000000111111111111222222222333333344444444
##   0 | 555567788889
##   1 | 000122234
##   1 | 5669
##   2 | 2
##   2 | 5
##   3 | 2
head(df_MX)
## # A tibble: 6 x 25
##      id date    time     continent_code country_name country_code state population
##   <dbl> <chr>   <chr>    <chr>          <chr>        <chr>        <chr>      <dbl>
## 1   115 7/4/07  <NA>     <NA>           Mexico       MX           Vera~       1947
## 2  2438 9/17/10 <NA>     <NA>           Mexico       MX           Vera~       1324
## 3  3684 7/1/11  <NA>     <NA>           Mexico       MX           Vera~     425148
## 4  5403 8/26/13 20:20:00 <NA>           Mexico       MX           Vera~      30607
## 5  5405 8/26/13 <NA>     <NA>           Mexico       MX           Vera~      15800
## 6  5406 8/26/13 <NA>     <NA>           Mexico       MX           Vera~       3198
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_MX))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
115 7/4/07 NA NA Mexico MX Veracruz-Llave 1947 Laguna Chica (Pueblo Nuevo) 9.51003 NA 18.5369 -96.8229 (18.536899999999999, -96.822900000000004) Landslide Landslide Medium Rain NA NA 7 nytimes.com http://www.nytimes.com/2007/07/04/world/americas/04cnd-mexico.html?_r=1&hp&oref=slogin 2.2989460 1.149473
2438 9/17/10 NA NA Mexico MX Veracruz-Llave 1324 El Hatito 8.28739 NA 19.2818 -96.3149 (19.2818, -96.314899999999994) Landslide Landslide Medium Tropical cyclone Hurricane Karl NA 2 NA http://www.lfpress.com/news/world/2010/09/17/15382976-reuters.html 2.0033861 3.300639
3684 7/1/11 NA NA Mexico MX Veracruz-Llave 425148 Xalapa de Enríquez 1.27837 NA 19.5426 -96.9137 (19.5426, -96.913700000000006) Landslide Mudslide Medium Tropical cyclone Tropical Storm Arlene NA 0 NA http://edition.cnn.com/2011/WORLD/americas/07/04/mexico.arlene.deaths/ 0.3090320 4.456848
5403 8/26/13 20:20:00 NA Mexico MX Veracruz-Llave 30607 Coatzintla 1.52983 NA 20.5004 -97.4647 (20.500399999999999, -97.464699999999993) Landslide Landslide Medium Tropical cyclone Fernand NA 3 www.cbc.ca http://www.cbc.ca/news/world/mexico-storm-related-landslides-kill-13-1.1370491 0.3698197 4.796274
5405 8/26/13 NA NA Mexico MX Veracruz-Llave 15800 Altotonga 2.85382 NA 19.7906 -97.2428 (19.790600000000001, -97.242800000000003) Landslide Landslide Medium Tropical cyclone Fernand NA 1 www.cbc.ca http://www.cbc.ca/news/world/mexico-storm-related-landslides-kill-13-1.1370491 0.6898799 5.326124
5406 8/26/13 NA NA Mexico MX Veracruz-Llave 3198 Yecuatla 3.73160 NA 19.8413 -96.8005 (19.8413, -96.8005) Landslide Landslide Medium Downpour NA NA 9 www.cbc.ca http://www.cbc.ca/news/world/mexico-storm-related-landslides-kill-13-1.1370491 0.9020736 6.122101
stem(df_MX$"distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 000000000111111111111222222222333333344444444
##   0 | 555567788889
##   1 | 000122234
##   1 | 5669
##   2 | 2
##   2 | 5
##   3 | 2
stem(df_MX$"distance", scale = 2)
## 
##   The decimal point is at the |
## 
##    0 | 11122333456678899913457779
##    2 | 12556913445677
##    4 | 122345782
##    6 | 58199
##    8 | 359568
##   10 | 98
##   12 | 345
##   14 | 027
##   16 | 2
##   18 | 4
##   20 | 8
##   22 | 
##   24 | 7
##   26 | 
##   28 | 
##   30 | 
##   32 | 1

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
0.15208 2 2.7 2.7 2.7 2.7
0.05648 1 1.4 1.4 4.1 4.1
0.08269 1 1.4 1.4 5.5 5.5
0.09971 1 1.4 1.4 6.8 6.8
0.28905 1 1.4 1.4 8.2 8.2
0.30326 1 1.4 1.4 9.6 9.6
0.31118 1 1.4 1.4 11.0 11.0
0.36051 1 1.4 1.4 12.3 12.3
0.50188 1 1.4 1.4 13.7 13.7
0.6355 1 1.4 1.4 15.1 15.1
0.64483 1 1.4 1.4 16.4 16.4
0.66626 1 1.4 1.4 17.8 17.8
0.76257 1 1.4 1.4 19.2 19.2
0.7834 1 1.4 1.4 20.5 20.5
0.88149 1 1.4 1.4 21.9 21.9
0.90692 1 1.4 1.4 23.3 23.3
0.94118 1 1.4 1.4 24.7 24.7
1.06048 1 1.4 1.4 26.0 26.0
1.27837 1 1.4 1.4 27.4 27.4
1.41805 1 1.4 1.4 28.8 28.8
1.52983 1 1.4 1.4 30.1 30.1
1.68294 1 1.4 1.4 31.5 31.5
1.69508 1 1.4 1.4 32.9 32.9
1.73469 1 1.4 1.4 34.2 34.2
1.93516 1 1.4 1.4 35.6 35.6
2.06743 1 1.4 1.4 37.0 37.0
2.15703 1 1.4 1.4 38.4 38.4
2.478 1 1.4 1.4 39.7 39.7
2.4977 1 1.4 1.4 41.1 41.1
2.59637 1 1.4 1.4 42.5 42.5
2.85382 1 1.4 1.4 43.8 43.8
3.05542 1 1.4 1.4 45.2 45.2
3.30074 1 1.4 1.4 46.6 46.6
3.36905 1 1.4 1.4 47.9 47.9
3.4274 1 1.4 1.4 49.3 49.3
3.49173 1 1.4 1.4 50.7 50.7
3.64682 1 1.4 1.4 52.1 52.1
3.7316 1 1.4 1.4 53.4 53.4
3.74149 1 1.4 1.4 54.8 54.8
4.1083 1 1.4 1.4 56.2 56.2
4.18059 1 1.4 1.4 57.5 57.5
4.19108 1 1.4 1.4 58.9 58.9
4.32007 1 1.4 1.4 60.3 60.3
4.40801 1 1.4 1.4 61.6 61.6
4.5182 1 1.4 1.4 63.0 63.0
4.68443 1 1.4 1.4 64.4 64.4
4.8168 1 1.4 1.4 65.8 65.8
5.24855 1 1.4 1.4 67.1 67.1
6.46156 1 1.4 1.4 68.5 68.5
6.8095 1 1.4 1.4 69.9 69.9
7.07138 1 1.4 1.4 71.2 71.2
7.93258 1 1.4 1.4 72.6 72.6
7.93996 1 1.4 1.4 74.0 74.0
8.28739 1 1.4 1.4 75.3 75.3
8.46579 1 1.4 1.4 76.7 76.7
8.93271 1 1.4 1.4 78.1 78.1
9.51003 1 1.4 1.4 79.5 79.5
9.56829 1 1.4 1.4 80.8 80.8
9.78251 1 1.4 1.4 82.2 82.2
10.88351 1 1.4 1.4 83.6 83.6
11.8349 1 1.4 1.4 84.9 84.9
12.33417 1 1.4 1.4 86.3 86.3
12.365 1 1.4 1.4 87.7 87.7
12.53758 1 1.4 1.4 89.0 89.0
14.04274 1 1.4 1.4 90.4 90.4
15.2226 1 1.4 1.4 91.8 91.8
15.74984 1 1.4 1.4 93.2 93.2
16.16369 1 1.4 1.4 94.5 94.5
19.41353 1 1.4 1.4 95.9 95.9
21.8006 1 1.4 1.4 97.3 97.3
24.67589 1 1.4 1.4 98.6 98.6
32.12708 1 1.4 1.4 100.0 100.0
Total 73 100.0 100.0 100.0 100.0
str(table)
## Classes 'freqtab' and 'data.frame':  73 obs. of  5 variables:
##  $ n      : num  2 1 1 1 1 1 1 1 1 1 ...
##  $ %      : num  2.7 1.4 1.4 1.4 1.4 1.4 1.4 1.4 1.4 1.4 ...
##  $ val%   : num  2.7 1.4 1.4 1.4 1.4 1.4 1.4 1.4 1.4 1.4 ...
##  $ %cum   : num  2.7 4.1 5.5 6.8 8.2 9.6 11 12.3 13.7 15.1 ...
##  $ val%cum: num  2.7 4.1 5.5 6.8 8.2 9.6 11 12.3 13.7 15.1 ...
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
0.15208 2
0.05648 1
0.08269 1
0.09971 1
0.28905 1
0.30326 1
0.31118 1
0.36051 1
0.50188 1
0.6355 1
0.64483 1
0.66626 1
0.76257 1
0.7834 1
0.88149 1
0.90692 1
0.94118 1
1.06048 1
1.27837 1
1.41805 1
1.52983 1
1.68294 1
1.69508 1
1.73469 1
1.93516 1
2.06743 1
2.15703 1
2.478 1
2.4977 1
2.59637 1
2.85382 1
3.05542 1
3.30074 1
3.36905 1
3.4274 1
3.49173 1
3.64682 1
3.7316 1
3.74149 1
4.1083 1
4.18059 1
4.19108 1
4.32007 1
4.40801 1
4.5182 1
4.68443 1
4.8168 1
5.24855 1
6.46156 1
6.8095 1
7.07138 1
7.93258 1
7.93996 1
8.28739 1
8.46579 1
8.93271 1
9.51003 1
9.56829 1
9.78251 1
10.88351 1
11.8349 1
12.33417 1
12.365 1
12.53758 1
14.04274 1
15.2226 1
15.74984 1
16.16369 1
19.41353 1
21.8006 1
24.67589 1
32.12708 1
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")+ theme(axis.text.x = element_text(angle = 90))

library(pastecs)
stat.desc(df_MX)
##                        id date time continent_code country_name country_code
## nbr.val      7.300000e+01   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          1.150000e+02   NA   NA             NA           NA           NA
## max          7.518000e+03   NA   NA             NA           NA           NA
## range        7.403000e+03   NA   NA             NA           NA           NA
## sum          2.913040e+05   NA   NA             NA           NA           NA
## median       3.834000e+03   NA   NA             NA           NA           NA
## mean         3.990466e+03   NA   NA             NA           NA           NA
## SE.mean      2.764651e+02   NA   NA             NA           NA           NA
## CI.mean.0.95 5.511232e+02   NA   NA             NA           NA           NA
## var          5.579607e+06   NA   NA             NA           NA           NA
## std.dev      2.362119e+03   NA   NA             NA           NA           NA
## coef.var     5.919407e-01   NA   NA             NA           NA           NA
##              state   population city    distance location_description
## nbr.val         NA 7.300000e+01   NA  73.0000000                   NA
## nbr.null        NA 0.000000e+00   NA   0.0000000                   NA
## nbr.na          NA 0.000000e+00   NA   0.0000000                   NA
## min             NA 1.005000e+03   NA   0.0564800                   NA
## max             NA 1.229419e+07   NA  32.1270800                   NA
## range           NA 1.229319e+07   NA  32.0706000                   NA
## sum             NA 3.298622e+07   NA 413.6691300                   NA
## median          NA 6.089000e+03   NA   3.4917300                   NA
## mean            NA 4.518661e+05   NA   5.6667004                   NA
## SE.mean         NA 2.369285e+05   NA   0.7442396                   NA
## CI.mean.0.95    NA 4.723084e+05   NA   1.4836145                   NA
## var             NA 4.097865e+12   NA  40.4341560                   NA
## std.dev         NA 2.024318e+06   NA   6.3587857                   NA
## coef.var        NA 4.479908e+00   NA   1.1221320                   NA
##                  latitude     longitude geolocation hazard_type landslide_type
## nbr.val        73.0000000  7.300000e+01          NA          NA             NA
## nbr.null        0.0000000  0.000000e+00          NA          NA             NA
## nbr.na          0.0000000  0.000000e+00          NA          NA             NA
## min            15.0337000 -1.170898e+02          NA          NA             NA
## max            32.5755000 -9.129880e+01          NA          NA             NA
## range          17.5418000  2.579100e+01          NA          NA             NA
## sum          1427.3326000 -7.208526e+03          NA          NA             NA
## median         19.0294000 -9.749970e+01          NA          NA             NA
## mean           19.5525014 -9.874694e+01          NA          NA             NA
## SE.mean         0.4625032  6.876754e-01          NA          NA             NA
## CI.mean.0.95    0.9219832  1.370856e+00          NA          NA             NA
## var            15.6153711  3.452151e+01          NA          NA             NA
## std.dev         3.9516289  5.875501e+00          NA          NA             NA
## coef.var        0.2021035 -5.950059e-02          NA          NA             NA
##              landslide_size trigger storm_name   injuries fatalities
## nbr.val                  NA      NA         NA 19.0000000  70.000000
## nbr.null                 NA      NA         NA 15.0000000  27.000000
## nbr.na                   NA      NA         NA 54.0000000   3.000000
## min                      NA      NA         NA  0.0000000   0.000000
## max                      NA      NA         NA  8.0000000  71.000000
## range                    NA      NA         NA  8.0000000  71.000000
## sum                      NA      NA         NA 15.0000000 284.000000
## median                   NA      NA         NA  0.0000000   2.000000
## mean                     NA      NA         NA  0.7894737   4.057143
## SE.mean                  NA      NA         NA  0.4625062   1.098632
## CI.mean.0.95             NA      NA         NA  0.9716894   2.191711
## var                      NA      NA         NA  4.0643275  84.489441
## std.dev                  NA      NA         NA  2.0160177   9.191814
## coef.var                 NA      NA         NA  2.5536225   2.265588
##              source_name source_link         prop         ypos
## nbr.val               NA          NA  73.00000000   73.0000000
## nbr.null              NA          NA   0.00000000    0.0000000
## nbr.na                NA          NA   0.00000000    0.0000000
## min                   NA          NA   0.01365342    1.1494730
## max                   NA          NA   7.76637116   99.2189942
## range                 NA          NA   7.75271773   98.0695212
## sum                   NA          NA 100.00000000 3545.1804284
## median                NA          NA   0.84408764   54.2381528
## mean                  NA          NA   1.36986301   48.5641155
## SE.mean               NA          NA   0.17991180    3.7293810
## CI.mean.0.95          NA          NA   0.35864762    7.4343851
## var                   NA          NA   2.36288267 1015.3046257
## std.dev               NA          NA   1.53716709   31.8638451
## coef.var              NA          NA   1.12213198    0.6561191
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Baja California (Mexico)

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_MX <- subset(df, country_name == "Mexico")
knitr::kable(head(df_MX)) 
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
115 7/4/07 NA NA Mexico MX Veracruz-Llave 1947 Laguna Chica (Pueblo Nuevo) 9.51003 NA 18.5369 -96.8229 (18.536899999999999, -96.822900000000004) Landslide Landslide Medium Rain NA NA 7 nytimes.com http://www.nytimes.com/2007/07/04/world/americas/04cnd-mexico.html?_r=1&hp&oref=slogin
201 8/23/07 NA NA Mexico MX Puebla 1414 Xochitlaxco (San Baltazar) 1.68294 NA 19.9333 -97.8500 (19.933299999999999, -97.85) Landslide Mudslide Medium Tropical cyclone Hurricane Dean NA 6 Inquirer.com http://newsinfo.inquirer.net/breakingnews/world/view_article.php?article_id=84452
225 9/2/07 NA NA Mexico MX Sinaloa 3191 El Limón de los Ramos 10.88351 NA 24.9531 -107.6220 (24.953099999999999, -107.622) Landslide Complex Medium Tropical cyclone Tropical Storm Henrietta NA 3 NA NA
284 9/28/07 NA NA Mexico MX Puebla 3761 Xaltepuxtla 7.93258 NA 20.2000 -97.9000 (20.2, -97.9) Landslide Mudslide Medium Tropical cyclone Hurricane Lorenzo NA 1 PressTV.ir http://www.presstv.ir/detail.aspx?id=25037&sectionid=3510207
342 10/31/07 NA NA Mexico MX Tabasco 4468 Buenavista 4.19108 NA 17.9493 -92.5534 (17.949300000000001, -92.553399999999996) Landslide Landslide Medium Rain NA NA 18 CapeTimes http://www.capetimes.co.za/?fArticleId=4109453
346 11/4/07 NA NA Mexico MX Chiapas 3183 Ostuacán 3.74149 NA 17.3900 -93.3060 (17.39, -93.305999999999997) Landslide Mudslide Very_large Rain NA NA 5 Reuters - AlertNet.org http://news.monstersandcritics.com/americas/news/article_1371436.php/Mudslide_is_latest_disaster_for_hard-hit_Mexico__2nd_Roundup_
library(dplyr)
df_MX <- subset(df, state == "Baja California")
knitr::kable(head(df_MX))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
417 1/7/08 NA NA Mexico MX Baja California 1376457 Tijuana 12.53758 NA 32.5755 -116.9016 (32.575499999999998, -116.9016) Landslide Landslide Medium Rain NA NA 1 NA http://www.signonsandiego.com/news/mexico/tijuana/20080108-1858-bn08tj.html
1838 5/5/10 NA NA Mexico MX Baja California 1376457 Tijuana 2.49770 NA 32.5250 -116.9999 (32.524999999999999, -116.9999) Landslide Mudslide Medium Rain NA NA 0 NA http://hisz.rsoe.hu/alertmap/woalert_read.php?lang=eng&cid=25962&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+RsoeEdis-EmergencyAndDisasterInformation+%28RSOE+EDIS+-+Emergency+and+Disaster+Information%29
5731 12/28/13 2:00:00 NA Mexico MX Baja California 9085 El Sauzal 12.36500 Below road 31.9780 -116.7776 (31.978000000000002, -116.77760000000001) Landslide Mudslide Medium Downpour NA NA 0 www.globalpost.com http://www.globalpost.com/dispatch/news/agencia-efe/131230/mudslide-closes-major-road-northwestern-mexico
7056 5/17/15 NA NA Mexico MX Baja California 1173 La Esperanza [Granjas Familiares] 6.46156 Urban area 32.5196 -117.0898 (32.519599999999997, -117.0898) Landslide Landslide Medium Rain NA 0 0 NA NA

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(x=state, y=distance, fill=city)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_MX <- df_MX %>% 
  arrange(desc(city)) %>%
  mutate(prop = distance / sum(df_MX$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_MX, aes(x=state, y = prop, fill=city)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
data<- ts(df_MX$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
12.53758
2.49770
6.46156
12.36500
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_MX$distance
names(distance) <- df_MX$city 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por ciudades"
)

##                                    
## Pareto chart analysis for distance
##                                     Frequency Cum.Freq. Percentage Cum.Percent.
##   Tijuana                            12.53758  12.53758   37.02569     37.02569
##   El Sauzal                          12.36500  24.90258   36.51603     73.54172
##   La Esperanza [Granjas Familiares]   6.46156  31.36414   19.08213     92.62385
##   Tijuana                             2.49770  33.86184    7.37615    100.00000
stem(df_MX$"distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 2
##   0 | 6
##   1 | 23
head(df_MX)
## # A tibble: 4 x 25
##      id date     time    continent_code country_name country_code state population
##   <dbl> <chr>    <chr>   <chr>          <chr>        <chr>        <chr>      <dbl>
## 1   417 1/7/08   <NA>    <NA>           Mexico       MX           Baja~    1376457
## 2  1838 5/5/10   <NA>    <NA>           Mexico       MX           Baja~    1376457
## 3  7056 5/17/15  <NA>    <NA>           Mexico       MX           Baja~       1173
## 4  5731 12/28/13 2:00:00 <NA>           Mexico       MX           Baja~       9085
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_MX))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
417 1/7/08 NA NA Mexico MX Baja California 1376457 Tijuana 12.53758 NA 32.5755 -116.9016 (32.575499999999998, -116.9016) Landslide Landslide Medium Rain NA NA 1 NA http://www.signonsandiego.com/news/mexico/tijuana/20080108-1858-bn08tj.html 37.02569 18.51285
1838 5/5/10 NA NA Mexico MX Baja California 1376457 Tijuana 2.49770 NA 32.5250 -116.9999 (32.524999999999999, -116.9999) Landslide Mudslide Medium Rain NA NA 0 NA http://hisz.rsoe.hu/alertmap/woalert_read.php?lang=eng&cid=25962&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+RsoeEdis-EmergencyAndDisasterInformation+%28RSOE+EDIS+-+Emergency+and+Disaster+Information%29 7.37615 40.71377
7056 5/17/15 NA NA Mexico MX Baja California 1173 La Esperanza [Granjas Familiares] 6.46156 Urban area 32.5196 -117.0898 (32.519599999999997, -117.0898) Landslide Landslide Medium Rain NA 0 0 NA NA 19.08213 53.94290
5731 12/28/13 2:00:00 NA Mexico MX Baja California 9085 El Sauzal 12.36500 Below road 31.9780 -116.7776 (31.978000000000002, -116.77760000000001) Landslide Mudslide Medium Downpour NA NA 0 www.globalpost.com http://www.globalpost.com/dispatch/news/agencia-efe/131230/mudslide-closes-major-road-northwestern-mexico 36.51603 81.74198
stem(df_MX$"distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 2
##   0 | 6
##   1 | 23
stem(df_MX$"distance", scale = 2)
## 
##   The decimal point is at the |
## 
##    2 | 5
##    4 | 
##    6 | 5
##    8 | 
##   10 | 
##   12 | 45

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
2.4977 1 25 25 25 25
6.46156 1 25 25 50 50
12.365 1 25 25 75 75
12.53758 1 25 25 100 100
Total 4 100 100 100 100
str(table)
## Classes 'freqtab' and 'data.frame':  5 obs. of  5 variables:
##  $ n      : num  1 1 1 1 4
##  $ %      : num  25 25 25 25 100
##  $ val%   : num  25 25 25 25 100
##  $ %cum   : num  25 50 75 100 100
##  $ val%cum: num  25 50 75 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
2.4977 1
6.46156 1
12.365 1
12.53758 1
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1]  2.4977  6.4977 10.4977 14.4977
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(2.5,6.5] 1 0.3333333 1
(6.5,10.5] 0 0.0000000 1
(10.5,14.5] 2 0.6666667 3
str(Freq_table)
## 'data.frame':    3 obs. of  4 variables:
##  $ distance: Factor w/ 3 levels "(2.5,6.5]","(6.5,10.5]",..: 1 2 3
##  $ Freq    : int  1 0 2
##  $ Rel_Freq: num  0.333 0 0.667
##  $ Cum_Freq: int  1 1 3
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(2.5,6.5] 1
(6.5,10.5] 0
(10.5,14.5] 2
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_MX)
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
##                        id date time continent_code country_name country_code
## nbr.val      4.000000e+00   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          4.170000e+02   NA   NA             NA           NA           NA
## max          7.056000e+03   NA   NA             NA           NA           NA
## range        6.639000e+03   NA   NA             NA           NA           NA
## sum          1.504200e+04   NA   NA             NA           NA           NA
## median       3.784500e+03   NA   NA             NA           NA           NA
## mean         3.760500e+03   NA   NA             NA           NA           NA
## SE.mean      1.571045e+03   NA   NA             NA           NA           NA
## CI.mean.0.95 4.999766e+03   NA   NA             NA           NA           NA
## var          9.872730e+06   NA   NA             NA           NA           NA
## std.dev      3.142090e+03   NA   NA             NA           NA           NA
## coef.var     8.355511e-01   NA   NA             NA           NA           NA
##              state   population city   distance location_description
## nbr.val         NA 4.000000e+00   NA  4.0000000                   NA
## nbr.null        NA 0.000000e+00   NA  0.0000000                   NA
## nbr.na          NA 0.000000e+00   NA  0.0000000                   NA
## min             NA 1.173000e+03   NA  2.4977000                   NA
## max             NA 1.376457e+06   NA 12.5375800                   NA
## range           NA 1.375284e+06   NA 10.0398800                   NA
## sum             NA 2.763172e+06   NA 33.8618400                   NA
## median          NA 6.927710e+05   NA  9.4132800                   NA
## mean            NA 6.907930e+05   NA  8.4654600                   NA
## SE.mean         NA 3.958716e+05   NA  2.4395756                   NA
## CI.mean.0.95    NA 1.259840e+06   NA  7.7638183                   NA
## var             NA 6.268573e+11   NA 23.8061160                   NA
## std.dev         NA 7.917432e+05   NA  4.8791512                   NA
## coef.var        NA 1.146137e+00   NA  0.5763598                   NA
##                  latitude     longitude geolocation hazard_type landslide_type
## nbr.val      4.000000e+00  4.000000e+00          NA          NA             NA
## nbr.null     0.000000e+00  0.000000e+00          NA          NA             NA
## nbr.na       0.000000e+00  0.000000e+00          NA          NA             NA
## min          3.197800e+01 -1.170898e+02          NA          NA             NA
## max          3.257550e+01 -1.167776e+02          NA          NA             NA
## range        5.975000e-01  3.122000e-01          NA          NA             NA
## sum          1.295981e+02 -4.677689e+02          NA          NA             NA
## median       3.252230e+01 -1.169507e+02          NA          NA             NA
## mean         3.239952e+01 -1.169422e+02          NA          NA             NA
## SE.mean      1.410711e-01  6.699289e-02          NA          NA             NA
## CI.mean.0.95 4.489511e-01  2.132013e-01          NA          NA             NA
## var          7.960417e-02  1.795219e-02          NA          NA             NA
## std.dev      2.821421e-01  1.339858e-01          NA          NA             NA
## coef.var     8.708217e-03 -1.145743e-03          NA          NA             NA
##              landslide_size trigger storm_name injuries fatalities source_name
## nbr.val                  NA      NA         NA        1  4.0000000          NA
## nbr.null                 NA      NA         NA        1  3.0000000          NA
## nbr.na                   NA      NA         NA        3  0.0000000          NA
## min                      NA      NA         NA        0  0.0000000          NA
## max                      NA      NA         NA        0  1.0000000          NA
## range                    NA      NA         NA        0  1.0000000          NA
## sum                      NA      NA         NA        0  1.0000000          NA
## median                   NA      NA         NA        0  0.0000000          NA
## mean                     NA      NA         NA        0  0.2500000          NA
## SE.mean                  NA      NA         NA       NA  0.2500000          NA
## CI.mean.0.95             NA      NA         NA      NaN  0.7956116          NA
## var                      NA      NA         NA       NA  0.2500000          NA
## std.dev                  NA      NA         NA       NA  0.5000000          NA
## coef.var                 NA      NA         NA       NA  2.0000000          NA
##              source_link        prop        ypos
## nbr.val               NA   4.0000000   4.0000000
## nbr.null              NA   0.0000000   0.0000000
## nbr.na                NA   0.0000000   0.0000000
## min                   NA   7.3761497  18.5128451
## max                   NA  37.0256903  81.7419845
## range                 NA  29.6495406  63.2291393
## sum                   NA 100.0000000 194.9114992
## median                NA  27.7990800  47.3283348
## mean                  NA  25.0000000  48.7278748
## SE.mean               NA   7.2044980  13.2107972
## CI.mean.0.95          NA  22.9279280  42.0426527
## var                   NA 207.6191648 698.1006481
## std.dev               NA  14.4089960  26.4215944
## coef.var              NA   0.5763598   0.5422275
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Chiapas (Mexico)

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_MX <- subset(df, country_name == "Mexico")
knitr::kable(head(df_MX)) 
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
115 7/4/07 NA NA Mexico MX Veracruz-Llave 1947 Laguna Chica (Pueblo Nuevo) 9.51003 NA 18.5369 -96.8229 (18.536899999999999, -96.822900000000004) Landslide Landslide Medium Rain NA NA 7 nytimes.com http://www.nytimes.com/2007/07/04/world/americas/04cnd-mexico.html?_r=1&hp&oref=slogin
201 8/23/07 NA NA Mexico MX Puebla 1414 Xochitlaxco (San Baltazar) 1.68294 NA 19.9333 -97.8500 (19.933299999999999, -97.85) Landslide Mudslide Medium Tropical cyclone Hurricane Dean NA 6 Inquirer.com http://newsinfo.inquirer.net/breakingnews/world/view_article.php?article_id=84452
225 9/2/07 NA NA Mexico MX Sinaloa 3191 El Limón de los Ramos 10.88351 NA 24.9531 -107.6220 (24.953099999999999, -107.622) Landslide Complex Medium Tropical cyclone Tropical Storm Henrietta NA 3 NA NA
284 9/28/07 NA NA Mexico MX Puebla 3761 Xaltepuxtla 7.93258 NA 20.2000 -97.9000 (20.2, -97.9) Landslide Mudslide Medium Tropical cyclone Hurricane Lorenzo NA 1 PressTV.ir http://www.presstv.ir/detail.aspx?id=25037&sectionid=3510207
342 10/31/07 NA NA Mexico MX Tabasco 4468 Buenavista 4.19108 NA 17.9493 -92.5534 (17.949300000000001, -92.553399999999996) Landslide Landslide Medium Rain NA NA 18 CapeTimes http://www.capetimes.co.za/?fArticleId=4109453
346 11/4/07 NA NA Mexico MX Chiapas 3183 Ostuacán 3.74149 NA 17.3900 -93.3060 (17.39, -93.305999999999997) Landslide Mudslide Very_large Rain NA NA 5 Reuters - AlertNet.org http://news.monstersandcritics.com/americas/news/article_1371436.php/Mudslide_is_latest_disaster_for_hard-hit_Mexico__2nd_Roundup_
library(dplyr)
df_MX <- subset(df, state == "Chiapas")
knitr::kable(head(df_MX))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
346 11/4/07 NA NA Mexico MX Chiapas 3183 Ostuacán 3.74149 NA 17.3900 -93.3060 (17.39, -93.305999999999997) Landslide Mudslide Very_large Rain NA NA 5 Reuters - AlertNet.org http://news.monstersandcritics.com/americas/news/article_1371436.php/Mudslide_is_latest_disaster_for_hard-hit_Mexico__2nd_Roundup_
2114 7/22/10 Overnight NA Mexico MX Chiapas 9570 Escuintla 1.41805 NA 15.3258 -92.6698 (15.325799999999999, -92.669799999999995) Landslide Mudslide Medium Downpour NA NA 3 NA http://www.laht.com/article.asp?ArticleId=361041&CategoryId=14091
2115 7/22/10 NA NA Mexico MX Chiapas 128996 San Cristóbal de las Casas 0.94118 NA 16.7294 -92.6389 (16.729399999999998, -92.638900000000007) Landslide Rockfall Small Downpour NA NA 0 NA http://www.laht.com/article.asp?ArticleId=361041&CategoryId=14091
2520 9/29/10 NA NA Mexico MX Chiapas 3947 Amatán 0.31118 NA 17.3738 -92.8213 (17.373799999999999, -92.821299999999994) Landslide Mudslide Large Tropical cyclone Hurricane Karl and Tropical Storm Matthew NA 16 NA http://technews.tmcnet.com/topics/associated-press/articles/106209-death-toll-southern-mexico-mudslides-up-32.htm
2521 9/29/10 NA NA Mexico MX Chiapas 1088 Chihuahua 8.46579 NA 16.0203 -91.8946 (16.020299999999999, -91.894599999999997) Landslide Complex Medium Tropical cyclone Hurricane Karl and Tropical Storm Matthew NA 3 NA http://technews.tmcnet.com/topics/associated-press/articles/106209-death-toll-southern-mexico-mudslides-up-32.htm
3832 7/22/11 NA NA Mexico MX Chiapas 1005 Nueva Libertad 7.93996 NA 16.1514 -92.7704 (16.151399999999999, -92.770399999999995) Landslide Landslide Medium Tropical cyclone Hurricane Dora NA 0 NA http://www.china.org.cn/environment/2011-07/22/content_23044018.htm

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(x=state, y=distance, fill=city)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_MX <- df_MX %>% 
  arrange(desc(city)) %>%
  mutate(prop = distance / sum(df_MX$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_MX, aes(x=state, y = prop, fill=city)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4
## Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Greens is 9
## Returning the palette you asked for with that many colors

Grafico de series temporales

library(forecast)
data<- ts(df_MX$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
1.73469
0.76257
0.94118
3.74149
7.93996
2.06743
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_MX$distance
names(distance) <- df_MX$city 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por ciudades"
)

##                             
## Pareto chart analysis for distance
##                                Frequency   Cum.Freq.  Percentage Cum.Percent.
##   Chihuahua                    8.4657900   8.4657900  26.4005074   26.4005074
##   Nueva Libertad               7.9399600  16.4057500  24.7607102   51.1612177
##   Emiliano Zapata              4.6844300  21.0901800  14.6083625   65.7695802
##   Ostuacán                     3.7414900  24.8316700  11.6678106   77.4373908
##   Motozintla de Mendoza        2.0674300  26.8991000   6.4472661   83.8846569
##   Tila                         1.7346900  28.6337900   5.4096187   89.2942757
##   Escuintla                    1.4180500  30.0518400   4.4221791   93.7164548
##   San Cristóbal de las Casas   0.9411800  30.9930200   2.9350633   96.6515181
##   Santo Domingo                0.7625700  31.7555900   2.3780693   99.0295873
##   Amatán                       0.3111800  32.0667700   0.9704127  100.0000000
stem(df_MX$"distance")
## 
##   The decimal point is at the |
## 
##   0 | 38947
##   2 | 17
##   4 | 7
##   6 | 9
##   8 | 5
head(df_MX)
## # A tibble: 6 x 25
##      id date     time  continent_code country_name country_code state   population
##   <dbl> <chr>    <chr> <chr>          <chr>        <chr>        <chr>        <dbl>
## 1  7517 9/12/15  <NA>  <NA>           Mexico       MX           Chiapas       6089
## 2  7514 5/23/15  <NA>  <NA>           Mexico       MX           Chiapas       3796
## 3  2115 7/22/10  <NA>  <NA>           Mexico       MX           Chiapas     128996
## 4   346 11/4/07  <NA>  <NA>           Mexico       MX           Chiapas       3183
## 5  3832 7/22/11  <NA>  <NA>           Mexico       MX           Chiapas       1005
## 6  7515 10/21/15 <NA>  <NA>           Mexico       MX           Chiapas      19092
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_MX))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
7517 9/12/15 NA NA Mexico MX Chiapas 6089 Tila 1.73469 Below road 17.3112 -92.4393 (17.311199999999999, -92.439300000000003) Landslide Landslide Medium Rain NA 0 0 NVI noticias http://www.noticiasnet.mx/portal/chiapas/general/agua/317470-lluvias-causan-deslaves-tramos-carreteros 5.409619 2.704809
7514 5/23/15 NA NA Mexico MX Chiapas 3796 Santo Domingo 0.76257 Below road 15.0337 -92.1124 (15.0337, -92.112399999999994) Landslide Landslide Medium Rain NA 0 0 Cuarto Poder http://www.cuartopoder.mx/deslaveafectacarreteradejandouncarril-116564.html 2.378069 6.598653
2115 7/22/10 NA NA Mexico MX Chiapas 128996 San Cristóbal de las Casas 0.94118 NA 16.7294 -92.6389 (16.729399999999998, -92.638900000000007) Landslide Rockfall Small Downpour NA NA 0 NA http://www.laht.com/article.asp?ArticleId=361041&CategoryId=14091 2.935063 9.255220
346 11/4/07 NA NA Mexico MX Chiapas 3183 Ostuacán 3.74149 NA 17.3900 -93.3060 (17.39, -93.305999999999997) Landslide Mudslide Very_large Rain NA NA 5 Reuters - AlertNet.org http://news.monstersandcritics.com/americas/news/article_1371436.php/Mudslide_is_latest_disaster_for_hard-hit_Mexico__2nd_Roundup_ 11.667811 16.556657
3832 7/22/11 NA NA Mexico MX Chiapas 1005 Nueva Libertad 7.93996 NA 16.1514 -92.7704 (16.151399999999999, -92.770399999999995) Landslide Landslide Medium Tropical cyclone Hurricane Dora NA 0 NA http://www.china.org.cn/environment/2011-07/22/content_23044018.htm 24.760710 34.770917
7515 10/21/15 NA NA Mexico MX Chiapas 19092 Motozintla de Mendoza 2.06743 Above road 15.3482 -92.2523 (15.3482, -92.252300000000005) Landslide Landslide Medium Rain NA 0 0 azteca noticias http://www.aztecanoticias.com.mx/notas/estados/234140/suman-16-deslaves-por-lluvias-en-chiapas 6.447266 50.374905
stem(df_MX$"distance")
## 
##   The decimal point is at the |
## 
##   0 | 38947
##   2 | 17
##   4 | 7
##   6 | 9
##   8 | 5
stem(df_MX$"distance", scale = 2)
## 
##   The decimal point is at the |
## 
##   0 | 389
##   1 | 47
##   2 | 1
##   3 | 7
##   4 | 7
##   5 | 
##   6 | 
##   7 | 9
##   8 | 5

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
0.31118 1 10 10 10 10
0.76257 1 10 10 20 20
0.94118 1 10 10 30 30
1.41805 1 10 10 40 40
1.73469 1 10 10 50 50
2.06743 1 10 10 60 60
3.74149 1 10 10 70 70
4.68443 1 10 10 80 80
7.93996 1 10 10 90 90
8.46579 1 10 10 100 100
Total 10 100 100 100 100
str(table)
## Classes 'freqtab' and 'data.frame':  11 obs. of  5 variables:
##  $ n      : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ %      : num  10 10 10 10 10 10 10 10 10 10 ...
##  $ val%   : num  10 10 10 10 10 10 10 10 10 10 ...
##  $ %cum   : num  10 20 30 40 50 60 70 80 90 100 ...
##  $ val%cum: num  10 20 30 40 50 60 70 80 90 100 ...
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
0.31118 1
0.76257 1
0.94118 1
1.41805 1
1.73469 1
2.06743 1
3.74149 1
4.68443 1
7.93996 1
8.46579 1
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1]  0.31118  2.31118  4.31118  6.31118  8.31118 10.31118
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(0.311,2.31] 5 0.5555556 5
(2.31,4.31] 1 0.1111111 6
(4.31,6.31] 1 0.1111111 7
(6.31,8.31] 1 0.1111111 8
(8.31,10.3] 1 0.1111111 9
str(Freq_table)
## 'data.frame':    5 obs. of  4 variables:
##  $ distance: Factor w/ 5 levels "(0.311,2.31]",..: 1 2 3 4 5
##  $ Freq    : int  5 1 1 1 1
##  $ Rel_Freq: num  0.556 0.111 0.111 0.111 0.111
##  $ Cum_Freq: int  5 6 7 8 9
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(0.311,2.31] 5
(2.31,4.31] 1
(4.31,6.31] 1
(6.31,8.31] 1
(8.31,10.3] 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_MX)
##                        id date time continent_code country_name country_code
## nbr.val      1.000000e+01   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          3.460000e+02   NA   NA             NA           NA           NA
## max          7.517000e+03   NA   NA             NA           NA           NA
## range        7.171000e+03   NA   NA             NA           NA           NA
## sum          4.351000e+04   NA   NA             NA           NA           NA
## median       3.176500e+03   NA   NA             NA           NA           NA
## mean         4.351000e+03   NA   NA             NA           NA           NA
## SE.mean      9.010547e+02   NA   NA             NA           NA           NA
## CI.mean.0.95 2.038327e+03   NA   NA             NA           NA           NA
## var          8.118995e+06   NA   NA             NA           NA           NA
## std.dev      2.849385e+03   NA   NA             NA           NA           NA
## coef.var     6.548805e-01   NA   NA             NA           NA           NA
##              state   population city   distance location_description
## nbr.val         NA 1.000000e+01   NA 10.0000000                   NA
## nbr.null        NA 0.000000e+00   NA  0.0000000                   NA
## nbr.na          NA 0.000000e+00   NA  0.0000000                   NA
## min             NA 1.005000e+03   NA  0.3111800                   NA
## max             NA 1.289960e+05   NA  8.4657900                   NA
## range           NA 1.279910e+05   NA  8.1546100                   NA
## sum             NA 1.953740e+05   NA 32.0667700                   NA
## median          NA 5.018000e+03   NA  1.9010600                   NA
## mean            NA 1.953740e+04   NA  3.2066770                   NA
## SE.mean         NA 1.233991e+04   NA  0.9350334                   NA
## CI.mean.0.95    NA 2.791482e+04   NA  2.1151925                   NA
## var             NA 1.522734e+09   NA  8.7428745                   NA
## std.dev         NA 3.902222e+04   NA  2.9568352                   NA
## coef.var        NA 1.997309e+00   NA  0.9220870                   NA
##                  latitude     longitude geolocation hazard_type landslide_type
## nbr.val       10.00000000  1.000000e+01          NA          NA             NA
## nbr.null       0.00000000  0.000000e+00          NA          NA             NA
## nbr.na         0.00000000  0.000000e+00          NA          NA             NA
## min           15.03370000 -9.330600e+01          NA          NA             NA
## max           17.70550000 -9.174160e+01          NA          NA             NA
## range          2.67180000  1.564400e+00          NA          NA             NA
## sum          164.38930000 -9.246466e+02          NA          NA             NA
## median        16.44040000 -9.253910e+01          NA          NA             NA
## mean          16.43893000 -9.246466e+01          NA          NA             NA
## SE.mean        0.31427677  1.497619e-01          NA          NA             NA
## CI.mean.0.95   0.71094344  3.387849e-01          NA          NA             NA
## var            0.98769885  2.242861e-01          NA          NA             NA
## std.dev        0.99383039  4.735886e-01          NA          NA             NA
## coef.var       0.06045591 -5.121833e-03          NA          NA             NA
##              landslide_size trigger storm_name injuries fatalities source_name
## nbr.val                  NA      NA         NA        4  10.000000          NA
## nbr.null                 NA      NA         NA        4   6.000000          NA
## nbr.na                   NA      NA         NA        6   0.000000          NA
## min                      NA      NA         NA        0   0.000000          NA
## max                      NA      NA         NA        0  16.000000          NA
## range                    NA      NA         NA        0  16.000000          NA
## sum                      NA      NA         NA        0  27.000000          NA
## median                   NA      NA         NA        0   0.000000          NA
## mean                     NA      NA         NA        0   2.700000          NA
## SE.mean                  NA      NA         NA        0   1.584999          NA
## CI.mean.0.95             NA      NA         NA        0   3.585517          NA
## var                      NA      NA         NA        0  25.122222          NA
## std.dev                  NA      NA         NA        0   5.012207          NA
## coef.var                 NA      NA         NA      NaN   1.856373          NA
##              source_link        prop         ypos
## nbr.val               NA  10.0000000   10.0000000
## nbr.null              NA   0.0000000    0.0000000
## nbr.na                NA   0.0000000    0.0000000
## min                   NA   0.9704127    2.7048094
## max                   NA  26.4005074   99.5147937
## range                 NA  25.4300948   96.8099843
## sum                   NA 100.0000000  426.7398151
## median                NA   5.9284424   42.5729111
## mean                  NA  10.0000000   42.6739815
## SE.mean               NA   2.9158952   10.8517928
## CI.mean.0.95          NA   6.5962131   24.5484607
## var                   NA  85.0244464 1177.6140634
## std.dev               NA   9.2208702   34.3163819
## coef.var              NA   0.9220870    0.8041523
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Guerrero (Mexico)

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_MX <- subset(df, country_name == "Mexico")
knitr::kable(head(df_MX)) 
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
115 7/4/07 NA NA Mexico MX Veracruz-Llave 1947 Laguna Chica (Pueblo Nuevo) 9.51003 NA 18.5369 -96.8229 (18.536899999999999, -96.822900000000004) Landslide Landslide Medium Rain NA NA 7 nytimes.com http://www.nytimes.com/2007/07/04/world/americas/04cnd-mexico.html?_r=1&hp&oref=slogin
201 8/23/07 NA NA Mexico MX Puebla 1414 Xochitlaxco (San Baltazar) 1.68294 NA 19.9333 -97.8500 (19.933299999999999, -97.85) Landslide Mudslide Medium Tropical cyclone Hurricane Dean NA 6 Inquirer.com http://newsinfo.inquirer.net/breakingnews/world/view_article.php?article_id=84452
225 9/2/07 NA NA Mexico MX Sinaloa 3191 El Limón de los Ramos 10.88351 NA 24.9531 -107.6220 (24.953099999999999, -107.622) Landslide Complex Medium Tropical cyclone Tropical Storm Henrietta NA 3 NA NA
284 9/28/07 NA NA Mexico MX Puebla 3761 Xaltepuxtla 7.93258 NA 20.2000 -97.9000 (20.2, -97.9) Landslide Mudslide Medium Tropical cyclone Hurricane Lorenzo NA 1 PressTV.ir http://www.presstv.ir/detail.aspx?id=25037&sectionid=3510207
342 10/31/07 NA NA Mexico MX Tabasco 4468 Buenavista 4.19108 NA 17.9493 -92.5534 (17.949300000000001, -92.553399999999996) Landslide Landslide Medium Rain NA NA 18 CapeTimes http://www.capetimes.co.za/?fArticleId=4109453
346 11/4/07 NA NA Mexico MX Chiapas 3183 Ostuacán 3.74149 NA 17.3900 -93.3060 (17.39, -93.305999999999997) Landslide Mudslide Very_large Rain NA NA 5 Reuters - AlertNet.org http://news.monstersandcritics.com/americas/news/article_1371436.php/Mudslide_is_latest_disaster_for_hard-hit_Mexico__2nd_Roundup_
library(dplyr)
df_MX <- subset(df, state == "Guerrero")
knitr::kable(head(df_MX))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
3834 7/22/11 NA NA Mexico MX Guerrero 165250 Chilpancingo de los Bravos 6.80950 NA 17.4974 -99.5380 (17.497399999999999, -99.537999999999997) Landslide Landslide Medium Tropical cyclone Hurricane Dora NA 0 NA NA
5530 9/15/13 NA NA Mexico MX Guerrero 652136 Acapulco 0.90692 NA 16.8638 -99.8816 (16.863800000000001, -99.881600000000006) Landslide Mudslide Very_large Downpour NA NA NA www.trust.org http://www.trust.org/item/20131014162052-6ou24/?source=gep
5534 9/16/13 15:30 NA Mexico MX Guerrero 21407 Atoyac de Álvarez 32.12708 Deforested slope 17.3461 -100.1681 (17.3461, -100.1681) Landslide Mudslide Medium Tropical cyclone Manuel 4 71 Vice News https://news.vice.com/article/a-mexican-town-mourns-its-missing-one-year-after-being-engulfed-by-mud
5538 9/16/13 NA NA Mexico MX Guerrero 1252 San Pablo Atzompa 14.04274 NA 17.2250 -98.5000 (17.225000000000001, -98.5) Landslide Landslide Medium Downpour NA NA 0 america.aljazeera.com http://america.aljazeera.com/articles/2013/11/19/after-landslide-poorestofthepoorleftoutinthecoldinmexico.html
5543 9/16/13 NA NA Mexico MX Guerrero 165250 Chilpancingo de los Bravos 0.88149 NA 17.5482 -99.5137 (17.548200000000001, -99.5137) Landslide Mudslide Medium Tropical cyclone Ingrid NA 4 www.washingtonpost.com http://www.washingtonpost.com/world/two-storms-batter-mexico-killing-dozens/2013/09/17/5cd3d81e-1f8e-11e3-b7d1-7153ad47b549_gallery.html#photo=6
6269 10/18/14 NA NA Mexico MX Guerrero 1018 Colonia Alborada 4.10830 Above road 16.8850 -99.8562 (16.885000000000002, -99.856200000000001) Landslide Landslide Small Tropical cyclone Trudy 0 0 Thomson Reuters Foundation http://www.trust.org/item/20141018173449-r3rca/

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(x=state, y=distance, fill=city)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_MX <- df_MX %>% 
  arrange(desc(city)) %>%
  mutate(prop = distance / sum(df_MX$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_MX, aes(x=state, y = prop, fill=city)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
data<- ts(df_MX$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
14.04274
12.33417
4.10830
6.80950
0.88149
32.12708
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_MX$distance
names(distance) <- df_MX$city 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por ciudades"
)

##                             
## Pareto chart analysis for distance
##                               Frequency  Cum.Freq. Percentage Cum.Percent.
##   Atoyac de Álvarez           32.127080  32.127080  41.040408    41.040408
##   San Pablo Atzompa           14.042740  46.169820  17.938754    58.979162
##   Pochutla                    12.334170  58.503990  15.756159    74.735321
##   Acapulco de Juárez           7.071380  65.575370   9.033262    83.768583
##   Chilpancingo de los Bravos   6.809500  72.384870   8.698726    92.467308
##   Colonia Alborada             4.108300  76.493170   5.248106    97.715414
##   Acapulco                     0.906920  77.400090   1.158536    98.873950
##   Chilpancingo de los Bravos   0.881490  78.281580   1.126050   100.000000
stem(df_MX$"distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 11477
##   1 | 24
##   2 | 
##   3 | 2
head(df_MX)
## # A tibble: 6 x 25
##      id date     time  continent_code country_name country_code state    population
##   <dbl> <chr>    <chr> <chr>          <chr>        <chr>        <chr>         <dbl>
## 1  5538 9/16/13  <NA>  <NA>           Mexico       MX           Guerrero       1252
## 2  7479 10/3/15  Night <NA>           Mexico       MX           Guerrero       1201
## 3  6269 10/18/14 <NA>  <NA>           Mexico       MX           Guerrero       1018
## 4  3834 7/22/11  <NA>  <NA>           Mexico       MX           Guerrero     165250
## 5  5543 9/16/13  <NA>  <NA>           Mexico       MX           Guerrero     165250
## 6  5534 9/16/13  15:30 <NA>           Mexico       MX           Guerrero      21407
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_MX))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
5538 9/16/13 NA NA Mexico MX Guerrero 1252 San Pablo Atzompa 14.04274 NA 17.2250 -98.5000 (17.225000000000001, -98.5) Landslide Landslide Medium Downpour NA NA 0 america.aljazeera.com http://america.aljazeera.com/articles/2013/11/19/after-landslide-poorestofthepoorleftoutinthecoldinmexico.html 17.938754 8.969377
7479 10/3/15 Night NA Mexico MX Guerrero 1201 Pochutla 12.33417 Unknown 17.5586 -98.9483 (17.558599999999998, -98.948300000000003) Landslide Landslide Medium Tropical cyclone Marty 0 0 Tiempo http://www.tiempoenlinea.com.mx/index.php/oaxaca-2/52715-deslave-afecta-62-viviendas-en-la-montana-de-guerrero 15.756159 25.816833
6269 10/18/14 NA NA Mexico MX Guerrero 1018 Colonia Alborada 4.10830 Above road 16.8850 -99.8562 (16.885000000000002, -99.856200000000001) Landslide Landslide Small Tropical cyclone Trudy 0 0 Thomson Reuters Foundation http://www.trust.org/item/20141018173449-r3rca/ 5.248106 36.318965
3834 7/22/11 NA NA Mexico MX Guerrero 165250 Chilpancingo de los Bravos 6.80950 NA 17.4974 -99.5380 (17.497399999999999, -99.537999999999997) Landslide Landslide Medium Tropical cyclone Hurricane Dora NA 0 NA NA 8.698726 43.292381
5543 9/16/13 NA NA Mexico MX Guerrero 165250 Chilpancingo de los Bravos 0.88149 NA 17.5482 -99.5137 (17.548200000000001, -99.5137) Landslide Mudslide Medium Tropical cyclone Ingrid NA 4 www.washingtonpost.com http://www.washingtonpost.com/world/two-storms-batter-mexico-killing-dozens/2013/09/17/5cd3d81e-1f8e-11e3-b7d1-7153ad47b549_gallery.html#photo=6 1.126050 48.204769
5534 9/16/13 15:30 NA Mexico MX Guerrero 21407 Atoyac de Álvarez 32.12708 Deforested slope 17.3461 -100.1681 (17.3461, -100.1681) Landslide Mudslide Medium Tropical cyclone Manuel 4 71 Vice News https://news.vice.com/article/a-mexican-town-mourns-its-missing-one-year-after-being-engulfed-by-mud 41.040408 69.287999
stem(df_MX$"distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 11477
##   1 | 24
##   2 | 
##   3 | 2
stem(df_MX$"distance", scale = 2)
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 114
##   0 | 77
##   1 | 24
##   1 | 
##   2 | 
##   2 | 
##   3 | 2

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
0.88149 1 12.5 12.5 12.5 12.5
0.90692 1 12.5 12.5 25.0 25.0
4.1083 1 12.5 12.5 37.5 37.5
6.8095 1 12.5 12.5 50.0 50.0
7.07138 1 12.5 12.5 62.5 62.5
12.33417 1 12.5 12.5 75.0 75.0
14.04274 1 12.5 12.5 87.5 87.5
32.12708 1 12.5 12.5 100.0 100.0
Total 8 100.0 100.0 100.0 100.0
str(table)
## Classes 'freqtab' and 'data.frame':  9 obs. of  5 variables:
##  $ n      : num  1 1 1 1 1 1 1 1 8
##  $ %      : num  12.5 12.5 12.5 12.5 12.5 12.5 12.5 12.5 100
##  $ val%   : num  12.5 12.5 12.5 12.5 12.5 12.5 12.5 12.5 100
##  $ %cum   : num  12.5 25 37.5 50 62.5 75 87.5 100 100
##  $ val%cum: num  12.5 25 37.5 50 62.5 75 87.5 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
0.88149 1
0.90692 1
4.1083 1
6.8095 1
7.07138 1
12.33417 1
14.04274 1
32.12708 1
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1]  0.88149  8.88149 16.88149 24.88149 32.88149
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(0.881,8.88] 4 0.5714286 4
(8.88,16.9] 2 0.2857143 6
(16.9,24.9] 0 0.0000000 6
(24.9,32.9] 1 0.1428571 7
str(Freq_table)
## 'data.frame':    4 obs. of  4 variables:
##  $ distance: Factor w/ 4 levels "(0.881,8.88]",..: 1 2 3 4
##  $ Freq    : int  4 2 0 1
##  $ Rel_Freq: num  0.571 0.286 0 0.143
##  $ Cum_Freq: int  4 6 6 7
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(0.881,8.88] 4
(8.88,16.9] 2
(16.9,24.9] 0
(24.9,32.9] 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_MX)
##                        id date time continent_code country_name country_code
## nbr.val      8.000000e+00   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          3.834000e+03   NA   NA             NA           NA           NA
## max          7.479000e+03   NA   NA             NA           NA           NA
## range        3.645000e+03   NA   NA             NA           NA           NA
## sum          4.719200e+04   NA   NA             NA           NA           NA
## median       5.540500e+03   NA   NA             NA           NA           NA
## mean         5.899000e+03   NA   NA             NA           NA           NA
## SE.mean      4.199559e+02   NA   NA             NA           NA           NA
## CI.mean.0.95 9.930378e+02   NA   NA             NA           NA           NA
## var          1.410903e+06   NA   NA             NA           NA           NA
## std.dev      1.187815e+03   NA   NA             NA           NA           NA
## coef.var     2.013586e-01   NA   NA             NA           NA           NA
##              state   population city   distance location_description
## nbr.val         NA 8.000000e+00   NA   8.000000                   NA
## nbr.null        NA 0.000000e+00   NA   0.000000                   NA
## nbr.na          NA 0.000000e+00   NA   0.000000                   NA
## min             NA 1.018000e+03   NA   0.881490                   NA
## max             NA 6.521360e+05   NA  32.127080                   NA
## range           NA 6.511180e+05   NA  31.245590                   NA
## sum             NA 1.659650e+06   NA  78.281580                   NA
## median          NA 9.332850e+04   NA   6.940440                   NA
## mean            NA 2.074563e+05   NA   9.785198                   NA
## SE.mean         NA 1.001192e+05   NA   3.611317                   NA
## CI.mean.0.95    NA 2.367442e+05   NA   8.539408                   NA
## var             NA 8.019077e+10   NA 104.332887                   NA
## std.dev         NA 2.831797e+05   NA  10.214347                   NA
## coef.var        NA 1.365009e+00   NA   1.043857                   NA
##                  latitude     longitude geolocation hazard_type landslide_type
## nbr.val        8.00000000  8.000000e+00          NA          NA             NA
## nbr.null       0.00000000  0.000000e+00          NA          NA             NA
## nbr.na         0.00000000  0.000000e+00          NA          NA             NA
## min           16.86380000 -1.001681e+02          NA          NA             NA
## max           17.55860000 -9.850000e+01          NA          NA             NA
## range          0.69480000  1.668100e+00          NA          NA             NA
## sum          137.82300000 -7.963511e+02          NA          NA             NA
## median        17.28555000 -9.969710e+01          NA          NA             NA
## mean          17.22787500 -9.954389e+01          NA          NA             NA
## SE.mean        0.10831128  1.985507e-01          NA          NA             NA
## CI.mean.0.95   0.25611548  4.694978e-01          NA          NA             NA
## var            0.09385067  3.153790e-01          NA          NA             NA
## std.dev        0.30635057  5.615862e-01          NA          NA             NA
## coef.var       0.01778226 -5.641594e-03          NA          NA             NA
##              landslide_size trigger storm_name injuries fatalities source_name
## nbr.val                  NA      NA         NA 4.000000   7.000000          NA
## nbr.null                 NA      NA         NA 3.000000   5.000000          NA
## nbr.na                   NA      NA         NA 4.000000   1.000000          NA
## min                      NA      NA         NA 0.000000   0.000000          NA
## max                      NA      NA         NA 4.000000  71.000000          NA
## range                    NA      NA         NA 4.000000  71.000000          NA
## sum                      NA      NA         NA 4.000000  75.000000          NA
## median                   NA      NA         NA 0.000000   0.000000          NA
## mean                     NA      NA         NA 1.000000  10.714286          NA
## SE.mean                  NA      NA         NA 1.000000  10.063404          NA
## CI.mean.0.95             NA      NA         NA 3.182446  24.624264          NA
## var                      NA      NA         NA 4.000000 708.904762          NA
## std.dev                  NA      NA         NA 2.000000  26.625265          NA
## coef.var                 NA      NA         NA 2.000000   2.485025          NA
##              source_link       prop         ypos
## nbr.val               NA   8.000000    8.0000000
## nbr.null              NA   0.000000    0.0000000
## nbr.na                NA   0.000000    0.0000000
## min                   NA   1.126050    8.9693770
## max                   NA  41.040408   99.4207322
## range                 NA  39.914358   90.4513552
## sum                   NA 100.000000  425.6358903
## median                NA   8.865994   45.7485752
## mean                  NA  12.500000   53.2044863
## SE.mean               NA   4.613240   11.3394853
## CI.mean.0.95          NA  10.908579   26.8136220
## var                   NA 170.255856 1028.6714218
## std.dev               NA  13.048213   32.0729079
## coef.var              NA   1.043857    0.6028234
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Hidalgo (Mexico)

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_MX <- subset(df, country_name == "Mexico")
knitr::kable(head(df_MX)) 
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
115 7/4/07 NA NA Mexico MX Veracruz-Llave 1947 Laguna Chica (Pueblo Nuevo) 9.51003 NA 18.5369 -96.8229 (18.536899999999999, -96.822900000000004) Landslide Landslide Medium Rain NA NA 7 nytimes.com http://www.nytimes.com/2007/07/04/world/americas/04cnd-mexico.html?_r=1&hp&oref=slogin
201 8/23/07 NA NA Mexico MX Puebla 1414 Xochitlaxco (San Baltazar) 1.68294 NA 19.9333 -97.8500 (19.933299999999999, -97.85) Landslide Mudslide Medium Tropical cyclone Hurricane Dean NA 6 Inquirer.com http://newsinfo.inquirer.net/breakingnews/world/view_article.php?article_id=84452
225 9/2/07 NA NA Mexico MX Sinaloa 3191 El Limón de los Ramos 10.88351 NA 24.9531 -107.6220 (24.953099999999999, -107.622) Landslide Complex Medium Tropical cyclone Tropical Storm Henrietta NA 3 NA NA
284 9/28/07 NA NA Mexico MX Puebla 3761 Xaltepuxtla 7.93258 NA 20.2000 -97.9000 (20.2, -97.9) Landslide Mudslide Medium Tropical cyclone Hurricane Lorenzo NA 1 PressTV.ir http://www.presstv.ir/detail.aspx?id=25037&sectionid=3510207
342 10/31/07 NA NA Mexico MX Tabasco 4468 Buenavista 4.19108 NA 17.9493 -92.5534 (17.949300000000001, -92.553399999999996) Landslide Landslide Medium Rain NA NA 18 CapeTimes http://www.capetimes.co.za/?fArticleId=4109453
346 11/4/07 NA NA Mexico MX Chiapas 3183 Ostuacán 3.74149 NA 17.3900 -93.3060 (17.39, -93.305999999999997) Landslide Mudslide Very_large Rain NA NA 5 Reuters - AlertNet.org http://news.monstersandcritics.com/americas/news/article_1371436.php/Mudslide_is_latest_disaster_for_hard-hit_Mexico__2nd_Roundup_
library(dplyr)
df_MX <- subset(df, state == "Hidalgo")
knitr::kable(head(df_MX))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
4874 5/26/13 NA NA Mexico MX Hidalgo 33196 Tepeji de Ocampo 0.64483 NA 19.9006 -99.3417 (19.900600000000001, -99.341700000000003) Landslide Landslide Large Rain NA NA 7 en.apa.az http://en.apa.az/xeber_7_killed_in_landslide_in_central_mexico_193672.html
5527 9/15/13 NA NA Mexico MX Hidalgo 1236 Fontezuelas 9.78251 NA 20.5402 -98.9492 (20.540199999999999, -98.949200000000005) Landslide Landslide Very_large Tropical cyclone Ingrid NA 3 www.recordonline.com http://www.recordonline.com/apps/pbcs.dll/article?AID=/20130915/NEWS/130919814/-1/SITEMAP

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(x=state, y=distance, fill=city)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_MX <- df_MX %>% 
  arrange(desc(city)) %>%
  mutate(prop = distance / sum(df_MX$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_MX, aes(x=state, y = prop, fill=city)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
data<- ts(df_MX$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
0.64483
9.78251
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_MX$distance
names(distance) <- df_MX$city 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por ciudades"
)

##                   
## Pareto chart analysis for distance
##                     Frequency  Cum.Freq. Percentage Cum.Percent.
##   Fontezuelas        9.782510   9.782510  93.815968    93.815968
##   Tepeji de Ocampo   0.644830  10.427340   6.184032   100.000000
stem(df_MX$"distance")
## 
##   The decimal point is at the |
## 
##   0 | 6
##   2 | 
##   4 | 
##   6 | 
##   8 | 8
head(df_MX)
## # A tibble: 2 x 25
##      id date    time  continent_code country_name country_code state   population
##   <dbl> <chr>   <chr> <chr>          <chr>        <chr>        <chr>        <dbl>
## 1  4874 5/26/13 <NA>  <NA>           Mexico       MX           Hidalgo      33196
## 2  5527 9/15/13 <NA>  <NA>           Mexico       MX           Hidalgo       1236
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_MX))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
4874 5/26/13 NA NA Mexico MX Hidalgo 33196 Tepeji de Ocampo 0.64483 NA 19.9006 -99.3417 (19.900600000000001, -99.341700000000003) Landslide Landslide Large Rain NA NA 7 en.apa.az http://en.apa.az/xeber_7_killed_in_landslide_in_central_mexico_193672.html 6.184032 3.092016
5527 9/15/13 NA NA Mexico MX Hidalgo 1236 Fontezuelas 9.78251 NA 20.5402 -98.9492 (20.540199999999999, -98.949200000000005) Landslide Landslide Very_large Tropical cyclone Ingrid NA 3 www.recordonline.com http://www.recordonline.com/apps/pbcs.dll/article?AID=/20130915/NEWS/130919814/-1/SITEMAP 93.815968 53.092016
stem(df_MX$"distance")
## 
##   The decimal point is at the |
## 
##   0 | 6
##   2 | 
##   4 | 
##   6 | 
##   8 | 8
stem(df_MX$"distance", scale = 2)
## 
##   The decimal point is at the |
## 
##   0 | 6
##   1 | 
##   2 | 
##   3 | 
##   4 | 
##   5 | 
##   6 | 
##   7 | 
##   8 | 
##   9 | 8

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
0.64483 1 50 50 50 50
9.78251 1 50 50 100 100
Total 2 100 100 100 100
str(table)
## Classes 'freqtab' and 'data.frame':  3 obs. of  5 variables:
##  $ n      : num  1 1 2
##  $ %      : num  50 50 100
##  $ val%   : num  50 50 100
##  $ %cum   : num  50 100 100
##  $ val%cum: num  50 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
0.64483 1
9.78251 1
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1]  0.64483  5.64483 10.64483
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(0.645,5.64] 0 0 0
(5.64,10.6] 1 1 1
str(Freq_table)
## 'data.frame':    2 obs. of  4 variables:
##  $ distance: Factor w/ 2 levels "(0.645,5.64]",..: 1 2
##  $ Freq    : int  0 1
##  $ Rel_Freq: num  0 1
##  $ Cum_Freq: int  0 1
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(0.645,5.64] 0
(5.64,10.6] 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_MX)
## Warning in min(x): ningún argumento finito para min; retornando Inf
## Warning in max(x): ningun argumento finito para max; retornando -Inf
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
##                        id date time continent_code country_name country_code
## nbr.val      2.000000e+00   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          4.874000e+03   NA   NA             NA           NA           NA
## max          5.527000e+03   NA   NA             NA           NA           NA
## range        6.530000e+02   NA   NA             NA           NA           NA
## sum          1.040100e+04   NA   NA             NA           NA           NA
## median       5.200500e+03   NA   NA             NA           NA           NA
## mean         5.200500e+03   NA   NA             NA           NA           NA
## SE.mean      3.265000e+02   NA   NA             NA           NA           NA
## CI.mean.0.95 4.148576e+03   NA   NA             NA           NA           NA
## var          2.132045e+05   NA   NA             NA           NA           NA
## std.dev      4.617407e+02   NA   NA             NA           NA           NA
## coef.var     8.878776e-02   NA   NA             NA           NA           NA
##              state   population city  distance location_description    latitude
## nbr.val         NA 2.000000e+00   NA  2.000000                   NA  2.00000000
## nbr.null        NA 0.000000e+00   NA  0.000000                   NA  0.00000000
## nbr.na          NA 0.000000e+00   NA  0.000000                   NA  0.00000000
## min             NA 1.236000e+03   NA  0.644830                   NA 19.90060000
## max             NA 3.319600e+04   NA  9.782510                   NA 20.54020000
## range           NA 3.196000e+04   NA  9.137680                   NA  0.63960000
## sum             NA 3.443200e+04   NA 10.427340                   NA 40.44080000
## median          NA 1.721600e+04   NA  5.213670                   NA 20.22040000
## mean            NA 1.721600e+04   NA  5.213670                   NA 20.22040000
## SE.mean         NA 1.598000e+04   NA  4.568840                   NA  0.31980000
## CI.mean.0.95    NA 2.030452e+05   NA 58.052616                   NA  4.06344427
## var             NA 5.107208e+08   NA 41.748598                   NA  0.20454408
## std.dev         NA 2.259913e+04   NA  6.461315                   NA  0.45226550
## coef.var        NA 1.312682e+00   NA  1.239303                   NA  0.02236679
##                  longitude geolocation hazard_type landslide_type
## nbr.val       2.000000e+00          NA          NA             NA
## nbr.null      0.000000e+00          NA          NA             NA
## nbr.na        0.000000e+00          NA          NA             NA
## min          -9.934170e+01          NA          NA             NA
## max          -9.894920e+01          NA          NA             NA
## range         3.925000e-01          NA          NA             NA
## sum          -1.982909e+02          NA          NA             NA
## median       -9.914545e+01          NA          NA             NA
## mean         -9.914545e+01          NA          NA             NA
## SE.mean       1.962500e-01          NA          NA             NA
## CI.mean.0.95  2.493593e+00          NA          NA             NA
## var           7.702812e-02          NA          NA             NA
## std.dev       2.775394e-01          NA          NA             NA
## coef.var     -2.799316e-03          NA          NA             NA
##              landslide_size trigger storm_name injuries fatalities source_name
## nbr.val                  NA      NA         NA        0  2.0000000          NA
## nbr.null                 NA      NA         NA        0  0.0000000          NA
## nbr.na                   NA      NA         NA        2  0.0000000          NA
## min                      NA      NA         NA      Inf  3.0000000          NA
## max                      NA      NA         NA     -Inf  7.0000000          NA
## range                    NA      NA         NA     -Inf  4.0000000          NA
## sum                      NA      NA         NA        0 10.0000000          NA
## median                   NA      NA         NA       NA  5.0000000          NA
## mean                     NA      NA         NA      NaN  5.0000000          NA
## SE.mean                  NA      NA         NA       NA  2.0000000          NA
## CI.mean.0.95             NA      NA         NA      NaN 25.4124095          NA
## var                      NA      NA         NA       NA  8.0000000          NA
## std.dev                  NA      NA         NA       NA  2.8284271          NA
## coef.var                 NA      NA         NA       NA  0.5656854          NA
##              source_link        prop        ypos
## nbr.val               NA    2.000000    2.000000
## nbr.null              NA    0.000000    0.000000
## nbr.na                NA    0.000000    0.000000
## min                   NA    6.184032    3.092016
## max                   NA   93.815968   53.092016
## range                 NA   87.631937   50.000000
## sum                   NA  100.000000   56.184032
## median                NA   50.000000   28.092016
## mean                  NA   50.000000   28.092016
## SE.mean               NA   43.815968   25.000000
## CI.mean.0.95          NA  556.734665  317.655118
## var                   NA 3839.678175 1250.000000
## std.dev               NA   61.965137   35.355339
## coef.var              NA    1.239303    1.258555
boxplot(data, horizontal=TRUE, col='green')

Gráfico para México (Mexico)

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_MX <- subset(df, country_name == "Mexico")
knitr::kable(head(df_MX)) 
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
115 7/4/07 NA NA Mexico MX Veracruz-Llave 1947 Laguna Chica (Pueblo Nuevo) 9.51003 NA 18.5369 -96.8229 (18.536899999999999, -96.822900000000004) Landslide Landslide Medium Rain NA NA 7 nytimes.com http://www.nytimes.com/2007/07/04/world/americas/04cnd-mexico.html?_r=1&hp&oref=slogin
201 8/23/07 NA NA Mexico MX Puebla 1414 Xochitlaxco (San Baltazar) 1.68294 NA 19.9333 -97.8500 (19.933299999999999, -97.85) Landslide Mudslide Medium Tropical cyclone Hurricane Dean NA 6 Inquirer.com http://newsinfo.inquirer.net/breakingnews/world/view_article.php?article_id=84452
225 9/2/07 NA NA Mexico MX Sinaloa 3191 El Limón de los Ramos 10.88351 NA 24.9531 -107.6220 (24.953099999999999, -107.622) Landslide Complex Medium Tropical cyclone Tropical Storm Henrietta NA 3 NA NA
284 9/28/07 NA NA Mexico MX Puebla 3761 Xaltepuxtla 7.93258 NA 20.2000 -97.9000 (20.2, -97.9) Landslide Mudslide Medium Tropical cyclone Hurricane Lorenzo NA 1 PressTV.ir http://www.presstv.ir/detail.aspx?id=25037&sectionid=3510207
342 10/31/07 NA NA Mexico MX Tabasco 4468 Buenavista 4.19108 NA 17.9493 -92.5534 (17.949300000000001, -92.553399999999996) Landslide Landslide Medium Rain NA NA 18 CapeTimes http://www.capetimes.co.za/?fArticleId=4109453
346 11/4/07 NA NA Mexico MX Chiapas 3183 Ostuacán 3.74149 NA 17.3900 -93.3060 (17.39, -93.305999999999997) Landslide Mudslide Very_large Rain NA NA 5 Reuters - AlertNet.org http://news.monstersandcritics.com/americas/news/article_1371436.php/Mudslide_is_latest_disaster_for_hard-hit_Mexico__2nd_Roundup_
library(dplyr)
df_MX <- subset(df, state == "México")
knitr::kable(head(df_MX))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
744 8/25/08 NA NA Mexico MX México 7182 San Francisco Chimalpa 2.59637 NA 19.4360 -99.3650 (19.436, -99.364999999999995) Landslide Mudslide Medium Downpour NA NA 1 NA http://www.alertnet.org/thenews/newsdesk/N26335664.htm
1469 2/5/10 NA NA Mexico MX México 505881 Toluca 0.66626 NA 19.2880 -99.6469 (19.288, -99.646900000000002) Landslide Landslide Large Downpour NA NA 11 NA http://www.trt.net.tr/trtinternational/en/newsDetail.aspx?HaberKodu=dc2169ea-80de-437d-a57a-eb11725c40e5
1477 2/4/10 22:00 NA Mexico MX México 2968 Zacazonapan 4.40801 Above road 19.1927 -99.8654 (19.192699999999999, -99.865399999999994) Landslide Mudslide Large Downpour NA NA 11 News Daily http://www.newsdaily.com/stories/tre6151we-us-mexico-mudslide/
2474 9/20/10 Late night NA Mexico MX México 9267 Villa Guerrero 1.06048 NA 18.9642 -99.6511 (18.964200000000002, -99.6511) Landslide Landslide Large Downpour NA NA 8 NA http://www.alertnet.org/thenews/newsdesk/N21168920.htm

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(x=state, y=distance, fill=city)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_MX <- df_MX %>% 
  arrange(desc(city)) %>%
  mutate(prop = distance / sum(df_MX$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_MX, aes(x=state, y = prop, fill=city)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
data<- ts(df_MX$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
4.40801
1.06048
0.66626
2.59637
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_MX$distance
names(distance) <- df_MX$city 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por ciudades"
)

##                         
## Pareto chart analysis for distance
##                           Frequency  Cum.Freq. Percentage Cum.Percent.
##   Zacazonapan              4.408010   4.408010  50.486192    50.486192
##   San Francisco Chimalpa   2.596370   7.004380  29.736964    80.223156
##   Villa Guerrero           1.060480   8.064860  12.145979    92.369135
##   Toluca                   0.666260   8.731120   7.630865   100.000000
stem(df_MX$"distance")
## 
##   The decimal point is at the |
## 
##   0 | 7
##   1 | 1
##   2 | 6
##   3 | 
##   4 | 4
head(df_MX)
## # A tibble: 4 x 25
##      id date    time       continent_code country_name country_code state  population
##   <dbl> <chr>   <chr>      <chr>          <chr>        <chr>        <chr>       <dbl>
## 1  1477 2/4/10  22:00      <NA>           Mexico       MX           México       2968
## 2  2474 9/20/10 Late night <NA>           Mexico       MX           México       9267
## 3  1469 2/5/10  <NA>       <NA>           Mexico       MX           México     505881
## 4   744 8/25/08 <NA>       <NA>           Mexico       MX           México       7182
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_MX))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
1477 2/4/10 22:00 NA Mexico MX México 2968 Zacazonapan 4.40801 Above road 19.1927 -99.8654 (19.192699999999999, -99.865399999999994) Landslide Mudslide Large Downpour NA NA 11 News Daily http://www.newsdaily.com/stories/tre6151we-us-mexico-mudslide/ 50.486192 25.24310
2474 9/20/10 Late night NA Mexico MX México 9267 Villa Guerrero 1.06048 NA 18.9642 -99.6511 (18.964200000000002, -99.6511) Landslide Landslide Large Downpour NA NA 8 NA http://www.alertnet.org/thenews/newsdesk/N21168920.htm 12.145979 56.55918
1469 2/5/10 NA NA Mexico MX México 505881 Toluca 0.66626 NA 19.2880 -99.6469 (19.288, -99.646900000000002) Landslide Landslide Large Downpour NA NA 11 NA http://www.trt.net.tr/trtinternational/en/newsDetail.aspx?HaberKodu=dc2169ea-80de-437d-a57a-eb11725c40e5 7.630865 66.44760
744 8/25/08 NA NA Mexico MX México 7182 San Francisco Chimalpa 2.59637 NA 19.4360 -99.3650 (19.436, -99.364999999999995) Landslide Mudslide Medium Downpour NA NA 1 NA http://www.alertnet.org/thenews/newsdesk/N26335664.htm 29.736964 85.13152
stem(df_MX$"distance")
## 
##   The decimal point is at the |
## 
##   0 | 7
##   1 | 1
##   2 | 6
##   3 | 
##   4 | 4
stem(df_MX$"distance", scale = 2)
## 
##   The decimal point is at the |
## 
##   0 | 7
##   1 | 1
##   1 | 
##   2 | 
##   2 | 6
##   3 | 
##   3 | 
##   4 | 4

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
0.66626 1 25 25 25 25
1.06048 1 25 25 50 50
2.59637 1 25 25 75 75
4.40801 1 25 25 100 100
Total 4 100 100 100 100
str(table)
## Classes 'freqtab' and 'data.frame':  5 obs. of  5 variables:
##  $ n      : num  1 1 1 1 4
##  $ %      : num  25 25 25 25 100
##  $ val%   : num  25 25 25 25 100
##  $ %cum   : num  25 50 75 100 100
##  $ val%cum: num  25 50 75 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
0.66626 1
1.06048 1
2.59637 1
4.40801 1
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.66626 2.66626 4.66626
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(0.666,2.67] 2 0.6666667 2
(2.67,4.67] 1 0.3333333 3
str(Freq_table)
## 'data.frame':    2 obs. of  4 variables:
##  $ distance: Factor w/ 2 levels "(0.666,2.67]",..: 1 2
##  $ Freq    : int  2 1
##  $ Rel_Freq: num  0.667 0.333
##  $ Cum_Freq: int  2 3
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(0.666,2.67] 2
(2.67,4.67] 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_MX)
## Warning in min(x): ningún argumento finito para min; retornando Inf
## Warning in max(x): ningun argumento finito para max; retornando -Inf
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
##                        id date time continent_code country_name country_code
## nbr.val      4.000000e+00   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          7.440000e+02   NA   NA             NA           NA           NA
## max          2.474000e+03   NA   NA             NA           NA           NA
## range        1.730000e+03   NA   NA             NA           NA           NA
## sum          6.164000e+03   NA   NA             NA           NA           NA
## median       1.473000e+03   NA   NA             NA           NA           NA
## mean         1.541000e+03   NA   NA             NA           NA           NA
## SE.mean      3.553142e+02   NA   NA             NA           NA           NA
## CI.mean.0.95 1.130768e+03   NA   NA             NA           NA           NA
## var          5.049927e+05   NA   NA             NA           NA           NA
## std.dev      7.106284e+02   NA   NA             NA           NA           NA
## coef.var     4.611475e-01   NA   NA             NA           NA           NA
##              state   population city  distance location_description    latitude
## nbr.val         NA 4.000000e+00   NA 4.0000000                   NA  4.00000000
## nbr.null        NA 0.000000e+00   NA 0.0000000                   NA  0.00000000
## nbr.na          NA 0.000000e+00   NA 0.0000000                   NA  0.00000000
## min             NA 2.968000e+03   NA 0.6662600                   NA 18.96420000
## max             NA 5.058810e+05   NA 4.4080100                   NA 19.43600000
## range           NA 5.029130e+05   NA 3.7417500                   NA  0.47180000
## sum             NA 5.252980e+05   NA 8.7311200                   NA 76.88090000
## median          NA 8.224500e+03   NA 1.8284250                   NA 19.24035000
## mean            NA 1.313245e+05   NA 2.1827800                   NA 19.22022500
## SE.mean         NA 1.248590e+05   NA 0.8505927                   NA  0.09893547
## CI.mean.0.95    NA 3.973572e+05   NA 2.7069656                   NA  0.31485682
## var             NA 6.235912e+10   NA 2.8940318                   NA  0.03915291
## std.dev         NA 2.497181e+05   NA 1.7011854                   NA  0.19787094
## coef.var        NA 1.901535e+00   NA 0.7793664                   NA  0.01029493
##                  longitude geolocation hazard_type landslide_type
## nbr.val       4.000000e+00          NA          NA             NA
## nbr.null      0.000000e+00          NA          NA             NA
## nbr.na        0.000000e+00          NA          NA             NA
## min          -9.986540e+01          NA          NA             NA
## max          -9.936500e+01          NA          NA             NA
## range         5.004000e-01          NA          NA             NA
## sum          -3.985284e+02          NA          NA             NA
## median       -9.964900e+01          NA          NA             NA
## mean         -9.963210e+01          NA          NA             NA
## SE.mean       1.026123e-01          NA          NA             NA
## CI.mean.0.95  3.265580e-01          NA          NA             NA
## var           4.211711e-02          NA          NA             NA
## std.dev       2.052245e-01          NA          NA             NA
## coef.var     -2.059824e-03          NA          NA             NA
##              landslide_size trigger storm_name injuries fatalities source_name
## nbr.val                  NA      NA         NA        0  4.0000000          NA
## nbr.null                 NA      NA         NA        0  0.0000000          NA
## nbr.na                   NA      NA         NA        4  0.0000000          NA
## min                      NA      NA         NA      Inf  1.0000000          NA
## max                      NA      NA         NA     -Inf 11.0000000          NA
## range                    NA      NA         NA     -Inf 10.0000000          NA
## sum                      NA      NA         NA        0 31.0000000          NA
## median                   NA      NA         NA       NA  9.5000000          NA
## mean                     NA      NA         NA      NaN  7.7500000          NA
## SE.mean                  NA      NA         NA       NA  2.3584953          NA
## CI.mean.0.95             NA      NA         NA      NaN  7.5057846          NA
## var                      NA      NA         NA       NA 22.2500000          NA
## std.dev                  NA      NA         NA       NA  4.7169906          NA
## coef.var                 NA      NA         NA       NA  0.6086439          NA
##              source_link        prop        ypos
## nbr.val               NA   4.0000000   4.0000000
## nbr.null              NA   0.0000000   0.0000000
## nbr.na                NA   0.0000000   0.0000000
## min                   NA   7.6308652  25.2430960
## max                   NA  50.4861919  85.1315181
## range                 NA  42.8553267  59.8884221
## sum                   NA 100.0000000 233.3813989
## median                NA  20.9414714  61.5033925
## mean                  NA  25.0000000  58.3453497
## SE.mean               NA   9.7420801  12.5236274
## CI.mean.0.95          NA  31.0036469  39.8557719
## var                   NA 379.6325015 627.3649774
## std.dev               NA  19.4841603  25.0472549
## coef.var              NA   0.7793664   0.4292931
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Michoacán (Mexico)

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_MX <- subset(df, country_name == "Mexico")
knitr::kable(head(df_MX)) 
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
115 7/4/07 NA NA Mexico MX Veracruz-Llave 1947 Laguna Chica (Pueblo Nuevo) 9.51003 NA 18.5369 -96.8229 (18.536899999999999, -96.822900000000004) Landslide Landslide Medium Rain NA NA 7 nytimes.com http://www.nytimes.com/2007/07/04/world/americas/04cnd-mexico.html?_r=1&hp&oref=slogin
201 8/23/07 NA NA Mexico MX Puebla 1414 Xochitlaxco (San Baltazar) 1.68294 NA 19.9333 -97.8500 (19.933299999999999, -97.85) Landslide Mudslide Medium Tropical cyclone Hurricane Dean NA 6 Inquirer.com http://newsinfo.inquirer.net/breakingnews/world/view_article.php?article_id=84452
225 9/2/07 NA NA Mexico MX Sinaloa 3191 El Limón de los Ramos 10.88351 NA 24.9531 -107.6220 (24.953099999999999, -107.622) Landslide Complex Medium Tropical cyclone Tropical Storm Henrietta NA 3 NA NA
284 9/28/07 NA NA Mexico MX Puebla 3761 Xaltepuxtla 7.93258 NA 20.2000 -97.9000 (20.2, -97.9) Landslide Mudslide Medium Tropical cyclone Hurricane Lorenzo NA 1 PressTV.ir http://www.presstv.ir/detail.aspx?id=25037&sectionid=3510207
342 10/31/07 NA NA Mexico MX Tabasco 4468 Buenavista 4.19108 NA 17.9493 -92.5534 (17.949300000000001, -92.553399999999996) Landslide Landslide Medium Rain NA NA 18 CapeTimes http://www.capetimes.co.za/?fArticleId=4109453
346 11/4/07 NA NA Mexico MX Chiapas 3183 Ostuacán 3.74149 NA 17.3900 -93.3060 (17.39, -93.305999999999997) Landslide Mudslide Very_large Rain NA NA 5 Reuters - AlertNet.org http://news.monstersandcritics.com/americas/news/article_1371436.php/Mudslide_is_latest_disaster_for_hard-hit_Mexico__2nd_Roundup_
library(dplyr)
df_MX <- subset(df, state == "Michoacán")
knitr::kable(head(df_MX))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
1470 2/5/10 NA NA Mexico MX Michoacán 78950 Heroica Zitácuaro 4.18059 NA 19.4000 -100.3667 (19.399999999999999, -100.36669999999999) Landslide Mudslide Medium Downpour NA NA 2 NA http://www.sott.net/articles/show/202533-Mexico-rainstorms-leave-at-least-15-dead
1471 2/5/10 NA NA Mexico MX Michoacán 2817 Jarácuaro 3.42740 NA 19.5670 -101.7098 (19.567, -101.7098) Landslide Mudslide Medium Downpour NA NA 1 NA http://www.sott.net/articles/show/202533-Mexico-rainstorms-leave-at-least-15-dead
1472 2/5/10 NA NA Mexico MX Michoacán 4943 Mineral de Angangueo 1.69508 NA 19.6167 -100.3000 (19.616700000000002, -100.3) Landslide Mudslide Medium Downpour NA NA 3 NA http://www.aajtv.com/news/World/158635_detail.html
3835 7/22/11 NA NA Mexico MX Michoacán 2817 Jarácuaro 3.36905 NA 19.5805 -101.7023 (19.580500000000001, -101.70229999999999) Landslide Landslide Medium Tropical cyclone Hurricane Dora NA 0 NA NA

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(x=state, y=distance, fill=city)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_MX <- df_MX %>% 
  arrange(desc(city)) %>%
  mutate(prop = distance / sum(df_MX$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_MX, aes(x=state, y = prop, fill=city)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
data<- ts(df_MX$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
1.69508
3.42740
3.36905
4.18059
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_MX$distance
names(distance) <- df_MX$city 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por ciudades"
)

##                       
## Pareto chart analysis for distance
##                        Frequency Cum.Freq. Percentage Cum.Percent.
##   Heroica Zitácuaro      4.18059   4.18059   32.99045     32.99045
##   Jarácuaro              3.42740   7.60799   27.04678     60.03723
##   Jarácuaro              3.36905  10.97704   26.58632     86.62355
##   Mineral de Angangueo   1.69508  12.67212   13.37645    100.00000
stem(df_MX$"distance")
## 
##   The decimal point is at the |
## 
##   1 | 7
##   2 | 
##   2 | 
##   3 | 44
##   3 | 
##   4 | 2
head(df_MX)
## # A tibble: 4 x 25
##      id date    time  continent_code country_name country_code state     population
##   <dbl> <chr>   <chr> <chr>          <chr>        <chr>        <chr>          <dbl>
## 1  1472 2/5/10  <NA>  <NA>           Mexico       MX           Michoacán       4943
## 2  1471 2/5/10  <NA>  <NA>           Mexico       MX           Michoacán       2817
## 3  3835 7/22/11 <NA>  <NA>           Mexico       MX           Michoacán       2817
## 4  1470 2/5/10  <NA>  <NA>           Mexico       MX           Michoacán      78950
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_MX))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
1472 2/5/10 NA NA Mexico MX Michoacán 4943 Mineral de Angangueo 1.69508 NA 19.6167 -100.3000 (19.616700000000002, -100.3) Landslide Mudslide Medium Downpour NA NA 3 NA http://www.aajtv.com/news/World/158635_detail.html 13.37645 6.688226
1471 2/5/10 NA NA Mexico MX Michoacán 2817 Jarácuaro 3.42740 NA 19.5670 -101.7098 (19.567, -101.7098) Landslide Mudslide Medium Downpour NA NA 1 NA http://www.sott.net/articles/show/202533-Mexico-rainstorms-leave-at-least-15-dead 27.04678 26.899840
3835 7/22/11 NA NA Mexico MX Michoacán 2817 Jarácuaro 3.36905 NA 19.5805 -101.7023 (19.580500000000001, -101.70229999999999) Landslide Landslide Medium Tropical cyclone Hurricane Dora NA 0 NA NA 26.58632 53.716387
1470 2/5/10 NA NA Mexico MX Michoacán 78950 Heroica Zitácuaro 4.18059 NA 19.4000 -100.3667 (19.399999999999999, -100.36669999999999) Landslide Mudslide Medium Downpour NA NA 2 NA http://www.sott.net/articles/show/202533-Mexico-rainstorms-leave-at-least-15-dead 32.99045 83.504773
stem(df_MX$"distance")
## 
##   The decimal point is at the |
## 
##   1 | 7
##   2 | 
##   2 | 
##   3 | 44
##   3 | 
##   4 | 2
stem(df_MX$"distance", scale = 2)
## 
##   The decimal point is at the |
## 
##   1 | 7
##   2 | 
##   2 | 
##   3 | 44
##   3 | 
##   4 | 2

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
1.69508 1 25 25 25 25
3.36905 1 25 25 50 50
3.4274 1 25 25 75 75
4.18059 1 25 25 100 100
Total 4 100 100 100 100
str(table)
## Classes 'freqtab' and 'data.frame':  5 obs. of  5 variables:
##  $ n      : num  1 1 1 1 4
##  $ %      : num  25 25 25 25 100
##  $ val%   : num  25 25 25 25 100
##  $ %cum   : num  25 50 75 100 100
##  $ val%cum: num  25 50 75 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
1.69508 1
3.36905 1
3.4274 1
4.18059 1
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 1.69508 2.69508 3.69508 4.69508
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(1.7,2.7] 0 0.0000000 0
(2.7,3.7] 2 0.6666667 2
(3.7,4.7] 1 0.3333333 3
str(Freq_table)
## 'data.frame':    3 obs. of  4 variables:
##  $ distance: Factor w/ 3 levels "(1.7,2.7]","(2.7,3.7]",..: 1 2 3
##  $ Freq    : int  0 2 1
##  $ Rel_Freq: num  0 0.667 0.333
##  $ Cum_Freq: int  0 2 3
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(1.7,2.7] 0
(2.7,3.7] 2
(3.7,4.7] 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_MX)
## Warning in min(x): ningún argumento finito para min; retornando Inf
## Warning in max(x): ningun argumento finito para max; retornando -Inf
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
##                        id date time continent_code country_name country_code
## nbr.val      4.000000e+00   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          1.470000e+03   NA   NA             NA           NA           NA
## max          3.835000e+03   NA   NA             NA           NA           NA
## range        2.365000e+03   NA   NA             NA           NA           NA
## sum          8.248000e+03   NA   NA             NA           NA           NA
## median       1.471500e+03   NA   NA             NA           NA           NA
## mean         2.062000e+03   NA   NA             NA           NA           NA
## SE.mean      5.910001e+02   NA   NA             NA           NA           NA
## CI.mean.0.95 1.880826e+03   NA   NA             NA           NA           NA
## var          1.397125e+06   NA   NA             NA           NA           NA
## std.dev      1.182000e+03   NA   NA             NA           NA           NA
## coef.var     5.732300e-01   NA   NA             NA           NA           NA
##              state   population city   distance location_description
## nbr.val         NA 4.000000e+00   NA  4.0000000                   NA
## nbr.null        NA 0.000000e+00   NA  0.0000000                   NA
## nbr.na          NA 0.000000e+00   NA  0.0000000                   NA
## min             NA 2.817000e+03   NA  1.6950800                   NA
## max             NA 7.895000e+04   NA  4.1805900                   NA
## range           NA 7.613300e+04   NA  2.4855100                   NA
## sum             NA 8.952700e+04   NA 12.6721200                   NA
## median          NA 3.880000e+03   NA  3.3982250                   NA
## mean            NA 2.238175e+04   NA  3.1680300                   NA
## SE.mean         NA 1.886274e+04   NA  0.5246063                   NA
## CI.mean.0.95    NA 6.002966e+04   NA  1.6695314                   NA
## var             NA 1.423212e+09   NA  1.1008471                   NA
## std.dev         NA 3.772548e+04   NA  1.0492126                   NA
## coef.var        NA 1.685547e+00   NA  0.3311877                   NA
##                  latitude     longitude geolocation hazard_type landslide_type
## nbr.val       4.000000000  4.000000e+00          NA          NA             NA
## nbr.null      0.000000000  0.000000e+00          NA          NA             NA
## nbr.na        0.000000000  0.000000e+00          NA          NA             NA
## min          19.400000000 -1.017098e+02          NA          NA             NA
## max          19.616700000 -1.003000e+02          NA          NA             NA
## range         0.216700000  1.409800e+00          NA          NA             NA
## sum          78.164200000 -4.040788e+02          NA          NA             NA
## median       19.573750000 -1.010345e+02          NA          NA             NA
## mean         19.541050000 -1.010197e+02          NA          NA             NA
## SE.mean       0.048173065  3.965011e-01          NA          NA             NA
## CI.mean.0.95  0.153308192  1.261844e+00          NA          NA             NA
## var           0.009282577  6.288526e-01          NA          NA             NA
## std.dev       0.096346129  7.930023e-01          NA          NA             NA
## coef.var      0.004930448 -7.849977e-03          NA          NA             NA
##              landslide_size trigger storm_name injuries fatalities source_name
## nbr.val                  NA      NA         NA        0  4.0000000          NA
## nbr.null                 NA      NA         NA        0  1.0000000          NA
## nbr.na                   NA      NA         NA        4  0.0000000          NA
## min                      NA      NA         NA      Inf  0.0000000          NA
## max                      NA      NA         NA     -Inf  3.0000000          NA
## range                    NA      NA         NA     -Inf  3.0000000          NA
## sum                      NA      NA         NA        0  6.0000000          NA
## median                   NA      NA         NA       NA  1.5000000          NA
## mean                     NA      NA         NA      NaN  1.5000000          NA
## SE.mean                  NA      NA         NA       NA  0.6454972          NA
## CI.mean.0.95             NA      NA         NA      NaN  2.0542603          NA
## var                      NA      NA         NA       NA  1.6666667          NA
## std.dev                  NA      NA         NA       NA  1.2909944          NA
## coef.var                 NA      NA         NA       NA  0.8606630          NA
##              source_link        prop         ypos
## nbr.val               NA   4.0000000    4.0000000
## nbr.null              NA   0.0000000    0.0000000
## nbr.na                NA   0.0000000    0.0000000
## min                   NA  13.3764516    6.6882258
## max                   NA  32.9904546   83.5047727
## range                 NA  19.6140030   76.8165469
## sum                   NA 100.0000000  170.8092253
## median                NA  26.8165469   40.3081134
## mean                  NA  25.0000000   42.7023063
## SE.mean               NA   4.1398464   16.6655412
## CI.mean.0.95          NA  13.1748389   53.0371901
## var                   NA  68.5533130 1110.9610564
## std.dev               NA   8.2796928   33.3310824
## coef.var              NA   0.3311877    0.7805453
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Nayarit (Mexico)

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_MX <- subset(df, country_name == "Mexico")
knitr::kable(head(df_MX)) 
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
115 7/4/07 NA NA Mexico MX Veracruz-Llave 1947 Laguna Chica (Pueblo Nuevo) 9.51003 NA 18.5369 -96.8229 (18.536899999999999, -96.822900000000004) Landslide Landslide Medium Rain NA NA 7 nytimes.com http://www.nytimes.com/2007/07/04/world/americas/04cnd-mexico.html?_r=1&hp&oref=slogin
201 8/23/07 NA NA Mexico MX Puebla 1414 Xochitlaxco (San Baltazar) 1.68294 NA 19.9333 -97.8500 (19.933299999999999, -97.85) Landslide Mudslide Medium Tropical cyclone Hurricane Dean NA 6 Inquirer.com http://newsinfo.inquirer.net/breakingnews/world/view_article.php?article_id=84452
225 9/2/07 NA NA Mexico MX Sinaloa 3191 El Limón de los Ramos 10.88351 NA 24.9531 -107.6220 (24.953099999999999, -107.622) Landslide Complex Medium Tropical cyclone Tropical Storm Henrietta NA 3 NA NA
284 9/28/07 NA NA Mexico MX Puebla 3761 Xaltepuxtla 7.93258 NA 20.2000 -97.9000 (20.2, -97.9) Landslide Mudslide Medium Tropical cyclone Hurricane Lorenzo NA 1 PressTV.ir http://www.presstv.ir/detail.aspx?id=25037&sectionid=3510207
342 10/31/07 NA NA Mexico MX Tabasco 4468 Buenavista 4.19108 NA 17.9493 -92.5534 (17.949300000000001, -92.553399999999996) Landslide Landslide Medium Rain NA NA 18 CapeTimes http://www.capetimes.co.za/?fArticleId=4109453
346 11/4/07 NA NA Mexico MX Chiapas 3183 Ostuacán 3.74149 NA 17.3900 -93.3060 (17.39, -93.305999999999997) Landslide Mudslide Very_large Rain NA NA 5 Reuters - AlertNet.org http://news.monstersandcritics.com/americas/news/article_1371436.php/Mudslide_is_latest_disaster_for_hard-hit_Mexico__2nd_Roundup_
library(dplyr)
df_MX <- subset(df, state == "Nayarit")
knitr::kable(head(df_MX))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
3639 6/22/11 NA NA Mexico MX Nayarit 4463 La Jarretadera 21.80060 NA 21.9814 -104.8381 (21.981400000000001, -104.8381) Landslide Landslide Medium Tropical cyclone Hurricane Beatriz NA 0 NA http://channel6newsonline.com/2011/06/missing-teen-found-dead-after-beatriz-slams-mexicos-pacific-coast/
3836 7/22/11 NA NA Mexico MX Nayarit 6361 Puga 19.41353 NA 21.7595 -104.8334 (21.759499999999999, -104.8334) Landslide Landslide Medium Tropical cyclone Hurricane Dora NA 0 NA NA

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(x=state, y=distance, fill=city)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_MX <- df_MX %>% 
  arrange(desc(city)) %>%
  mutate(prop = distance / sum(df_MX$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_MX, aes(x=state, y = prop, fill=city)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
data<- ts(df_MX$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
19.41353
21.80060
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_MX$distance
names(distance) <- df_MX$city 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por ciudades"
)

##                 
## Pareto chart analysis for distance
##                  Frequency Cum.Freq. Percentage Cum.Percent.
##   La Jarretadera  21.80060  21.80060   52.89594     52.89594
##   Puga            19.41353  41.21413   47.10406    100.00000
stem(df_MX$"distance")
## 
##   The decimal point is at the |
## 
##   19 | 4
##   19 | 
##   20 | 
##   20 | 
##   21 | 
##   21 | 8
head(df_MX)
## # A tibble: 2 x 25
##      id date    time  continent_code country_name country_code state   population
##   <dbl> <chr>   <chr> <chr>          <chr>        <chr>        <chr>        <dbl>
## 1  3836 7/22/11 <NA>  <NA>           Mexico       MX           Nayarit       6361
## 2  3639 6/22/11 <NA>  <NA>           Mexico       MX           Nayarit       4463
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_MX))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
3836 7/22/11 NA NA Mexico MX Nayarit 6361 Puga 19.41353 NA 21.7595 -104.8334 (21.759499999999999, -104.8334) Landslide Landslide Medium Tropical cyclone Hurricane Dora NA 0 NA NA 47.10406 23.55203
3639 6/22/11 NA NA Mexico MX Nayarit 4463 La Jarretadera 21.80060 NA 21.9814 -104.8381 (21.981400000000001, -104.8381) Landslide Landslide Medium Tropical cyclone Hurricane Beatriz NA 0 NA http://channel6newsonline.com/2011/06/missing-teen-found-dead-after-beatriz-slams-mexicos-pacific-coast/ 52.89594 73.55203
stem(df_MX$"distance")
## 
##   The decimal point is at the |
## 
##   19 | 4
##   19 | 
##   20 | 
##   20 | 
##   21 | 
##   21 | 8
stem(df_MX$"distance", scale = 2)
## 
##   The decimal point is at the |
## 
##   19 | 4
##   19 | 
##   20 | 
##   20 | 
##   21 | 
##   21 | 8

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
19.41353 1 50 50 50 50
21.8006 1 50 50 100 100
Total 2 100 100 100 100
str(table)
## Classes 'freqtab' and 'data.frame':  3 obs. of  5 variables:
##  $ n      : num  1 1 2
##  $ %      : num  50 50 100
##  $ val%   : num  50 50 100
##  $ %cum   : num  50 100 100
##  $ val%cum: num  50 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
19.41353 1
21.8006 1
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 19.41353 21.41353 23.41353
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(19.4,21.4] 0 0 0
(21.4,23.4] 1 1 1
str(Freq_table)
## 'data.frame':    2 obs. of  4 variables:
##  $ distance: Factor w/ 2 levels "(19.4,21.4]",..: 1 2
##  $ Freq    : int  0 1
##  $ Rel_Freq: num  0 1
##  $ Cum_Freq: int  0 1
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(19.4,21.4] 0
(21.4,23.4] 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_MX)
## Warning in min(x): ningún argumento finito para min; retornando Inf
## Warning in max(x): ningun argumento finito para max; retornando -Inf
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
##                        id date time continent_code country_name country_code
## nbr.val      2.000000e+00   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          3.639000e+03   NA   NA             NA           NA           NA
## max          3.836000e+03   NA   NA             NA           NA           NA
## range        1.970000e+02   NA   NA             NA           NA           NA
## sum          7.475000e+03   NA   NA             NA           NA           NA
## median       3.737500e+03   NA   NA             NA           NA           NA
## mean         3.737500e+03   NA   NA             NA           NA           NA
## SE.mean      9.850000e+01   NA   NA             NA           NA           NA
## CI.mean.0.95 1.251561e+03   NA   NA             NA           NA           NA
## var          1.940450e+04   NA   NA             NA           NA           NA
## std.dev      1.393000e+02   NA   NA             NA           NA           NA
## coef.var     3.727091e-02   NA   NA             NA           NA           NA
##              state   population city    distance location_description
## nbr.val         NA 2.000000e+00   NA  2.00000000                   NA
## nbr.null        NA 0.000000e+00   NA  0.00000000                   NA
## nbr.na          NA 0.000000e+00   NA  0.00000000                   NA
## min             NA 4.463000e+03   NA 19.41353000                   NA
## max             NA 6.361000e+03   NA 21.80060000                   NA
## range           NA 1.898000e+03   NA  2.38707000                   NA
## sum             NA 1.082400e+04   NA 41.21413000                   NA
## median          NA 5.412000e+03   NA 20.60706500                   NA
## mean            NA 5.412000e+03   NA 20.60706500                   NA
## SE.mean         NA 9.490000e+02   NA  1.19353500                   NA
## CI.mean.0.95    NA 1.205819e+04   NA 15.16530007                   NA
## var             NA 1.801202e+06   NA  2.84905159                   NA
## std.dev         NA 1.342089e+03   NA  1.68791338                   NA
## coef.var        NA 2.479839e-01   NA  0.08190945                   NA
##                  latitude     longitude geolocation hazard_type landslide_type
## nbr.val       2.000000000  2.000000e+00          NA          NA             NA
## nbr.null      0.000000000  0.000000e+00          NA          NA             NA
## nbr.na        0.000000000  0.000000e+00          NA          NA             NA
## min          21.759500000 -1.048381e+02          NA          NA             NA
## max          21.981400000 -1.048334e+02          NA          NA             NA
## range         0.221900000  4.700000e-03          NA          NA             NA
## sum          43.740900000 -2.096715e+02          NA          NA             NA
## median       21.870450000 -1.048357e+02          NA          NA             NA
## mean         21.870450000 -1.048357e+02          NA          NA             NA
## SE.mean       0.110950000  2.350000e-03          NA          NA             NA
## CI.mean.0.95  1.409753415  2.985958e-02          NA          NA             NA
## var           0.024619805  1.104500e-05          NA          NA             NA
## std.dev       0.156906995  3.323402e-03          NA          NA             NA
## coef.var      0.007174383 -3.170104e-05          NA          NA             NA
##              landslide_size trigger storm_name injuries fatalities source_name
## nbr.val                  NA      NA         NA        0          2          NA
## nbr.null                 NA      NA         NA        0          2          NA
## nbr.na                   NA      NA         NA        2          0          NA
## min                      NA      NA         NA      Inf          0          NA
## max                      NA      NA         NA     -Inf          0          NA
## range                    NA      NA         NA     -Inf          0          NA
## sum                      NA      NA         NA        0          0          NA
## median                   NA      NA         NA       NA          0          NA
## mean                     NA      NA         NA      NaN          0          NA
## SE.mean                  NA      NA         NA       NA          0          NA
## CI.mean.0.95             NA      NA         NA      NaN          0          NA
## var                      NA      NA         NA       NA          0          NA
## std.dev                  NA      NA         NA       NA          0          NA
## coef.var                 NA      NA         NA       NA        NaN          NA
##              source_link         prop         ypos
## nbr.val               NA   2.00000000    2.0000000
## nbr.null              NA   0.00000000    0.0000000
## nbr.na                NA   0.00000000    0.0000000
## min                   NA  47.10406358   23.5520318
## max                   NA  52.89593642   73.5520318
## range                 NA   5.79187284   50.0000000
## sum                   NA 100.00000000   97.1040636
## median                NA  50.00000000   48.5520318
## mean                  NA  50.00000000   48.5520318
## SE.mean               NA   2.89593642   25.0000000
## CI.mean.0.95          NA  36.79636103  317.6551184
## var                   NA  16.77289547 1250.0000000
## std.dev               NA   4.09547256   35.3553391
## coef.var              NA   0.08190945    0.7281948
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Oaxaca (Mexico)

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_MX <- subset(df, country_name == "Mexico")
knitr::kable(head(df_MX)) 
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
115 7/4/07 NA NA Mexico MX Veracruz-Llave 1947 Laguna Chica (Pueblo Nuevo) 9.51003 NA 18.5369 -96.8229 (18.536899999999999, -96.822900000000004) Landslide Landslide Medium Rain NA NA 7 nytimes.com http://www.nytimes.com/2007/07/04/world/americas/04cnd-mexico.html?_r=1&hp&oref=slogin
201 8/23/07 NA NA Mexico MX Puebla 1414 Xochitlaxco (San Baltazar) 1.68294 NA 19.9333 -97.8500 (19.933299999999999, -97.85) Landslide Mudslide Medium Tropical cyclone Hurricane Dean NA 6 Inquirer.com http://newsinfo.inquirer.net/breakingnews/world/view_article.php?article_id=84452
225 9/2/07 NA NA Mexico MX Sinaloa 3191 El Limón de los Ramos 10.88351 NA 24.9531 -107.6220 (24.953099999999999, -107.622) Landslide Complex Medium Tropical cyclone Tropical Storm Henrietta NA 3 NA NA
284 9/28/07 NA NA Mexico MX Puebla 3761 Xaltepuxtla 7.93258 NA 20.2000 -97.9000 (20.2, -97.9) Landslide Mudslide Medium Tropical cyclone Hurricane Lorenzo NA 1 PressTV.ir http://www.presstv.ir/detail.aspx?id=25037&sectionid=3510207
342 10/31/07 NA NA Mexico MX Tabasco 4468 Buenavista 4.19108 NA 17.9493 -92.5534 (17.949300000000001, -92.553399999999996) Landslide Landslide Medium Rain NA NA 18 CapeTimes http://www.capetimes.co.za/?fArticleId=4109453
346 11/4/07 NA NA Mexico MX Chiapas 3183 Ostuacán 3.74149 NA 17.3900 -93.3060 (17.39, -93.305999999999997) Landslide Mudslide Very_large Rain NA NA 5 Reuters - AlertNet.org http://news.monstersandcritics.com/americas/news/article_1371436.php/Mudslide_is_latest_disaster_for_hard-hit_Mexico__2nd_Roundup_
library(dplyr)
df_MX <- subset(df, state == "Oaxaca")
knitr::kable(head(df_MX))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
2321 8/24/10 Morning NA Mexico MX Oaxaca 1099 Santiago Choapam 16.16369 NA 17.2568 -96.0289 (17.256799999999998, -96.028899999999993) Landslide Mudslide Medium Tropical cyclone Hurricane Frank NA 2 NA NA
2322 8/24/10 Morning NA Mexico MX Oaxaca 1701 Santa María la Asunción 0.63550 NA 18.1060 -96.8133 (18.106000000000002, -96.813299999999998) Landslide Mudslide Medium Tropical cyclone Hurricane Frank NA 2 NA NA
2514 9/28/10 4:00:00 NA Mexico MX Oaxaca 1813 Santo Domingo Tepuxtepec 15.74984 NA 17.0977 -96.0625 (17.0977, -96.0625) Landslide Mudslide Medium Tropical cyclone Hurricane Karl and Tropical Storm Matthew NA 11 NA http://technews.tmcnet.com/topics/associated-press/articles/106209-death-toll-southern-mexico-mudslides-up-32.htm
2524 9/30/10 Morning NA Mexico MX Oaxaca 1096 Tanetze de Zaragoza 24.67589 NA 17.1833 -96.1833 (17.183299999999999, -96.183300000000003) Landslide Mudslide Medium Tropical cyclone Hurricane Karl and Tropical Storm Matthew NA 2 NA http://technews.tmcnet.com/topics/associated-press/articles/106209-death-toll-southern-mexico-mudslides-up-32.htm
3795 7/17/11 NA NA Mexico MX Oaxaca 73648 Salina Cruz 0.78340 NA 16.1863 -95.1924 (16.186299999999999, -95.192400000000006) Landslide Mudslide Medium Downpour NA NA 0 NA http://latino.foxnews.com/latino/news/2011/07/18/3-die-180000-affected-by-rains-in-southern-mexico/
3796 7/17/11 NA NA Mexico MX Oaxaca 3294 San José Chiltepec 0.28905 NA 17.9484 -96.1674 (17.948399999999999, -96.167400000000001) Landslide Mudslide Medium Downpour NA NA 0 NA http://www.thenews.com.pk/NewsDetail.aspx?ID=18895&title=Intense-rains-in-Mexico-affect-thousands

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(x=state, y=distance, fill=city)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_MX <- df_MX %>% 
  arrange(desc(city)) %>%
  mutate(prop = distance / sum(df_MX$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_MX, aes(x=state, y = prop, fill=city)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
data<- ts(df_MX$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
24.67589
15.74984
16.16369
0.63550
0.28905
11.83490
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_MX$distance
names(distance) <- df_MX$city 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por ciudades"
)

##                           
## Pareto chart analysis for distance
##                              Frequency   Cum.Freq.  Percentage Cum.Percent.
##   Tanetze de Zaragoza       24.6758900  24.6758900  29.6060776   29.6060776
##   Santiago Choapam          16.1636900  40.8395800  19.3931591   48.9992367
##   Santo Domingo Tepuxtepec  15.7498400  56.5894200  18.8966228   67.8958595
##   San Andrés Huayapam       11.8349000  68.4243200  14.1994865   82.0953460
##   Paso Real de Sarabia       9.5682900  77.9926100  11.4800129   93.5753589
##   Oaxaca                     3.6468200  81.6394300   4.3754465   97.9508054
##   Salina Cruz                0.7834000  82.4228300   0.9399216   98.8907270
##   Santa María la Asunción    0.6355000  83.0583300   0.7624715   99.6531985
##   San José Chiltepec         0.2890500  83.3473800   0.3468015  100.0000000
stem(df_MX$"distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 0114
##   0 | 
##   1 | 02
##   1 | 66
##   2 | 
##   2 | 5
head(df_MX)
## # A tibble: 6 x 25
##      id date    time    continent_code country_name country_code state  population
##   <dbl> <chr>   <chr>   <chr>          <chr>        <chr>        <chr>       <dbl>
## 1  2524 9/30/10 Morning <NA>           Mexico       MX           Oaxaca       1096
## 2  2514 9/28/10 4:00:00 <NA>           Mexico       MX           Oaxaca       1813
## 3  2321 8/24/10 Morning <NA>           Mexico       MX           Oaxaca       1099
## 4  2322 8/24/10 Morning <NA>           Mexico       MX           Oaxaca       1701
## 5  3796 7/17/11 <NA>    <NA>           Mexico       MX           Oaxaca       3294
## 6  5526 9/14/13 <NA>    <NA>           Mexico       MX           Oaxaca       3630
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_MX))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
2524 9/30/10 Morning NA Mexico MX Oaxaca 1096 Tanetze de Zaragoza 24.67589 NA 17.1833 -96.1833 (17.183299999999999, -96.183300000000003) Landslide Mudslide Medium Tropical cyclone Hurricane Karl and Tropical Storm Matthew NA 2 NA http://technews.tmcnet.com/topics/associated-press/articles/106209-death-toll-southern-mexico-mudslides-up-32.htm 29.6060776 14.80304
2514 9/28/10 4:00:00 NA Mexico MX Oaxaca 1813 Santo Domingo Tepuxtepec 15.74984 NA 17.0977 -96.0625 (17.0977, -96.0625) Landslide Mudslide Medium Tropical cyclone Hurricane Karl and Tropical Storm Matthew NA 11 NA http://technews.tmcnet.com/topics/associated-press/articles/106209-death-toll-southern-mexico-mudslides-up-32.htm 18.8966228 39.05439
2321 8/24/10 Morning NA Mexico MX Oaxaca 1099 Santiago Choapam 16.16369 NA 17.2568 -96.0289 (17.256799999999998, -96.028899999999993) Landslide Mudslide Medium Tropical cyclone Hurricane Frank NA 2 NA NA 19.3931591 58.19928
2322 8/24/10 Morning NA Mexico MX Oaxaca 1701 Santa María la Asunción 0.63550 NA 18.1060 -96.8133 (18.106000000000002, -96.813299999999998) Landslide Mudslide Medium Tropical cyclone Hurricane Frank NA 2 NA NA 0.7624715 68.27710
3796 7/17/11 NA NA Mexico MX Oaxaca 3294 San José Chiltepec 0.28905 NA 17.9484 -96.1674 (17.948399999999999, -96.167400000000001) Landslide Mudslide Medium Downpour NA NA 0 NA http://www.thenews.com.pk/NewsDetail.aspx?ID=18895&title=Intense-rains-in-Mexico-affect-thousands 0.3468015 68.83173
5526 9/14/13 NA NA Mexico MX Oaxaca 3630 San Andrés Huayapam 11.83490 NA 17.1933 -96.6045 (17.193300000000001, -96.604500000000002) Landslide Landslide Very_large Tropical cyclone Ingrid NA 1 www.recordonline.com http://www.recordonline.com/apps/pbcs.dll/article?AID=/20130915/NEWS/130919814/-1/SITEMAP 14.1994865 76.10488
stem(df_MX$"distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 0114
##   0 | 
##   1 | 02
##   1 | 66
##   2 | 
##   2 | 5
stem(df_MX$"distance", scale = 2)
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 0114
##   0 | 
##   1 | 02
##   1 | 66
##   2 | 
##   2 | 5

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
0.28905 1 11.1 11.1 11.1 11.1
0.6355 1 11.1 11.1 22.2 22.2
0.7834 1 11.1 11.1 33.3 33.3
3.64682 1 11.1 11.1 44.4 44.4
9.56829 1 11.1 11.1 55.6 55.6
11.8349 1 11.1 11.1 66.7 66.7
15.74984 1 11.1 11.1 77.8 77.8
16.16369 1 11.1 11.1 88.9 88.9
24.67589 1 11.1 11.1 100.0 100.0
Total 9 100.0 100.0 100.0 100.0
str(table)
## Classes 'freqtab' and 'data.frame':  10 obs. of  5 variables:
##  $ n      : num  1 1 1 1 1 1 1 1 1 9
##  $ %      : num  11.1 11.1 11.1 11.1 11.1 11.1 11.1 11.1 11.1 100
##  $ val%   : num  11.1 11.1 11.1 11.1 11.1 11.1 11.1 11.1 11.1 100
##  $ %cum   : num  11.1 22.2 33.3 44.4 55.6 66.7 77.8 88.9 100 100
##  $ val%cum: num  11.1 22.2 33.3 44.4 55.6 66.7 77.8 88.9 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
0.28905 1
0.6355 1
0.7834 1
3.64682 1
9.56829 1
11.8349 1
15.74984 1
16.16369 1
24.67589 1
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1]  0.28905  5.28905 10.28905 15.28905 20.28905 25.28905
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(0.289,5.29] 3 0.375 3
(5.29,10.3] 1 0.125 4
(10.3,15.3] 1 0.125 5
(15.3,20.3] 2 0.250 7
(20.3,25.3] 1 0.125 8
str(Freq_table)
## 'data.frame':    5 obs. of  4 variables:
##  $ distance: Factor w/ 5 levels "(0.289,5.29]",..: 1 2 3 4 5
##  $ Freq    : int  3 1 1 2 1
##  $ Rel_Freq: num  0.375 0.125 0.125 0.25 0.125
##  $ Cum_Freq: int  3 4 5 7 8
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(0.289,5.29] 3
(5.29,10.3] 1
(10.3,15.3] 1
(15.3,20.3] 2
(20.3,25.3] 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_MX)
## Warning in min(x): ningún argumento finito para min; retornando Inf
## Warning in max(x): ningun argumento finito para max; retornando -Inf
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
##                        id date time continent_code country_name country_code
## nbr.val      9.000000e+00   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          2.321000e+03   NA   NA             NA           NA           NA
## max          5.526000e+03   NA   NA             NA           NA           NA
## range        3.205000e+03   NA   NA             NA           NA           NA
## sum          3.100700e+04   NA   NA             NA           NA           NA
## median       3.795000e+03   NA   NA             NA           NA           NA
## mean         3.445222e+03   NA   NA             NA           NA           NA
## SE.mean      3.703370e+02   NA   NA             NA           NA           NA
## CI.mean.0.95 8.539986e+02   NA   NA             NA           NA           NA
## var          1.234345e+06   NA   NA             NA           NA           NA
## std.dev      1.111011e+03   NA   NA             NA           NA           NA
## coef.var     3.224787e-01   NA   NA             NA           NA           NA
##              state   population city   distance location_description
## nbr.val         NA 9.000000e+00   NA  9.0000000                   NA
## nbr.null        NA 0.000000e+00   NA  0.0000000                   NA
## nbr.na          NA 0.000000e+00   NA  0.0000000                   NA
## min             NA 1.019000e+03   NA  0.2890500                   NA
## max             NA 2.625660e+05   NA 24.6758900                   NA
## range           NA 2.615470e+05   NA 24.3868400                   NA
## sum             NA 3.498660e+05   NA 83.3473800                   NA
## median          NA 1.813000e+03   NA  9.5682900                   NA
## mean            NA 3.887400e+04   NA  9.2608200                   NA
## SE.mean         NA 2.905882e+04   NA  2.8683480                   NA
## CI.mean.0.95    NA 6.700977e+04   NA  6.6144223                   NA
## var             NA 7.599737e+09   NA 74.0467814                   NA
## std.dev         NA 8.717647e+04   NA  8.6050440                   NA
## coef.var        NA 2.242539e+00   NA  0.9291881                   NA
##                  latitude     longitude geolocation hazard_type landslide_type
## nbr.val        9.00000000  9.000000e+00          NA          NA             NA
## nbr.null       0.00000000  0.000000e+00          NA          NA             NA
## nbr.na         0.00000000  0.000000e+00          NA          NA             NA
## min           16.18630000 -9.681330e+01          NA          NA             NA
## max           18.10600000 -9.500000e+01          NA          NA             NA
## range          1.91970000  1.813300e+00          NA          NA             NA
## sum          155.05720000 -8.648023e+02          NA          NA             NA
## median        17.18330000 -9.616740e+01          NA          NA             NA
## mean          17.22857778 -9.608914e+01          NA          NA             NA
## SE.mean        0.18515476  2.120154e-01          NA          NA             NA
## CI.mean.0.95   0.42696764  4.889083e-01          NA          NA             NA
## var            0.30854056  4.045547e-01          NA          NA             NA
## std.dev        0.55546428  6.360461e-01          NA          NA             NA
## coef.var       0.03224087 -6.619334e-03          NA          NA             NA
##              landslide_size trigger storm_name injuries fatalities source_name
## nbr.val                  NA      NA         NA        0   9.000000          NA
## nbr.null                 NA      NA         NA        0   2.000000          NA
## nbr.na                   NA      NA         NA        9   0.000000          NA
## min                      NA      NA         NA      Inf   0.000000          NA
## max                      NA      NA         NA     -Inf  11.000000          NA
## range                    NA      NA         NA     -Inf  11.000000          NA
## sum                      NA      NA         NA        0  23.000000          NA
## median                   NA      NA         NA       NA   2.000000          NA
## mean                     NA      NA         NA      NaN   2.555556          NA
## SE.mean                  NA      NA         NA       NA   1.106937          NA
## CI.mean.0.95             NA      NA         NA      NaN   2.552600          NA
## var                      NA      NA         NA       NA  11.027778          NA
## std.dev                  NA      NA         NA       NA   3.320810          NA
## coef.var                 NA      NA         NA       NA   1.299447          NA
##              source_link        prop        ypos
## nbr.val               NA   9.0000000   9.0000000
## nbr.null              NA   0.0000000   0.0000000
## nbr.na                NA   0.0000000   0.0000000
## min                   NA   0.3468015  14.8030388
## max                   NA  29.6060776  97.8122768
## range                 NA  29.2592761  83.0092380
## sum                   NA 100.0000000 596.6418141
## median                NA  11.4800129  68.8317317
## mean                  NA  11.1111111  66.2935349
## SE.mean               NA   3.4414375   8.6742485
## CI.mean.0.95          NA   7.9359691  20.0028529
## var                   NA 106.5914281 677.1832815
## std.dev               NA  10.3243125  26.0227455
## coef.var              NA   0.9291881   0.3925382
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Puebla (Mexico)

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_MX <- subset(df, country_name == "Mexico")
knitr::kable(head(df_MX)) 
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
115 7/4/07 NA NA Mexico MX Veracruz-Llave 1947 Laguna Chica (Pueblo Nuevo) 9.51003 NA 18.5369 -96.8229 (18.536899999999999, -96.822900000000004) Landslide Landslide Medium Rain NA NA 7 nytimes.com http://www.nytimes.com/2007/07/04/world/americas/04cnd-mexico.html?_r=1&hp&oref=slogin
201 8/23/07 NA NA Mexico MX Puebla 1414 Xochitlaxco (San Baltazar) 1.68294 NA 19.9333 -97.8500 (19.933299999999999, -97.85) Landslide Mudslide Medium Tropical cyclone Hurricane Dean NA 6 Inquirer.com http://newsinfo.inquirer.net/breakingnews/world/view_article.php?article_id=84452
225 9/2/07 NA NA Mexico MX Sinaloa 3191 El Limón de los Ramos 10.88351 NA 24.9531 -107.6220 (24.953099999999999, -107.622) Landslide Complex Medium Tropical cyclone Tropical Storm Henrietta NA 3 NA NA
284 9/28/07 NA NA Mexico MX Puebla 3761 Xaltepuxtla 7.93258 NA 20.2000 -97.9000 (20.2, -97.9) Landslide Mudslide Medium Tropical cyclone Hurricane Lorenzo NA 1 PressTV.ir http://www.presstv.ir/detail.aspx?id=25037&sectionid=3510207
342 10/31/07 NA NA Mexico MX Tabasco 4468 Buenavista 4.19108 NA 17.9493 -92.5534 (17.949300000000001, -92.553399999999996) Landslide Landslide Medium Rain NA NA 18 CapeTimes http://www.capetimes.co.za/?fArticleId=4109453
346 11/4/07 NA NA Mexico MX Chiapas 3183 Ostuacán 3.74149 NA 17.3900 -93.3060 (17.39, -93.305999999999997) Landslide Mudslide Very_large Rain NA NA 5 Reuters - AlertNet.org http://news.monstersandcritics.com/americas/news/article_1371436.php/Mudslide_is_latest_disaster_for_hard-hit_Mexico__2nd_Roundup_
library(dplyr)
df_MX <- subset(df, state == "Puebla")
knitr::kable(head(df_MX))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
201 8/23/07 NA NA Mexico MX Puebla 1414 Xochitlaxco (San Baltazar) 1.68294 NA 19.9333 -97.8500 (19.933299999999999, -97.85) Landslide Mudslide Medium Tropical cyclone Hurricane Dean NA 6 Inquirer.com http://newsinfo.inquirer.net/breakingnews/world/view_article.php?article_id=84452
284 9/28/07 NA NA Mexico MX Puebla 3761 Xaltepuxtla 7.93258 NA 20.2000 -97.9000 (20.2, -97.9) Landslide Mudslide Medium Tropical cyclone Hurricane Lorenzo NA 1 PressTV.ir http://www.presstv.ir/detail.aspx?id=25037&sectionid=3510207
5528 9/15/13 NA NA Mexico MX Puebla 9720 Tlatlauquitepec 0.30326 NA 19.8490 -97.4997 (19.849, -97.499700000000004) Landslide Landslide Very_large Tropical cyclone Ingrid 1 3 www.recordonline.com http://www.recordonline.com/apps/pbcs.dll/article?AID=/20130915/NEWS/130919814/-1/SITEMAP
5539 9/16/13 NA NA Mexico MX Puebla 1123 Atzala 5.24855 NA 18.5938 -98.5539 (18.593800000000002, -98.553899999999999) Landslide Landslide Medium Tropical cyclone Manyi NA 0 america.aljazeera.com http://america.aljazeera.com/articles/2013/11/19/after-landslide-poorestofthepoorleftoutinthecoldinmexico.html

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(x=state, y=distance, fill=city)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_MX <- df_MX %>% 
  arrange(desc(city)) %>%
  mutate(prop = distance / sum(df_MX$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_MX, aes(x=state, y = prop, fill=city)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
data<- ts(df_MX$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
1.68294
7.93258
0.30326
5.24855
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_MX$distance
names(distance) <- df_MX$city 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por ciudades"
)

##                             
## Pareto chart analysis for distance
##                               Frequency  Cum.Freq. Percentage Cum.Percent.
##   Xaltepuxtla                  7.932580   7.932580  52.300438    52.300438
##   Atzala                       5.248550  13.181130  34.604311    86.904749
##   Xochitlaxco (San Baltazar)   1.682940  14.864070  11.095822    98.000571
##   Tlatlauquitepec              0.303260  15.167330   1.999429   100.000000
stem(df_MX$"distance")
## 
##   The decimal point is at the |
## 
##   0 | 37
##   2 | 
##   4 | 2
##   6 | 9
head(df_MX)
## # A tibble: 4 x 25
##      id date    time  continent_code country_name country_code state  population
##   <dbl> <chr>   <chr> <chr>          <chr>        <chr>        <chr>       <dbl>
## 1   201 8/23/07 <NA>  <NA>           Mexico       MX           Puebla       1414
## 2   284 9/28/07 <NA>  <NA>           Mexico       MX           Puebla       3761
## 3  5528 9/15/13 <NA>  <NA>           Mexico       MX           Puebla       9720
## 4  5539 9/16/13 <NA>  <NA>           Mexico       MX           Puebla       1123
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_MX))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
201 8/23/07 NA NA Mexico MX Puebla 1414 Xochitlaxco (San Baltazar) 1.68294 NA 19.9333 -97.8500 (19.933299999999999, -97.85) Landslide Mudslide Medium Tropical cyclone Hurricane Dean NA 6 Inquirer.com http://newsinfo.inquirer.net/breakingnews/world/view_article.php?article_id=84452 11.095822 5.547911
284 9/28/07 NA NA Mexico MX Puebla 3761 Xaltepuxtla 7.93258 NA 20.2000 -97.9000 (20.2, -97.9) Landslide Mudslide Medium Tropical cyclone Hurricane Lorenzo NA 1 PressTV.ir http://www.presstv.ir/detail.aspx?id=25037&sectionid=3510207 52.300438 37.246041
5528 9/15/13 NA NA Mexico MX Puebla 9720 Tlatlauquitepec 0.30326 NA 19.8490 -97.4997 (19.849, -97.499700000000004) Landslide Landslide Very_large Tropical cyclone Ingrid 1 3 www.recordonline.com http://www.recordonline.com/apps/pbcs.dll/article?AID=/20130915/NEWS/130919814/-1/SITEMAP 1.999429 64.395975
5539 9/16/13 NA NA Mexico MX Puebla 1123 Atzala 5.24855 NA 18.5938 -98.5539 (18.593800000000002, -98.553899999999999) Landslide Landslide Medium Tropical cyclone Manyi NA 0 america.aljazeera.com http://america.aljazeera.com/articles/2013/11/19/after-landslide-poorestofthepoorleftoutinthecoldinmexico.html 34.604311 82.697845
stem(df_MX$"distance")
## 
##   The decimal point is at the |
## 
##   0 | 37
##   2 | 
##   4 | 2
##   6 | 9
stem(df_MX$"distance", scale = 2)
## 
##   The decimal point is at the |
## 
##   0 | 3
##   1 | 7
##   2 | 
##   3 | 
##   4 | 
##   5 | 2
##   6 | 
##   7 | 9

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
0.30326 1 25 25 25 25
1.68294 1 25 25 50 50
5.24855 1 25 25 75 75
7.93258 1 25 25 100 100
Total 4 100 100 100 100
str(table)
## Classes 'freqtab' and 'data.frame':  5 obs. of  5 variables:
##  $ n      : num  1 1 1 1 4
##  $ %      : num  25 25 25 25 100
##  $ val%   : num  25 25 25 25 100
##  $ %cum   : num  25 50 75 100 100
##  $ val%cum: num  25 50 75 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
0.30326 1
1.68294 1
5.24855 1
7.93258 1
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.30326 3.30326 6.30326 9.30326
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(0.303,3.3] 1 0.3333333 1
(3.3,6.3] 1 0.3333333 2
(6.3,9.3] 1 0.3333333 3
str(Freq_table)
## 'data.frame':    3 obs. of  4 variables:
##  $ distance: Factor w/ 3 levels "(0.303,3.3]",..: 1 2 3
##  $ Freq    : int  1 1 1
##  $ Rel_Freq: num  0.333 0.333 0.333
##  $ Cum_Freq: int  1 2 3
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(0.303,3.3] 1
(3.3,6.3] 1
(6.3,9.3] 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_MX)
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
##                        id date time continent_code country_name country_code
## nbr.val      4.000000e+00   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          2.010000e+02   NA   NA             NA           NA           NA
## max          5.539000e+03   NA   NA             NA           NA           NA
## range        5.338000e+03   NA   NA             NA           NA           NA
## sum          1.155200e+04   NA   NA             NA           NA           NA
## median       2.906000e+03   NA   NA             NA           NA           NA
## mean         2.888000e+03   NA   NA             NA           NA           NA
## SE.mean      1.527476e+03   NA   NA             NA           NA           NA
## CI.mean.0.95 4.861110e+03   NA   NA             NA           NA           NA
## var          9.332729e+06   NA   NA             NA           NA           NA
## std.dev      3.054951e+03   NA   NA             NA           NA           NA
## coef.var     1.057809e+00   NA   NA             NA           NA           NA
##              state   population city   distance location_description
## nbr.val         NA 4.000000e+00   NA  4.0000000                   NA
## nbr.null        NA 0.000000e+00   NA  0.0000000                   NA
## nbr.na          NA 0.000000e+00   NA  0.0000000                   NA
## min             NA 1.123000e+03   NA  0.3032600                   NA
## max             NA 9.720000e+03   NA  7.9325800                   NA
## range           NA 8.597000e+03   NA  7.6293200                   NA
## sum             NA 1.601800e+04   NA 15.1673300                   NA
## median          NA 2.587500e+03   NA  3.4657450                   NA
## mean            NA 4.004500e+03   NA  3.7918325                   NA
## SE.mean         NA 1.994575e+03   NA  1.7292913                   NA
## CI.mean.0.95    NA 6.347629e+03   NA  5.5033768                   NA
## var             NA 1.591332e+07   NA 11.9617938                   NA
## std.dev         NA 3.989150e+03   NA  3.4585826                   NA
## coef.var        NA 9.961669e-01   NA  0.9121138                   NA
##                 latitude     longitude geolocation hazard_type landslide_type
## nbr.val       4.00000000  4.000000e+00          NA          NA             NA
## nbr.null      0.00000000  0.000000e+00          NA          NA             NA
## nbr.na        0.00000000  0.000000e+00          NA          NA             NA
## min          18.59380000 -9.855390e+01          NA          NA             NA
## max          20.20000000 -9.749970e+01          NA          NA             NA
## range         1.60620000  1.054200e+00          NA          NA             NA
## sum          78.57610000 -3.918036e+02          NA          NA             NA
## median       19.89115000 -9.787500e+01          NA          NA             NA
## mean         19.64402500 -9.795090e+01          NA          NA             NA
## SE.mean       0.35797760  2.198412e-01          NA          NA             NA
## CI.mean.0.95  1.13924448  6.996329e-01          NA          NA             NA
## var           0.51259184  1.933207e-01          NA          NA             NA
## std.dev       0.71595520  4.396825e-01          NA          NA             NA
## coef.var      0.03644646 -4.488805e-03          NA          NA             NA
##              landslide_size trigger storm_name injuries fatalities source_name
## nbr.val                  NA      NA         NA        1   4.000000          NA
## nbr.null                 NA      NA         NA        0   1.000000          NA
## nbr.na                   NA      NA         NA        3   0.000000          NA
## min                      NA      NA         NA        1   0.000000          NA
## max                      NA      NA         NA        1   6.000000          NA
## range                    NA      NA         NA        0   6.000000          NA
## sum                      NA      NA         NA        1  10.000000          NA
## median                   NA      NA         NA        1   2.000000          NA
## mean                     NA      NA         NA        1   2.500000          NA
## SE.mean                  NA      NA         NA       NA   1.322876          NA
## CI.mean.0.95             NA      NA         NA      NaN   4.209981          NA
## var                      NA      NA         NA       NA   7.000000          NA
## std.dev                  NA      NA         NA       NA   2.645751          NA
## coef.var                 NA      NA         NA       NA   1.058301          NA
##              source_link        prop         ypos
## nbr.val               NA   4.0000000    4.0000000
## nbr.null              NA   0.0000000    0.0000000
## nbr.na                NA   0.0000000    0.0000000
## min                   NA   1.9994290    5.5479112
## max                   NA  52.3004378   82.6978446
## range                 NA  50.3010088   77.1499334
## sum                   NA 100.0000000  189.8877719
## median                NA  22.8500666   50.8210080
## mean                  NA  25.0000000   47.4719430
## SE.mean               NA  11.4014221   16.8064484
## CI.mean.0.95          NA  36.2844136   53.4856197
## var                   NA 519.9697034 1129.8268351
## std.dev               NA  22.8028442   33.6128969
## coef.var              NA   0.9121138    0.7080582
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Tabasco (Mexico)

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_MX <- subset(df, country_name == "Mexico")
knitr::kable(head(df_MX)) 
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
115 7/4/07 NA NA Mexico MX Veracruz-Llave 1947 Laguna Chica (Pueblo Nuevo) 9.51003 NA 18.5369 -96.8229 (18.536899999999999, -96.822900000000004) Landslide Landslide Medium Rain NA NA 7 nytimes.com http://www.nytimes.com/2007/07/04/world/americas/04cnd-mexico.html?_r=1&hp&oref=slogin
201 8/23/07 NA NA Mexico MX Puebla 1414 Xochitlaxco (San Baltazar) 1.68294 NA 19.9333 -97.8500 (19.933299999999999, -97.85) Landslide Mudslide Medium Tropical cyclone Hurricane Dean NA 6 Inquirer.com http://newsinfo.inquirer.net/breakingnews/world/view_article.php?article_id=84452
225 9/2/07 NA NA Mexico MX Sinaloa 3191 El Limón de los Ramos 10.88351 NA 24.9531 -107.6220 (24.953099999999999, -107.622) Landslide Complex Medium Tropical cyclone Tropical Storm Henrietta NA 3 NA NA
284 9/28/07 NA NA Mexico MX Puebla 3761 Xaltepuxtla 7.93258 NA 20.2000 -97.9000 (20.2, -97.9) Landslide Mudslide Medium Tropical cyclone Hurricane Lorenzo NA 1 PressTV.ir http://www.presstv.ir/detail.aspx?id=25037&sectionid=3510207
342 10/31/07 NA NA Mexico MX Tabasco 4468 Buenavista 4.19108 NA 17.9493 -92.5534 (17.949300000000001, -92.553399999999996) Landslide Landslide Medium Rain NA NA 18 CapeTimes http://www.capetimes.co.za/?fArticleId=4109453
346 11/4/07 NA NA Mexico MX Chiapas 3183 Ostuacán 3.74149 NA 17.3900 -93.3060 (17.39, -93.305999999999997) Landslide Mudslide Very_large Rain NA NA 5 Reuters - AlertNet.org http://news.monstersandcritics.com/americas/news/article_1371436.php/Mudslide_is_latest_disaster_for_hard-hit_Mexico__2nd_Roundup_
library(dplyr)
df_MX <- subset(df, state == "Tabasco")
knitr::kable(head(df_MX))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
342 10/31/07 NA NA Mexico MX Tabasco 4468 Buenavista 4.19108 NA 17.9493 -92.5534 (17.949300000000001, -92.553399999999996) Landslide Landslide Medium Rain NA NA 18 CapeTimes http://www.capetimes.co.za/?fArticleId=4109453
5693 12/16/13 NA NA Mexico MX Tabasco 1732 Olcuatitán 4.32007 NA 18.1563 -92.9790 (18.156300000000002, -92.978999999999999) Landslide Landslide Medium Continuous rain NA NA NA www.aljazeera.com http://www.aljazeera.com/weather/2013/12/rain-snow-floods-hit-mexico-20131229105245837299.html
7430 10/7/15 NA NA Mexico MX Tabasco 1042 Libertad 4.81680 Unknown 17.3768 -92.7439 (17.376799999999999, -92.743899999999996) Landslide Landslide Medium Rain NA 0 0 Tabasco Hoy http://www.tabascohoy.com/2/notas/273505/dejan-lluvias-deslizamientos-e-inundaciones-en-la-sierra
7431 10/7/15 NA NA Mexico MX Tabasco 2500 Huapinol 2.15703 Unknown 17.9100 -92.8956 (17.91, -92.895600000000002) Landslide Landslide Medium Rain NA 0 0 Tabasco Hoy http://www.tabascohoy.com/2/notas/273505/dejan-lluvias-deslizamientos-e-inundaciones-en-la-sierra
7469 12/16/15 NA NA Mexico MX Tabasco 1732 Olcuatitán 8.93271 Below road 18.2341 -92.8901 (18.234100000000002, -92.890100000000004) Landslide Riverbank collapse Medium Flooding NA 0 0 Diario Presente http://www.diariopresente.com.mx/noticia/tabasco/145872/deslave-carretera-punto-dejar-incomunicados-comunidades-nacajuca/
7518 10/15/15 NA NA Mexico MX Tabasco 32415 Tenosique de Pino Suárez 15.22260 Below road 17.4054 -91.2988 (17.4054, -91.2988) Landslide Landslide Medium Flooding NA 0 0 ver tiente global http://vertienteglobal.com/?p=49401

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(x=state, y=distance, fill=city)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_MX <- df_MX %>% 
  arrange(desc(city)) %>%
  mutate(prop = distance / sum(df_MX$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_MX, aes(x=state, y = prop, fill=city)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
data<- ts(df_MX$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
15.22260
4.32007
8.93271
4.81680
2.15703
4.19108
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_MX$distance
names(distance) <- df_MX$city 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por ciudades"
)

##                           
## Pareto chart analysis for distance
##                             Frequency  Cum.Freq. Percentage Cum.Percent.
##   Tenosique de Pino Suárez  15.222600  15.222600  38.401838    38.401838
##   Olcuatitán                 8.932710  24.155310  22.534421    60.936260
##   Libertad                   4.816800  28.972110  12.151273    73.087533
##   Olcuatitán                 4.320070  33.292180  10.898180    83.985713
##   Buenavista                 4.191080  37.483260  10.572778    94.558491
##   Huapinol                   2.157030  39.640290   5.441509   100.000000
stem(df_MX$"distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 244
##   0 | 59
##   1 | 
##   1 | 5
head(df_MX)
## # A tibble: 6 x 25
##      id date     time  continent_code country_name country_code state   population
##   <dbl> <chr>    <chr> <chr>          <chr>        <chr>        <chr>        <dbl>
## 1  7518 10/15/15 <NA>  <NA>           Mexico       MX           Tabasco      32415
## 2  5693 12/16/13 <NA>  <NA>           Mexico       MX           Tabasco       1732
## 3  7469 12/16/15 <NA>  <NA>           Mexico       MX           Tabasco       1732
## 4  7430 10/7/15  <NA>  <NA>           Mexico       MX           Tabasco       1042
## 5  7431 10/7/15  <NA>  <NA>           Mexico       MX           Tabasco       2500
## 6   342 10/31/07 <NA>  <NA>           Mexico       MX           Tabasco       4468
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_MX))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
7518 10/15/15 NA NA Mexico MX Tabasco 32415 Tenosique de Pino Suárez 15.22260 Below road 17.4054 -91.2988 (17.4054, -91.2988) Landslide Landslide Medium Flooding NA 0 0 ver tiente global http://vertienteglobal.com/?p=49401 38.401838 19.20092
5693 12/16/13 NA NA Mexico MX Tabasco 1732 Olcuatitán 4.32007 NA 18.1563 -92.9790 (18.156300000000002, -92.978999999999999) Landslide Landslide Medium Continuous rain NA NA NA www.aljazeera.com http://www.aljazeera.com/weather/2013/12/rain-snow-floods-hit-mexico-20131229105245837299.html 10.898180 43.85093
7469 12/16/15 NA NA Mexico MX Tabasco 1732 Olcuatitán 8.93271 Below road 18.2341 -92.8901 (18.234100000000002, -92.890100000000004) Landslide Riverbank collapse Medium Flooding NA 0 0 Diario Presente http://www.diariopresente.com.mx/noticia/tabasco/145872/deslave-carretera-punto-dejar-incomunicados-comunidades-nacajuca/ 22.534421 60.56723
7430 10/7/15 NA NA Mexico MX Tabasco 1042 Libertad 4.81680 Unknown 17.3768 -92.7439 (17.376799999999999, -92.743899999999996) Landslide Landslide Medium Rain NA 0 0 Tabasco Hoy http://www.tabascohoy.com/2/notas/273505/dejan-lluvias-deslizamientos-e-inundaciones-en-la-sierra 12.151273 77.91008
7431 10/7/15 NA NA Mexico MX Tabasco 2500 Huapinol 2.15703 Unknown 17.9100 -92.8956 (17.91, -92.895600000000002) Landslide Landslide Medium Rain NA 0 0 Tabasco Hoy http://www.tabascohoy.com/2/notas/273505/dejan-lluvias-deslizamientos-e-inundaciones-en-la-sierra 5.441509 86.70647
342 10/31/07 NA NA Mexico MX Tabasco 4468 Buenavista 4.19108 NA 17.9493 -92.5534 (17.949300000000001, -92.553399999999996) Landslide Landslide Medium Rain NA NA 18 CapeTimes http://www.capetimes.co.za/?fArticleId=4109453 10.572778 94.71361
stem(df_MX$"distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 244
##   0 | 59
##   1 | 
##   1 | 5
stem(df_MX$"distance", scale = 2)
## 
##   The decimal point is at the |
## 
##    2 | 2
##    4 | 238
##    6 | 
##    8 | 9
##   10 | 
##   12 | 
##   14 | 2

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
2.15703 1 16.7 16.7 16.7 16.7
4.19108 1 16.7 16.7 33.3 33.3
4.32007 1 16.7 16.7 50.0 50.0
4.8168 1 16.7 16.7 66.7 66.7
8.93271 1 16.7 16.7 83.3 83.3
15.2226 1 16.7 16.7 100.0 100.0
Total 6 100.0 100.0 100.0 100.0
str(table)
## Classes 'freqtab' and 'data.frame':  7 obs. of  5 variables:
##  $ n      : num  1 1 1 1 1 1 6
##  $ %      : num  16.7 16.7 16.7 16.7 16.7 16.7 100
##  $ val%   : num  16.7 16.7 16.7 16.7 16.7 16.7 100
##  $ %cum   : num  16.7 33.3 50 66.7 83.3 100 100
##  $ val%cum: num  16.7 33.3 50 66.7 83.3 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
2.15703 1
4.19108 1
4.32007 1
4.8168 1
8.93271 1
15.2226 1
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1]  2.15703  7.15703 12.15703 17.15703
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(2.16,7.16] 3 0.6 3
(7.16,12.2] 1 0.2 4
(12.2,17.2] 1 0.2 5
str(Freq_table)
## 'data.frame':    3 obs. of  4 variables:
##  $ distance: Factor w/ 3 levels "(2.16,7.16]",..: 1 2 3
##  $ Freq    : int  3 1 1
##  $ Rel_Freq: num  0.6 0.2 0.2
##  $ Cum_Freq: int  3 4 5
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(2.16,7.16] 3
(7.16,12.2] 1
(12.2,17.2] 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_MX)
##                        id date time continent_code country_name country_code
## nbr.val      6.000000e+00   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          3.420000e+02   NA   NA             NA           NA           NA
## max          7.518000e+03   NA   NA             NA           NA           NA
## range        7.176000e+03   NA   NA             NA           NA           NA
## sum          3.588300e+04   NA   NA             NA           NA           NA
## median       7.430500e+03   NA   NA             NA           NA           NA
## mean         5.980500e+03   NA   NA             NA           NA           NA
## SE.mean      1.164186e+03   NA   NA             NA           NA           NA
## CI.mean.0.95 2.992636e+03   NA   NA             NA           NA           NA
## var          8.131976e+06   NA   NA             NA           NA           NA
## std.dev      2.851662e+03   NA   NA             NA           NA           NA
## coef.var     4.768267e-01   NA   NA             NA           NA           NA
##              state   population city   distance location_description
## nbr.val         NA 6.000000e+00   NA  6.0000000                   NA
## nbr.null        NA 0.000000e+00   NA  0.0000000                   NA
## nbr.na          NA 0.000000e+00   NA  0.0000000                   NA
## min             NA 1.042000e+03   NA  2.1570300                   NA
## max             NA 3.241500e+04   NA 15.2226000                   NA
## range           NA 3.137300e+04   NA 13.0655700                   NA
## sum             NA 4.388900e+04   NA 39.6402900                   NA
## median          NA 2.116000e+03   NA  4.5684350                   NA
## mean            NA 7.314833e+03   NA  6.6067150                   NA
## SE.mean         NA 5.043114e+03   NA  1.9468886                   NA
## CI.mean.0.95    NA 1.296374e+04   NA  5.0046365                   NA
## var             NA 1.525980e+08   NA 22.7422514                   NA
## std.dev         NA 1.235306e+04   NA  4.7688837                   NA
## coef.var        NA 1.688768e+00   NA  0.7218237                   NA
##                  latitude     longitude geolocation hazard_type landslide_type
## nbr.val        6.00000000  6.000000e+00          NA          NA             NA
## nbr.null       0.00000000  0.000000e+00          NA          NA             NA
## nbr.na         0.00000000  0.000000e+00          NA          NA             NA
## min           17.37680000 -9.297900e+01          NA          NA             NA
## max           18.23410000 -9.129880e+01          NA          NA             NA
## range          0.85730000  1.680200e+00          NA          NA             NA
## sum          107.03190000 -5.553608e+02          NA          NA             NA
## median        17.92965000 -9.281700e+01          NA          NA             NA
## mean          17.83865000 -9.256013e+01          NA          NA             NA
## SE.mean        0.15006970  2.595921e-01          NA          NA             NA
## CI.mean.0.95   0.38576645  6.673029e-01          NA          NA             NA
## var            0.13512549  4.043285e-01          NA          NA             NA
## std.dev        0.36759419  6.358683e-01          NA          NA             NA
## coef.var       0.02060662 -6.869786e-03          NA          NA             NA
##              landslide_size trigger storm_name injuries fatalities source_name
## nbr.val                  NA      NA         NA        4   5.000000          NA
## nbr.null                 NA      NA         NA        4   4.000000          NA
## nbr.na                   NA      NA         NA        2   1.000000          NA
## min                      NA      NA         NA        0   0.000000          NA
## max                      NA      NA         NA        0  18.000000          NA
## range                    NA      NA         NA        0  18.000000          NA
## sum                      NA      NA         NA        0  18.000000          NA
## median                   NA      NA         NA        0   0.000000          NA
## mean                     NA      NA         NA        0   3.600000          NA
## SE.mean                  NA      NA         NA        0   3.600000          NA
## CI.mean.0.95             NA      NA         NA        0   9.995202          NA
## var                      NA      NA         NA        0  64.800000          NA
## std.dev                  NA      NA         NA        0   8.049845          NA
## coef.var                 NA      NA         NA      NaN   2.236068          NA
##              source_link        prop        ypos
## nbr.val               NA   6.0000000   6.0000000
## nbr.null              NA   0.0000000   0.0000000
## nbr.na                NA   0.0000000   0.0000000
## min                   NA   5.4415091  19.2009191
## max                   NA  38.4018381  94.7136108
## range                 NA  32.9603290  75.5126918
## sum                   NA 100.0000000 382.9492292
## median                NA  11.5247265  69.2386521
## mean                  NA  16.6666667  63.8248715
## SE.mean               NA   4.9113884  11.6575332
## CI.mean.0.95          NA  12.6251258  29.9666431
## var                   NA 144.7304161 815.3884811
## std.dev               NA  12.0303955  28.5550080
## coef.var              NA   0.7218237   0.4473962
boxplot(data, horizontal=TRUE, col='green')

Gráfico para The Federal District (Mexico)

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_MX <- subset(df, country_name == "Mexico")
knitr::kable(head(df_MX)) 
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
115 7/4/07 NA NA Mexico MX Veracruz-Llave 1947 Laguna Chica (Pueblo Nuevo) 9.51003 NA 18.5369 -96.8229 (18.536899999999999, -96.822900000000004) Landslide Landslide Medium Rain NA NA 7 nytimes.com http://www.nytimes.com/2007/07/04/world/americas/04cnd-mexico.html?_r=1&hp&oref=slogin
201 8/23/07 NA NA Mexico MX Puebla 1414 Xochitlaxco (San Baltazar) 1.68294 NA 19.9333 -97.8500 (19.933299999999999, -97.85) Landslide Mudslide Medium Tropical cyclone Hurricane Dean NA 6 Inquirer.com http://newsinfo.inquirer.net/breakingnews/world/view_article.php?article_id=84452
225 9/2/07 NA NA Mexico MX Sinaloa 3191 El Limón de los Ramos 10.88351 NA 24.9531 -107.6220 (24.953099999999999, -107.622) Landslide Complex Medium Tropical cyclone Tropical Storm Henrietta NA 3 NA NA
284 9/28/07 NA NA Mexico MX Puebla 3761 Xaltepuxtla 7.93258 NA 20.2000 -97.9000 (20.2, -97.9) Landslide Mudslide Medium Tropical cyclone Hurricane Lorenzo NA 1 PressTV.ir http://www.presstv.ir/detail.aspx?id=25037&sectionid=3510207
342 10/31/07 NA NA Mexico MX Tabasco 4468 Buenavista 4.19108 NA 17.9493 -92.5534 (17.949300000000001, -92.553399999999996) Landslide Landslide Medium Rain NA NA 18 CapeTimes http://www.capetimes.co.za/?fArticleId=4109453
346 11/4/07 NA NA Mexico MX Chiapas 3183 Ostuacán 3.74149 NA 17.3900 -93.3060 (17.39, -93.305999999999997) Landslide Mudslide Very_large Rain NA NA 5 Reuters - AlertNet.org http://news.monstersandcritics.com/americas/news/article_1371436.php/Mudslide_is_latest_disaster_for_hard-hit_Mexico__2nd_Roundup_
library(dplyr)
df_MX <- subset(df, state == "The Federal District")
knitr::kable(head(df_MX))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
975 1/22/09 NA NA Mexico MX The Federal District 12294193 Mexico City 0.15208 NA 19.4271 -99.1276 (19.427099999999999, -99.127600000000001) Landslide Landslide Medium Downpour NA NA 2 NA NA
1167 9/15/09 NA NA Mexico MX The Federal District 228927 Magdalena Contreras 3.49173 NA 19.3161 -99.2398 (19.316099999999999, -99.239800000000002) Landslide Mudslide Medium Downpour NA NA 3 NA http://edition.cnn.com/2009/WORLD/americas/09/16/mexico.mudslide.deaths/
2191 8/2/10 NA NA Mexico MX The Federal District 12294193 Mexico City 0.15208 NA 19.4270 -99.1276 (19.427, -99.127600000000001) Landslide Mudslide Medium Construction NA NA 2 NA NA

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(x=state, y=distance, fill=city)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_MX <- df_MX %>% 
  arrange(desc(city)) %>%
  mutate(prop = distance / sum(df_MX$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_MX, aes(x=state, y = prop, fill=city)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
data<- ts(df_MX$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
0.15208
0.15208
3.49173
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_MX$distance
names(distance) <- df_MX$city 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por ciudades"
)

##                      
## Pareto chart analysis for distance
##                        Frequency  Cum.Freq. Percentage Cum.Percent.
##   Magdalena Contreras   3.491730   3.491730  91.987123    91.987123
##   Mexico City           0.152080   3.643810   4.006439    95.993561
##   Mexico City           0.152080   3.795890   4.006439   100.000000
stem(df_MX$"distance")
## 
##   The decimal point is at the |
## 
##   0 | 22
##   1 | 
##   2 | 
##   3 | 5
head(df_MX)
## # A tibble: 3 x 25
##      id date    time  continent_code country_name country_code state  population
##   <dbl> <chr>   <chr> <chr>          <chr>        <chr>        <chr>       <dbl>
## 1   975 1/22/09 <NA>  <NA>           Mexico       MX           The F~   12294193
## 2  2191 8/2/10  <NA>  <NA>           Mexico       MX           The F~   12294193
## 3  1167 9/15/09 <NA>  <NA>           Mexico       MX           The F~     228927
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_MX))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
975 1/22/09 NA NA Mexico MX The Federal District 12294193 Mexico City 0.15208 NA 19.4271 -99.1276 (19.427099999999999, -99.127600000000001) Landslide Landslide Medium Downpour NA NA 2 NA NA 4.006438 2.003219
2191 8/2/10 NA NA Mexico MX The Federal District 12294193 Mexico City 0.15208 NA 19.4270 -99.1276 (19.427, -99.127600000000001) Landslide Mudslide Medium Construction NA NA 2 NA NA 4.006438 6.009658
1167 9/15/09 NA NA Mexico MX The Federal District 228927 Magdalena Contreras 3.49173 NA 19.3161 -99.2398 (19.316099999999999, -99.239800000000002) Landslide Mudslide Medium Downpour NA NA 3 NA http://edition.cnn.com/2009/WORLD/americas/09/16/mexico.mudslide.deaths/ 91.987123 54.006439
stem(df_MX$"distance")
## 
##   The decimal point is at the |
## 
##   0 | 22
##   1 | 
##   2 | 
##   3 | 5
stem(df_MX$"distance", scale = 2)
## 
##   The decimal point is at the |
## 
##   0 | 22
##   0 | 
##   1 | 
##   1 | 
##   2 | 
##   2 | 
##   3 | 
##   3 | 5

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
0.15208 2 66.7 66.7 66.7 66.7
3.49173 1 33.3 33.3 100.0 100.0
Total 3 100.0 100.0 100.0 100.0
str(table)
## Classes 'freqtab' and 'data.frame':  3 obs. of  5 variables:
##  $ n      : num  2 1 3
##  $ %      : num  66.7 33.3 100
##  $ val%   : num  66.7 33.3 100
##  $ %cum   : num  66.7 100 100
##  $ val%cum: num  66.7 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
0.15208 2
3.49173 1
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.15208 2.15208 4.15208
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(0.152,2.15] 0 0 0
(2.15,4.15] 1 1 1
str(Freq_table)
## 'data.frame':    2 obs. of  4 variables:
##  $ distance: Factor w/ 2 levels "(0.152,2.15]",..: 1 2
##  $ Freq    : int  0 1
##  $ Rel_Freq: num  0 1
##  $ Cum_Freq: int  0 1
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(0.152,2.15] 0
(2.15,4.15] 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_MX)
## Warning in min(x): ningún argumento finito para min; retornando Inf
## Warning in max(x): ningun argumento finito para max; retornando -Inf
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
##                        id date time continent_code country_name country_code
## nbr.val      3.000000e+00   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          9.750000e+02   NA   NA             NA           NA           NA
## max          2.191000e+03   NA   NA             NA           NA           NA
## range        1.216000e+03   NA   NA             NA           NA           NA
## sum          4.333000e+03   NA   NA             NA           NA           NA
## median       1.167000e+03   NA   NA             NA           NA           NA
## mean         1.444333e+03   NA   NA             NA           NA           NA
## SE.mean      3.774252e+02   NA   NA             NA           NA           NA
## CI.mean.0.95 1.623930e+03   NA   NA             NA           NA           NA
## var          4.273493e+05   NA   NA             NA           NA           NA
## std.dev      6.537196e+02   NA   NA             NA           NA           NA
## coef.var     4.526099e-01   NA   NA             NA           NA           NA
##              state   population city distance location_description     latitude
## nbr.val         NA 3.000000e+00   NA 3.000000                   NA  3.000000000
## nbr.null        NA 0.000000e+00   NA 0.000000                   NA  0.000000000
## nbr.na          NA 0.000000e+00   NA 0.000000                   NA  0.000000000
## min             NA 2.289270e+05   NA 0.152080                   NA 19.316100000
## max             NA 1.229419e+07   NA 3.491730                   NA 19.427100000
## range           NA 1.206527e+07   NA 3.339650                   NA  0.111000000
## sum             NA 2.481731e+07   NA 3.795890                   NA 58.170200000
## median          NA 1.229419e+07   NA 0.152080                   NA 19.427000000
## mean            NA 8.272438e+06   NA 1.265297                   NA 19.390066667
## SE.mean         NA 4.021755e+06   NA 1.113217                   NA  0.036983345
## CI.mean.0.95    NA 1.730422e+07   NA 4.789785                   NA  0.159126489
## var             NA 4.852355e+13   NA 3.717754                   NA  0.004103303
## std.dev         NA 6.965885e+06   NA 1.928148                   NA  0.064057032
## coef.var        NA 8.420595e-01   NA 1.523870                   NA  0.003303600
##                  longitude geolocation hazard_type landslide_type
## nbr.val       3.000000e+00          NA          NA             NA
## nbr.null      0.000000e+00          NA          NA             NA
## nbr.na        0.000000e+00          NA          NA             NA
## min          -9.923980e+01          NA          NA             NA
## max          -9.912760e+01          NA          NA             NA
## range         1.122000e-01          NA          NA             NA
## sum          -2.974950e+02          NA          NA             NA
## median       -9.912760e+01          NA          NA             NA
## mean         -9.916500e+01          NA          NA             NA
## SE.mean       3.740000e-02          NA          NA             NA
## CI.mean.0.95  1.609192e-01          NA          NA             NA
## var           4.196280e-03          NA          NA             NA
## std.dev       6.477870e-02          NA          NA             NA
## coef.var     -6.532416e-04          NA          NA             NA
##              landslide_size trigger storm_name injuries fatalities source_name
## nbr.val                  NA      NA         NA        0  3.0000000          NA
## nbr.null                 NA      NA         NA        0  0.0000000          NA
## nbr.na                   NA      NA         NA        3  0.0000000          NA
## min                      NA      NA         NA      Inf  2.0000000          NA
## max                      NA      NA         NA     -Inf  3.0000000          NA
## range                    NA      NA         NA     -Inf  1.0000000          NA
## sum                      NA      NA         NA        0  7.0000000          NA
## median                   NA      NA         NA       NA  2.0000000          NA
## mean                     NA      NA         NA      NaN  2.3333333          NA
## SE.mean                  NA      NA         NA       NA  0.3333333          NA
## CI.mean.0.95             NA      NA         NA      NaN  1.4342176          NA
## var                      NA      NA         NA       NA  0.3333333          NA
## std.dev                  NA      NA         NA       NA  0.5773503          NA
## coef.var                 NA      NA         NA       NA  0.2474358          NA
##              source_link        prop       ypos
## nbr.val               NA    3.000000   3.000000
## nbr.null              NA    0.000000   0.000000
## nbr.na                NA    0.000000   0.000000
## min                   NA    4.006439   2.003219
## max                   NA   91.987123  54.006439
## range                 NA   87.980684  52.003219
## sum                   NA  100.000000  62.019316
## median                NA    4.006439   6.009658
## mean                  NA   33.333333  20.673105
## SE.mean               NA   29.326895  16.706747
## CI.mean.0.95          NA  126.183444  71.883332
## var                   NA 2580.200274 837.346221
## std.dev               NA   50.795672  28.936935
## coef.var              NA    1.523870   1.399738
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Veracruz (Mexico)

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_MX <- subset(df, country_name == "Mexico")
knitr::kable(head(df_MX)) 
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
115 7/4/07 NA NA Mexico MX Veracruz-Llave 1947 Laguna Chica (Pueblo Nuevo) 9.51003 NA 18.5369 -96.8229 (18.536899999999999, -96.822900000000004) Landslide Landslide Medium Rain NA NA 7 nytimes.com http://www.nytimes.com/2007/07/04/world/americas/04cnd-mexico.html?_r=1&hp&oref=slogin
201 8/23/07 NA NA Mexico MX Puebla 1414 Xochitlaxco (San Baltazar) 1.68294 NA 19.9333 -97.8500 (19.933299999999999, -97.85) Landslide Mudslide Medium Tropical cyclone Hurricane Dean NA 6 Inquirer.com http://newsinfo.inquirer.net/breakingnews/world/view_article.php?article_id=84452
225 9/2/07 NA NA Mexico MX Sinaloa 3191 El Limón de los Ramos 10.88351 NA 24.9531 -107.6220 (24.953099999999999, -107.622) Landslide Complex Medium Tropical cyclone Tropical Storm Henrietta NA 3 NA NA
284 9/28/07 NA NA Mexico MX Puebla 3761 Xaltepuxtla 7.93258 NA 20.2000 -97.9000 (20.2, -97.9) Landslide Mudslide Medium Tropical cyclone Hurricane Lorenzo NA 1 PressTV.ir http://www.presstv.ir/detail.aspx?id=25037&sectionid=3510207
342 10/31/07 NA NA Mexico MX Tabasco 4468 Buenavista 4.19108 NA 17.9493 -92.5534 (17.949300000000001, -92.553399999999996) Landslide Landslide Medium Rain NA NA 18 CapeTimes http://www.capetimes.co.za/?fArticleId=4109453
346 11/4/07 NA NA Mexico MX Chiapas 3183 Ostuacán 3.74149 NA 17.3900 -93.3060 (17.39, -93.305999999999997) Landslide Mudslide Very_large Rain NA NA 5 Reuters - AlertNet.org http://news.monstersandcritics.com/americas/news/article_1371436.php/Mudslide_is_latest_disaster_for_hard-hit_Mexico__2nd_Roundup_
library(dplyr)
df_MX <- subset(df, state == "Veracruz")
knitr::kable(head(df_MX))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
6688 1/5/14 Night NA Mexico MX Veracruz 9277 Chocaman 1.93516 Unknown 19.0294 -97.0355 (19.029399999999999, -97.035499999999999) Landslide Landslide Medium Rain NA 0 0 Calor http://www.alcalorpolitico.com/informacion/saldo-blanco-en-deslave-de-chocaman-familias-salieron-a-tiempo-maza-limon-139202.html#.VLfpVPnF98E
7445 9/17/15 NA NA Mexico MX Veracruz 1005 Cruz Verde 2.47800 Deforested slope 19.1331 -97.1317 (19.133099999999999, -97.131699999999995) Landslide Landslide Medium Rain NA 0 6 El Diario http://www.eldiariony.com/2015/09/17/deslizamiento-de-cerro-en-veracruz-deja-seis-muertos/

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(x=state, y=distance, fill=city)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_MX <- df_MX %>% 
  arrange(desc(city)) %>%
  mutate(prop = distance / sum(df_MX$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_MX, aes(x=state, y = prop, fill=city)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
data<- ts(df_MX$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
2.47800
1.93516
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_MX$distance
names(distance) <- df_MX$city 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por ciudades"
)

##             
## Pareto chart analysis for distance
##              Frequency Cum.Freq. Percentage Cum.Percent.
##   Cruz Verde   2.47800   2.47800   56.15024     56.15024
##   Chocaman     1.93516   4.41316   43.84976    100.00000
stem(df_MX$"distance")
## 
##   The decimal point is 1 digit(s) to the left of the |
## 
##   18 | 4
##   20 | 
##   22 | 
##   24 | 8
head(df_MX)
## # A tibble: 2 x 25
##      id date    time  continent_code country_name country_code state    population
##   <dbl> <chr>   <chr> <chr>          <chr>        <chr>        <chr>         <dbl>
## 1  7445 9/17/15 <NA>  <NA>           Mexico       MX           Veracruz       1005
## 2  6688 1/5/14  Night <NA>           Mexico       MX           Veracruz       9277
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_MX))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
7445 9/17/15 NA NA Mexico MX Veracruz 1005 Cruz Verde 2.47800 Deforested slope 19.1331 -97.1317 (19.133099999999999, -97.131699999999995) Landslide Landslide Medium Rain NA 0 6 El Diario http://www.eldiariony.com/2015/09/17/deslizamiento-de-cerro-en-veracruz-deja-seis-muertos/ 56.15024 28.07512
6688 1/5/14 Night NA Mexico MX Veracruz 9277 Chocaman 1.93516 Unknown 19.0294 -97.0355 (19.029399999999999, -97.035499999999999) Landslide Landslide Medium Rain NA 0 0 Calor http://www.alcalorpolitico.com/informacion/saldo-blanco-en-deslave-de-chocaman-familias-salieron-a-tiempo-maza-limon-139202.html#.VLfpVPnF98E 43.84976 78.07512
stem(df_MX$"distance")
## 
##   The decimal point is 1 digit(s) to the left of the |
## 
##   18 | 4
##   20 | 
##   22 | 
##   24 | 8
stem(df_MX$"distance", scale = 2)
## 
##   The decimal point is 1 digit(s) to the left of the |
## 
##   19 | 4
##   20 | 
##   21 | 
##   22 | 
##   23 | 
##   24 | 8

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
1.93516 1 50 50 50 50
2.478 1 50 50 100 100
Total 2 100 100 100 100
str(table)
## Classes 'freqtab' and 'data.frame':  3 obs. of  5 variables:
##  $ n      : num  1 1 2
##  $ %      : num  50 50 100
##  $ val%   : num  50 50 100
##  $ %cum   : num  50 100 100
##  $ val%cum: num  50 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
1.93516 1
2.478 1
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 1.93516 2.93516
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(1.94,2.94] 1 1 1
str(Freq_table)
## 'data.frame':    1 obs. of  4 variables:
##  $ distance: Factor w/ 1 level "(1.94,2.94]": 1
##  $ Freq    : int 1
##  $ Rel_Freq: num 1
##  $ Cum_Freq: int 1
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(1.94,2.94] 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_MX)
##                        id date time continent_code country_name country_code
## nbr.val      2.000000e+00   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          6.688000e+03   NA   NA             NA           NA           NA
## max          7.445000e+03   NA   NA             NA           NA           NA
## range        7.570000e+02   NA   NA             NA           NA           NA
## sum          1.413300e+04   NA   NA             NA           NA           NA
## median       7.066500e+03   NA   NA             NA           NA           NA
## mean         7.066500e+03   NA   NA             NA           NA           NA
## SE.mean      3.785000e+02   NA   NA             NA           NA           NA
## CI.mean.0.95 4.809298e+03   NA   NA             NA           NA           NA
## var          2.865245e+05   NA   NA             NA           NA           NA
## std.dev      5.352798e+02   NA   NA             NA           NA           NA
## coef.var     7.574893e-02   NA   NA             NA           NA           NA
##              state   population city  distance location_description
## nbr.val         NA 2.000000e+00   NA 2.0000000                   NA
## nbr.null        NA 0.000000e+00   NA 0.0000000                   NA
## nbr.na          NA 0.000000e+00   NA 0.0000000                   NA
## min             NA 1.005000e+03   NA 1.9351600                   NA
## max             NA 9.277000e+03   NA 2.4780000                   NA
## range           NA 8.272000e+03   NA 0.5428400                   NA
## sum             NA 1.028200e+04   NA 4.4131600                   NA
## median          NA 5.141000e+03   NA 2.2065800                   NA
## mean            NA 5.141000e+03   NA 2.2065800                   NA
## SE.mean         NA 4.136000e+03   NA 0.2714200                   NA
## CI.mean.0.95    NA 5.255286e+04   NA 3.4487181                   NA
## var             NA 3.421299e+07   NA 0.1473376                   NA
## std.dev         NA 5.849187e+03   NA 0.3838458                   NA
## coef.var        NA 1.137753e+00   NA 0.1739551                   NA
##                  latitude     longitude geolocation hazard_type landslide_type
## nbr.val       2.000000000  2.000000e+00          NA          NA             NA
## nbr.null      0.000000000  0.000000e+00          NA          NA             NA
## nbr.na        0.000000000  0.000000e+00          NA          NA             NA
## min          19.029400000 -9.713170e+01          NA          NA             NA
## max          19.133100000 -9.703550e+01          NA          NA             NA
## range         0.103700000  9.620000e-02          NA          NA             NA
## sum          38.162500000 -1.941672e+02          NA          NA             NA
## median       19.081250000 -9.708360e+01          NA          NA             NA
## mean         19.081250000 -9.708360e+01          NA          NA             NA
## SE.mean       0.051850000  4.810000e-02          NA          NA             NA
## CI.mean.0.95  0.658816716  6.111684e-01          NA          NA             NA
## var           0.005376845  4.627220e-03          NA          NA             NA
## std.dev       0.073326973  6.802367e-02          NA          NA             NA
## coef.var      0.003842881 -7.006711e-04          NA          NA             NA
##              landslide_size trigger storm_name injuries fatalities source_name
## nbr.val                  NA      NA         NA        2   2.000000          NA
## nbr.null                 NA      NA         NA        2   1.000000          NA
## nbr.na                   NA      NA         NA        0   0.000000          NA
## min                      NA      NA         NA        0   0.000000          NA
## max                      NA      NA         NA        0   6.000000          NA
## range                    NA      NA         NA        0   6.000000          NA
## sum                      NA      NA         NA        0   6.000000          NA
## median                   NA      NA         NA        0   3.000000          NA
## mean                     NA      NA         NA        0   3.000000          NA
## SE.mean                  NA      NA         NA        0   3.000000          NA
## CI.mean.0.95             NA      NA         NA        0  38.118614          NA
## var                      NA      NA         NA        0  18.000000          NA
## std.dev                  NA      NA         NA        0   4.242641          NA
## coef.var                 NA      NA         NA      NaN   1.414214          NA
##              source_link        prop         ypos
## nbr.val               NA   2.0000000    2.0000000
## nbr.null              NA   0.0000000    0.0000000
## nbr.na                NA   0.0000000    0.0000000
## min                   NA  43.8497584   28.0751208
## max                   NA  56.1502416   78.0751208
## range                 NA  12.3004831   50.0000000
## sum                   NA 100.0000000  106.1502416
## median                NA  50.0000000   53.0751208
## mean                  NA  50.0000000   53.0751208
## SE.mean               NA   6.1502416   25.0000000
## CI.mean.0.95          NA  78.1462283  317.6551184
## var                   NA  75.6509423 1250.0000000
## std.dev               NA   8.6977550   35.3553391
## coef.var              NA   0.1739551    0.6661377
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Veracruz-Llave (Mexico)

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_MX <- subset(df, country_name == "Mexico")
knitr::kable(head(df_MX)) 
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
115 7/4/07 NA NA Mexico MX Veracruz-Llave 1947 Laguna Chica (Pueblo Nuevo) 9.51003 NA 18.5369 -96.8229 (18.536899999999999, -96.822900000000004) Landslide Landslide Medium Rain NA NA 7 nytimes.com http://www.nytimes.com/2007/07/04/world/americas/04cnd-mexico.html?_r=1&hp&oref=slogin
201 8/23/07 NA NA Mexico MX Puebla 1414 Xochitlaxco (San Baltazar) 1.68294 NA 19.9333 -97.8500 (19.933299999999999, -97.85) Landslide Mudslide Medium Tropical cyclone Hurricane Dean NA 6 Inquirer.com http://newsinfo.inquirer.net/breakingnews/world/view_article.php?article_id=84452
225 9/2/07 NA NA Mexico MX Sinaloa 3191 El Limón de los Ramos 10.88351 NA 24.9531 -107.6220 (24.953099999999999, -107.622) Landslide Complex Medium Tropical cyclone Tropical Storm Henrietta NA 3 NA NA
284 9/28/07 NA NA Mexico MX Puebla 3761 Xaltepuxtla 7.93258 NA 20.2000 -97.9000 (20.2, -97.9) Landslide Mudslide Medium Tropical cyclone Hurricane Lorenzo NA 1 PressTV.ir http://www.presstv.ir/detail.aspx?id=25037&sectionid=3510207
342 10/31/07 NA NA Mexico MX Tabasco 4468 Buenavista 4.19108 NA 17.9493 -92.5534 (17.949300000000001, -92.553399999999996) Landslide Landslide Medium Rain NA NA 18 CapeTimes http://www.capetimes.co.za/?fArticleId=4109453
346 11/4/07 NA NA Mexico MX Chiapas 3183 Ostuacán 3.74149 NA 17.3900 -93.3060 (17.39, -93.305999999999997) Landslide Mudslide Very_large Rain NA NA 5 Reuters - AlertNet.org http://news.monstersandcritics.com/americas/news/article_1371436.php/Mudslide_is_latest_disaster_for_hard-hit_Mexico__2nd_Roundup_
library(dplyr)
df_MX <- subset(df, state == "Veracruz-Llave")
knitr::kable(head(df_MX))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
115 7/4/07 NA NA Mexico MX Veracruz-Llave 1947 Laguna Chica (Pueblo Nuevo) 9.51003 NA 18.5369 -96.8229 (18.536899999999999, -96.822900000000004) Landslide Landslide Medium Rain NA NA 7 nytimes.com http://www.nytimes.com/2007/07/04/world/americas/04cnd-mexico.html?_r=1&hp&oref=slogin
2438 9/17/10 NA NA Mexico MX Veracruz-Llave 1324 El Hatito 8.28739 NA 19.2818 -96.3149 (19.2818, -96.314899999999994) Landslide Landslide Medium Tropical cyclone Hurricane Karl NA 2 NA http://www.lfpress.com/news/world/2010/09/17/15382976-reuters.html
3684 7/1/11 NA NA Mexico MX Veracruz-Llave 425148 Xalapa de Enríquez 1.27837 NA 19.5426 -96.9137 (19.5426, -96.913700000000006) Landslide Mudslide Medium Tropical cyclone Tropical Storm Arlene NA 0 NA http://edition.cnn.com/2011/WORLD/americas/07/04/mexico.arlene.deaths/
5403 8/26/13 20:20:00 NA Mexico MX Veracruz-Llave 30607 Coatzintla 1.52983 NA 20.5004 -97.4647 (20.500399999999999, -97.464699999999993) Landslide Landslide Medium Tropical cyclone Fernand NA 3 www.cbc.ca http://www.cbc.ca/news/world/mexico-storm-related-landslides-kill-13-1.1370491
5405 8/26/13 NA NA Mexico MX Veracruz-Llave 15800 Altotonga 2.85382 NA 19.7906 -97.2428 (19.790600000000001, -97.242800000000003) Landslide Landslide Medium Tropical cyclone Fernand NA 1 www.cbc.ca http://www.cbc.ca/news/world/mexico-storm-related-landslides-kill-13-1.1370491
5406 8/26/13 NA NA Mexico MX Veracruz-Llave 3198 Yecuatla 3.73160 NA 19.8413 -96.8005 (19.8413, -96.8005) Landslide Landslide Medium Downpour NA NA 9 www.cbc.ca http://www.cbc.ca/news/world/mexico-storm-related-landslides-kill-13-1.1370491

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(x=state, y=distance, fill=city)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_MX <- df_MX %>% 
  arrange(desc(city)) %>%
  mutate(prop = distance / sum(df_MX$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_MX, aes(x=state, y = prop, fill=city)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
data<- ts(df_MX$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
3.73160
1.27837
4.51820
9.51003
0.09971
8.28739
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_MX$distance
names(distance) <- df_MX$city 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por ciudades"
)

##                                
## Pareto chart analysis for distance
##                                   Frequency   Cum.Freq.  Percentage
##   Laguna Chica (Pueblo Nuevo)     9.5100300   9.5100300  29.4329486
##   El Hatito                       8.2873900  17.7974200  25.6489542
##   Tetlaxco                        4.5182000  22.3156200  13.9835467
##   Yecuatla                        3.7316000  26.0472200  11.5490688
##   Altotonga                       2.8538200  28.9010400   8.8323946
##   Coatzintla                      1.5298300  30.4308700   4.7347283
##   Xalapa de Enríquez              1.2783700  31.7092400   3.9564753
##   Altotonga                       0.5018800  32.2111200   1.5532872
##   Heroica Coscomatepec de Bravo   0.0997100  32.3108300   0.3085962
##                                
## Pareto chart analysis for distance
##                                 Cum.Percent.
##   Laguna Chica (Pueblo Nuevo)     29.4329486
##   El Hatito                       55.0819029
##   Tetlaxco                        69.0654496
##   Yecuatla                        80.6145184
##   Altotonga                       89.4469130
##   Coatzintla                      94.1816413
##   Xalapa de Enríquez              98.1381165
##   Altotonga                       99.6914038
##   Heroica Coscomatepec de Bravo  100.0000000
stem(df_MX$"distance")
## 
##   The decimal point is at the |
## 
##   0 | 1535
##   2 | 97
##   4 | 5
##   6 | 
##   8 | 35
head(df_MX)
## # A tibble: 6 x 25
##      id date    time  continent_code country_name country_code state  population
##   <dbl> <chr>   <chr> <chr>          <chr>        <chr>        <chr>       <dbl>
## 1  5406 8/26/13 <NA>  <NA>           Mexico       MX           Verac~       3198
## 2  3684 7/1/11  <NA>  <NA>           Mexico       MX           Verac~     425148
## 3  5542 9/16/13 <NA>  <NA>           Mexico       MX           Verac~       1543
## 4   115 7/4/07  <NA>  <NA>           Mexico       MX           Verac~       1947
## 5  5486 9/9/13  Night <NA>           Mexico       MX           Verac~      12920
## 6  2438 9/17/10 <NA>  <NA>           Mexico       MX           Verac~       1324
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_MX))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
5406 8/26/13 NA NA Mexico MX Veracruz-Llave 3198 Yecuatla 3.73160 NA 19.8413 -96.8005 (19.8413, -96.8005) Landslide Landslide Medium Downpour NA NA 9 www.cbc.ca http://www.cbc.ca/news/world/mexico-storm-related-landslides-kill-13-1.1370491 11.5490688 5.774534
3684 7/1/11 NA NA Mexico MX Veracruz-Llave 425148 Xalapa de Enríquez 1.27837 NA 19.5426 -96.9137 (19.5426, -96.913700000000006) Landslide Mudslide Medium Tropical cyclone Tropical Storm Arlene NA 0 NA http://edition.cnn.com/2011/WORLD/americas/07/04/mexico.arlene.deaths/ 3.9564753 13.527306
5542 9/16/13 NA NA Mexico MX Veracruz-Llave 1543 Tetlaxco 4.51820 NA 19.0603 -97.1009 (19.060300000000002, -97.100899999999996) Landslide Landslide Medium Tropical cyclone Manuel NA 13 www.raymondvillechroniclenews.com http://www.raymondvillechroniclenews.com/news/2013-09-18/News/Mexican_village_buries_13_victims_of_mudslide.html 13.9835467 22.497318
115 7/4/07 NA NA Mexico MX Veracruz-Llave 1947 Laguna Chica (Pueblo Nuevo) 9.51003 NA 18.5369 -96.8229 (18.536899999999999, -96.822900000000004) Landslide Landslide Medium Rain NA NA 7 nytimes.com http://www.nytimes.com/2007/07/04/world/americas/04cnd-mexico.html?_r=1&hp&oref=slogin 29.4329486 44.205565
5486 9/9/13 Night NA Mexico MX Veracruz-Llave 12920 Heroica Coscomatepec de Bravo 0.09971 NA 19.0719 -97.0474 (19.071899999999999, -97.047399999999996) Landslide Landslide Medium Downpour NA 2 14 infosurhoy.com http://infosurhoy.com/en_GB/articles/saii/newsbriefs/2013/09/12/newsbrief-03 0.3085962 59.076338
2438 9/17/10 NA NA Mexico MX Veracruz-Llave 1324 El Hatito 8.28739 NA 19.2818 -96.3149 (19.2818, -96.314899999999994) Landslide Landslide Medium Tropical cyclone Hurricane Karl NA 2 NA http://www.lfpress.com/news/world/2010/09/17/15382976-reuters.html 25.6489542 72.055113
stem(df_MX$"distance")
## 
##   The decimal point is at the |
## 
##   0 | 1535
##   2 | 97
##   4 | 5
##   6 | 
##   8 | 35
stem(df_MX$"distance", scale = 2)
## 
##   The decimal point is at the |
## 
##   0 | 15
##   1 | 35
##   2 | 9
##   3 | 7
##   4 | 5
##   5 | 
##   6 | 
##   7 | 
##   8 | 3
##   9 | 5

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
0.09971 1 11.1 11.1 11.1 11.1
0.50188 1 11.1 11.1 22.2 22.2
1.27837 1 11.1 11.1 33.3 33.3
1.52983 1 11.1 11.1 44.4 44.4
2.85382 1 11.1 11.1 55.6 55.6
3.7316 1 11.1 11.1 66.7 66.7
4.5182 1 11.1 11.1 77.8 77.8
8.28739 1 11.1 11.1 88.9 88.9
9.51003 1 11.1 11.1 100.0 100.0
Total 9 100.0 100.0 100.0 100.0
str(table)
## Classes 'freqtab' and 'data.frame':  10 obs. of  5 variables:
##  $ n      : num  1 1 1 1 1 1 1 1 1 9
##  $ %      : num  11.1 11.1 11.1 11.1 11.1 11.1 11.1 11.1 11.1 100
##  $ val%   : num  11.1 11.1 11.1 11.1 11.1 11.1 11.1 11.1 11.1 100
##  $ %cum   : num  11.1 22.2 33.3 44.4 55.6 66.7 77.8 88.9 100 100
##  $ val%cum: num  11.1 22.2 33.3 44.4 55.6 66.7 77.8 88.9 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
0.09971 1
0.50188 1
1.27837 1
1.52983 1
2.85382 1
3.7316 1
4.5182 1
8.28739 1
9.51003 1
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1]  0.09971  2.09971  4.09971  6.09971  8.09971 10.09971
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(0.0997,2.1] 3 0.375 3
(2.1,4.1] 2 0.250 5
(4.1,6.1] 1 0.125 6
(6.1,8.1] 0 0.000 6
(8.1,10.1] 2 0.250 8
str(Freq_table)
## 'data.frame':    5 obs. of  4 variables:
##  $ distance: Factor w/ 5 levels "(0.0997,2.1]",..: 1 2 3 4 5
##  $ Freq    : int  3 2 1 0 2
##  $ Rel_Freq: num  0.375 0.25 0.125 0 0.25
##  $ Cum_Freq: int  3 5 6 6 8
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(0.0997,2.1] 3
(2.1,4.1] 2
(4.1,6.1] 1
(6.1,8.1] 0
(8.1,10.1] 2
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_MX)
##                        id date time continent_code country_name country_code
## nbr.val      9.000000e+00   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          1.150000e+02   NA   NA             NA           NA           NA
## max          5.542000e+03   NA   NA             NA           NA           NA
## range        5.427000e+03   NA   NA             NA           NA           NA
## sum          3.901100e+04   NA   NA             NA           NA           NA
## median       5.405000e+03   NA   NA             NA           NA           NA
## mean         4.334556e+03   NA   NA             NA           NA           NA
## SE.mean      6.398549e+02   NA   NA             NA           NA           NA
## CI.mean.0.95 1.475508e+03   NA   NA             NA           NA           NA
## var          3.684729e+06   NA   NA             NA           NA           NA
## std.dev      1.919565e+03   NA   NA             NA           NA           NA
## coef.var     4.428516e-01   NA   NA             NA           NA           NA
##              state   population city   distance location_description
## nbr.val         NA 9.000000e+00   NA  9.0000000                   NA
## nbr.null        NA 0.000000e+00   NA  0.0000000                   NA
## nbr.na          NA 0.000000e+00   NA  0.0000000                   NA
## min             NA 1.324000e+03   NA  0.0997100                   NA
## max             NA 4.251480e+05   NA  9.5100300                   NA
## range           NA 4.238240e+05   NA  9.4103200                   NA
## sum             NA 5.082870e+05   NA 32.3108300                   NA
## median          NA 1.292000e+04   NA  2.8538200                   NA
## mean            NA 5.647633e+04   NA  3.5900922                   NA
## SE.mean         NA 4.619791e+04   NA  1.1170543                   NA
## CI.mean.0.95    NA 1.065326e+05   NA  2.5759318                   NA
## var             NA 1.920822e+10   NA 11.2302922                   NA
## std.dev         NA 1.385937e+05   NA  3.3511628                   NA
## coef.var        NA 2.454014e+00   NA  0.9334476                   NA
##                  latitude     longitude geolocation hazard_type landslide_type
## nbr.val        9.00000000  9.000000e+00          NA          NA             NA
## nbr.null       0.00000000  0.000000e+00          NA          NA             NA
## nbr.na         0.00000000  0.000000e+00          NA          NA             NA
## min           18.53690000 -9.746470e+01          NA          NA             NA
## max           20.50040000 -9.631490e+01          NA          NA             NA
## range          1.96350000  1.149800e+00          NA          NA             NA
## sum          175.38730000 -8.729500e+02          NA          NA             NA
## median        19.54260000 -9.704740e+01          NA          NA             NA
## mean          19.48747778 -9.699444e+01          NA          NA             NA
## SE.mean        0.19084238  1.114103e-01          NA          NA             NA
## CI.mean.0.95   0.44008331  2.569127e-01          NA          NA             NA
## var            0.32778731  1.117104e-01          NA          NA             NA
## std.dev        0.57252713  3.342310e-01          NA          NA             NA
## coef.var       0.02937923 -3.445878e-03          NA          NA             NA
##              landslide_size trigger storm_name   injuries fatalities
## nbr.val                  NA      NA         NA  2.0000000  9.0000000
## nbr.null                 NA      NA         NA  0.0000000  1.0000000
## nbr.na                   NA      NA         NA  7.0000000  0.0000000
## min                      NA      NA         NA  2.0000000  0.0000000
## max                      NA      NA         NA  8.0000000 14.0000000
## range                    NA      NA         NA  6.0000000 14.0000000
## sum                      NA      NA         NA 10.0000000 61.0000000
## median                   NA      NA         NA  5.0000000  7.0000000
## mean                     NA      NA         NA  5.0000000  6.7777778
## SE.mean                  NA      NA         NA  3.0000000  1.8240506
## CI.mean.0.95             NA      NA         NA 38.1186142  4.2062682
## var                      NA      NA         NA 18.0000000 29.9444444
## std.dev                  NA      NA         NA  4.2426407  5.4721517
## coef.var                 NA      NA         NA  0.8485281  0.8073666
##              source_name source_link        prop         ypos
## nbr.val               NA          NA   9.0000000    9.0000000
## nbr.null              NA          NA   0.0000000    0.0000000
## nbr.na                NA          NA   0.0000000    0.0000000
## min                   NA          NA   0.3085962    5.7745344
## max                   NA          NA  29.4329486   99.2233564
## range                 NA          NA  29.1243524   93.4488220
## sum                   NA          NA 100.0000000  497.6369997
## median                NA          NA   8.8323946   59.0763376
## mean                  NA          NA  11.1111111   55.2930000
## SE.mean               NA          NA   3.4572132   11.8683450
## CI.mean.0.95          NA          NA   7.9723479   27.3684527
## var                   NA          NA 107.5709066 1267.7185196
## std.dev               NA          NA  10.3716395   35.6050350
## coef.var              NA          NA   0.9334476    0.6439339
boxplot(data, horizontal=TRUE, col='green')

Gráfico para El Salvador

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_ES <- subset(df, country_name == "El Salvador")
knitr::kable(head(df_ES))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
230 9/5/07 NA NA El Salvador SV Ahuachapán 7797 Concepción de Ataco 0.00273 NA 13.8703 -89.8486 (13.8703, -89.848600000000005) Landslide Mudslide Medium Tropical cyclone Hurricane Felix NA NA Azcentral.com http://www.azcentral.com/news/articles/1108sr-fhsistercity1109-ON.html
564 6/2/08 NA NA El Salvador SV La Libertad 124694 Santa Tecla 4.96416 NA 13.7205 -89.2687 (13.720499999999999, -89.268699999999995) Landslide Landslide Medium Tropical cyclone Tropical Storm Arthur NA NA NA http://news.xinhuanet.com/english/2008-06/04/content_8310737.htm
1285 11/8/09 NA NA El Salvador SV San Vicente 41504 San Vicente 7.60946 NA 13.6409 -88.8699 (13.6409, -88.869900000000001) Landslide Complex Very_large Tropical cyclone Tropical Cyclone Ida NA 23 NA http://www.google.com/hostednews/ap/article/ALeqM5j0XCCb1n12DyhoBoDzGj_hTyEtrAD9BRKPRG0
1286 11/8/09 NA NA El Salvador SV La Libertad 33767 Antiguo Cuscatlán 4.86219 NA 13.7156 -89.2521 (13.7156, -89.252099999999999) Landslide Mudslide Medium Tropical cyclone Tropical Cyclone Ida NA 4 NA http://www.google.com/hostednews/ap/article/ALeqM5j0XCCb1n12DyhoBoDzGj_hTyEtrAD9BRKPRG0
1287 11/8/09 NA NA El Salvador SV San Vicente 41504 San Vicente 5.90726 NA 13.6094 -88.8488 (13.609400000000001, -88.848799999999997) Landslide Rockfall Medium Tropical cyclone Tropical Cyclone Ida NA NA NA http://news.bbc.co.uk/2/hi/in_depth/8349333.stm
1288 11/8/09 NA NA El Salvador SV San Vicente 41504 San Vicente 4.03125 NA 13.6466 -88.8347 (13.646599999999999, -88.834699999999998) Landslide Mudslide Medium Tropical cyclone Tropical Cyclone Ida NA NA NA http://news.yahoo.com/s/afp/20091109/wl_afp/salvadorweatherstorm_20091109100952

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_ES, aes(fill= state, y=distance, x=country_name)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_ES, aes(fill=state, y=distance, x=country_name)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_ES, aes(x=country_name, y=distance, fill=state)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_ES <- df_ES %>% 
  arrange(desc(state)) %>%
  mutate(prop = distance / sum(df_ES$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_ES, aes(x=country_name, y=prop, fill=state)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
data<- ts(df_ES$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
4.23875
3.22235
0.49346
8.83210
1.15810
7.60946
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_ES$distance
names(distance) <- df_ES$state 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por estados"
)

##               
## Pareto chart analysis for distance
##                   Frequency    Cum.Freq.   Percentage Cum.Percent.
##   San Miguel   1.006695e+01 1.006695e+01 8.974011e+00 8.974011e+00
##   San Miguel   9.972270e+00 2.003922e+01 8.889610e+00 1.786362e+01
##   La Libertad  9.875530e+00 2.991475e+01 8.803373e+00 2.666699e+01
##   Santa Ana    8.832100e+00 3.874685e+01 7.873225e+00 3.454022e+01
##   Cabañas      8.825250e+00 4.757210e+01 7.867118e+00 4.240734e+01
##   San Vicente  7.609460e+00 5.518156e+01 6.783323e+00 4.919066e+01
##   San Miguel   6.945360e+00 6.212692e+01 6.191323e+00 5.538198e+01
##   San Vicente  5.907260e+00 6.803418e+01 5.265926e+00 6.064791e+01
##   Ahuachapán   5.299010e+00 7.333319e+01 4.723712e+00 6.537162e+01
##   La Libertad  4.964160e+00 7.829735e+01 4.425216e+00 6.979684e+01
##   La Libertad  4.862190e+00 8.315954e+01 4.334316e+00 7.413115e+01
##   La Libertad  4.677220e+00 8.783676e+01 4.169428e+00 7.830058e+01
##   La Libertad  4.606550e+00 9.244331e+01 4.106430e+00 8.240701e+01
##   Sonsonate    4.238750e+00 9.668206e+01 3.778561e+00 8.618557e+01
##   San Vicente  4.031250e+00 1.007133e+02 3.593589e+00 8.977916e+01
##   San Salvador 3.252270e+00 1.039656e+02 2.899181e+00 9.267834e+01
##   Sonsonate    3.222350e+00 1.071879e+02 2.872509e+00 9.555085e+01
##   San Salvador 3.017390e+00 1.102053e+02 2.689801e+00 9.824065e+01
##   Santa Ana    1.158100e+00 1.113634e+02 1.032368e+00 9.927302e+01
##   Sonsonate    4.934600e-01 1.118569e+02 4.398865e-01 9.971291e+01
##   La Paz       3.193300e-01 1.121762e+02 2.846613e-01 9.999757e+01
##   Ahuachapán   2.730000e-03 1.121789e+02 2.433612e-03 1.000000e+02
stem(df_ES$"distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 000133344
##   0 | 5555567899
##   1 | 000
head(df_ES)
## # A tibble: 6 x 25
##      id date     time  continent_code country_name country_code state population
##   <dbl> <chr>    <chr> <chr>          <chr>        <chr>        <chr>      <dbl>
## 1  6681 10/15/14 <NA>  <NA>           El Salvador  SV           Sons~       7358
## 2  6682 10/15/14 <NA>  <NA>           El Salvador  SV           Sons~      15446
## 3  7442 10/19/15 <NA>  <NA>           El Salvador  SV           Sons~       9936
## 4  6685 10/12/14 <NA>  <NA>           El Salvador  SV           Sant~       5773
## 5  7438 7/18/15  <NA>  <NA>           El Salvador  SV           Sant~      10095
## 6  1285 11/8/09  <NA>  <NA>           El Salvador  SV           San ~      41504
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_ES))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
6681 10/15/14 NA NA El Salvador SV Sonsonate 7358 Nahuizalco 4.23875 Unknown 13.7895 -89.7739 (13.7895, -89.773899999999998) Landslide Landslide Medium Continuous rain NA 0 0 reliefweb http://reliefweb.int/report/el-salvador/lluvias-causan-estragos-en-seis-departamentos 3.7785613 1.889281
6682 10/15/14 NA NA El Salvador SV Sonsonate 15446 Sonzacate 3.22235 Unknown 13.7135 -89.6938 (13.7135, -89.693799999999996) Landslide Landslide Medium Continuous rain NA 0 0 reliefweb http://reliefweb.int/report/el-salvador/lluvias-causan-estragos-en-seis-departamentos 2.8725089 5.214816
7442 10/19/15 NA NA El Salvador SV Sonsonate 9936 Juayúa 0.49346 Unknown 13.8457 -89.7445 (13.845700000000001, -89.744500000000002) Landslide Landslide Medium Downpour NA 1 0 El Heraldo http://www.elheraldo.hn/mundo/892045-217/cuatro-muertos-por-persistentes-lluvias-en-el-salvador 0.4398865 6.871013
6685 10/12/14 NA NA El Salvador SV Santa Ana 5773 Coatepeque 8.83210 Unknown 14.0007 -89.4691 (14.0007, -89.469099999999997) Landslide Landslide Medium Rain NA 2 3 Mexicano http://www.oem.com.mx/elmexicano/notas/n3569793.htm 7.8732247 11.027569
7438 7/18/15 NA NA El Salvador SV Santa Ana 10095 Ciudad Arce 1.15810 Above road 13.8464 -89.4502 (13.846399999999999, -89.450199999999995) Landslide Landslide Small Rain NA 0 0 La Prensa http://www.laprensagrafica.com/2015/07/18/lluvias-causan-derrumbe-en-carretera-a-santa-ana 1.0323685 15.480366
1285 11/8/09 NA NA El Salvador SV San Vicente 41504 San Vicente 7.60946 NA 13.6409 -88.8699 (13.6409, -88.869900000000001) Landslide Complex Very_large Tropical cyclone Tropical Cyclone Ida NA 23 NA http://www.google.com/hostednews/ap/article/ALeqM5j0XCCb1n12DyhoBoDzGj_hTyEtrAD9BRKPRG0 6.7833231 19.388211
stem(df_ES$"distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 000133344
##   0 | 5555567899
##   1 | 000
stem(df_ES$"distance", scale = 2)
## 
##   The decimal point is at the |
## 
##    0 | 0352
##    2 | 023
##    4 | 02679039
##    6 | 96
##    8 | 889
##   10 | 01

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
0.00273 1 4.5 4.5 4.5 4.5
0.31933 1 4.5 4.5 9.1 9.1
0.49346 1 4.5 4.5 13.6 13.6
1.1581 1 4.5 4.5 18.2 18.2
3.01739 1 4.5 4.5 22.7 22.7
3.22235 1 4.5 4.5 27.3 27.3
3.25227 1 4.5 4.5 31.8 31.8
4.03125 1 4.5 4.5 36.4 36.4
4.23875 1 4.5 4.5 40.9 40.9
4.60655 1 4.5 4.5 45.5 45.5
4.67722 1 4.5 4.5 50.0 50.0
4.86219 1 4.5 4.5 54.5 54.5
4.96416 1 4.5 4.5 59.1 59.1
5.29901 1 4.5 4.5 63.6 63.6
5.90726 1 4.5 4.5 68.2 68.2
6.94536 1 4.5 4.5 72.7 72.7
7.60946 1 4.5 4.5 77.3 77.3
8.82525 1 4.5 4.5 81.8 81.8
8.8321 1 4.5 4.5 86.4 86.4
9.87553 1 4.5 4.5 90.9 90.9
9.97227 1 4.5 4.5 95.5 95.5
10.06695 1 4.5 4.5 100.0 100.0
Total 22 100.0 100.0 100.0 100.0
str(table)
## Classes 'freqtab' and 'data.frame':  23 obs. of  5 variables:
##  $ n      : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ %      : num  4.5 4.5 4.5 4.5 4.5 4.5 4.5 4.5 4.5 4.5 ...
##  $ val%   : num  4.5 4.5 4.5 4.5 4.5 4.5 4.5 4.5 4.5 4.5 ...
##  $ %cum   : num  4.5 9.1 13.6 18.2 22.7 27.3 31.8 36.4 40.9 45.5 ...
##  $ val%cum: num  4.5 9.1 13.6 18.2 22.7 27.3 31.8 36.4 40.9 45.5 ...
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
0.00273 1
0.31933 1
0.49346 1
1.1581 1
3.01739 1
3.22235 1
3.25227 1
4.03125 1
4.23875 1
4.60655 1
4.67722 1
4.86219 1
4.96416 1
5.29901 1
5.90726 1
6.94536 1
7.60946 1
8.82525 1
8.8321 1
9.87553 1
9.97227 1
10.06695 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1]  0.00273  3.00273  6.00273  9.00273 12.00273
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(0.00273,3] 3 0.1428571 3
(3,6] 11 0.5238095 14
(6,9] 4 0.1904762 18
(9,12] 3 0.1428571 21
str(Freq_table)
## 'data.frame':    4 obs. of  4 variables:
##  $ distance: Factor w/ 4 levels "(0.00273,3]",..: 1 2 3 4
##  $ Freq    : int  3 11 4 3
##  $ Rel_Freq: num  0.143 0.524 0.19 0.143
##  $ Cum_Freq: int  3 14 18 21
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(0.00273,3] 3
(3,6] 11
(6,9] 4
(9,12] 3
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_ES)
##                        id date time continent_code country_name country_code
## nbr.val      2.200000e+01   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          2.300000e+02   NA   NA             NA           NA           NA
## max          7.442000e+03   NA   NA             NA           NA           NA
## range        7.212000e+03   NA   NA             NA           NA           NA
## sum          1.086050e+05   NA   NA             NA           NA           NA
## median       6.680000e+03   NA   NA             NA           NA           NA
## mean         4.936591e+03   NA   NA             NA           NA           NA
## SE.mean      5.803080e+02   NA   NA             NA           NA           NA
## CI.mean.0.95 1.206817e+03   NA   NA             NA           NA           NA
## var          7.408663e+06   NA   NA             NA           NA           NA
## std.dev      2.721886e+03   NA   NA             NA           NA           NA
## coef.var     5.513696e-01   NA   NA             NA           NA           NA
##              state   population city    distance location_description
## nbr.val         NA 2.200000e+01   NA  22.0000000                   NA
## nbr.null        NA 0.000000e+00   NA   0.0000000                   NA
## nbr.na          NA 0.000000e+00   NA   0.0000000                   NA
## min             NA 2.654000e+03   NA   0.0027300                   NA
## max             NA 1.246940e+05   NA  10.0669500                   NA
## range           NA 1.220400e+05   NA  10.0642200                   NA
## sum             NA 9.383360e+05   NA 112.1789400                   NA
## median          NA 2.643100e+04   NA   4.7697050                   NA
## mean            NA 4.265164e+04   NA   5.0990427                   NA
## SE.mean         NA 9.849090e+03   NA   0.6720603                   NA
## CI.mean.0.95    NA 2.048230e+04   NA   1.3976259                   NA
## var             NA 2.134100e+09   NA   9.9366315                   NA
## std.dev         NA 4.619633e+04   NA   3.1522423                   NA
## coef.var        NA 1.083108e+00   NA   0.6182028                   NA
##                  latitude     longitude geolocation hazard_type landslide_type
## nbr.val       22.00000000  2.200000e+01          NA          NA             NA
## nbr.null       0.00000000  0.000000e+00          NA          NA             NA
## nbr.na         0.00000000  0.000000e+00          NA          NA             NA
## min           13.28170000 -8.990440e+01          NA          NA             NA
## max           14.00070000 -8.808430e+01          NA          NA             NA
## range          0.71900000  1.820100e+00          NA          NA             NA
## sum          301.25530000 -1.961688e+03          NA          NA             NA
## median        13.71515000 -8.926030e+01          NA          NA             NA
## mean          13.69342273 -8.916765e+01          NA          NA             NA
## SE.mean        0.03809807  1.112458e-01          NA          NA             NA
## CI.mean.0.95   0.07922927  2.313484e-01          NA          NA             NA
## var            0.03193218  2.722640e-01          NA          NA             NA
## std.dev        0.17869578  5.217892e-01          NA          NA             NA
## coef.var       0.01304975 -5.851777e-03          NA          NA             NA
##              landslide_size trigger storm_name   injuries fatalities
## nbr.val                  NA      NA         NA 14.0000000  18.000000
## nbr.null                 NA      NA         NA 10.0000000  13.000000
## nbr.na                   NA      NA         NA  8.0000000   4.000000
## min                      NA      NA         NA  0.0000000   0.000000
## max                      NA      NA         NA  2.0000000  32.000000
## range                    NA      NA         NA  2.0000000  32.000000
## sum                      NA      NA         NA  6.0000000  63.000000
## median                   NA      NA         NA  0.0000000   0.000000
## mean                     NA      NA         NA  0.4285714   3.500000
## SE.mean                  NA      NA         NA  0.2020305   2.107022
## CI.mean.0.95             NA      NA         NA  0.4364604   4.445428
## var                      NA      NA         NA  0.5714286  79.911765
## std.dev                  NA      NA         NA  0.7559289   8.939338
## coef.var                 NA      NA         NA  1.7638342   2.554097
##              source_name source_link         prop         ypos
## nbr.val               NA          NA 2.200000e+01   22.0000000
## nbr.null              NA          NA 0.000000e+00    0.0000000
## nbr.na                NA          NA 0.000000e+00    0.0000000
## min                   NA          NA 2.433612e-03    1.8892806
## max                   NA          NA 8.974011e+00   97.6381440
## range                 NA          NA 8.971577e+00   95.7488634
## sum                   NA          NA 1.000000e+02 1038.4834890
## median                NA          NA 4.251872e+00   44.1153638
## mean                  NA          NA 4.545455e+00   47.2037950
## SE.mean               NA          NA 5.990967e-01    6.6246536
## CI.mean.0.95          NA          NA 1.245890e+00   13.7767213
## var                   NA          NA 7.896171e+00  965.4927696
## std.dev               NA          NA 2.810013e+00   31.0723795
## coef.var              NA          NA 6.182028e-01    0.6582602
boxplot(data, horizontal=TRUE, col='green')

Gráfico para La Libertad (El Salvador)

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_ES <- subset(df, country_name == "El Salvador")
knitr::kable(head(df_ES)) 
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
230 9/5/07 NA NA El Salvador SV Ahuachapán 7797 Concepción de Ataco 0.00273 NA 13.8703 -89.8486 (13.8703, -89.848600000000005) Landslide Mudslide Medium Tropical cyclone Hurricane Felix NA NA Azcentral.com http://www.azcentral.com/news/articles/1108sr-fhsistercity1109-ON.html
564 6/2/08 NA NA El Salvador SV La Libertad 124694 Santa Tecla 4.96416 NA 13.7205 -89.2687 (13.720499999999999, -89.268699999999995) Landslide Landslide Medium Tropical cyclone Tropical Storm Arthur NA NA NA http://news.xinhuanet.com/english/2008-06/04/content_8310737.htm
1285 11/8/09 NA NA El Salvador SV San Vicente 41504 San Vicente 7.60946 NA 13.6409 -88.8699 (13.6409, -88.869900000000001) Landslide Complex Very_large Tropical cyclone Tropical Cyclone Ida NA 23 NA http://www.google.com/hostednews/ap/article/ALeqM5j0XCCb1n12DyhoBoDzGj_hTyEtrAD9BRKPRG0
1286 11/8/09 NA NA El Salvador SV La Libertad 33767 Antiguo Cuscatlán 4.86219 NA 13.7156 -89.2521 (13.7156, -89.252099999999999) Landslide Mudslide Medium Tropical cyclone Tropical Cyclone Ida NA 4 NA http://www.google.com/hostednews/ap/article/ALeqM5j0XCCb1n12DyhoBoDzGj_hTyEtrAD9BRKPRG0
1287 11/8/09 NA NA El Salvador SV San Vicente 41504 San Vicente 5.90726 NA 13.6094 -88.8488 (13.609400000000001, -88.848799999999997) Landslide Rockfall Medium Tropical cyclone Tropical Cyclone Ida NA NA NA http://news.bbc.co.uk/2/hi/in_depth/8349333.stm
1288 11/8/09 NA NA El Salvador SV San Vicente 41504 San Vicente 4.03125 NA 13.6466 -88.8347 (13.646599999999999, -88.834699999999998) Landslide Mudslide Medium Tropical cyclone Tropical Cyclone Ida NA NA NA http://news.yahoo.com/s/afp/20091109/wl_afp/salvadorweatherstorm_20091109100952
library(dplyr)
df_ES <- subset(df, state == "La Libertad")
knitr::kable(head(df_ES))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
564 6/2/08 NA NA El Salvador SV La Libertad 124694 Santa Tecla 4.96416 NA 13.7205 -89.2687 (13.720499999999999, -89.268699999999995) Landslide Landslide Medium Tropical cyclone Tropical Storm Arthur NA NA NA http://news.xinhuanet.com/english/2008-06/04/content_8310737.htm
1004 4/14/09 NA SA Peru PE La Libertad 0 Parcoy 9.64894 NA -7.9589 -77.5239 (-7.9588999999999999, -77.523899999999998) Landslide Mudslide Medium Downpour NA NA 12 NA http://rawstory.com/news/afp/Nine_dead_in_Peru_mudslides_04142009.html
1005 4/14/09 NA SA Peru PE La Libertad 0 Aricapampa 0.00442 NA -7.8058 -77.7172 (-7.8057999999999996, -77.717200000000005) Landslide Mudslide Medium Downpour NA NA 0 NA http://rawstory.com/news/afp/Nine_dead_in_Peru_mudslides_04142009.html
1286 11/8/09 NA NA El Salvador SV La Libertad 33767 Antiguo Cuscatlán 4.86219 NA 13.7156 -89.2521 (13.7156, -89.252099999999999) Landslide Mudslide Medium Tropical cyclone Tropical Cyclone Ida NA 4 NA http://www.google.com/hostednews/ap/article/ALeqM5j0XCCb1n12DyhoBoDzGj_hTyEtrAD9BRKPRG0
6686 10/12/14 NA NA El Salvador SV La Libertad 124694 Santa Tecla 4.60655 Above road 13.6905 -89.3200 (13.6905, -89.32) Landslide Landslide Medium Rain NA 0 0 Mexicano http://www.oem.com.mx/elmexicano/notas/n3569793.htm
7440 11/3/15 1:00 NA El Salvador SV La Libertad 124694 Santa Tecla 4.67722 Unknown 13.7178 -89.2685 (13.7178, -89.268500000000003) Landslide Mudslide Medium Rain NA 0 0 El Salvador Noticias http://www.elsalvadornoticias.net/2015/11/03/deslave-en-colonia-escalon-por-desborde-quebrada-las-lajas/

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_ES, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_ES, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_ES, aes(x=state, y=distance, fill=city)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_ES <- df_ES %>% 
  arrange(desc(city)) %>%
  mutate(prop = distance / sum(df_ES$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_ES, aes(x=state, y = prop, fill=city)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
data<- ts(df_ES$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
4.96416
4.60655
4.67722
9.87553
9.64894
0.00442
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_ES$distance
names(distance) <- df_ES$city 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por ciudades"
)

##                    
## Pareto chart analysis for distance
##                        Frequency    Cum.Freq.   Percentage Cum.Percent.
##   Santa Tecla         9.87553000   9.87553000  25.55844469  25.55844469
##   Parcoy              9.64894000  19.52447000  24.97201662  50.53046131
##   Santa Tecla         4.96416000  24.48863000  12.84753414  63.37799545
##   Antiguo Cuscatlán   4.86219000  29.35082000  12.58362986  75.96162531
##   Santa Tecla         4.67722000  34.02804000  12.10491677  88.06654208
##   Santa Tecla         4.60655000  38.63459000  11.92201871  99.98856078
##   Aricapampa          0.00442000  38.63901000   0.01143922 100.00000000
stem(df_ES$"distance")
## 
##   The decimal point is at the |
## 
##   0 | 0
##   2 | 
##   4 | 6790
##   6 | 
##   8 | 69
head(df_ES)
## # A tibble: 6 x 25
##      id date     time  continent_code country_name country_code state population
##   <dbl> <chr>    <chr> <chr>          <chr>        <chr>        <chr>      <dbl>
## 1   564 6/2/08   <NA>  <NA>           El Salvador  SV           La L~     124694
## 2  6686 10/12/14 <NA>  <NA>           El Salvador  SV           La L~     124694
## 3  7440 11/3/15  1:00  <NA>           El Salvador  SV           La L~     124694
## 4  7441 11/4/15  <NA>  <NA>           El Salvador  SV           La L~     124694
## 5  1004 4/14/09  <NA>  SA             Peru         PE           La L~          0
## 6  1005 4/14/09  <NA>  SA             Peru         PE           La L~          0
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_ES))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
564 6/2/08 NA NA El Salvador SV La Libertad 124694 Santa Tecla 4.96416 NA 13.7205 -89.2687 (13.720499999999999, -89.268699999999995) Landslide Landslide Medium Tropical cyclone Tropical Storm Arthur NA NA NA http://news.xinhuanet.com/english/2008-06/04/content_8310737.htm 12.8475341 6.423767
6686 10/12/14 NA NA El Salvador SV La Libertad 124694 Santa Tecla 4.60655 Above road 13.6905 -89.3200 (13.6905, -89.32) Landslide Landslide Medium Rain NA 0 0 Mexicano http://www.oem.com.mx/elmexicano/notas/n3569793.htm 11.9220187 18.808543
7440 11/3/15 1:00 NA El Salvador SV La Libertad 124694 Santa Tecla 4.67722 Unknown 13.7178 -89.2685 (13.7178, -89.268500000000003) Landslide Mudslide Medium Rain NA 0 0 El Salvador Noticias http://www.elsalvadornoticias.net/2015/11/03/deslave-en-colonia-escalon-por-desborde-quebrada-las-lajas/ 12.1049168 30.822011
7441 11/4/15 NA NA El Salvador SV La Libertad 124694 Santa Tecla 9.87553 Retaining wall 13.7147 -89.3625 (13.714700000000001, -89.362499999999997) Landslide Other Small Rain NA 1 0 Tele El Salvador http://www.teleelsalvador.com/noticias/deslaves-en-colonia-escalon-dejan-carros-danados-y-calles-cerradas/ 25.5584447 49.653692
1004 4/14/09 NA SA Peru PE La Libertad 0 Parcoy 9.64894 NA -7.9589 -77.5239 (-7.9588999999999999, -77.523899999999998) Landslide Mudslide Medium Downpour NA NA 12 NA http://rawstory.com/news/afp/Nine_dead_in_Peru_mudslides_04142009.html 24.9720166 74.918923
1005 4/14/09 NA SA Peru PE La Libertad 0 Aricapampa 0.00442 NA -7.8058 -77.7172 (-7.8057999999999996, -77.717200000000005) Landslide Mudslide Medium Downpour NA NA 0 NA http://rawstory.com/news/afp/Nine_dead_in_Peru_mudslides_04142009.html 0.0114392 87.410651
stem(df_ES$"distance")
## 
##   The decimal point is at the |
## 
##   0 | 0
##   2 | 
##   4 | 6790
##   6 | 
##   8 | 69
stem(df_ES$"distance", scale = 2)
## 
##   The decimal point is at the |
## 
##   0 | 0
##   1 | 
##   2 | 
##   3 | 
##   4 | 679
##   5 | 0
##   6 | 
##   7 | 
##   8 | 
##   9 | 69

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
0.00442 1 14.3 14.3 14.3 14.3
4.60655 1 14.3 14.3 28.6 28.6
4.67722 1 14.3 14.3 42.9 42.9
4.86219 1 14.3 14.3 57.1 57.1
4.96416 1 14.3 14.3 71.4 71.4
9.64894 1 14.3 14.3 85.7 85.7
9.87553 1 14.3 14.3 100.0 100.0
Total 7 100.0 100.0 100.0 100.0
str(table)
## Classes 'freqtab' and 'data.frame':  8 obs. of  5 variables:
##  $ n      : num  1 1 1 1 1 1 1 7
##  $ %      : num  14.3 14.3 14.3 14.3 14.3 14.3 14.3 100
##  $ val%   : num  14.3 14.3 14.3 14.3 14.3 14.3 14.3 100
##  $ %cum   : num  14.3 28.6 42.9 57.1 71.4 85.7 100 100
##  $ val%cum: num  14.3 28.6 42.9 57.1 71.4 85.7 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
0.00442 1
4.60655 1
4.67722 1
4.86219 1
4.96416 1
9.64894 1
9.87553 1
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1]  0.00442  4.00442  8.00442 12.00442
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(0.00442,4] 0 0.0000000 0
(4,8] 4 0.6666667 4
(8,12] 2 0.3333333 6
str(Freq_table)
## 'data.frame':    3 obs. of  4 variables:
##  $ distance: Factor w/ 3 levels "(0.00442,4]",..: 1 2 3
##  $ Freq    : int  0 4 2
##  $ Rel_Freq: num  0 0.667 0.333
##  $ Cum_Freq: int  0 4 6
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(0.00442,4] 0
(4,8] 4
(8,12] 2
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_ES)
##                        id date time continent_code country_name country_code
## nbr.val      7.000000e+00   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          5.640000e+02   NA   NA             NA           NA           NA
## max          7.441000e+03   NA   NA             NA           NA           NA
## range        6.877000e+03   NA   NA             NA           NA           NA
## sum          2.542600e+04   NA   NA             NA           NA           NA
## median       1.286000e+03   NA   NA             NA           NA           NA
## mean         3.632286e+03   NA   NA             NA           NA           NA
## SE.mean      1.263594e+03   NA   NA             NA           NA           NA
## CI.mean.0.95 3.091903e+03   NA   NA             NA           NA           NA
## var          1.117669e+07   NA   NA             NA           NA           NA
## std.dev      3.343155e+03   NA   NA             NA           NA           NA
## coef.var     9.203998e-01   NA   NA             NA           NA           NA
##              state   population city   distance location_description   latitude
## nbr.val         NA 7.000000e+00   NA  7.0000000                   NA   7.000000
## nbr.null        NA 2.000000e+00   NA  0.0000000                   NA   0.000000
## nbr.na          NA 0.000000e+00   NA  0.0000000                   NA   0.000000
## min             NA 0.000000e+00   NA  0.0044200                   NA  -7.958900
## max             NA 1.246940e+05   NA  9.8755300                   NA  13.720500
## range           NA 1.246940e+05   NA  9.8711100                   NA  21.679400
## sum             NA 5.325430e+05   NA 38.6390100                   NA  52.794400
## median          NA 1.246940e+05   NA  4.8621900                   NA  13.714700
## mean            NA 7.607757e+04   NA  5.5198586                   NA   7.542057
## SE.mean         NA 2.330952e+04   NA  1.2791994                   NA   3.982602
## CI.mean.0.95    NA 5.703633e+04   NA  3.1300883                   NA   9.745075
## var             NA 3.803334e+09   NA 11.4544585                   NA 111.027808
## std.dev         NA 6.167118e+04   NA  3.3844436                   NA  10.536973
## coef.var        NA 8.106355e-01   NA  0.6131395                   NA   1.397095
##                  longitude geolocation hazard_type landslide_type
## nbr.val         7.00000000          NA          NA             NA
## nbr.null        0.00000000          NA          NA             NA
## nbr.na          0.00000000          NA          NA             NA
## min           -89.36250000          NA          NA             NA
## max           -77.52390000          NA          NA             NA
## range          11.83860000          NA          NA             NA
## sum          -601.71290000          NA          NA             NA
## median        -89.26850000          NA          NA             NA
## mean          -85.95898571          NA          NA             NA
## SE.mean         2.15312466          NA          NA             NA
## CI.mean.0.95    5.26850626          NA          NA             NA
## var            32.45162074          NA          NA             NA
## std.dev         5.69663240          NA          NA             NA
## coef.var       -0.06627152          NA          NA             NA
##              landslide_size trigger storm_name  injuries fatalities source_name
## nbr.val                  NA      NA         NA 3.0000000   6.000000          NA
## nbr.null                 NA      NA         NA 2.0000000   4.000000          NA
## nbr.na                   NA      NA         NA 4.0000000   1.000000          NA
## min                      NA      NA         NA 0.0000000   0.000000          NA
## max                      NA      NA         NA 1.0000000  12.000000          NA
## range                    NA      NA         NA 1.0000000  12.000000          NA
## sum                      NA      NA         NA 1.0000000  16.000000          NA
## median                   NA      NA         NA 0.0000000   0.000000          NA
## mean                     NA      NA         NA 0.3333333   2.666667          NA
## SE.mean                  NA      NA         NA 0.3333333   1.977653          NA
## CI.mean.0.95             NA      NA         NA 1.4342176   5.083719          NA
## var                      NA      NA         NA 0.3333333  23.466667          NA
## std.dev                  NA      NA         NA 0.5773503   4.844241          NA
## coef.var                 NA      NA         NA 1.7320508   1.816590          NA
##              source_link         prop         ypos
## nbr.val               NA   7.00000000    7.0000000
## nbr.null              NA   0.00000000    0.0000000
## nbr.na                NA   0.00000000    0.0000000
## min                   NA   0.01143922    6.4237671
## max                   NA  25.55844469   93.7081851
## range                 NA  25.54700547   87.2844180
## sum                   NA 100.00000000  361.7457720
## median                NA  12.58362986   49.6536920
## mean                  NA  14.28571429   51.6779674
## SE.mean               NA   3.31064238   13.0483680
## CI.mean.0.95          NA   8.10085008   31.9282063
## var                   NA  76.72247089 1191.8193525
## std.dev               NA   8.75913642   34.5227367
## coef.var              NA   0.61313955    0.6680359
boxplot(data, horizontal=TRUE, col='green')

Gráfico para La Paz (El Salvador)

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_ES <- subset(df, country_name == "El Salvador")
knitr::kable(head(df_ES)) 
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
230 9/5/07 NA NA El Salvador SV Ahuachapán 7797 Concepción de Ataco 0.00273 NA 13.8703 -89.8486 (13.8703, -89.848600000000005) Landslide Mudslide Medium Tropical cyclone Hurricane Felix NA NA Azcentral.com http://www.azcentral.com/news/articles/1108sr-fhsistercity1109-ON.html
564 6/2/08 NA NA El Salvador SV La Libertad 124694 Santa Tecla 4.96416 NA 13.7205 -89.2687 (13.720499999999999, -89.268699999999995) Landslide Landslide Medium Tropical cyclone Tropical Storm Arthur NA NA NA http://news.xinhuanet.com/english/2008-06/04/content_8310737.htm
1285 11/8/09 NA NA El Salvador SV San Vicente 41504 San Vicente 7.60946 NA 13.6409 -88.8699 (13.6409, -88.869900000000001) Landslide Complex Very_large Tropical cyclone Tropical Cyclone Ida NA 23 NA http://www.google.com/hostednews/ap/article/ALeqM5j0XCCb1n12DyhoBoDzGj_hTyEtrAD9BRKPRG0
1286 11/8/09 NA NA El Salvador SV La Libertad 33767 Antiguo Cuscatlán 4.86219 NA 13.7156 -89.2521 (13.7156, -89.252099999999999) Landslide Mudslide Medium Tropical cyclone Tropical Cyclone Ida NA 4 NA http://www.google.com/hostednews/ap/article/ALeqM5j0XCCb1n12DyhoBoDzGj_hTyEtrAD9BRKPRG0
1287 11/8/09 NA NA El Salvador SV San Vicente 41504 San Vicente 5.90726 NA 13.6094 -88.8488 (13.609400000000001, -88.848799999999997) Landslide Rockfall Medium Tropical cyclone Tropical Cyclone Ida NA NA NA http://news.bbc.co.uk/2/hi/in_depth/8349333.stm
1288 11/8/09 NA NA El Salvador SV San Vicente 41504 San Vicente 4.03125 NA 13.6466 -88.8347 (13.646599999999999, -88.834699999999998) Landslide Mudslide Medium Tropical cyclone Tropical Cyclone Ida NA NA NA http://news.yahoo.com/s/afp/20091109/wl_afp/salvadorweatherstorm_20091109100952
library(dplyr)
df_ES <- subset(df, state == "La Paz")
knitr::kable(head(df_ES))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
6683 10/15/14 NA NA El Salvador SV La Paz 2654 San Pedro Masahuat 0.31933 Above river 13.5461 -89.0401 (13.546099999999999, -89.040099999999995) Landslide Landslide Medium Continuous rain NA 0 0 reliefweb http://reliefweb.int/report/el-salvador/lluvias-causan-estragos-en-seis-departamentos
7460 9/25/15 NA NA Honduras HN La Paz 1463 San José 4.69133 Unknown 14.2801 -87.9369 (14.280099999999999, -87.936899999999994) Landslide Landslide Medium Rain NA 0 0 Tiempo http://www.tiempo.hn/lluvias-comienzan-a-causar-deslizamientos-en-carreteras-del-occidente-de-honduras/

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_ES, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_ES, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_ES, aes(x=state, y=distance, fill=city)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_ES <- df_ES %>% 
  arrange(desc(city)) %>%
  mutate(prop = distance / sum(df_ES$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_ES, aes(x=state, y = prop, fill=city)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
data<- ts(df_ES$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
0.31933
4.69133
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_ES$distance
names(distance) <- df_ES$city 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por ciudades"
)

##                     
## Pareto chart analysis for distance
##                       Frequency  Cum.Freq. Percentage Cum.Percent.
##   San José             4.691330   4.691330  93.626987    93.626987
##   San Pedro Masahuat   0.319330   5.010660   6.373013   100.000000
stem(df_ES$"distance")
## 
##   The decimal point is at the |
## 
##   0 | 3
##   1 | 
##   2 | 
##   3 | 
##   4 | 7
head(df_ES)
## # A tibble: 2 x 25
##      id date     time  continent_code country_name country_code state  population
##   <dbl> <chr>    <chr> <chr>          <chr>        <chr>        <chr>       <dbl>
## 1  6683 10/15/14 <NA>  <NA>           El Salvador  SV           La Paz       2654
## 2  7460 9/25/15  <NA>  <NA>           Honduras     HN           La Paz       1463
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_ES))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
6683 10/15/14 NA NA El Salvador SV La Paz 2654 San Pedro Masahuat 0.31933 Above river 13.5461 -89.0401 (13.546099999999999, -89.040099999999995) Landslide Landslide Medium Continuous rain NA 0 0 reliefweb http://reliefweb.int/report/el-salvador/lluvias-causan-estragos-en-seis-departamentos 6.373013 3.186506
7460 9/25/15 NA NA Honduras HN La Paz 1463 San José 4.69133 Unknown 14.2801 -87.9369 (14.280099999999999, -87.936899999999994) Landslide Landslide Medium Rain NA 0 0 Tiempo http://www.tiempo.hn/lluvias-comienzan-a-causar-deslizamientos-en-carreteras-del-occidente-de-honduras/ 93.626987 53.186506
stem(df_ES$"distance")
## 
##   The decimal point is at the |
## 
##   0 | 3
##   1 | 
##   2 | 
##   3 | 
##   4 | 7
stem(df_ES$"distance", scale = 2)
## 
##   The decimal point is at the |
## 
##   0 | 3
##   0 | 
##   1 | 
##   1 | 
##   2 | 
##   2 | 
##   3 | 
##   3 | 
##   4 | 
##   4 | 7

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
0.31933 1 50 50 50 50
4.69133 1 50 50 100 100
Total 2 100 100 100 100
str(table)
## Classes 'freqtab' and 'data.frame':  3 obs. of  5 variables:
##  $ n      : num  1 1 2
##  $ %      : num  50 50 100
##  $ val%   : num  50 50 100
##  $ %cum   : num  50 100 100
##  $ val%cum: num  50 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
0.31933 1
4.69133 1
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.31933 3.31933 6.31933
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(0.319,3.32] 0 0 0
(3.32,6.32] 1 1 1
str(Freq_table)
## 'data.frame':    2 obs. of  4 variables:
##  $ distance: Factor w/ 2 levels "(0.319,3.32]",..: 1 2
##  $ Freq    : int  0 1
##  $ Rel_Freq: num  0 1
##  $ Cum_Freq: int  0 1
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(0.319,3.32] 0
(3.32,6.32] 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_ES)
##                        id date time continent_code country_name country_code
## nbr.val      2.000000e+00   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          6.683000e+03   NA   NA             NA           NA           NA
## max          7.460000e+03   NA   NA             NA           NA           NA
## range        7.770000e+02   NA   NA             NA           NA           NA
## sum          1.414300e+04   NA   NA             NA           NA           NA
## median       7.071500e+03   NA   NA             NA           NA           NA
## mean         7.071500e+03   NA   NA             NA           NA           NA
## SE.mean      3.885000e+02   NA   NA             NA           NA           NA
## CI.mean.0.95 4.936361e+03   NA   NA             NA           NA           NA
## var          3.018645e+05   NA   NA             NA           NA           NA
## std.dev      5.494220e+02   NA   NA             NA           NA           NA
## coef.var     7.769525e-02   NA   NA             NA           NA           NA
##              state   population city  distance location_description    latitude
## nbr.val         NA 2.000000e+00   NA  2.000000                   NA  2.00000000
## nbr.null        NA 0.000000e+00   NA  0.000000                   NA  0.00000000
## nbr.na          NA 0.000000e+00   NA  0.000000                   NA  0.00000000
## min             NA 1.463000e+03   NA  0.319330                   NA 13.54610000
## max             NA 2.654000e+03   NA  4.691330                   NA 14.28010000
## range           NA 1.191000e+03   NA  4.372000                   NA  0.73400000
## sum             NA 4.117000e+03   NA  5.010660                   NA 27.82620000
## median          NA 2.058500e+03   NA  2.505330                   NA 13.91310000
## mean            NA 2.058500e+03   NA  2.505330                   NA 13.91310000
## SE.mean         NA 5.955000e+02   NA  2.186000                   NA  0.36700000
## CI.mean.0.95    NA 7.566545e+03   NA 27.775764                   NA  4.66317714
## var             NA 7.092405e+05   NA  9.557192                   NA  0.26937800
## std.dev         NA 8.421642e+02   NA  3.091471                   NA  0.51901638
## coef.var        NA 4.091155e-01   NA  1.233958                   NA  0.03730415
##                  longitude geolocation hazard_type landslide_type
## nbr.val       2.000000e+00          NA          NA             NA
## nbr.null      0.000000e+00          NA          NA             NA
## nbr.na        0.000000e+00          NA          NA             NA
## min          -8.904010e+01          NA          NA             NA
## max          -8.793690e+01          NA          NA             NA
## range         1.103200e+00          NA          NA             NA
## sum          -1.769770e+02          NA          NA             NA
## median       -8.848850e+01          NA          NA             NA
## mean         -8.848850e+01          NA          NA             NA
## SE.mean       5.516000e-01          NA          NA             NA
## CI.mean.0.95  7.008743e+00          NA          NA             NA
## var           6.085251e-01          NA          NA             NA
## std.dev       7.800802e-01          NA          NA             NA
## coef.var     -8.815611e-03          NA          NA             NA
##              landslide_size trigger storm_name injuries fatalities source_name
## nbr.val                  NA      NA         NA        2          2          NA
## nbr.null                 NA      NA         NA        2          2          NA
## nbr.na                   NA      NA         NA        0          0          NA
## min                      NA      NA         NA        0          0          NA
## max                      NA      NA         NA        0          0          NA
## range                    NA      NA         NA        0          0          NA
## sum                      NA      NA         NA        0          0          NA
## median                   NA      NA         NA        0          0          NA
## mean                     NA      NA         NA        0          0          NA
## SE.mean                  NA      NA         NA        0          0          NA
## CI.mean.0.95             NA      NA         NA        0          0          NA
## var                      NA      NA         NA        0          0          NA
## std.dev                  NA      NA         NA        0          0          NA
## coef.var                 NA      NA         NA      NaN        NaN          NA
##              source_link        prop        ypos
## nbr.val               NA    2.000000    2.000000
## nbr.null              NA    0.000000    0.000000
## nbr.na                NA    0.000000    0.000000
## min                   NA    6.373013    3.186506
## max                   NA   93.626987   53.186506
## range                 NA   87.253975   50.000000
## sum                   NA  100.000000   56.373013
## median                NA   50.000000   28.186506
## mean                  NA   50.000000   28.186506
## SE.mean               NA   43.626987   25.000000
## CI.mean.0.95          NA  554.333432  317.655118
## var                   NA 3806.628035 1250.000000
## std.dev               NA   61.697877   35.355339
## coef.var              NA    1.233958    1.254336
boxplot(data, horizontal=TRUE, col='green')

Gráfico para San Miguel (El Salvador)

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_ES <- subset(df, country_name == "El Salvador")
knitr::kable(head(df_ES)) 
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
230 9/5/07 NA NA El Salvador SV Ahuachapán 7797 Concepción de Ataco 0.00273 NA 13.8703 -89.8486 (13.8703, -89.848600000000005) Landslide Mudslide Medium Tropical cyclone Hurricane Felix NA NA Azcentral.com http://www.azcentral.com/news/articles/1108sr-fhsistercity1109-ON.html
564 6/2/08 NA NA El Salvador SV La Libertad 124694 Santa Tecla 4.96416 NA 13.7205 -89.2687 (13.720499999999999, -89.268699999999995) Landslide Landslide Medium Tropical cyclone Tropical Storm Arthur NA NA NA http://news.xinhuanet.com/english/2008-06/04/content_8310737.htm
1285 11/8/09 NA NA El Salvador SV San Vicente 41504 San Vicente 7.60946 NA 13.6409 -88.8699 (13.6409, -88.869900000000001) Landslide Complex Very_large Tropical cyclone Tropical Cyclone Ida NA 23 NA http://www.google.com/hostednews/ap/article/ALeqM5j0XCCb1n12DyhoBoDzGj_hTyEtrAD9BRKPRG0
1286 11/8/09 NA NA El Salvador SV La Libertad 33767 Antiguo Cuscatlán 4.86219 NA 13.7156 -89.2521 (13.7156, -89.252099999999999) Landslide Mudslide Medium Tropical cyclone Tropical Cyclone Ida NA 4 NA http://www.google.com/hostednews/ap/article/ALeqM5j0XCCb1n12DyhoBoDzGj_hTyEtrAD9BRKPRG0
1287 11/8/09 NA NA El Salvador SV San Vicente 41504 San Vicente 5.90726 NA 13.6094 -88.8488 (13.609400000000001, -88.848799999999997) Landslide Rockfall Medium Tropical cyclone Tropical Cyclone Ida NA NA NA http://news.bbc.co.uk/2/hi/in_depth/8349333.stm
1288 11/8/09 NA NA El Salvador SV San Vicente 41504 San Vicente 4.03125 NA 13.6466 -88.8347 (13.646599999999999, -88.834699999999998) Landslide Mudslide Medium Tropical cyclone Tropical Cyclone Ida NA NA NA http://news.yahoo.com/s/afp/20091109/wl_afp/salvadorweatherstorm_20091109100952
library(dplyr)
df_ES <- subset(df, state == "San Miguel")
knitr::kable(head(df_ES))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
6251 10/13/14 Night NA El Salvador SV San Miguel 6393 Chirilagua 6.94536 Above road 13.2817 -88.1253 (13.281700000000001, -88.125299999999996) Landslide Rockfall Small Earthquake NA 0 0 elsalvador.com http://www.elsalvador.com/mwedh/nota/nota_completa.asp?idCat=47859&idArt=9167407
6675 5/22/14 Night NA El Salvador SV San Miguel 19095 San Rafael Oriente 10.06695 Unknown 13.4379 -88.2756 (13.437900000000001, -88.275599999999997) Landslide Debris flow Medium Rain NA 0 0 Mundo http://elmundo.com.sv/comunidades-aledanas-al-chaparrastique-afectadas-por-escombros-tras-lluvias
6684 10/15/14 NA NA El Salvador SV San Miguel 6393 Chirilagua 9.97227 Other 13.2930 -88.0843 (13.292999999999999, -88.084299999999999) Landslide Landslide Medium Continuous rain NA 0 0 reliefweb http://reliefweb.int/report/el-salvador/lluvias-causan-estragos-en-seis-departamentos

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_ES, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_ES, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_ES, aes(x=state, y=distance, fill=city)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_ES <- df_ES %>% 
  arrange(desc(city)) %>%
  mutate(prop = distance / sum(df_ES$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_ES, aes(x=state, y = prop, fill=city)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
data<- ts(df_ES$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
10.06695
6.94536
9.97227
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_ES$distance
names(distance) <- df_ES$city 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por ciudades"
)

##                     
## Pareto chart analysis for distance
##                      Frequency Cum.Freq. Percentage Cum.Percent.
##   San Rafael Oriente  10.06695  10.06695   37.30631     37.30631
##   Chirilagua           9.97227  20.03922   36.95544     74.26175
##   Chirilagua           6.94536  26.98458   25.73825    100.00000
stem(df_ES$"distance")
## 
##   The decimal point is at the |
## 
##    6 | 9
##    7 | 
##    8 | 
##    9 | 
##   10 | 01
head(df_ES)
## # A tibble: 3 x 25
##      id date     time  continent_code country_name country_code state      population
##   <dbl> <chr>    <chr> <chr>          <chr>        <chr>        <chr>           <dbl>
## 1  6675 5/22/14  Night <NA>           El Salvador  SV           San Miguel      19095
## 2  6251 10/13/14 Night <NA>           El Salvador  SV           San Miguel       6393
## 3  6684 10/15/14 <NA>  <NA>           El Salvador  SV           San Miguel       6393
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_ES))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
6675 5/22/14 Night NA El Salvador SV San Miguel 19095 San Rafael Oriente 10.06695 Unknown 13.4379 -88.2756 (13.437900000000001, -88.275599999999997) Landslide Debris flow Medium Rain NA 0 0 Mundo http://elmundo.com.sv/comunidades-aledanas-al-chaparrastique-afectadas-por-escombros-tras-lluvias 37.30631 18.65315
6251 10/13/14 Night NA El Salvador SV San Miguel 6393 Chirilagua 6.94536 Above road 13.2817 -88.1253 (13.281700000000001, -88.125299999999996) Landslide Rockfall Small Earthquake NA 0 0 elsalvador.com http://www.elsalvador.com/mwedh/nota/nota_completa.asp?idCat=47859&idArt=9167407 25.73825 50.17543
6684 10/15/14 NA NA El Salvador SV San Miguel 6393 Chirilagua 9.97227 Other 13.2930 -88.0843 (13.292999999999999, -88.084299999999999) Landslide Landslide Medium Continuous rain NA 0 0 reliefweb http://reliefweb.int/report/el-salvador/lluvias-causan-estragos-en-seis-departamentos 36.95544 81.52228
stem(df_ES$"distance")
## 
##   The decimal point is at the |
## 
##    6 | 9
##    7 | 
##    8 | 
##    9 | 
##   10 | 01
stem(df_ES$"distance", scale = 2)
## 
##   The decimal point is at the |
## 
##    6 | 9
##    7 | 
##    7 | 
##    8 | 
##    8 | 
##    9 | 
##    9 | 
##   10 | 01

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
6.94536 1 33.3 33.3 33.3 33.3
9.97227 1 33.3 33.3 66.7 66.7
10.06695 1 33.3 33.3 100.0 100.0
Total 3 100.0 100.0 100.0 100.0
str(table)
## Classes 'freqtab' and 'data.frame':  4 obs. of  5 variables:
##  $ n      : num  1 1 1 3
##  $ %      : num  33.3 33.3 33.3 100
##  $ val%   : num  33.3 33.3 33.3 100
##  $ %cum   : num  33.3 66.7 100 100
##  $ val%cum: num  33.3 66.7 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
6.94536 1
9.97227 1
10.06695 1
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1]  6.94536  8.94536 10.94536
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(6.95,8.95] 0 0 0
(8.95,10.9] 2 1 2
str(Freq_table)
## 'data.frame':    2 obs. of  4 variables:
##  $ distance: Factor w/ 2 levels "(6.95,8.95]",..: 1 2
##  $ Freq    : int  0 2
##  $ Rel_Freq: num  0 1
##  $ Cum_Freq: int  0 2
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(6.95,8.95] 0
(8.95,10.9] 2
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_ES)
##                        id date time continent_code country_name country_code
## nbr.val      3.000000e+00   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          6.251000e+03   NA   NA             NA           NA           NA
## max          6.684000e+03   NA   NA             NA           NA           NA
## range        4.330000e+02   NA   NA             NA           NA           NA
## sum          1.961000e+04   NA   NA             NA           NA           NA
## median       6.675000e+03   NA   NA             NA           NA           NA
## mean         6.536667e+03   NA   NA             NA           NA           NA
## SE.mean      1.428570e+02   NA   NA             NA           NA           NA
## CI.mean.0.95 6.146639e+02   NA   NA             NA           NA           NA
## var          6.122433e+04   NA   NA             NA           NA           NA
## std.dev      2.474355e+02   NA   NA             NA           NA           NA
## coef.var     3.785347e-02   NA   NA             NA           NA           NA
##              state   population city   distance location_description
## nbr.val         NA 3.000000e+00   NA  3.0000000                   NA
## nbr.null        NA 0.000000e+00   NA  0.0000000                   NA
## nbr.na          NA 0.000000e+00   NA  0.0000000                   NA
## min             NA 6.393000e+03   NA  6.9453600                   NA
## max             NA 1.909500e+04   NA 10.0669500                   NA
## range           NA 1.270200e+04   NA  3.1215900                   NA
## sum             NA 3.188100e+04   NA 26.9845800                   NA
## median          NA 6.393000e+03   NA  9.9722700                   NA
## mean            NA 1.062700e+04   NA  8.9948600                   NA
## SE.mean         NA 4.234000e+03   NA  1.0251144                   NA
## CI.mean.0.95    NA 1.821743e+04   NA  4.4107114                   NA
## var             NA 5.378027e+07   NA  3.1525788                   NA
## std.dev         NA 7.333503e+03   NA  1.7755503                   NA
## coef.var        NA 6.900822e-01   NA  0.1973961                   NA
##                  latitude     longitude geolocation hazard_type landslide_type
## nbr.val       3.000000000  3.000000e+00          NA          NA             NA
## nbr.null      0.000000000  0.000000e+00          NA          NA             NA
## nbr.na        0.000000000  0.000000e+00          NA          NA             NA
## min          13.281700000 -8.827560e+01          NA          NA             NA
## max          13.437900000 -8.808430e+01          NA          NA             NA
## range         0.156200000  1.913000e-01          NA          NA             NA
## sum          40.012600000 -2.644852e+02          NA          NA             NA
## median       13.293000000 -8.812530e+01          NA          NA             NA
## mean         13.337533333 -8.816173e+01          NA          NA             NA
## SE.mean       0.050289241  5.815056e-02          NA          NA             NA
## CI.mean.0.95  0.216377141  2.502017e-01          NA          NA             NA
## var           0.007587023  1.014446e-02          NA          NA             NA
## std.dev       0.087103521  1.007197e-01          NA          NA             NA
## coef.var      0.006530707 -1.142443e-03          NA          NA             NA
##              landslide_size trigger storm_name injuries fatalities source_name
## nbr.val                  NA      NA         NA        3          3          NA
## nbr.null                 NA      NA         NA        3          3          NA
## nbr.na                   NA      NA         NA        0          0          NA
## min                      NA      NA         NA        0          0          NA
## max                      NA      NA         NA        0          0          NA
## range                    NA      NA         NA        0          0          NA
## sum                      NA      NA         NA        0          0          NA
## median                   NA      NA         NA        0          0          NA
## mean                     NA      NA         NA        0          0          NA
## SE.mean                  NA      NA         NA        0          0          NA
## CI.mean.0.95             NA      NA         NA        0          0          NA
## var                      NA      NA         NA        0          0          NA
## std.dev                  NA      NA         NA        0          0          NA
## coef.var                 NA      NA         NA      NaN        NaN          NA
##              source_link        prop        ypos
## nbr.val               NA   3.0000000   3.0000000
## nbr.null              NA   0.0000000   0.0000000
## nbr.na                NA   0.0000000   0.0000000
## min                   NA  25.7382550  18.6531530
## max                   NA  37.3063060  81.5222805
## range                 NA  11.5680511  62.8691275
## sum                   NA 100.0000000 150.3508671
## median                NA  36.9554390  50.1754335
## mean                  NA  33.3333333  50.1169557
## SE.mean               NA   3.7988897  18.1487774
## CI.mean.0.95          NA  16.3453031  78.0878866
## var                   NA  43.2946886 988.1343623
## std.dev               NA   6.5798699  31.4346045
## coef.var              NA   0.1973961   0.6272249
boxplot(data, horizontal=TRUE, col='green')

Gráfico para San Salvador (El Salvador)

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_ES <- subset(df, country_name == "El Salvador")
knitr::kable(head(df_ES)) 
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
230 9/5/07 NA NA El Salvador SV Ahuachapán 7797 Concepción de Ataco 0.00273 NA 13.8703 -89.8486 (13.8703, -89.848600000000005) Landslide Mudslide Medium Tropical cyclone Hurricane Felix NA NA Azcentral.com http://www.azcentral.com/news/articles/1108sr-fhsistercity1109-ON.html
564 6/2/08 NA NA El Salvador SV La Libertad 124694 Santa Tecla 4.96416 NA 13.7205 -89.2687 (13.720499999999999, -89.268699999999995) Landslide Landslide Medium Tropical cyclone Tropical Storm Arthur NA NA NA http://news.xinhuanet.com/english/2008-06/04/content_8310737.htm
1285 11/8/09 NA NA El Salvador SV San Vicente 41504 San Vicente 7.60946 NA 13.6409 -88.8699 (13.6409, -88.869900000000001) Landslide Complex Very_large Tropical cyclone Tropical Cyclone Ida NA 23 NA http://www.google.com/hostednews/ap/article/ALeqM5j0XCCb1n12DyhoBoDzGj_hTyEtrAD9BRKPRG0
1286 11/8/09 NA NA El Salvador SV La Libertad 33767 Antiguo Cuscatlán 4.86219 NA 13.7156 -89.2521 (13.7156, -89.252099999999999) Landslide Mudslide Medium Tropical cyclone Tropical Cyclone Ida NA 4 NA http://www.google.com/hostednews/ap/article/ALeqM5j0XCCb1n12DyhoBoDzGj_hTyEtrAD9BRKPRG0
1287 11/8/09 NA NA El Salvador SV San Vicente 41504 San Vicente 5.90726 NA 13.6094 -88.8488 (13.609400000000001, -88.848799999999997) Landslide Rockfall Medium Tropical cyclone Tropical Cyclone Ida NA NA NA http://news.bbc.co.uk/2/hi/in_depth/8349333.stm
1288 11/8/09 NA NA El Salvador SV San Vicente 41504 San Vicente 4.03125 NA 13.6466 -88.8347 (13.646599999999999, -88.834699999999998) Landslide Mudslide Medium Tropical cyclone Tropical Cyclone Ida NA NA NA http://news.yahoo.com/s/afp/20091109/wl_afp/salvadorweatherstorm_20091109100952
library(dplyr)
df_ES <- subset(df, state == "San Salvador")
knitr::kable(head(df_ES))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
4008 10/10/11 NA NA El Salvador SV San Salvador 112158 Apopa 3.01739 NA 13.7874 -89.1600 (13.7874, -89.16) Landslide Landslide Very_large Downpour NA NA 32 NA http://www.utsandiego.com/news/2011/oct/17/heavy-rains-kill-at-least-84-in-central-america/
6687 10/12/14 NA NA El Salvador SV San Salvador 33767 Antiguo Cuscatlán 3.25227 Retaining wall 13.6891 -89.2361 (13.6891, -89.236099999999993) Landslide Other Small Rain NA 0 0 Mexicano http://www.oem.com.mx/elmexicano/notas/n3569793.htm

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_ES, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_ES, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_ES, aes(x=state, y=distance, fill=city)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_ES <- df_ES %>% 
  arrange(desc(city)) %>%
  mutate(prop = distance / sum(df_ES$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_ES, aes(x=state, y = prop, fill=city)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
data<- ts(df_ES$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
3.01739
3.25227
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_ES$distance
names(distance) <- df_ES$city 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por ciudades"
)

##                    
## Pareto chart analysis for distance
##                     Frequency Cum.Freq. Percentage Cum.Percent.
##   Antiguo Cuscatlán   3.25227   3.25227   51.87315     51.87315
##   Apopa               3.01739   6.26966   48.12685    100.00000
stem(df_ES$"distance")
## 
##   The decimal point is 1 digit(s) to the left of the |
## 
##   30 | 2
##   30 | 
##   31 | 
##   31 | 
##   32 | 
##   32 | 5
head(df_ES)
## # A tibble: 2 x 25
##      id date     time  continent_code country_name country_code state population
##   <dbl> <chr>    <chr> <chr>          <chr>        <chr>        <chr>      <dbl>
## 1  4008 10/10/11 <NA>  <NA>           El Salvador  SV           San ~     112158
## 2  6687 10/12/14 <NA>  <NA>           El Salvador  SV           San ~      33767
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_ES))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
4008 10/10/11 NA NA El Salvador SV San Salvador 112158 Apopa 3.01739 NA 13.7874 -89.1600 (13.7874, -89.16) Landslide Landslide Very_large Downpour NA NA 32 NA http://www.utsandiego.com/news/2011/oct/17/heavy-rains-kill-at-least-84-in-central-america/ 48.12685 24.06343
6687 10/12/14 NA NA El Salvador SV San Salvador 33767 Antiguo Cuscatlán 3.25227 Retaining wall 13.6891 -89.2361 (13.6891, -89.236099999999993) Landslide Other Small Rain NA 0 0 Mexicano http://www.oem.com.mx/elmexicano/notas/n3569793.htm 51.87315 74.06343
stem(df_ES$"distance")
## 
##   The decimal point is 1 digit(s) to the left of the |
## 
##   30 | 2
##   30 | 
##   31 | 
##   31 | 
##   32 | 
##   32 | 5
stem(df_ES$"distance", scale = 2)
## 
##   The decimal point is 1 digit(s) to the left of the |
## 
##   30 | 2
##   30 | 
##   31 | 
##   31 | 
##   32 | 
##   32 | 5

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
3.01739 1 50 50 50 50
3.25227 1 50 50 100 100
Total 2 100 100 100 100
str(table)
## Classes 'freqtab' and 'data.frame':  3 obs. of  5 variables:
##  $ n      : num  1 1 2
##  $ %      : num  50 50 100
##  $ val%   : num  50 50 100
##  $ %cum   : num  50 100 100
##  $ val%cum: num  50 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
3.01739 1
3.25227 1
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 3.01739 4.01739
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(3.02,4.02] 1 1 1
str(Freq_table)
## 'data.frame':    1 obs. of  4 variables:
##  $ distance: Factor w/ 1 level "(3.02,4.02]": 1
##  $ Freq    : int 1
##  $ Rel_Freq: num 1
##  $ Cum_Freq: int 1
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(3.02,4.02] 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_ES)
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
##                        id date time continent_code country_name country_code
## nbr.val      2.000000e+00   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          4.008000e+03   NA   NA             NA           NA           NA
## max          6.687000e+03   NA   NA             NA           NA           NA
## range        2.679000e+03   NA   NA             NA           NA           NA
## sum          1.069500e+04   NA   NA             NA           NA           NA
## median       5.347500e+03   NA   NA             NA           NA           NA
## mean         5.347500e+03   NA   NA             NA           NA           NA
## SE.mean      1.339500e+03   NA   NA             NA           NA           NA
## CI.mean.0.95 1.701996e+04   NA   NA             NA           NA           NA
## var          3.588521e+06   NA   NA             NA           NA           NA
## std.dev      1.894339e+03   NA   NA             NA           NA           NA
## coef.var     3.542476e-01   NA   NA             NA           NA           NA
##              state   population city   distance location_description
## nbr.val         NA 2.000000e+00   NA 2.00000000                   NA
## nbr.null        NA 0.000000e+00   NA 0.00000000                   NA
## nbr.na          NA 0.000000e+00   NA 0.00000000                   NA
## min             NA 3.376700e+04   NA 3.01739000                   NA
## max             NA 1.121580e+05   NA 3.25227000                   NA
## range           NA 7.839100e+04   NA 0.23488000                   NA
## sum             NA 1.459250e+05   NA 6.26966000                   NA
## median          NA 7.296250e+04   NA 3.13483000                   NA
## mean            NA 7.296250e+04   NA 3.13483000                   NA
## SE.mean         NA 3.919550e+04   NA 0.11744000                   NA
## CI.mean.0.95    NA 4.980260e+05   NA 1.49221668                   NA
## var             NA 3.072574e+09   NA 0.02758431                   NA
## std.dev         NA 5.543081e+04   NA 0.16608524                   NA
## coef.var        NA 7.597164e-01   NA 0.05298062                   NA
##                  latitude     longitude geolocation hazard_type landslide_type
## nbr.val       2.000000000  2.000000e+00          NA          NA             NA
## nbr.null      0.000000000  0.000000e+00          NA          NA             NA
## nbr.na        0.000000000  0.000000e+00          NA          NA             NA
## min          13.689100000 -8.923610e+01          NA          NA             NA
## max          13.787400000 -8.916000e+01          NA          NA             NA
## range         0.098300000  7.610000e-02          NA          NA             NA
## sum          27.476500000 -1.783961e+02          NA          NA             NA
## median       13.738250000 -8.919805e+01          NA          NA             NA
## mean         13.738250000 -8.919805e+01          NA          NA             NA
## SE.mean       0.049150000  3.805000e-02          NA          NA             NA
## CI.mean.0.95  0.624509963  4.834711e-01          NA          NA             NA
## var           0.004831445  2.895605e-03          NA          NA             NA
## std.dev       0.069508597  5.381083e-02          NA          NA             NA
## coef.var      0.005059494 -6.032736e-04          NA          NA             NA
##              landslide_size trigger storm_name injuries fatalities source_name
## nbr.val                  NA      NA         NA        1   2.000000          NA
## nbr.null                 NA      NA         NA        1   1.000000          NA
## nbr.na                   NA      NA         NA        1   0.000000          NA
## min                      NA      NA         NA        0   0.000000          NA
## max                      NA      NA         NA        0  32.000000          NA
## range                    NA      NA         NA        0  32.000000          NA
## sum                      NA      NA         NA        0  32.000000          NA
## median                   NA      NA         NA        0  16.000000          NA
## mean                     NA      NA         NA        0  16.000000          NA
## SE.mean                  NA      NA         NA       NA  16.000000          NA
## CI.mean.0.95             NA      NA         NA      NaN 203.299276          NA
## var                      NA      NA         NA       NA 512.000000          NA
## std.dev                  NA      NA         NA       NA  22.627417          NA
## coef.var                 NA      NA         NA       NA   1.414214          NA
##              source_link         prop         ypos
## nbr.val               NA   2.00000000    2.0000000
## nbr.null              NA   0.00000000    0.0000000
## nbr.na                NA   0.00000000    0.0000000
## min                   NA  48.12685217   24.0634261
## max                   NA  51.87314783   74.0634261
## range                 NA   3.74629565   50.0000000
## sum                   NA 100.00000000   98.1268522
## median                NA  50.00000000   49.0634261
## mean                  NA  50.00000000   49.0634261
## SE.mean               NA   1.87314783   25.0000000
## CI.mean.0.95          NA  23.80059978  317.6551184
## var                   NA   7.01736556 1250.0000000
## std.dev               NA   2.64903106   35.3553391
## coef.var              NA   0.05298062    0.7206048
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Santa Ana (El Salvador)

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_ES <- subset(df, country_name == "El Salvador")
knitr::kable(head(df_ES)) 
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
230 9/5/07 NA NA El Salvador SV Ahuachapán 7797 Concepción de Ataco 0.00273 NA 13.8703 -89.8486 (13.8703, -89.848600000000005) Landslide Mudslide Medium Tropical cyclone Hurricane Felix NA NA Azcentral.com http://www.azcentral.com/news/articles/1108sr-fhsistercity1109-ON.html
564 6/2/08 NA NA El Salvador SV La Libertad 124694 Santa Tecla 4.96416 NA 13.7205 -89.2687 (13.720499999999999, -89.268699999999995) Landslide Landslide Medium Tropical cyclone Tropical Storm Arthur NA NA NA http://news.xinhuanet.com/english/2008-06/04/content_8310737.htm
1285 11/8/09 NA NA El Salvador SV San Vicente 41504 San Vicente 7.60946 NA 13.6409 -88.8699 (13.6409, -88.869900000000001) Landslide Complex Very_large Tropical cyclone Tropical Cyclone Ida NA 23 NA http://www.google.com/hostednews/ap/article/ALeqM5j0XCCb1n12DyhoBoDzGj_hTyEtrAD9BRKPRG0
1286 11/8/09 NA NA El Salvador SV La Libertad 33767 Antiguo Cuscatlán 4.86219 NA 13.7156 -89.2521 (13.7156, -89.252099999999999) Landslide Mudslide Medium Tropical cyclone Tropical Cyclone Ida NA 4 NA http://www.google.com/hostednews/ap/article/ALeqM5j0XCCb1n12DyhoBoDzGj_hTyEtrAD9BRKPRG0
1287 11/8/09 NA NA El Salvador SV San Vicente 41504 San Vicente 5.90726 NA 13.6094 -88.8488 (13.609400000000001, -88.848799999999997) Landslide Rockfall Medium Tropical cyclone Tropical Cyclone Ida NA NA NA http://news.bbc.co.uk/2/hi/in_depth/8349333.stm
1288 11/8/09 NA NA El Salvador SV San Vicente 41504 San Vicente 4.03125 NA 13.6466 -88.8347 (13.646599999999999, -88.834699999999998) Landslide Mudslide Medium Tropical cyclone Tropical Cyclone Ida NA NA NA http://news.yahoo.com/s/afp/20091109/wl_afp/salvadorweatherstorm_20091109100952
library(dplyr)
df_ES <- subset(df, state == "Santa Ana")
knitr::kable(head(df_ES))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
6685 10/12/14 NA NA El Salvador SV Santa Ana 5773 Coatepeque 8.8321 Unknown 14.0007 -89.4691 (14.0007, -89.469099999999997) Landslide Landslide Medium Rain NA 2 3 Mexicano http://www.oem.com.mx/elmexicano/notas/n3569793.htm
7438 7/18/15 NA NA El Salvador SV Santa Ana 10095 Ciudad Arce 1.1581 Above road 13.8464 -89.4502 (13.846399999999999, -89.450199999999995) Landslide Landslide Small Rain NA 0 0 La Prensa http://www.laprensagrafica.com/2015/07/18/lluvias-causan-derrumbe-en-carretera-a-santa-ana

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_ES, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_ES, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_ES, aes(x=state, y=distance, fill=city)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_ES <- df_ES %>% 
  arrange(desc(city)) %>%
  mutate(prop = distance / sum(df_ES$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_ES, aes(x=state, y = prop, fill=city)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
data<- ts(df_ES$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
8.8321
1.1581
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_ES$distance
names(distance) <- df_ES$city 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por ciudades"
)

##              
## Pareto chart analysis for distance
##               Frequency Cum.Freq. Percentage Cum.Percent.
##   Coatepeque    8.83210   8.83210   88.40764     88.40764
##   Ciudad Arce   1.15810   9.99020   11.59236    100.00000
stem(df_ES$"distance")
## 
##   The decimal point is at the |
## 
##   0 | 2
##   2 | 
##   4 | 
##   6 | 
##   8 | 8
head(df_ES)
## # A tibble: 2 x 25
##      id date     time  continent_code country_name country_code state     population
##   <dbl> <chr>    <chr> <chr>          <chr>        <chr>        <chr>          <dbl>
## 1  6685 10/12/14 <NA>  <NA>           El Salvador  SV           Santa Ana       5773
## 2  7438 7/18/15  <NA>  <NA>           El Salvador  SV           Santa Ana      10095
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_ES))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
6685 10/12/14 NA NA El Salvador SV Santa Ana 5773 Coatepeque 8.8321 Unknown 14.0007 -89.4691 (14.0007, -89.469099999999997) Landslide Landslide Medium Rain NA 2 3 Mexicano http://www.oem.com.mx/elmexicano/notas/n3569793.htm 88.40764 44.20382
7438 7/18/15 NA NA El Salvador SV Santa Ana 10095 Ciudad Arce 1.1581 Above road 13.8464 -89.4502 (13.846399999999999, -89.450199999999995) Landslide Landslide Small Rain NA 0 0 La Prensa http://www.laprensagrafica.com/2015/07/18/lluvias-causan-derrumbe-en-carretera-a-santa-ana 11.59236 94.20382
stem(df_ES$"distance")
## 
##   The decimal point is at the |
## 
##   0 | 2
##   2 | 
##   4 | 
##   6 | 
##   8 | 8
stem(df_ES$"distance", scale = 2)
## 
##   The decimal point is at the |
## 
##   1 | 2
##   2 | 
##   3 | 
##   4 | 
##   5 | 
##   6 | 
##   7 | 
##   8 | 8

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
1.1581 1 50 50 50 50
8.8321 1 50 50 100 100
Total 2 100 100 100 100
str(table)
## Classes 'freqtab' and 'data.frame':  3 obs. of  5 variables:
##  $ n      : num  1 1 2
##  $ %      : num  50 50 100
##  $ val%   : num  50 50 100
##  $ %cum   : num  50 100 100
##  $ val%cum: num  50 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
1.1581 1
8.8321 1
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 1.1581 5.1581 9.1581
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(1.16,5.16] 0 0 0
(5.16,9.16] 1 1 1
str(Freq_table)
## 'data.frame':    2 obs. of  4 variables:
##  $ distance: Factor w/ 2 levels "(1.16,5.16]",..: 1 2
##  $ Freq    : int  0 1
##  $ Rel_Freq: num  0 1
##  $ Cum_Freq: int  0 1
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(1.16,5.16] 0
(5.16,9.16] 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_ES)
##                        id date time continent_code country_name country_code
## nbr.val      2.000000e+00   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          6.685000e+03   NA   NA             NA           NA           NA
## max          7.438000e+03   NA   NA             NA           NA           NA
## range        7.530000e+02   NA   NA             NA           NA           NA
## sum          1.412300e+04   NA   NA             NA           NA           NA
## median       7.061500e+03   NA   NA             NA           NA           NA
## mean         7.061500e+03   NA   NA             NA           NA           NA
## SE.mean      3.765000e+02   NA   NA             NA           NA           NA
## CI.mean.0.95 4.783886e+03   NA   NA             NA           NA           NA
## var          2.835045e+05   NA   NA             NA           NA           NA
## std.dev      5.324514e+02   NA   NA             NA           NA           NA
## coef.var     7.540203e-02   NA   NA             NA           NA           NA
##              state   population city  distance location_description
## nbr.val         NA 2.000000e+00   NA  2.000000                   NA
## nbr.null        NA 0.000000e+00   NA  0.000000                   NA
## nbr.na          NA 0.000000e+00   NA  0.000000                   NA
## min             NA 5.773000e+03   NA  1.158100                   NA
## max             NA 1.009500e+04   NA  8.832100                   NA
## range           NA 4.322000e+03   NA  7.674000                   NA
## sum             NA 1.586800e+04   NA  9.990200                   NA
## median          NA 7.934000e+03   NA  4.995100                   NA
## mean            NA 7.934000e+03   NA  4.995100                   NA
## SE.mean         NA 2.161000e+03   NA  3.837000                   NA
## CI.mean.0.95    NA 2.745811e+04   NA 48.753708                   NA
## var             NA 9.339842e+06   NA 29.445138                   NA
## std.dev         NA 3.056116e+03   NA  5.426337                   NA
## coef.var        NA 3.851923e-01   NA  1.086332                   NA
##                  latitude     longitude geolocation hazard_type landslide_type
## nbr.val       2.000000000  2.000000e+00          NA          NA             NA
## nbr.null      0.000000000  0.000000e+00          NA          NA             NA
## nbr.na        0.000000000  0.000000e+00          NA          NA             NA
## min          13.846400000 -8.946910e+01          NA          NA             NA
## max          14.000700000 -8.945020e+01          NA          NA             NA
## range         0.154300000  1.890000e-02          NA          NA             NA
## sum          27.847100000 -1.789193e+02          NA          NA             NA
## median       13.923550000 -8.945965e+01          NA          NA             NA
## mean         13.923550000 -8.945965e+01          NA          NA             NA
## SE.mean       0.077150000  9.450000e-03          NA          NA             NA
## CI.mean.0.95  0.980283695  1.200736e-01          NA          NA             NA
## var           0.011904245  1.786050e-04          NA          NA             NA
## std.dev       0.109106576  1.336432e-02          NA          NA             NA
## coef.var      0.007836118 -1.493893e-04          NA          NA             NA
##              landslide_size trigger storm_name  injuries fatalities source_name
## nbr.val                  NA      NA         NA  2.000000   2.000000          NA
## nbr.null                 NA      NA         NA  1.000000   1.000000          NA
## nbr.na                   NA      NA         NA  0.000000   0.000000          NA
## min                      NA      NA         NA  0.000000   0.000000          NA
## max                      NA      NA         NA  2.000000   3.000000          NA
## range                    NA      NA         NA  2.000000   3.000000          NA
## sum                      NA      NA         NA  2.000000   3.000000          NA
## median                   NA      NA         NA  1.000000   1.500000          NA
## mean                     NA      NA         NA  1.000000   1.500000          NA
## SE.mean                  NA      NA         NA  1.000000   1.500000          NA
## CI.mean.0.95             NA      NA         NA 12.706205  19.059307          NA
## var                      NA      NA         NA  2.000000   4.500000          NA
## std.dev                  NA      NA         NA  1.414214   2.121320          NA
## coef.var                 NA      NA         NA  1.414214   1.414214          NA
##              source_link        prop         ypos
## nbr.val               NA    2.000000    2.0000000
## nbr.null              NA    0.000000    0.0000000
## nbr.na                NA    0.000000    0.0000000
## min                   NA   11.592361   44.2038197
## max                   NA   88.407639   94.2038197
## range                 NA   76.815279   50.0000000
## sum                   NA  100.000000  138.4076395
## median                NA   50.000000   69.2038197
## mean                  NA   50.000000   69.2038197
## SE.mean               NA   38.407639   25.0000000
## CI.mean.0.95          NA  488.015331  317.6551184
## var                   NA 2950.293542 1250.0000000
## std.dev               NA   54.316605   35.3553391
## coef.var              NA    1.086332    0.5108871
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Sonsonate (El Salvador)

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_ES <- subset(df, country_name == "El Salvador")
knitr::kable(head(df_ES)) 
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
230 9/5/07 NA NA El Salvador SV Ahuachapán 7797 Concepción de Ataco 0.00273 NA 13.8703 -89.8486 (13.8703, -89.848600000000005) Landslide Mudslide Medium Tropical cyclone Hurricane Felix NA NA Azcentral.com http://www.azcentral.com/news/articles/1108sr-fhsistercity1109-ON.html
564 6/2/08 NA NA El Salvador SV La Libertad 124694 Santa Tecla 4.96416 NA 13.7205 -89.2687 (13.720499999999999, -89.268699999999995) Landslide Landslide Medium Tropical cyclone Tropical Storm Arthur NA NA NA http://news.xinhuanet.com/english/2008-06/04/content_8310737.htm
1285 11/8/09 NA NA El Salvador SV San Vicente 41504 San Vicente 7.60946 NA 13.6409 -88.8699 (13.6409, -88.869900000000001) Landslide Complex Very_large Tropical cyclone Tropical Cyclone Ida NA 23 NA http://www.google.com/hostednews/ap/article/ALeqM5j0XCCb1n12DyhoBoDzGj_hTyEtrAD9BRKPRG0
1286 11/8/09 NA NA El Salvador SV La Libertad 33767 Antiguo Cuscatlán 4.86219 NA 13.7156 -89.2521 (13.7156, -89.252099999999999) Landslide Mudslide Medium Tropical cyclone Tropical Cyclone Ida NA 4 NA http://www.google.com/hostednews/ap/article/ALeqM5j0XCCb1n12DyhoBoDzGj_hTyEtrAD9BRKPRG0
1287 11/8/09 NA NA El Salvador SV San Vicente 41504 San Vicente 5.90726 NA 13.6094 -88.8488 (13.609400000000001, -88.848799999999997) Landslide Rockfall Medium Tropical cyclone Tropical Cyclone Ida NA NA NA http://news.bbc.co.uk/2/hi/in_depth/8349333.stm
1288 11/8/09 NA NA El Salvador SV San Vicente 41504 San Vicente 4.03125 NA 13.6466 -88.8347 (13.646599999999999, -88.834699999999998) Landslide Mudslide Medium Tropical cyclone Tropical Cyclone Ida NA NA NA http://news.yahoo.com/s/afp/20091109/wl_afp/salvadorweatherstorm_20091109100952
library(dplyr)
df_ES <- subset(df, state == "Sonsonate")
knitr::kable(head(df_ES))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
6681 10/15/14 NA NA El Salvador SV Sonsonate 7358 Nahuizalco 4.23875 Unknown 13.7895 -89.7739 (13.7895, -89.773899999999998) Landslide Landslide Medium Continuous rain NA 0 0 reliefweb http://reliefweb.int/report/el-salvador/lluvias-causan-estragos-en-seis-departamentos
6682 10/15/14 NA NA El Salvador SV Sonsonate 15446 Sonzacate 3.22235 Unknown 13.7135 -89.6938 (13.7135, -89.693799999999996) Landslide Landslide Medium Continuous rain NA 0 0 reliefweb http://reliefweb.int/report/el-salvador/lluvias-causan-estragos-en-seis-departamentos
7442 10/19/15 NA NA El Salvador SV Sonsonate 9936 Juayúa 0.49346 Unknown 13.8457 -89.7445 (13.845700000000001, -89.744500000000002) Landslide Landslide Medium Downpour NA 1 0 El Heraldo http://www.elheraldo.hn/mundo/892045-217/cuatro-muertos-por-persistentes-lluvias-en-el-salvador

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_ES, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_ES, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_ES, aes(x=state, y=distance, fill=city)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_ES <- df_ES %>% 
  arrange(desc(city)) %>%
  mutate(prop = distance / sum(df_ES$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_ES, aes(x=state, y = prop, fill=city)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
data<- ts(df_ES$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
3.22235
4.23875
0.49346
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_ES$distance
names(distance) <- df_ES$city 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por ciudades"
)

##             
## Pareto chart analysis for distance
##               Frequency  Cum.Freq. Percentage Cum.Percent.
##   Nahuizalco   4.238750   4.238750  53.287045    53.287045
##   Sonzacate    3.222350   7.461100  40.509469    93.796514
##   Juayúa       0.493460   7.954560   6.203486   100.000000
stem(df_ES$"distance")
## 
##   The decimal point is at the |
## 
##   0 | 5
##   1 | 
##   2 | 
##   3 | 2
##   4 | 2
head(df_ES)
## # A tibble: 3 x 25
##      id date     time  continent_code country_name country_code state     population
##   <dbl> <chr>    <chr> <chr>          <chr>        <chr>        <chr>          <dbl>
## 1  6682 10/15/14 <NA>  <NA>           El Salvador  SV           Sonsonate      15446
## 2  6681 10/15/14 <NA>  <NA>           El Salvador  SV           Sonsonate       7358
## 3  7442 10/19/15 <NA>  <NA>           El Salvador  SV           Sonsonate       9936
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_ES))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
6682 10/15/14 NA NA El Salvador SV Sonsonate 15446 Sonzacate 3.22235 Unknown 13.7135 -89.6938 (13.7135, -89.693799999999996) Landslide Landslide Medium Continuous rain NA 0 0 reliefweb http://reliefweb.int/report/el-salvador/lluvias-causan-estragos-en-seis-departamentos 40.509469 20.25473
6681 10/15/14 NA NA El Salvador SV Sonsonate 7358 Nahuizalco 4.23875 Unknown 13.7895 -89.7739 (13.7895, -89.773899999999998) Landslide Landslide Medium Continuous rain NA 0 0 reliefweb http://reliefweb.int/report/el-salvador/lluvias-causan-estragos-en-seis-departamentos 53.287045 67.15299
7442 10/19/15 NA NA El Salvador SV Sonsonate 9936 Juayúa 0.49346 Unknown 13.8457 -89.7445 (13.845700000000001, -89.744500000000002) Landslide Landslide Medium Downpour NA 1 0 El Heraldo http://www.elheraldo.hn/mundo/892045-217/cuatro-muertos-por-persistentes-lluvias-en-el-salvador 6.203486 96.89826
stem(df_ES$"distance")
## 
##   The decimal point is at the |
## 
##   0 | 5
##   1 | 
##   2 | 
##   3 | 2
##   4 | 2
stem(df_ES$"distance", scale = 2)
## 
##   The decimal point is at the |
## 
##   0 | 
##   0 | 5
##   1 | 
##   1 | 
##   2 | 
##   2 | 
##   3 | 2
##   3 | 
##   4 | 2

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
0.49346 1 33.3 33.3 33.3 33.3
3.22235 1 33.3 33.3 66.7 66.7
4.23875 1 33.3 33.3 100.0 100.0
Total 3 100.0 100.0 100.0 100.0
str(table)
## Classes 'freqtab' and 'data.frame':  4 obs. of  5 variables:
##  $ n      : num  1 1 1 3
##  $ %      : num  33.3 33.3 33.3 100
##  $ val%   : num  33.3 33.3 33.3 100
##  $ %cum   : num  33.3 66.7 100 100
##  $ val%cum: num  33.3 66.7 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
0.49346 1
3.22235 1
4.23875 1
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.49346 2.49346 4.49346
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(0.493,2.49] 0 0 0
(2.49,4.49] 2 1 2
str(Freq_table)
## 'data.frame':    2 obs. of  4 variables:
##  $ distance: Factor w/ 2 levels "(0.493,2.49]",..: 1 2
##  $ Freq    : int  0 2
##  $ Rel_Freq: num  0 1
##  $ Cum_Freq: int  0 2
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(0.493,2.49] 0
(2.49,4.49] 2
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_ES)
##                        id date time continent_code country_name country_code
## nbr.val      3.000000e+00   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          6.681000e+03   NA   NA             NA           NA           NA
## max          7.442000e+03   NA   NA             NA           NA           NA
## range        7.610000e+02   NA   NA             NA           NA           NA
## sum          2.080500e+04   NA   NA             NA           NA           NA
## median       6.682000e+03   NA   NA             NA           NA           NA
## mean         6.935000e+03   NA   NA             NA           NA           NA
## SE.mean      2.535002e+02   NA   NA             NA           NA           NA
## CI.mean.0.95 1.090723e+03   NA   NA             NA           NA           NA
## var          1.927870e+05   NA   NA             NA           NA           NA
## std.dev      4.390752e+02   NA   NA             NA           NA           NA
## coef.var     6.331293e-02   NA   NA             NA           NA           NA
##              state   population city distance location_description     latitude
## nbr.val         NA 3.000000e+00   NA 3.000000                   NA  3.000000000
## nbr.null        NA 0.000000e+00   NA 0.000000                   NA  0.000000000
## nbr.na          NA 0.000000e+00   NA 0.000000                   NA  0.000000000
## min             NA 7.358000e+03   NA 0.493460                   NA 13.713500000
## max             NA 1.544600e+04   NA 4.238750                   NA 13.845700000
## range           NA 8.088000e+03   NA 3.745290                   NA  0.132200000
## sum             NA 3.274000e+04   NA 7.954560                   NA 41.348700000
## median          NA 9.936000e+03   NA 3.222350                   NA 13.789500000
## mean            NA 1.091333e+04   NA 2.651520                   NA 13.782900000
## SE.mean         NA 2.385395e+03   NA 1.118211                   NA  0.038305265
## CI.mean.0.95    NA 1.026352e+04   NA 4.811272                   NA  0.164814253
## var             NA 1.707032e+07   NA 3.751184                   NA  0.004401880
## std.dev         NA 4.131625e+03   NA 1.936797                   NA  0.066346665
## coef.var        NA 3.785850e-01   NA 0.730448                   NA  0.004813694
##                  longitude geolocation hazard_type landslide_type
## nbr.val       3.000000e+00          NA          NA             NA
## nbr.null      0.000000e+00          NA          NA             NA
## nbr.na        0.000000e+00          NA          NA             NA
## min          -8.977390e+01          NA          NA             NA
## max          -8.969380e+01          NA          NA             NA
## range         8.010000e-02          NA          NA             NA
## sum          -2.692122e+02          NA          NA             NA
## median       -8.974450e+01          NA          NA             NA
## mean         -8.973740e+01          NA          NA             NA
## SE.mean       2.339380e-02          NA          NA             NA
## CI.mean.0.95  1.006554e-01          NA          NA             NA
## var           1.641810e-03          NA          NA             NA
## std.dev       4.051925e-02          NA          NA             NA
## coef.var     -4.515314e-04          NA          NA             NA
##              landslide_size trigger storm_name  injuries fatalities source_name
## nbr.val                  NA      NA         NA 3.0000000          3          NA
## nbr.null                 NA      NA         NA 2.0000000          3          NA
## nbr.na                   NA      NA         NA 0.0000000          0          NA
## min                      NA      NA         NA 0.0000000          0          NA
## max                      NA      NA         NA 1.0000000          0          NA
## range                    NA      NA         NA 1.0000000          0          NA
## sum                      NA      NA         NA 1.0000000          0          NA
## median                   NA      NA         NA 0.0000000          0          NA
## mean                     NA      NA         NA 0.3333333          0          NA
## SE.mean                  NA      NA         NA 0.3333333          0          NA
## CI.mean.0.95             NA      NA         NA 1.4342176          0          NA
## var                      NA      NA         NA 0.3333333          0          NA
## std.dev                  NA      NA         NA 0.5773503          0          NA
## coef.var                 NA      NA         NA 1.7320508        NaN          NA
##              source_link       prop         ypos
## nbr.val               NA   3.000000    3.0000000
## nbr.null              NA   0.000000    0.0000000
## nbr.na                NA   0.000000    0.0000000
## min                   NA   6.203486   20.2547344
## max                   NA  53.287045   96.8982571
## range                 NA  47.083560   76.6435227
## sum                   NA 100.000000  184.3059830
## median                NA  40.509469   67.1529915
## mean                  NA  33.333333   61.4353277
## SE.mean               NA  14.057478   22.3090128
## CI.mean.0.95          NA  60.484447   95.9879347
## var                   NA 592.838087 1493.0761531
## std.dev               NA  24.348267   38.6403436
## coef.var              NA   0.730448    0.6289597
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Guatemala

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_GT <- subset(df, country_name == "El Salvador")
knitr::kable(head(df_GT))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
230 9/5/07 NA NA El Salvador SV Ahuachapán 7797 Concepción de Ataco 0.00273 NA 13.8703 -89.8486 (13.8703, -89.848600000000005) Landslide Mudslide Medium Tropical cyclone Hurricane Felix NA NA Azcentral.com http://www.azcentral.com/news/articles/1108sr-fhsistercity1109-ON.html
564 6/2/08 NA NA El Salvador SV La Libertad 124694 Santa Tecla 4.96416 NA 13.7205 -89.2687 (13.720499999999999, -89.268699999999995) Landslide Landslide Medium Tropical cyclone Tropical Storm Arthur NA NA NA http://news.xinhuanet.com/english/2008-06/04/content_8310737.htm
1285 11/8/09 NA NA El Salvador SV San Vicente 41504 San Vicente 7.60946 NA 13.6409 -88.8699 (13.6409, -88.869900000000001) Landslide Complex Very_large Tropical cyclone Tropical Cyclone Ida NA 23 NA http://www.google.com/hostednews/ap/article/ALeqM5j0XCCb1n12DyhoBoDzGj_hTyEtrAD9BRKPRG0
1286 11/8/09 NA NA El Salvador SV La Libertad 33767 Antiguo Cuscatlán 4.86219 NA 13.7156 -89.2521 (13.7156, -89.252099999999999) Landslide Mudslide Medium Tropical cyclone Tropical Cyclone Ida NA 4 NA http://www.google.com/hostednews/ap/article/ALeqM5j0XCCb1n12DyhoBoDzGj_hTyEtrAD9BRKPRG0
1287 11/8/09 NA NA El Salvador SV San Vicente 41504 San Vicente 5.90726 NA 13.6094 -88.8488 (13.609400000000001, -88.848799999999997) Landslide Rockfall Medium Tropical cyclone Tropical Cyclone Ida NA NA NA http://news.bbc.co.uk/2/hi/in_depth/8349333.stm
1288 11/8/09 NA NA El Salvador SV San Vicente 41504 San Vicente 4.03125 NA 13.6466 -88.8347 (13.646599999999999, -88.834699999999998) Landslide Mudslide Medium Tropical cyclone Tropical Cyclone Ida NA NA NA http://news.yahoo.com/s/afp/20091109/wl_afp/salvadorweatherstorm_20091109100952

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_GT, aes(fill= state, y=distance, x=country_name)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_GT, aes(fill=state, y=distance, x=country_name)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_GT, aes(x=country_name, y=distance, fill=state)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_GT <- df_GT %>% 
  arrange(desc(state)) %>%
  mutate(prop = distance / sum(df_GT$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_GT, aes(x=country_name, y=prop, fill=state)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
data<- ts(df_GT$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
4.23875
3.22235
0.49346
8.83210
1.15810
7.60946
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_GT$distance
names(distance) <- df_GT$state 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por estados"
)

##               
## Pareto chart analysis for distance
##                   Frequency    Cum.Freq.   Percentage Cum.Percent.
##   San Miguel   1.006695e+01 1.006695e+01 8.974011e+00 8.974011e+00
##   San Miguel   9.972270e+00 2.003922e+01 8.889610e+00 1.786362e+01
##   La Libertad  9.875530e+00 2.991475e+01 8.803373e+00 2.666699e+01
##   Santa Ana    8.832100e+00 3.874685e+01 7.873225e+00 3.454022e+01
##   Cabañas      8.825250e+00 4.757210e+01 7.867118e+00 4.240734e+01
##   San Vicente  7.609460e+00 5.518156e+01 6.783323e+00 4.919066e+01
##   San Miguel   6.945360e+00 6.212692e+01 6.191323e+00 5.538198e+01
##   San Vicente  5.907260e+00 6.803418e+01 5.265926e+00 6.064791e+01
##   Ahuachapán   5.299010e+00 7.333319e+01 4.723712e+00 6.537162e+01
##   La Libertad  4.964160e+00 7.829735e+01 4.425216e+00 6.979684e+01
##   La Libertad  4.862190e+00 8.315954e+01 4.334316e+00 7.413115e+01
##   La Libertad  4.677220e+00 8.783676e+01 4.169428e+00 7.830058e+01
##   La Libertad  4.606550e+00 9.244331e+01 4.106430e+00 8.240701e+01
##   Sonsonate    4.238750e+00 9.668206e+01 3.778561e+00 8.618557e+01
##   San Vicente  4.031250e+00 1.007133e+02 3.593589e+00 8.977916e+01
##   San Salvador 3.252270e+00 1.039656e+02 2.899181e+00 9.267834e+01
##   Sonsonate    3.222350e+00 1.071879e+02 2.872509e+00 9.555085e+01
##   San Salvador 3.017390e+00 1.102053e+02 2.689801e+00 9.824065e+01
##   Santa Ana    1.158100e+00 1.113634e+02 1.032368e+00 9.927302e+01
##   Sonsonate    4.934600e-01 1.118569e+02 4.398865e-01 9.971291e+01
##   La Paz       3.193300e-01 1.121762e+02 2.846613e-01 9.999757e+01
##   Ahuachapán   2.730000e-03 1.121789e+02 2.433612e-03 1.000000e+02
stem(df_GT$"distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 000133344
##   0 | 5555567899
##   1 | 000
head(df_GT)
## # A tibble: 6 x 25
##      id date     time  continent_code country_name country_code state population
##   <dbl> <chr>    <chr> <chr>          <chr>        <chr>        <chr>      <dbl>
## 1  6681 10/15/14 <NA>  <NA>           El Salvador  SV           Sons~       7358
## 2  6682 10/15/14 <NA>  <NA>           El Salvador  SV           Sons~      15446
## 3  7442 10/19/15 <NA>  <NA>           El Salvador  SV           Sons~       9936
## 4  6685 10/12/14 <NA>  <NA>           El Salvador  SV           Sant~       5773
## 5  7438 7/18/15  <NA>  <NA>           El Salvador  SV           Sant~      10095
## 6  1285 11/8/09  <NA>  <NA>           El Salvador  SV           San ~      41504
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_GT))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
6681 10/15/14 NA NA El Salvador SV Sonsonate 7358 Nahuizalco 4.23875 Unknown 13.7895 -89.7739 (13.7895, -89.773899999999998) Landslide Landslide Medium Continuous rain NA 0 0 reliefweb http://reliefweb.int/report/el-salvador/lluvias-causan-estragos-en-seis-departamentos 3.7785613 1.889281
6682 10/15/14 NA NA El Salvador SV Sonsonate 15446 Sonzacate 3.22235 Unknown 13.7135 -89.6938 (13.7135, -89.693799999999996) Landslide Landslide Medium Continuous rain NA 0 0 reliefweb http://reliefweb.int/report/el-salvador/lluvias-causan-estragos-en-seis-departamentos 2.8725089 5.214816
7442 10/19/15 NA NA El Salvador SV Sonsonate 9936 Juayúa 0.49346 Unknown 13.8457 -89.7445 (13.845700000000001, -89.744500000000002) Landslide Landslide Medium Downpour NA 1 0 El Heraldo http://www.elheraldo.hn/mundo/892045-217/cuatro-muertos-por-persistentes-lluvias-en-el-salvador 0.4398865 6.871013
6685 10/12/14 NA NA El Salvador SV Santa Ana 5773 Coatepeque 8.83210 Unknown 14.0007 -89.4691 (14.0007, -89.469099999999997) Landslide Landslide Medium Rain NA 2 3 Mexicano http://www.oem.com.mx/elmexicano/notas/n3569793.htm 7.8732247 11.027569
7438 7/18/15 NA NA El Salvador SV Santa Ana 10095 Ciudad Arce 1.15810 Above road 13.8464 -89.4502 (13.846399999999999, -89.450199999999995) Landslide Landslide Small Rain NA 0 0 La Prensa http://www.laprensagrafica.com/2015/07/18/lluvias-causan-derrumbe-en-carretera-a-santa-ana 1.0323685 15.480366
1285 11/8/09 NA NA El Salvador SV San Vicente 41504 San Vicente 7.60946 NA 13.6409 -88.8699 (13.6409, -88.869900000000001) Landslide Complex Very_large Tropical cyclone Tropical Cyclone Ida NA 23 NA http://www.google.com/hostednews/ap/article/ALeqM5j0XCCb1n12DyhoBoDzGj_hTyEtrAD9BRKPRG0 6.7833231 19.388211
stem(df_GT$"distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 000133344
##   0 | 5555567899
##   1 | 000
stem(df_GT$"distance", scale = 2)
## 
##   The decimal point is at the |
## 
##    0 | 0352
##    2 | 023
##    4 | 02679039
##    6 | 96
##    8 | 889
##   10 | 01

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
0.00273 1 4.5 4.5 4.5 4.5
0.31933 1 4.5 4.5 9.1 9.1
0.49346 1 4.5 4.5 13.6 13.6
1.1581 1 4.5 4.5 18.2 18.2
3.01739 1 4.5 4.5 22.7 22.7
3.22235 1 4.5 4.5 27.3 27.3
3.25227 1 4.5 4.5 31.8 31.8
4.03125 1 4.5 4.5 36.4 36.4
4.23875 1 4.5 4.5 40.9 40.9
4.60655 1 4.5 4.5 45.5 45.5
4.67722 1 4.5 4.5 50.0 50.0
4.86219 1 4.5 4.5 54.5 54.5
4.96416 1 4.5 4.5 59.1 59.1
5.29901 1 4.5 4.5 63.6 63.6
5.90726 1 4.5 4.5 68.2 68.2
6.94536 1 4.5 4.5 72.7 72.7
7.60946 1 4.5 4.5 77.3 77.3
8.82525 1 4.5 4.5 81.8 81.8
8.8321 1 4.5 4.5 86.4 86.4
9.87553 1 4.5 4.5 90.9 90.9
9.97227 1 4.5 4.5 95.5 95.5
10.06695 1 4.5 4.5 100.0 100.0
Total 22 100.0 100.0 100.0 100.0
str(table)
## Classes 'freqtab' and 'data.frame':  23 obs. of  5 variables:
##  $ n      : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ %      : num  4.5 4.5 4.5 4.5 4.5 4.5 4.5 4.5 4.5 4.5 ...
##  $ val%   : num  4.5 4.5 4.5 4.5 4.5 4.5 4.5 4.5 4.5 4.5 ...
##  $ %cum   : num  4.5 9.1 13.6 18.2 22.7 27.3 31.8 36.4 40.9 45.5 ...
##  $ val%cum: num  4.5 9.1 13.6 18.2 22.7 27.3 31.8 36.4 40.9 45.5 ...
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
0.00273 1
0.31933 1
0.49346 1
1.1581 1
3.01739 1
3.22235 1
3.25227 1
4.03125 1
4.23875 1
4.60655 1
4.67722 1
4.86219 1
4.96416 1
5.29901 1
5.90726 1
6.94536 1
7.60946 1
8.82525 1
8.8321 1
9.87553 1
9.97227 1
10.06695 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1]  0.00273  3.00273  6.00273  9.00273 12.00273
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(0.00273,3] 3 0.1428571 3
(3,6] 11 0.5238095 14
(6,9] 4 0.1904762 18
(9,12] 3 0.1428571 21
str(Freq_table)
## 'data.frame':    4 obs. of  4 variables:
##  $ distance: Factor w/ 4 levels "(0.00273,3]",..: 1 2 3 4
##  $ Freq    : int  3 11 4 3
##  $ Rel_Freq: num  0.143 0.524 0.19 0.143
##  $ Cum_Freq: int  3 14 18 21
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(0.00273,3] 3
(3,6] 11
(6,9] 4
(9,12] 3
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_GT)
##                        id date time continent_code country_name country_code
## nbr.val      2.200000e+01   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          2.300000e+02   NA   NA             NA           NA           NA
## max          7.442000e+03   NA   NA             NA           NA           NA
## range        7.212000e+03   NA   NA             NA           NA           NA
## sum          1.086050e+05   NA   NA             NA           NA           NA
## median       6.680000e+03   NA   NA             NA           NA           NA
## mean         4.936591e+03   NA   NA             NA           NA           NA
## SE.mean      5.803080e+02   NA   NA             NA           NA           NA
## CI.mean.0.95 1.206817e+03   NA   NA             NA           NA           NA
## var          7.408663e+06   NA   NA             NA           NA           NA
## std.dev      2.721886e+03   NA   NA             NA           NA           NA
## coef.var     5.513696e-01   NA   NA             NA           NA           NA
##              state   population city    distance location_description
## nbr.val         NA 2.200000e+01   NA  22.0000000                   NA
## nbr.null        NA 0.000000e+00   NA   0.0000000                   NA
## nbr.na          NA 0.000000e+00   NA   0.0000000                   NA
## min             NA 2.654000e+03   NA   0.0027300                   NA
## max             NA 1.246940e+05   NA  10.0669500                   NA
## range           NA 1.220400e+05   NA  10.0642200                   NA
## sum             NA 9.383360e+05   NA 112.1789400                   NA
## median          NA 2.643100e+04   NA   4.7697050                   NA
## mean            NA 4.265164e+04   NA   5.0990427                   NA
## SE.mean         NA 9.849090e+03   NA   0.6720603                   NA
## CI.mean.0.95    NA 2.048230e+04   NA   1.3976259                   NA
## var             NA 2.134100e+09   NA   9.9366315                   NA
## std.dev         NA 4.619633e+04   NA   3.1522423                   NA
## coef.var        NA 1.083108e+00   NA   0.6182028                   NA
##                  latitude     longitude geolocation hazard_type landslide_type
## nbr.val       22.00000000  2.200000e+01          NA          NA             NA
## nbr.null       0.00000000  0.000000e+00          NA          NA             NA
## nbr.na         0.00000000  0.000000e+00          NA          NA             NA
## min           13.28170000 -8.990440e+01          NA          NA             NA
## max           14.00070000 -8.808430e+01          NA          NA             NA
## range          0.71900000  1.820100e+00          NA          NA             NA
## sum          301.25530000 -1.961688e+03          NA          NA             NA
## median        13.71515000 -8.926030e+01          NA          NA             NA
## mean          13.69342273 -8.916765e+01          NA          NA             NA
## SE.mean        0.03809807  1.112458e-01          NA          NA             NA
## CI.mean.0.95   0.07922927  2.313484e-01          NA          NA             NA
## var            0.03193218  2.722640e-01          NA          NA             NA
## std.dev        0.17869578  5.217892e-01          NA          NA             NA
## coef.var       0.01304975 -5.851777e-03          NA          NA             NA
##              landslide_size trigger storm_name   injuries fatalities
## nbr.val                  NA      NA         NA 14.0000000  18.000000
## nbr.null                 NA      NA         NA 10.0000000  13.000000
## nbr.na                   NA      NA         NA  8.0000000   4.000000
## min                      NA      NA         NA  0.0000000   0.000000
## max                      NA      NA         NA  2.0000000  32.000000
## range                    NA      NA         NA  2.0000000  32.000000
## sum                      NA      NA         NA  6.0000000  63.000000
## median                   NA      NA         NA  0.0000000   0.000000
## mean                     NA      NA         NA  0.4285714   3.500000
## SE.mean                  NA      NA         NA  0.2020305   2.107022
## CI.mean.0.95             NA      NA         NA  0.4364604   4.445428
## var                      NA      NA         NA  0.5714286  79.911765
## std.dev                  NA      NA         NA  0.7559289   8.939338
## coef.var                 NA      NA         NA  1.7638342   2.554097
##              source_name source_link         prop         ypos
## nbr.val               NA          NA 2.200000e+01   22.0000000
## nbr.null              NA          NA 0.000000e+00    0.0000000
## nbr.na                NA          NA 0.000000e+00    0.0000000
## min                   NA          NA 2.433612e-03    1.8892806
## max                   NA          NA 8.974011e+00   97.6381440
## range                 NA          NA 8.971577e+00   95.7488634
## sum                   NA          NA 1.000000e+02 1038.4834890
## median                NA          NA 4.251872e+00   44.1153638
## mean                  NA          NA 4.545455e+00   47.2037950
## SE.mean               NA          NA 5.990967e-01    6.6246536
## CI.mean.0.95          NA          NA 1.245890e+00   13.7767213
## var                   NA          NA 7.896171e+00  965.4927696
## std.dev               NA          NA 2.810013e+00   31.0723795
## coef.var              NA          NA 6.182028e-01    0.6582602
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Alta Verapaz (Guatemala)

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_GT <- subset(df, country_name == "Guatemala")
knitr::kable(head(df_GT)) 
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
165 8/9/07 NA NA Guatemala GT Guatemala 47247 San José Pinula 4.74385 NA 14.5667 -90.4500 (14.566700000000001, -90.45) Landslide Mudslide Medium Rain NA NA 5 International Herald http://www.iht.com/articles/ap/2007/08/09/america/LA-GEN-Guatemala-Deadly-Mudslide.php
198 8/21/07 NA NA Guatemala GT Alta Verapaz 2006 Lanquín 13.39817 NA 15.6046 -90.0853 (15.6046, -90.085300000000004) Landslide Landslide Medium Tropical cyclone Hurricane Dean NA NA United Nations Development Programme - Relief Web http://www.reliefweb.int/rw/RWB.NSF/db900SID/EDIS-76BSG6?OpenDocument
199 8/21/07 NA NA Guatemala GT Izabal 18994 Morales 12.55184 NA 15.5163 -88.9286 (15.516299999999999, -88.928600000000003) Landslide Landslide Medium Tropical cyclone Hurricane Dean NA NA United Nations Development Programme - Relief Web http://www.reliefweb.int/rw/RWB.NSF/db900SID/EDIS-76BSG6?OpenDocument
277 9/22/07 NA NA Guatemala GT Guatemala 994938 Guatemala City 2.79113 NA 14.6229 -90.5316 (14.6229, -90.531599999999997) Landslide Mudslide Medium Rain NA NA 3 Fox News http://www.foxnews.com/story/0,2933,297714,00.html
563 6/1/08 NA NA Guatemala GT Escuintla 31329 Palín 3.10150 NA 14.4226 -90.6755 (14.422599999999999, -90.6755) Landslide Mudslide Medium Tropical cyclone Tropical Storm Arthur NA 1 NA http://209.85.215.104/search?q=cache:QU_lPxNfk78J:www.plenglish.com/article.asp?ID=%7B1D4A74F7-CDCA-49D0-ABD4-D2E0FD9D2130%7D&language=EN+Colom+said+the+declaration+came+after+a+death+in+Palin+and+40+houses+partially&hl=en&ct=clnk&cd=1&gl=us&c
591 6/18/08 NA NA Guatemala GT Guatemala 994938 Guatemala City 3.12614 NA 14.6510 -90.5403 (14.651, -90.540300000000002) Landslide Complex Medium Rain NA NA 8 NA http://cnnwire.blogs.cnn.com/2008/06/20/8-dead-in-rough-weather-in-guatemala/
library(dplyr)
df_GT <- subset(df, state == "Alta Verapaz")
knitr::kable(head(df_GT))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
198 8/21/07 NA NA Guatemala GT Alta Verapaz 2006 Lanquín 13.39817 NA 15.6046 -90.0853 (15.6046, -90.085300000000004) Landslide Landslide Medium Tropical cyclone Hurricane Dean NA NA United Nations Development Programme - Relief Web http://www.reliefweb.int/rw/RWB.NSF/db900SID/EDIS-76BSG6?OpenDocument
2354 8/28/10 NA NA Guatemala GT Alta Verapaz 5633 Senahú 2.30104 NA 15.4168 -89.8207 (15.4168, -89.820700000000002) Landslide Mudslide Medium Downpour NA NA 2 NA NA
7433 10/15/15 NA NA Guatemala GT Alta Verapaz 4671 Cahabón 5.14479 Unknown 15.5779 -89.7321 (15.5779, -89.732100000000003) Landslide Landslide Medium Rain NA 0 1 El Periodico http://elperiodico.com.gt/2015/10/20/pais/para-insivumeh-lluvia-es-fuerte-pero-diferente-a-la-del-mitch/

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_GT, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_GT, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_GT, aes(x=state, y=distance, fill=city)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_GT <- df_GT %>% 
  arrange(desc(city)) %>%
  mutate(prop = distance / sum(df_GT$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_GT, aes(x=state, y = prop, fill=city)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
data<- ts(df_GT$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
2.30104
13.39817
5.14479
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_GT$distance
names(distance) <- df_GT$city 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por ciudades"
)

##          
## Pareto chart analysis for distance
##           Frequency Cum.Freq. Percentage Cum.Percent.
##   Lanquín  13.39817  13.39817   64.27831     64.27831
##   Cahabón   5.14479  18.54296   24.68235     88.96066
##   Senahú    2.30104  20.84400   11.03934    100.00000
stem(df_GT$"distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 2
##   0 | 5
##   1 | 3
head(df_GT)
## # A tibble: 3 x 25
##      id date     time  continent_code country_name country_code state population
##   <dbl> <chr>    <chr> <chr>          <chr>        <chr>        <chr>      <dbl>
## 1  2354 8/28/10  <NA>  <NA>           Guatemala    GT           Alta~       5633
## 2   198 8/21/07  <NA>  <NA>           Guatemala    GT           Alta~       2006
## 3  7433 10/15/15 <NA>  <NA>           Guatemala    GT           Alta~       4671
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_GT))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
2354 8/28/10 NA NA Guatemala GT Alta Verapaz 5633 Senahú 2.30104 NA 15.4168 -89.8207 (15.4168, -89.820700000000002) Landslide Mudslide Medium Downpour NA NA 2 NA NA 11.03934 5.51967
198 8/21/07 NA NA Guatemala GT Alta Verapaz 2006 Lanquín 13.39817 NA 15.6046 -90.0853 (15.6046, -90.085300000000004) Landslide Landslide Medium Tropical cyclone Hurricane Dean NA NA United Nations Development Programme - Relief Web http://www.reliefweb.int/rw/RWB.NSF/db900SID/EDIS-76BSG6?OpenDocument 64.27831 43.17849
7433 10/15/15 NA NA Guatemala GT Alta Verapaz 4671 Cahabón 5.14479 Unknown 15.5779 -89.7321 (15.5779, -89.732100000000003) Landslide Landslide Medium Rain NA 0 1 El Periodico http://elperiodico.com.gt/2015/10/20/pais/para-insivumeh-lluvia-es-fuerte-pero-diferente-a-la-del-mitch/ 24.68235 87.65882
stem(df_GT$"distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 2
##   0 | 5
##   1 | 3
stem(df_GT$"distance", scale = 2)
## 
##   The decimal point is at the |
## 
##    2 | 3
##    4 | 1
##    6 | 
##    8 | 
##   10 | 
##   12 | 4

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
2.30104 1 33.3 33.3 33.3 33.3
5.14479 1 33.3 33.3 66.7 66.7
13.39817 1 33.3 33.3 100.0 100.0
Total 3 100.0 100.0 100.0 100.0
str(table)
## Classes 'freqtab' and 'data.frame':  4 obs. of  5 variables:
##  $ n      : num  1 1 1 3
##  $ %      : num  33.3 33.3 33.3 100
##  $ val%   : num  33.3 33.3 33.3 100
##  $ %cum   : num  33.3 66.7 100 100
##  $ val%cum: num  33.3 66.7 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
2.30104 1
5.14479 1
13.39817 1
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1]  2.30104  6.30104 10.30104 14.30104
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(2.3,6.3] 1 0.5 1
(6.3,10.3] 0 0.0 1
(10.3,14.3] 1 0.5 2
str(Freq_table)
## 'data.frame':    3 obs. of  4 variables:
##  $ distance: Factor w/ 3 levels "(2.3,6.3]","(6.3,10.3]",..: 1 2 3
##  $ Freq    : int  1 0 1
##  $ Rel_Freq: num  0.5 0 0.5
##  $ Cum_Freq: int  1 1 2
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(2.3,6.3] 1
(6.3,10.3] 0
(10.3,14.3] 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_GT)
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
##                        id date time continent_code country_name country_code
## nbr.val      3.000000e+00   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          1.980000e+02   NA   NA             NA           NA           NA
## max          7.433000e+03   NA   NA             NA           NA           NA
## range        7.235000e+03   NA   NA             NA           NA           NA
## sum          9.985000e+03   NA   NA             NA           NA           NA
## median       2.354000e+03   NA   NA             NA           NA           NA
## mean         3.328333e+03   NA   NA             NA           NA           NA
## SE.mean      2.144629e+03   NA   NA             NA           NA           NA
## CI.mean.0.95 9.227594e+03   NA   NA             NA           NA           NA
## var          1.379830e+07   NA   NA             NA           NA           NA
## std.dev      3.714606e+03   NA   NA             NA           NA           NA
## coef.var     1.116056e+00   NA   NA             NA           NA           NA
##              state   population city   distance location_description
## nbr.val         NA 3.000000e+00   NA  3.0000000                   NA
## nbr.null        NA 0.000000e+00   NA  0.0000000                   NA
## nbr.na          NA 0.000000e+00   NA  0.0000000                   NA
## min             NA 2.006000e+03   NA  2.3010400                   NA
## max             NA 5.633000e+03   NA 13.3981700                   NA
## range           NA 3.627000e+03   NA 11.0971300                   NA
## sum             NA 1.231000e+04   NA 20.8440000                   NA
## median          NA 4.671000e+03   NA  5.1447900                   NA
## mean            NA 4.103333e+03   NA  6.9480000                   NA
## SE.mean         NA 1.084814e+03   NA  3.3279247                   NA
## CI.mean.0.95    NA 4.667579e+03   NA 14.3189043                   NA
## var             NA 3.530466e+06   NA 33.2252483                   NA
## std.dev         NA 1.878954e+03   NA  5.7641347                   NA
## coef.var        NA 4.579091e-01   NA  0.8296106                   NA
##                  latitude     longitude geolocation hazard_type landslide_type
## nbr.val       3.000000000  3.000000e+00          NA          NA             NA
## nbr.null      0.000000000  0.000000e+00          NA          NA             NA
## nbr.na        0.000000000  0.000000e+00          NA          NA             NA
## min          15.416800000 -9.008530e+01          NA          NA             NA
## max          15.604600000 -8.973210e+01          NA          NA             NA
## range         0.187800000  3.532000e-01          NA          NA             NA
## sum          46.599300000 -2.696381e+02          NA          NA             NA
## median       15.577900000 -8.982070e+01          NA          NA             NA
## mean         15.533100000 -8.987937e+01          NA          NA             NA
## SE.mean       0.058658588  1.060957e-01          NA          NA             NA
## CI.mean.0.95  0.252387536  4.564930e-01          NA          NA             NA
## var           0.010322490  3.376889e-02          NA          NA             NA
## std.dev       0.101599656  1.837631e-01          NA          NA             NA
## coef.var      0.006540849 -2.044553e-03          NA          NA             NA
##              landslide_size trigger storm_name injuries fatalities source_name
## nbr.val                  NA      NA         NA        1  2.0000000          NA
## nbr.null                 NA      NA         NA        1  0.0000000          NA
## nbr.na                   NA      NA         NA        2  1.0000000          NA
## min                      NA      NA         NA        0  1.0000000          NA
## max                      NA      NA         NA        0  2.0000000          NA
## range                    NA      NA         NA        0  1.0000000          NA
## sum                      NA      NA         NA        0  3.0000000          NA
## median                   NA      NA         NA        0  1.5000000          NA
## mean                     NA      NA         NA        0  1.5000000          NA
## SE.mean                  NA      NA         NA       NA  0.5000000          NA
## CI.mean.0.95             NA      NA         NA      NaN  6.3531024          NA
## var                      NA      NA         NA       NA  0.5000000          NA
## std.dev                  NA      NA         NA       NA  0.7071068          NA
## coef.var                 NA      NA         NA       NA  0.4714045          NA
##              source_link        prop         ypos
## nbr.val               NA   3.0000000    3.0000000
## nbr.null              NA   0.0000000    0.0000000
## nbr.na                NA   0.0000000    0.0000000
## min                   NA  11.0393399    5.5196699
## max                   NA  64.2783055   87.6588227
## range                 NA  53.2389656   82.1391528
## sum                   NA 100.0000000  136.3569852
## median                NA  24.6823546   43.1784926
## mean                  NA  33.3333333   45.4523284
## SE.mean               NA  15.9658640   23.7387717
## CI.mean.0.95          NA  68.6955683  102.1396910
## var                   NA 764.7264402 1690.5878507
## std.dev               NA  27.6536876   41.1167588
## coef.var              NA   0.8296106    0.9046128
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Chimaltenango (Guatemala)

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_GT <- subset(df, country_name == "Guatemala")
knitr::kable(head(df_GT)) 
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
165 8/9/07 NA NA Guatemala GT Guatemala 47247 San José Pinula 4.74385 NA 14.5667 -90.4500 (14.566700000000001, -90.45) Landslide Mudslide Medium Rain NA NA 5 International Herald http://www.iht.com/articles/ap/2007/08/09/america/LA-GEN-Guatemala-Deadly-Mudslide.php
198 8/21/07 NA NA Guatemala GT Alta Verapaz 2006 Lanquín 13.39817 NA 15.6046 -90.0853 (15.6046, -90.085300000000004) Landslide Landslide Medium Tropical cyclone Hurricane Dean NA NA United Nations Development Programme - Relief Web http://www.reliefweb.int/rw/RWB.NSF/db900SID/EDIS-76BSG6?OpenDocument
199 8/21/07 NA NA Guatemala GT Izabal 18994 Morales 12.55184 NA 15.5163 -88.9286 (15.516299999999999, -88.928600000000003) Landslide Landslide Medium Tropical cyclone Hurricane Dean NA NA United Nations Development Programme - Relief Web http://www.reliefweb.int/rw/RWB.NSF/db900SID/EDIS-76BSG6?OpenDocument
277 9/22/07 NA NA Guatemala GT Guatemala 994938 Guatemala City 2.79113 NA 14.6229 -90.5316 (14.6229, -90.531599999999997) Landslide Mudslide Medium Rain NA NA 3 Fox News http://www.foxnews.com/story/0,2933,297714,00.html
563 6/1/08 NA NA Guatemala GT Escuintla 31329 Palín 3.10150 NA 14.4226 -90.6755 (14.422599999999999, -90.6755) Landslide Mudslide Medium Tropical cyclone Tropical Storm Arthur NA 1 NA http://209.85.215.104/search?q=cache:QU_lPxNfk78J:www.plenglish.com/article.asp?ID=%7B1D4A74F7-CDCA-49D0-ABD4-D2E0FD9D2130%7D&language=EN+Colom+said+the+declaration+came+after+a+death+in+Palin+and+40+houses+partially&hl=en&ct=clnk&cd=1&gl=us&c
591 6/18/08 NA NA Guatemala GT Guatemala 994938 Guatemala City 3.12614 NA 14.6510 -90.5403 (14.651, -90.540300000000002) Landslide Complex Medium Rain NA NA 8 NA http://cnnwire.blogs.cnn.com/2008/06/20/8-dead-in-rough-weather-in-guatemala/
library(dplyr)
df_GT <- subset(df, state == "Chimaltenango")
knitr::kable(head(df_GT))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
852 10/18/08 NA NA Guatemala GT Chimaltenango 5987 San José Poaquil 5.31511 NA 14.8667 -90.9167 (14.8667, -90.916700000000006) Landslide Landslide Medium Tropical cyclone Tropical Depression 16 NA NA NA http://www.reliefweb.int/rw/rwb.nsf/db900SID/KSAI-7KM5GH?OpenDocument
1921 5/30/10 NA NA Guatemala GT Chimaltenango 2396 Santa Apolonia 0.99952 NA 14.7833 -90.9667 (14.783300000000001, -90.966700000000003) Landslide Landslide Medium Tropical cyclone Tropical Cyclone Agatha NA 11 NA http://www.cbc.ca/world/story/2010/06/01/central-america-agatha.html
2388 9/4/10 NA NA Guatemala GT Chimaltenango 82370 Chimaltenango 1.36473 NA 14.6510 -90.8267 (14.651, -90.826700000000002) Landslide Landslide Medium Downpour NA NA 10 NA http://www.google.com/hostednews/afp/article/ALeqM5hL3jdiAHX4wFkaLtnQWfIwxDhHBQ
7413 9/27/15 NA NA Guatemala GT Chimaltenango 16494 Patzicía 5.52205 Above road 14.6614 -90.9681 (14.6614, -90.968100000000007) Landslide Landslide Medium Rain NA 0 0 CONRED http://www.redhum.org/documento_detail/conred-boletin-informativo-no-4038-lluvias-del-domingo-generaron-22-incidentes-en-6-departamentos

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_GT, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_GT, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_GT, aes(x=state, y=distance, fill=city)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_GT <- df_GT %>% 
  arrange(desc(city)) %>%
  mutate(prop = distance / sum(df_GT$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_GT, aes(x=state, y = prop, fill=city)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
data<- ts(df_GT$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
0.99952
5.31511
5.52205
1.36473
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_GT$distance
names(distance) <- df_GT$city 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por ciudades"
)

##                   
## Pareto chart analysis for distance
##                     Frequency  Cum.Freq. Percentage Cum.Percent.
##   Patzicía           5.522050   5.522050  41.829244    41.829244
##   San José Poaquil   5.315110  10.837160  40.261684    82.090928
##   Chimaltenango      1.364730  12.201890  10.337759    92.428688
##   Santa Apolonia     0.999520  13.201410   7.571312   100.000000
stem(df_GT$"distance")
## 
##   The decimal point is at the |
## 
##   0 | 
##   1 | 04
##   2 | 
##   3 | 
##   4 | 
##   5 | 35
head(df_GT)
## # A tibble: 4 x 25
##      id date     time  continent_code country_name country_code state population
##   <dbl> <chr>    <chr> <chr>          <chr>        <chr>        <chr>      <dbl>
## 1  1921 5/30/10  <NA>  <NA>           Guatemala    GT           Chim~       2396
## 2   852 10/18/08 <NA>  <NA>           Guatemala    GT           Chim~       5987
## 3  7413 9/27/15  <NA>  <NA>           Guatemala    GT           Chim~      16494
## 4  2388 9/4/10   <NA>  <NA>           Guatemala    GT           Chim~      82370
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_GT))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
1921 5/30/10 NA NA Guatemala GT Chimaltenango 2396 Santa Apolonia 0.99952 NA 14.7833 -90.9667 (14.783300000000001, -90.966700000000003) Landslide Landslide Medium Tropical cyclone Tropical Cyclone Agatha NA 11 NA http://www.cbc.ca/world/story/2010/06/01/central-america-agatha.html 7.571313 3.785656
852 10/18/08 NA NA Guatemala GT Chimaltenango 5987 San José Poaquil 5.31511 NA 14.8667 -90.9167 (14.8667, -90.916700000000006) Landslide Landslide Medium Tropical cyclone Tropical Depression 16 NA NA NA http://www.reliefweb.int/rw/rwb.nsf/db900SID/KSAI-7KM5GH?OpenDocument 40.261684 27.702154
7413 9/27/15 NA NA Guatemala GT Chimaltenango 16494 Patzicía 5.52205 Above road 14.6614 -90.9681 (14.6614, -90.968100000000007) Landslide Landslide Medium Rain NA 0 0 CONRED http://www.redhum.org/documento_detail/conred-boletin-informativo-no-4038-lluvias-del-domingo-generaron-22-incidentes-en-6-departamentos 41.829244 68.747619
2388 9/4/10 NA NA Guatemala GT Chimaltenango 82370 Chimaltenango 1.36473 NA 14.6510 -90.8267 (14.651, -90.826700000000002) Landslide Landslide Medium Downpour NA NA 10 NA http://www.google.com/hostednews/afp/article/ALeqM5hL3jdiAHX4wFkaLtnQWfIwxDhHBQ 10.337759 94.831120
stem(df_GT$"distance")
## 
##   The decimal point is at the |
## 
##   0 | 
##   1 | 04
##   2 | 
##   3 | 
##   4 | 
##   5 | 35
stem(df_GT$"distance", scale = 2)
## 
##   The decimal point is at the |
## 
##   0 | 
##   1 | 04
##   1 | 
##   2 | 
##   2 | 
##   3 | 
##   3 | 
##   4 | 
##   4 | 
##   5 | 3
##   5 | 5

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
0.99952 1 25 25 25 25
1.36473 1 25 25 50 50
5.31511 1 25 25 75 75
5.52205 1 25 25 100 100
Total 4 100 100 100 100
str(table)
## Classes 'freqtab' and 'data.frame':  5 obs. of  5 variables:
##  $ n      : num  1 1 1 1 4
##  $ %      : num  25 25 25 25 100
##  $ val%   : num  25 25 25 25 100
##  $ %cum   : num  25 50 75 100 100
##  $ val%cum: num  25 50 75 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
0.99952 1
1.36473 1
5.31511 1
5.52205 1
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.99952 2.99952 4.99952 6.99952
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(1,3] 1 0.3333333 1
(3,5] 0 0.0000000 1
(5,7] 2 0.6666667 3
str(Freq_table)
## 'data.frame':    3 obs. of  4 variables:
##  $ distance: Factor w/ 3 levels "(1,3]","(3,5]",..: 1 2 3
##  $ Freq    : int  1 0 2
##  $ Rel_Freq: num  0.333 0 0.667
##  $ Cum_Freq: int  1 1 3
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(1,3] 1
(3,5] 0
(5,7] 2
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_GT)
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
##                        id date time continent_code country_name country_code
## nbr.val      4.000000e+00   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          8.520000e+02   NA   NA             NA           NA           NA
## max          7.413000e+03   NA   NA             NA           NA           NA
## range        6.561000e+03   NA   NA             NA           NA           NA
## sum          1.257400e+04   NA   NA             NA           NA           NA
## median       2.154500e+03   NA   NA             NA           NA           NA
## mean         3.143500e+03   NA   NA             NA           NA           NA
## SE.mean      1.459020e+03   NA   NA             NA           NA           NA
## CI.mean.0.95 4.643254e+03   NA   NA             NA           NA           NA
## var          8.514963e+06   NA   NA             NA           NA           NA
## std.dev      2.918041e+03   NA   NA             NA           NA           NA
## coef.var     9.282777e-01   NA   NA             NA           NA           NA
##              state   population city  distance location_description
## nbr.val         NA 4.000000e+00   NA  4.000000                   NA
## nbr.null        NA 0.000000e+00   NA  0.000000                   NA
## nbr.na          NA 0.000000e+00   NA  0.000000                   NA
## min             NA 2.396000e+03   NA  0.999520                   NA
## max             NA 8.237000e+04   NA  5.522050                   NA
## range           NA 7.997400e+04   NA  4.522530                   NA
## sum             NA 1.072470e+05   NA 13.201410                   NA
## median          NA 1.124050e+04   NA  3.339920                   NA
## mean            NA 2.681175e+04   NA  3.300352                   NA
## SE.mean         NA 1.875938e+04   NA  1.225957                   NA
## CI.mean.0.95    NA 5.970073e+04   NA  3.901543                   NA
## var             NA 1.407658e+09   NA  6.011884                   NA
## std.dev         NA 3.751877e+04   NA  2.451914                   NA
## coef.var        NA 1.399341e+00   NA  0.742925                   NA
##                  latitude     longitude geolocation hazard_type landslide_type
## nbr.val       4.000000000  4.000000e+00          NA          NA             NA
## nbr.null      0.000000000  0.000000e+00          NA          NA             NA
## nbr.na        0.000000000  0.000000e+00          NA          NA             NA
## min          14.651000000 -9.096810e+01          NA          NA             NA
## max          14.866700000 -9.082670e+01          NA          NA             NA
## range         0.215700000  1.414000e-01          NA          NA             NA
## sum          58.962400000 -3.636782e+02          NA          NA             NA
## median       14.722350000 -9.094170e+01          NA          NA             NA
## mean         14.740600000 -9.091955e+01          NA          NA             NA
## SE.mean       0.051660188  3.317814e-02          NA          NA             NA
## CI.mean.0.95  0.164405774  1.055877e-01          NA          NA             NA
## var           0.010675100  4.403157e-03          NA          NA             NA
## std.dev       0.103320376  6.635629e-02          NA          NA             NA
## coef.var      0.007009238 -7.298352e-04          NA          NA             NA
##              landslide_size trigger storm_name injuries fatalities source_name
## nbr.val                  NA      NA         NA        1  3.0000000          NA
## nbr.null                 NA      NA         NA        1  1.0000000          NA
## nbr.na                   NA      NA         NA        3  1.0000000          NA
## min                      NA      NA         NA        0  0.0000000          NA
## max                      NA      NA         NA        0 11.0000000          NA
## range                    NA      NA         NA        0 11.0000000          NA
## sum                      NA      NA         NA        0 21.0000000          NA
## median                   NA      NA         NA        0 10.0000000          NA
## mean                     NA      NA         NA        0  7.0000000          NA
## SE.mean                  NA      NA         NA       NA  3.5118846          NA
## CI.mean.0.95             NA      NA         NA      NaN 15.1104198          NA
## var                      NA      NA         NA       NA 37.0000000          NA
## std.dev                  NA      NA         NA       NA  6.0827625          NA
## coef.var                 NA      NA         NA       NA  0.8689661          NA
##              source_link       prop         ypos
## nbr.val               NA   4.000000    4.0000000
## nbr.null              NA   0.000000    0.0000000
## nbr.na                NA   0.000000    0.0000000
## min                   NA   7.571312    3.7856562
## max                   NA  41.829244   94.8311203
## range                 NA  34.257932   91.0454641
## sum                   NA 100.000000  195.0665497
## median                NA  25.299722   48.2248866
## mean                  NA  25.000000   48.7666374
## SE.mean               NA   9.286562   20.3882673
## CI.mean.0.95          NA  29.553986   64.8845658
## var                   NA 344.960969 1662.7257673
## std.dev               NA  18.573125   40.7765345
## coef.var              NA   0.742925    0.8361564
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Escuintla (Guatemala)

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_GT <- subset(df, country_name == "Guatemala")
knitr::kable(head(df_GT)) 
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
165 8/9/07 NA NA Guatemala GT Guatemala 47247 San José Pinula 4.74385 NA 14.5667 -90.4500 (14.566700000000001, -90.45) Landslide Mudslide Medium Rain NA NA 5 International Herald http://www.iht.com/articles/ap/2007/08/09/america/LA-GEN-Guatemala-Deadly-Mudslide.php
198 8/21/07 NA NA Guatemala GT Alta Verapaz 2006 Lanquín 13.39817 NA 15.6046 -90.0853 (15.6046, -90.085300000000004) Landslide Landslide Medium Tropical cyclone Hurricane Dean NA NA United Nations Development Programme - Relief Web http://www.reliefweb.int/rw/RWB.NSF/db900SID/EDIS-76BSG6?OpenDocument
199 8/21/07 NA NA Guatemala GT Izabal 18994 Morales 12.55184 NA 15.5163 -88.9286 (15.516299999999999, -88.928600000000003) Landslide Landslide Medium Tropical cyclone Hurricane Dean NA NA United Nations Development Programme - Relief Web http://www.reliefweb.int/rw/RWB.NSF/db900SID/EDIS-76BSG6?OpenDocument
277 9/22/07 NA NA Guatemala GT Guatemala 994938 Guatemala City 2.79113 NA 14.6229 -90.5316 (14.6229, -90.531599999999997) Landslide Mudslide Medium Rain NA NA 3 Fox News http://www.foxnews.com/story/0,2933,297714,00.html
563 6/1/08 NA NA Guatemala GT Escuintla 31329 Palín 3.10150 NA 14.4226 -90.6755 (14.422599999999999, -90.6755) Landslide Mudslide Medium Tropical cyclone Tropical Storm Arthur NA 1 NA http://209.85.215.104/search?q=cache:QU_lPxNfk78J:www.plenglish.com/article.asp?ID=%7B1D4A74F7-CDCA-49D0-ABD4-D2E0FD9D2130%7D&language=EN+Colom+said+the+declaration+came+after+a+death+in+Palin+and+40+houses+partially&hl=en&ct=clnk&cd=1&gl=us&c
591 6/18/08 NA NA Guatemala GT Guatemala 994938 Guatemala City 3.12614 NA 14.6510 -90.5403 (14.651, -90.540300000000002) Landslide Complex Medium Rain NA NA 8 NA http://cnnwire.blogs.cnn.com/2008/06/20/8-dead-in-rough-weather-in-guatemala/
library(dplyr)
df_GT <- subset(df, state == "Escuintla")
knitr::kable(head(df_GT))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
563 6/1/08 NA NA Guatemala GT Escuintla 31329 Palín 3.10150 NA 14.4226 -90.6755 (14.422599999999999, -90.6755) Landslide Mudslide Medium Tropical cyclone Tropical Storm Arthur NA 1 NA http://209.85.215.104/search?q=cache:QU_lPxNfk78J:www.plenglish.com/article.asp?ID=%7B1D4A74F7-CDCA-49D0-ABD4-D2E0FD9D2130%7D&language=EN+Colom+said+the+declaration+came+after+a+death+in+Palin+and+40+houses+partially&hl=en&ct=clnk&cd=1&gl=us&c
1914 5/29/10 NA NA Guatemala GT Escuintla 31329 Palín 0.18542 NA 14.4039 -90.6986 (14.4039, -90.698599999999999) Landslide Mudslide Medium Tropical cyclone Tropical Cyclone Agatha NA 0 NA http://www.odt.co.nz/news/dunedin/108264/landslide-shunts-camp-building
7434 5/4/15 13:32 NA Guatemala GT Escuintla 11121 Nueva Concepción 8.25465 Above road 14.2202 -91.2264 (14.2202, -91.226399999999998) Landslide Landslide Medium Unknown NA 0 0 Prensa Libre http://www.prensalibre.com/guatemala/escuintla/deslizamiento-afecto-servicio-de-energia-electrica

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_GT, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_GT, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_GT, aes(x=state, y=distance, fill=city)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_GT <- df_GT %>% 
  arrange(desc(city)) %>%
  mutate(prop = distance / sum(df_GT$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_GT, aes(x=state, y = prop, fill=city)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
data<- ts(df_GT$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
3.10150
0.18542
8.25465
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_GT$distance
names(distance) <- df_GT$city 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por ciudades"
)

##                   
## Pareto chart analysis for distance
##                     Frequency  Cum.Freq. Percentage Cum.Percent.
##   Nueva Concepción   8.254650   8.254650  71.521032    71.521032
##   Palín              3.101500  11.356150  26.872427    98.393459
##   Palín              0.185420  11.541570   1.606541   100.000000
stem(df_GT$"distance")
## 
##   The decimal point is at the |
## 
##   0 | 2
##   2 | 1
##   4 | 
##   6 | 
##   8 | 3
head(df_GT)
## # A tibble: 3 x 25
##      id date    time  continent_code country_name country_code state     population
##   <dbl> <chr>   <chr> <chr>          <chr>        <chr>        <chr>          <dbl>
## 1   563 6/1/08  <NA>  <NA>           Guatemala    GT           Escuintla      31329
## 2  1914 5/29/10 <NA>  <NA>           Guatemala    GT           Escuintla      31329
## 3  7434 5/4/15  13:32 <NA>           Guatemala    GT           Escuintla      11121
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_GT))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
563 6/1/08 NA NA Guatemala GT Escuintla 31329 Palín 3.10150 NA 14.4226 -90.6755 (14.422599999999999, -90.6755) Landslide Mudslide Medium Tropical cyclone Tropical Storm Arthur NA 1 NA http://209.85.215.104/search?q=cache:QU_lPxNfk78J:www.plenglish.com/article.asp?ID=%7B1D4A74F7-CDCA-49D0-ABD4-D2E0FD9D2130%7D&language=EN+Colom+said+the+declaration+came+after+a+death+in+Palin+and+40+houses+partially&hl=en&ct=clnk&cd=1&gl=us&c 26.87243 13.43621
1914 5/29/10 NA NA Guatemala GT Escuintla 31329 Palín 0.18542 NA 14.4039 -90.6986 (14.4039, -90.698599999999999) Landslide Mudslide Medium Tropical cyclone Tropical Cyclone Agatha NA 0 NA http://www.odt.co.nz/news/dunedin/108264/landslide-shunts-camp-building 1.60654 27.67570
7434 5/4/15 13:32 NA Guatemala GT Escuintla 11121 Nueva Concepción 8.25465 Above road 14.2202 -91.2264 (14.2202, -91.226399999999998) Landslide Landslide Medium Unknown NA 0 0 Prensa Libre http://www.prensalibre.com/guatemala/escuintla/deslizamiento-afecto-servicio-de-energia-electrica 71.52103 64.23948
stem(df_GT$"distance")
## 
##   The decimal point is at the |
## 
##   0 | 2
##   2 | 1
##   4 | 
##   6 | 
##   8 | 3
stem(df_GT$"distance", scale = 2)
## 
##   The decimal point is at the |
## 
##   0 | 2
##   1 | 
##   2 | 
##   3 | 1
##   4 | 
##   5 | 
##   6 | 
##   7 | 
##   8 | 3

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
0.18542 1 33.3 33.3 33.3 33.3
3.1015 1 33.3 33.3 66.7 66.7
8.25465 1 33.3 33.3 100.0 100.0
Total 3 100.0 100.0 100.0 100.0
str(table)
## Classes 'freqtab' and 'data.frame':  4 obs. of  5 variables:
##  $ n      : num  1 1 1 3
##  $ %      : num  33.3 33.3 33.3 100
##  $ val%   : num  33.3 33.3 33.3 100
##  $ %cum   : num  33.3 66.7 100 100
##  $ val%cum: num  33.3 66.7 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
0.18542 1
3.1015 1
8.25465 1
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.18542 3.18542 6.18542 9.18542
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(0.185,3.19] 1 0.5 1
(3.19,6.19] 0 0.0 1
(6.19,9.19] 1 0.5 2
str(Freq_table)
## 'data.frame':    3 obs. of  4 variables:
##  $ distance: Factor w/ 3 levels "(0.185,3.19]",..: 1 2 3
##  $ Freq    : int  1 0 1
##  $ Rel_Freq: num  0.5 0 0.5
##  $ Cum_Freq: int  1 1 2
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(0.185,3.19] 1
(3.19,6.19] 0
(6.19,9.19] 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_GT)
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
##                        id date time continent_code country_name country_code
## nbr.val      3.000000e+00   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          5.630000e+02   NA   NA             NA           NA           NA
## max          7.434000e+03   NA   NA             NA           NA           NA
## range        6.871000e+03   NA   NA             NA           NA           NA
## sum          9.911000e+03   NA   NA             NA           NA           NA
## median       1.914000e+03   NA   NA             NA           NA           NA
## mean         3.303667e+03   NA   NA             NA           NA           NA
## SE.mean      2.101669e+03   NA   NA             NA           NA           NA
## CI.mean.0.95 9.042753e+03   NA   NA             NA           NA           NA
## var          1.325104e+07   NA   NA             NA           NA           NA
## std.dev      3.640198e+03   NA   NA             NA           NA           NA
## coef.var     1.101866e+00   NA   NA             NA           NA           NA
##              state   population city  distance location_description    latitude
## nbr.val         NA 3.000000e+00   NA  3.000000                   NA  3.00000000
## nbr.null        NA 0.000000e+00   NA  0.000000                   NA  0.00000000
## nbr.na          NA 0.000000e+00   NA  0.000000                   NA  0.00000000
## min             NA 1.112100e+04   NA  0.185420                   NA 14.22020000
## max             NA 3.132900e+04   NA  8.254650                   NA 14.42260000
## range           NA 2.020800e+04   NA  8.069230                   NA  0.20240000
## sum             NA 7.377900e+04   NA 11.541570                   NA 43.04670000
## median          NA 3.132900e+04   NA  3.101500                   NA 14.40390000
## mean            NA 2.459300e+04   NA  3.847190                   NA 14.34890000
## SE.mean         NA 6.736000e+03   NA  2.359036                   NA  0.06457603
## CI.mean.0.95    NA 2.898267e+04   NA 10.150114                   NA  0.27784822
## var             NA 1.361211e+08   NA 16.695158                   NA  0.01251019
## std.dev         NA 1.166709e+04   NA  4.085971                   NA  0.11184896
## coef.var        NA 4.744071e-01   NA  1.062066                   NA  0.00779495
##                  longitude geolocation hazard_type landslide_type
## nbr.val       3.000000e+00          NA          NA             NA
## nbr.null      0.000000e+00          NA          NA             NA
## nbr.na        0.000000e+00          NA          NA             NA
## min          -9.122640e+01          NA          NA             NA
## max          -9.067550e+01          NA          NA             NA
## range         5.509000e-01          NA          NA             NA
## sum          -2.726005e+02          NA          NA             NA
## median       -9.069860e+01          NA          NA             NA
## mean         -9.086683e+01          NA          NA             NA
## SE.mean       1.799070e-01          NA          NA             NA
## CI.mean.0.95  7.740772e-01          NA          NA             NA
## var           9.709954e-02          NA          NA             NA
## std.dev       3.116080e-01          NA          NA             NA
## coef.var     -3.429282e-03          NA          NA             NA
##              landslide_size trigger storm_name injuries fatalities source_name
## nbr.val                  NA      NA         NA        1  3.0000000          NA
## nbr.null                 NA      NA         NA        1  2.0000000          NA
## nbr.na                   NA      NA         NA        2  0.0000000          NA
## min                      NA      NA         NA        0  0.0000000          NA
## max                      NA      NA         NA        0  1.0000000          NA
## range                    NA      NA         NA        0  1.0000000          NA
## sum                      NA      NA         NA        0  1.0000000          NA
## median                   NA      NA         NA        0  0.0000000          NA
## mean                     NA      NA         NA        0  0.3333333          NA
## SE.mean                  NA      NA         NA       NA  0.3333333          NA
## CI.mean.0.95             NA      NA         NA      NaN  1.4342176          NA
## var                      NA      NA         NA       NA  0.3333333          NA
## std.dev                  NA      NA         NA       NA  0.5773503          NA
## coef.var                 NA      NA         NA       NA  1.7320508          NA
##              source_link        prop        ypos
## nbr.val               NA    3.000000   3.0000000
## nbr.null              NA    0.000000   0.0000000
## nbr.na                NA    0.000000   0.0000000
## min                   NA    1.606541  13.4362136
## max                   NA   71.521032  64.2394839
## range                 NA   69.914492  50.8032703
## sum                   NA  100.000000 105.3513950
## median                NA   26.872427  27.6756975
## mean                  NA   33.333333  35.1171317
## SE.mean               NA   20.439476  15.1302597
## CI.mean.0.95          NA   87.943967  65.1002531
## var                   NA 1253.316518 686.7742743
## std.dev               NA   35.402211  26.2063785
## coef.var              NA    1.062066   0.7462562
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Guatemala (Guatemala)

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_GT <- subset(df, country_name == "Guatemala")
knitr::kable(head(df_GT)) 
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
165 8/9/07 NA NA Guatemala GT Guatemala 47247 San José Pinula 4.74385 NA 14.5667 -90.4500 (14.566700000000001, -90.45) Landslide Mudslide Medium Rain NA NA 5 International Herald http://www.iht.com/articles/ap/2007/08/09/america/LA-GEN-Guatemala-Deadly-Mudslide.php
198 8/21/07 NA NA Guatemala GT Alta Verapaz 2006 Lanquín 13.39817 NA 15.6046 -90.0853 (15.6046, -90.085300000000004) Landslide Landslide Medium Tropical cyclone Hurricane Dean NA NA United Nations Development Programme - Relief Web http://www.reliefweb.int/rw/RWB.NSF/db900SID/EDIS-76BSG6?OpenDocument
199 8/21/07 NA NA Guatemala GT Izabal 18994 Morales 12.55184 NA 15.5163 -88.9286 (15.516299999999999, -88.928600000000003) Landslide Landslide Medium Tropical cyclone Hurricane Dean NA NA United Nations Development Programme - Relief Web http://www.reliefweb.int/rw/RWB.NSF/db900SID/EDIS-76BSG6?OpenDocument
277 9/22/07 NA NA Guatemala GT Guatemala 994938 Guatemala City 2.79113 NA 14.6229 -90.5316 (14.6229, -90.531599999999997) Landslide Mudslide Medium Rain NA NA 3 Fox News http://www.foxnews.com/story/0,2933,297714,00.html
563 6/1/08 NA NA Guatemala GT Escuintla 31329 Palín 3.10150 NA 14.4226 -90.6755 (14.422599999999999, -90.6755) Landslide Mudslide Medium Tropical cyclone Tropical Storm Arthur NA 1 NA http://209.85.215.104/search?q=cache:QU_lPxNfk78J:www.plenglish.com/article.asp?ID=%7B1D4A74F7-CDCA-49D0-ABD4-D2E0FD9D2130%7D&language=EN+Colom+said+the+declaration+came+after+a+death+in+Palin+and+40+houses+partially&hl=en&ct=clnk&cd=1&gl=us&c
591 6/18/08 NA NA Guatemala GT Guatemala 994938 Guatemala City 3.12614 NA 14.6510 -90.5403 (14.651, -90.540300000000002) Landslide Complex Medium Rain NA NA 8 NA http://cnnwire.blogs.cnn.com/2008/06/20/8-dead-in-rough-weather-in-guatemala/
library(dplyr)
df_GT <- subset(df, state == "Guatemala")
knitr::kable(head(df_GT))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
165 8/9/07 NA NA Guatemala GT Guatemala 47247 San José Pinula 4.74385 NA 14.5667 -90.4500 (14.566700000000001, -90.45) Landslide Mudslide Medium Rain NA NA 5 International Herald http://www.iht.com/articles/ap/2007/08/09/america/LA-GEN-Guatemala-Deadly-Mudslide.php
277 9/22/07 NA NA Guatemala GT Guatemala 994938 Guatemala City 2.79113 NA 14.6229 -90.5316 (14.6229, -90.531599999999997) Landslide Mudslide Medium Rain NA NA 3 Fox News http://www.foxnews.com/story/0,2933,297714,00.html
591 6/18/08 NA NA Guatemala GT Guatemala 994938 Guatemala City 3.12614 NA 14.6510 -90.5403 (14.651, -90.540300000000002) Landslide Complex Medium Rain NA NA 8 NA http://cnnwire.blogs.cnn.com/2008/06/20/8-dead-in-rough-weather-in-guatemala/
1915 5/29/10 NA NA Guatemala GT Guatemala 71836 Amatitlán 2.02891 NA 14.4789 -90.6319 (14.478899999999999, -90.631900000000002) Landslide Mudslide Medium Tropical cyclone Tropical Cyclone Agatha NA 0 NA http://news.xinhuanet.com/english2010/china/2010-05/30/c_13323403.htm
1916 5/29/10 NA NA Guatemala GT Guatemala 97172 Chinautla 0.44764 Urban area 14.6969 -90.4440 (14.696899999999999, -90.444000000000003) Landslide Landslide Medium Tropical cyclone Tropical Cyclone Agatha NA 11 Boston Globe http://www.boston.com/bigpicture/2010/06/a_rough_week_for_guatemala.html#photo17
1919 5/30/10 NA NA Guatemala GT Guatemala 994938 Guatemala City 4.07930 NA 14.6066 -90.5276 (14.6066, -90.527600000000007) Landslide Landslide Large Tropical cyclone Tropical Cyclone Agatha NA 17 NA http://worldnews.hometips4u.com/central-america-storm-kills-86-more-victims-feared

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_GT, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_GT, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_GT, aes(x=state, y=distance, fill=city)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_GT <- df_GT %>% 
  arrange(desc(city)) %>%
  mutate(prop = distance / sum(df_GT$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_GT, aes(x=state, y = prop, fill=city)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
data<- ts(df_GT$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
0.65744
2.81128
2.70053
0.94245
3.96161
4.74385
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_GT$distance
names(distance) <- df_GT$city 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por ciudades"
)

##                        
## Pareto chart analysis for distance
##                           Frequency   Cum.Freq.  Percentage Cum.Percent.
##   Fraijanes               6.1921800   6.1921800   7.1121567    7.1121567
##   Chinautla               5.9453500  12.1375300   6.8286550   13.9408116
##   San José Pinula         4.7438500  16.8813800   5.4486472   19.3894589
##   Petapa                  4.2072600  21.0886400   4.8323357   24.2217945
##   Guatemala City          4.0793000  25.1679400   4.6853646   28.9071591
##   Santa Catarina Pinula   3.9616100  29.1295500   4.5501893   33.4573483
##   Chinautla               3.8564800  32.9860300   4.4294400   37.8867884
##   Guatemala City          3.8031200  36.7891500   4.3681523   42.2549407
##   Mixco                   3.6474900  40.4366400   4.1894002   46.4443409
##   Guatemala City          3.4934100  43.9300500   4.0124285   50.4567693
##   Guatemala City          3.2567500  47.1868000   3.7406077   54.1973771
##   Guatemala City          3.1261400  50.3129400   3.5905929   57.7879699
##   Guatemala City          3.0031400  53.3160800   3.4493187   61.2372886
##   Santa Catarina Pinula   2.8112800  56.1273600   3.2289539   64.4662425
##   Guatemala City          2.7911300  58.9184900   3.2058102   67.6720527
##   Santa Catarina Pinula   2.7005300  61.6190200   3.1017497   70.7738024
##   Chinautla               2.6635800  64.2826000   3.0593100   73.8331124
##   Guatemala City          2.5962000  66.8788000   2.9819193   76.8150318
##   Chinautla               2.3637600  69.2425600   2.7149455   79.5299773
##   Mixco                   2.1041800  71.3467400   2.4167995   81.9467768
##   Guatemala City          2.0842500  73.4309900   2.3939085   84.3406854
##   Amatitlán               2.0289100  75.4599000   2.3303466   86.6710320
##   Mixco                   1.8700900  77.3299900   2.1479306   88.8189626
##   Guatemala City          1.8386300  79.1686200   2.1117966   90.9307592
##   Guatemala City          1.7014700  80.8700900   1.9542586   92.8850178
##   Guatemala City          1.6829000  82.5529900   1.9329297   94.8179475
##   Guatemala City          1.5738100  84.1268000   1.8076321   96.6255796
##   Santa Catarina Pinula   0.9424500  85.0692500   1.0824705   97.7080501
##   San José Pinula         0.8904000  85.9596500   1.0226874   98.7307375
##   Villa Canales           0.6574400  86.6170900   0.7551163   99.4858538
##   Chinautla               0.4476400  87.0647300   0.5141462  100.0000000
stem(df_GT$"distance")
## 
##   The decimal point is at the |
## 
##   0 | 4799
##   1 | 67789
##   2 | 011467788
##   3 | 0135689
##   4 | 0127
##   5 | 9
##   6 | 2
head(df_GT)
## # A tibble: 6 x 25
##      id date     time  continent_code country_name country_code state     population
##   <dbl> <chr>    <chr> <chr>          <chr>        <chr>        <chr>          <dbl>
## 1  7435 11/24/15 <NA>  <NA>           Guatemala    GT           Guatemala     122194
## 2  2350 8/28/10  <NA>  <NA>           Guatemala    GT           Guatemala      67994
## 3  6661 9/23/14  14:30 <NA>           Guatemala    GT           Guatemala      67994
## 4  7343 10/1/15  21:30 <NA>           Guatemala    GT           Guatemala      67994
## 5  7345 10/19/15 <NA>  <NA>           Guatemala    GT           Guatemala      67994
## 6   165 8/9/07   <NA>  <NA>           Guatemala    GT           Guatemala      47247
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_GT))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
7435 11/24/15 NA NA Guatemala GT Guatemala 122194 Villa Canales 0.65744 Retaining wall 14.4758 -90.5312 (14.4758, -90.531199999999998) Landslide Landslide Small Rain NA 0 0 Prensa Libre http://www.prensalibre.com/guatemala/comunitario/madre-alcanzo-a-empujar-a-su-hija-para-salvarla-de-deslizamiento 0.7551163 0.3775582
2350 8/28/10 NA NA Guatemala GT Guatemala 67994 Santa Catarina Pinula 2.81128 NA 14.5624 -90.5205 (14.5624, -90.520499999999998) Landslide Landslide Medium Downpour NA NA 0 NA NA 3.2289539 2.3695933
6661 9/23/14 14:30 NA Guatemala GT Guatemala 67994 Santa Catarina Pinula 2.70053 Unknown 14.5932 -90.4979 (14.5932, -90.497900000000001) Landslide Landslide Medium Rain NA 0 3 Reliefweb http://reliefweb.int/report/guatemala/deslizamiento-mata-esposos-e-hijo-de-3-os 3.1017497 5.5349451
7343 10/1/15 21:30 NA Guatemala GT Guatemala 67994 Santa Catarina Pinula 0.94245 Urban area 14.5755 -90.4959 (14.5755, -90.495900000000006) Landslide Mudslide Medium Rain NA 0 280 CNN http://www.cnn.com/2015/10/04/americas/guatemala-landslide-dead-missing/ 1.0824705 7.6270552
7345 10/19/15 NA NA Guatemala GT Guatemala 67994 Santa Catarina Pinula 3.96161 Above road 14.5572 -90.5300 (14.5572, -90.53) Landslide Mudslide Small Tropical cyclone NA 0 0 Prensa Libre http://www.prensalibre.com/guatemala/comunitario/lluvia-causa-estragos-en-las-principales-rutas-del-pais 4.5501893 10.4433851
165 8/9/07 NA NA Guatemala GT Guatemala 47247 San José Pinula 4.74385 NA 14.5667 -90.4500 (14.566700000000001, -90.45) Landslide Mudslide Medium Rain NA NA 5 International Herald http://www.iht.com/articles/ap/2007/08/09/america/LA-GEN-Guatemala-Deadly-Mudslide.php 5.4486472 15.4428033
stem(df_GT$"distance")
## 
##   The decimal point is at the |
## 
##   0 | 4799
##   1 | 67789
##   2 | 011467788
##   3 | 0135689
##   4 | 0127
##   5 | 9
##   6 | 2
stem(df_GT$"distance", scale = 2)
## 
##   The decimal point is at the |
## 
##   0 | 4
##   0 | 799
##   1 | 
##   1 | 67789
##   2 | 0114
##   2 | 67788
##   3 | 013
##   3 | 5689
##   4 | 012
##   4 | 7
##   5 | 
##   5 | 9
##   6 | 2

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
0.44764 1 3.2 3.2 3.2 3.2
0.65744 1 3.2 3.2 6.5 6.5
0.8904 1 3.2 3.2 9.7 9.7
0.94245 1 3.2 3.2 12.9 12.9
1.57381 1 3.2 3.2 16.1 16.1
1.6829 1 3.2 3.2 19.4 19.4
1.70147 1 3.2 3.2 22.6 22.6
1.83863 1 3.2 3.2 25.8 25.8
1.87009 1 3.2 3.2 29.0 29.0
2.02891 1 3.2 3.2 32.3 32.3
2.08425 1 3.2 3.2 35.5 35.5
2.10418 1 3.2 3.2 38.7 38.7
2.36376 1 3.2 3.2 41.9 41.9
2.5962 1 3.2 3.2 45.2 45.2
2.66358 1 3.2 3.2 48.4 48.4
2.70053 1 3.2 3.2 51.6 51.6
2.79113 1 3.2 3.2 54.8 54.8
2.81128 1 3.2 3.2 58.1 58.1
3.00314 1 3.2 3.2 61.3 61.3
3.12614 1 3.2 3.2 64.5 64.5
3.25675 1 3.2 3.2 67.7 67.7
3.49341 1 3.2 3.2 71.0 71.0
3.64749 1 3.2 3.2 74.2 74.2
3.80312 1 3.2 3.2 77.4 77.4
3.85648 1 3.2 3.2 80.6 80.6
3.96161 1 3.2 3.2 83.9 83.9
4.0793 1 3.2 3.2 87.1 87.1
4.20726 1 3.2 3.2 90.3 90.3
4.74385 1 3.2 3.2 93.5 93.5
5.94535 1 3.2 3.2 96.8 96.8
6.19218 1 3.2 3.2 100.0 100.0
Total 31 100.0 100.0 100.0 100.0
str(table)
## Classes 'freqtab' and 'data.frame':  32 obs. of  5 variables:
##  $ n      : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ %      : num  3.2 3.2 3.2 3.2 3.2 3.2 3.2 3.2 3.2 3.2 ...
##  $ val%   : num  3.2 3.2 3.2 3.2 3.2 3.2 3.2 3.2 3.2 3.2 ...
##  $ %cum   : num  3.2 6.5 9.7 12.9 16.1 19.4 22.6 25.8 29 32.3 ...
##  $ val%cum: num  3.2 6.5 9.7 12.9 16.1 19.4 22.6 25.8 29 32.3 ...
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
0.44764 1
0.65744 1
0.8904 1
0.94245 1
1.57381 1
1.6829 1
1.70147 1
1.83863 1
1.87009 1
2.02891 1
2.08425 1
2.10418 1
2.36376 1
2.5962 1
2.66358 1
2.70053 1
2.79113 1
2.81128 1
3.00314 1
3.12614 1
3.25675 1
3.49341 1
3.64749 1
3.80312 1
3.85648 1
3.96161 1
4.0793 1
4.20726 1
4.74385 1
5.94535 1
6.19218 1
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.44764 2.44764 4.44764 6.44764
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(0.448,2.45] 12 0.4 12
(2.45,4.45] 15 0.5 27
(4.45,6.45] 3 0.1 30
str(Freq_table)
## 'data.frame':    3 obs. of  4 variables:
##  $ distance: Factor w/ 3 levels "(0.448,2.45]",..: 1 2 3
##  $ Freq    : int  12 15 3
##  $ Rel_Freq: num  0.4 0.5 0.1
##  $ Cum_Freq: int  12 27 30
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(0.448,2.45] 12
(2.45,4.45] 15
(4.45,6.45] 3
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_GT)
##                        id date time continent_code country_name country_code
## nbr.val      3.100000e+01   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          1.650000e+02   NA   NA             NA           NA           NA
## max          7.435000e+03   NA   NA             NA           NA           NA
## range        7.270000e+03   NA   NA             NA           NA           NA
## sum          1.638690e+05   NA   NA             NA           NA           NA
## median       7.345000e+03   NA   NA             NA           NA           NA
## mean         5.286097e+03   NA   NA             NA           NA           NA
## SE.mean      5.051486e+02   NA   NA             NA           NA           NA
## CI.mean.0.95 1.031651e+03   NA   NA             NA           NA           NA
## var          7.910428e+06   NA   NA             NA           NA           NA
## std.dev      2.812548e+03   NA   NA             NA           NA           NA
## coef.var     5.320652e-01   NA   NA             NA           NA           NA
##              state   population city   distance location_description
## nbr.val         NA 3.100000e+01   NA 31.0000000                   NA
## nbr.null        NA 0.000000e+00   NA  0.0000000                   NA
## nbr.na          NA 0.000000e+00   NA  0.0000000                   NA
## min             NA 2.849200e+04   NA  0.4476400                   NA
## max             NA 9.949380e+05   NA  6.1921800                   NA
## range           NA 9.664460e+05   NA  5.7445400                   NA
## sum             NA 1.556974e+07   NA 87.0647300                   NA
## median          NA 4.730800e+05   NA  2.7005300                   NA
## mean            NA 5.022497e+05   NA  2.8085397                   NA
## SE.mean         NA 7.918482e+04   NA  0.2511060                   NA
## CI.mean.0.95    NA 1.617170e+05   NA  0.5128270                   NA
## var             NA 1.943773e+11   NA  1.9546816                   NA
## std.dev         NA 4.408824e+05   NA  1.3980993                   NA
## coef.var        NA 8.778152e-01   NA  0.4978029                   NA
##                  latitude     longitude geolocation hazard_type landslide_type
## nbr.val      3.100000e+01  3.100000e+01          NA          NA             NA
## nbr.null     0.000000e+00  0.000000e+00          NA          NA             NA
## nbr.na       0.000000e+00  0.000000e+00          NA          NA             NA
## min          1.442360e+01 -9.063190e+01          NA          NA             NA
## max          1.469690e+01 -9.041910e+01          NA          NA             NA
## range        2.733000e-01  2.128000e-01          NA          NA             NA
## sum          4.528442e+02 -2.805914e+03          NA          NA             NA
## median       1.462290e+01 -9.050650e+01          NA          NA             NA
## mean         1.460788e+01 -9.051336e+01          NA          NA             NA
## SE.mean      1.173315e-02  8.643431e-03          NA          NA             NA
## CI.mean.0.95 2.396228e-02  1.765224e-02          NA          NA             NA
## var          4.267668e-03  2.315976e-03          NA          NA             NA
## std.dev      6.532739e-02  4.812459e-02          NA          NA             NA
## coef.var     4.472066e-03 -5.316849e-04          NA          NA             NA
##              landslide_size trigger storm_name   injuries  fatalities
## nbr.val                  NA      NA         NA 20.0000000   31.000000
## nbr.null                 NA      NA         NA 18.0000000   21.000000
## nbr.na                   NA      NA         NA 11.0000000    0.000000
## min                      NA      NA         NA  0.0000000    0.000000
## max                      NA      NA         NA  6.0000000  280.000000
## range                    NA      NA         NA  6.0000000  280.000000
## sum                      NA      NA         NA  9.0000000  337.000000
## median                   NA      NA         NA  0.0000000    0.000000
## mean                     NA      NA         NA  0.4500000   10.870968
## SE.mean                  NA      NA         NA  0.3282730    8.997938
## CI.mean.0.95             NA      NA         NA  0.6870833   18.376241
## var                      NA      NA         NA  2.1552632 2509.849462
## std.dev                  NA      NA         NA  1.4680815   50.098398
## coef.var                 NA      NA         NA  3.2624032    4.608458
##              source_name source_link        prop         ypos
## nbr.val               NA          NA  31.0000000   31.0000000
## nbr.null              NA          NA   0.0000000    0.0000000
## nbr.na                NA          NA   0.0000000    0.0000000
## min                   NA          NA   0.5141462    0.3775582
## max                   NA          NA   7.1121567   98.8348267
## range                 NA          NA   6.5980105   98.4572685
## sum                   NA          NA 100.0000000 1491.4174144
## median                NA          NA   3.1017497   49.4240435
## mean                  NA          NA   3.2258065   48.1102392
## SE.mean               NA          NA   0.2884130    5.3801852
## CI.mean.0.95          NA          NA   0.5890180   10.9878040
## var                   NA          NA   2.5786447  897.3381627
## std.dev               NA          NA   1.6058159   29.9556032
## coef.var              NA          NA   0.4978029    0.6226451
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Huehuetenango (Guatemala)

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_GT <- subset(df, country_name == "Guatemala")
knitr::kable(head(df_GT)) 
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
165 8/9/07 NA NA Guatemala GT Guatemala 47247 San José Pinula 4.74385 NA 14.5667 -90.4500 (14.566700000000001, -90.45) Landslide Mudslide Medium Rain NA NA 5 International Herald http://www.iht.com/articles/ap/2007/08/09/america/LA-GEN-Guatemala-Deadly-Mudslide.php
198 8/21/07 NA NA Guatemala GT Alta Verapaz 2006 Lanquín 13.39817 NA 15.6046 -90.0853 (15.6046, -90.085300000000004) Landslide Landslide Medium Tropical cyclone Hurricane Dean NA NA United Nations Development Programme - Relief Web http://www.reliefweb.int/rw/RWB.NSF/db900SID/EDIS-76BSG6?OpenDocument
199 8/21/07 NA NA Guatemala GT Izabal 18994 Morales 12.55184 NA 15.5163 -88.9286 (15.516299999999999, -88.928600000000003) Landslide Landslide Medium Tropical cyclone Hurricane Dean NA NA United Nations Development Programme - Relief Web http://www.reliefweb.int/rw/RWB.NSF/db900SID/EDIS-76BSG6?OpenDocument
277 9/22/07 NA NA Guatemala GT Guatemala 994938 Guatemala City 2.79113 NA 14.6229 -90.5316 (14.6229, -90.531599999999997) Landslide Mudslide Medium Rain NA NA 3 Fox News http://www.foxnews.com/story/0,2933,297714,00.html
563 6/1/08 NA NA Guatemala GT Escuintla 31329 Palín 3.10150 NA 14.4226 -90.6755 (14.422599999999999, -90.6755) Landslide Mudslide Medium Tropical cyclone Tropical Storm Arthur NA 1 NA http://209.85.215.104/search?q=cache:QU_lPxNfk78J:www.plenglish.com/article.asp?ID=%7B1D4A74F7-CDCA-49D0-ABD4-D2E0FD9D2130%7D&language=EN+Colom+said+the+declaration+came+after+a+death+in+Palin+and+40+houses+partially&hl=en&ct=clnk&cd=1&gl=us&c
591 6/18/08 NA NA Guatemala GT Guatemala 994938 Guatemala City 3.12614 NA 14.6510 -90.5403 (14.651, -90.540300000000002) Landslide Complex Medium Rain NA NA 8 NA http://cnnwire.blogs.cnn.com/2008/06/20/8-dead-in-rough-weather-in-guatemala/
library(dplyr)
df_GT <- subset(df, state == "Huehuetenango")
knitr::kable(head(df_GT))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
867 10/28/08 NA NA Guatemala GT Huehuetenango 1311 San Sebastián Huehuetenango 1.58358 NA 15.3760 -91.5960 (15.375999999999999, -91.596000000000004) Landslide Mudslide Medium Downpour NA NA NA NA http://www.yourmileagemayvary.ca/mudslide-impeeds-our-progress-in-guatemala
2352 8/28/10 NA NA Guatemala GT Huehuetenango 2121 Malacatancito 0.03280 NA 15.2164 -91.5168 (15.2164, -91.516800000000003) Landslide Landslide Medium Downpour NA NA 0 NA NA
3981 9/20/11 15:00 NA Guatemala GT Huehuetenango 14100 Barillas 0.45507 NA 15.8076 -91.3148 (15.807600000000001, -91.314800000000005) Landslide Landslide Large Rain NA NA 15 BNO News http://wireupdate.com/news/three-children-found-dead-ten-others-missing-after-landslide-in-guatemala.html
6040 5/30/14 18:00 NA Guatemala GT Huehuetenango 3554 San Pedro Necta 0.91108 Natural slope 15.4918 -91.7671 (15.4918, -91.767099999999999) Landslide Mudslide Small Downpour NA 7 6 Reuters UK http://uk.reuters.com/article/2014/05/31/uk-guatemala-landslide-idUKKBN0EB0X320140531
6666 9/27/14 NA NA Guatemala GT Huehuetenango 1713 Cuilco 3.30989 Below road 15.3975 -91.9955 (15.397500000000001, -91.995500000000007) Landslide Landslide Medium Downpour NA 0 0 La Noticia http://lanoticiaenguatemala.com/tectitan-declarado-en-alerta-por-deslizamiento/

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_GT, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_GT, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_GT, aes(x=state, y=distance, fill=city)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_GT <- df_GT %>% 
  arrange(desc(city)) %>%
  mutate(prop = distance / sum(df_GT$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_GT, aes(x=state, y = prop, fill=city)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
data<- ts(df_GT$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
1.58358
0.91108
0.03280
3.30989
0.45507
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_GT$distance
names(distance) <- df_GT$city 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por ciudades"
)

##                              
## Pareto chart analysis for distance
##                                 Frequency   Cum.Freq.  Percentage Cum.Percent.
##   Cuilco                        3.3098900   3.3098900  52.6012250   52.6012250
##   San Sebastián Huehuetenango   1.5835800   4.8934700  25.1664701   77.7676951
##   San Pedro Necta               0.9110800   5.8045500  14.4790081   92.2467032
##   Barillas                      0.4550700   6.2596200   7.2320347   99.4787379
##   Malacatancito                 0.0328000   6.2924200   0.5212621  100.0000000
stem(df_GT$"distance")
## 
##   The decimal point is at the |
## 
##   0 | 059
##   1 | 6
##   2 | 
##   3 | 3
head(df_GT)
## # A tibble: 5 x 25
##      id date     time  continent_code country_name country_code state population
##   <dbl> <chr>    <chr> <chr>          <chr>        <chr>        <chr>      <dbl>
## 1   867 10/28/08 <NA>  <NA>           Guatemala    GT           Hueh~       1311
## 2  6040 5/30/14  18:00 <NA>           Guatemala    GT           Hueh~       3554
## 3  2352 8/28/10  <NA>  <NA>           Guatemala    GT           Hueh~       2121
## 4  6666 9/27/14  <NA>  <NA>           Guatemala    GT           Hueh~       1713
## 5  3981 9/20/11  15:00 <NA>           Guatemala    GT           Hueh~      14100
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_GT))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
867 10/28/08 NA NA Guatemala GT Huehuetenango 1311 San Sebastián Huehuetenango 1.58358 NA 15.3760 -91.5960 (15.375999999999999, -91.596000000000004) Landslide Mudslide Medium Downpour NA NA NA NA http://www.yourmileagemayvary.ca/mudslide-impeeds-our-progress-in-guatemala 25.1664701 12.58324
6040 5/30/14 18:00 NA Guatemala GT Huehuetenango 3554 San Pedro Necta 0.91108 Natural slope 15.4918 -91.7671 (15.4918, -91.767099999999999) Landslide Mudslide Small Downpour NA 7 6 Reuters UK http://uk.reuters.com/article/2014/05/31/uk-guatemala-landslide-idUKKBN0EB0X320140531 14.4790081 32.40597
2352 8/28/10 NA NA Guatemala GT Huehuetenango 2121 Malacatancito 0.03280 NA 15.2164 -91.5168 (15.2164, -91.516800000000003) Landslide Landslide Medium Downpour NA NA 0 NA NA 0.5212621 39.90611
6666 9/27/14 NA NA Guatemala GT Huehuetenango 1713 Cuilco 3.30989 Below road 15.3975 -91.9955 (15.397500000000001, -91.995500000000007) Landslide Landslide Medium Downpour NA 0 0 La Noticia http://lanoticiaenguatemala.com/tectitan-declarado-en-alerta-por-deslizamiento/ 52.6012250 66.46735
3981 9/20/11 15:00 NA Guatemala GT Huehuetenango 14100 Barillas 0.45507 NA 15.8076 -91.3148 (15.807600000000001, -91.314800000000005) Landslide Landslide Large Rain NA NA 15 BNO News http://wireupdate.com/news/three-children-found-dead-ten-others-missing-after-landslide-in-guatemala.html 7.2320347 96.38398
stem(df_GT$"distance")
## 
##   The decimal point is at the |
## 
##   0 | 059
##   1 | 6
##   2 | 
##   3 | 3
stem(df_GT$"distance", scale = 2)
## 
##   The decimal point is at the |
## 
##   0 | 0
##   0 | 59
##   1 | 
##   1 | 6
##   2 | 
##   2 | 
##   3 | 3

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
0.0328 1 20 20 20 20
0.45507 1 20 20 40 40
0.91108 1 20 20 60 60
1.58358 1 20 20 80 80
3.30989 1 20 20 100 100
Total 5 100 100 100 100
str(table)
## Classes 'freqtab' and 'data.frame':  6 obs. of  5 variables:
##  $ n      : num  1 1 1 1 1 5
##  $ %      : num  20 20 20 20 20 100
##  $ val%   : num  20 20 20 20 20 100
##  $ %cum   : num  20 40 60 80 100 100
##  $ val%cum: num  20 40 60 80 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
0.0328 1
0.45507 1
0.91108 1
1.58358 1
3.30989 1
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.0328 2.0328 4.0328
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(0.0328,2.03] 3 0.75 3
(2.03,4.03] 1 0.25 4
str(Freq_table)
## 'data.frame':    2 obs. of  4 variables:
##  $ distance: Factor w/ 2 levels "(0.0328,2.03]",..: 1 2
##  $ Freq    : int  3 1
##  $ Rel_Freq: num  0.75 0.25
##  $ Cum_Freq: int  3 4
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(0.0328,2.03] 3
(2.03,4.03] 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_GT)
##                        id date time continent_code country_name country_code
## nbr.val      5.000000e+00   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          8.670000e+02   NA   NA             NA           NA           NA
## max          6.666000e+03   NA   NA             NA           NA           NA
## range        5.799000e+03   NA   NA             NA           NA           NA
## sum          1.990600e+04   NA   NA             NA           NA           NA
## median       3.981000e+03   NA   NA             NA           NA           NA
## mean         3.981200e+03   NA   NA             NA           NA           NA
## SE.mean      1.090856e+03   NA   NA             NA           NA           NA
## CI.mean.0.95 3.028702e+03   NA   NA             NA           NA           NA
## var          5.949836e+06   NA   NA             NA           NA           NA
## std.dev      2.439229e+03   NA   NA             NA           NA           NA
## coef.var     6.126868e-01   NA   NA             NA           NA           NA
##              state   population city  distance location_description    latitude
## nbr.val         NA 5.000000e+00   NA 5.0000000                   NA  5.00000000
## nbr.null        NA 0.000000e+00   NA 0.0000000                   NA  0.00000000
## nbr.na          NA 0.000000e+00   NA 0.0000000                   NA  0.00000000
## min             NA 1.311000e+03   NA 0.0328000                   NA 15.21640000
## max             NA 1.410000e+04   NA 3.3098900                   NA 15.80760000
## range           NA 1.278900e+04   NA 3.2770900                   NA  0.59120000
## sum             NA 2.279900e+04   NA 6.2924200                   NA 77.28930000
## median          NA 2.121000e+03   NA 0.9110800                   NA 15.39750000
## mean            NA 4.559800e+03   NA 1.2584840                   NA 15.45786000
## SE.mean         NA 2.414889e+03   NA 0.5736906                   NA  0.09800952
## CI.mean.0.95    NA 6.704806e+03   NA 1.5928205                   NA  0.27211805
## var             NA 2.915844e+07   NA 1.6456047                   NA  0.04802933
## std.dev         NA 5.399856e+03   NA 1.2828113                   NA  0.21915594
## coef.var        NA 1.184231e+00   NA 1.0193306                   NA  0.01417764
##                  longitude geolocation hazard_type landslide_type
## nbr.val       5.000000e+00          NA          NA             NA
## nbr.null      0.000000e+00          NA          NA             NA
## nbr.na        0.000000e+00          NA          NA             NA
## min          -9.199550e+01          NA          NA             NA
## max          -9.131480e+01          NA          NA             NA
## range         6.807000e-01          NA          NA             NA
## sum          -4.581902e+02          NA          NA             NA
## median       -9.159600e+01          NA          NA             NA
## mean         -9.163804e+01          NA          NA             NA
## SE.mean       1.151922e-01          NA          NA             NA
## CI.mean.0.95  3.198247e-01          NA          NA             NA
## var           6.634618e-02          NA          NA             NA
## std.dev       2.575775e-01          NA          NA             NA
## coef.var     -2.810814e-03          NA          NA             NA
##              landslide_size trigger storm_name  injuries fatalities source_name
## nbr.val                  NA      NA         NA  2.000000   4.000000          NA
## nbr.null                 NA      NA         NA  1.000000   2.000000          NA
## nbr.na                   NA      NA         NA  3.000000   1.000000          NA
## min                      NA      NA         NA  0.000000   0.000000          NA
## max                      NA      NA         NA  7.000000  15.000000          NA
## range                    NA      NA         NA  7.000000  15.000000          NA
## sum                      NA      NA         NA  7.000000  21.000000          NA
## median                   NA      NA         NA  3.500000   3.000000          NA
## mean                     NA      NA         NA  3.500000   5.250000          NA
## SE.mean                  NA      NA         NA  3.500000   3.544362          NA
## CI.mean.0.95             NA      NA         NA 44.471717  11.279741          NA
## var                      NA      NA         NA 24.500000  50.250000          NA
## std.dev                  NA      NA         NA  4.949747   7.088723          NA
## coef.var                 NA      NA         NA  1.414214   1.350233          NA
##              source_link        prop         ypos
## nbr.val               NA   5.0000000    5.0000000
## nbr.null              NA   0.0000000    0.0000000
## nbr.na                NA   0.0000000    0.0000000
## min                   NA   0.5212621   12.5832351
## max                   NA  52.6012250   96.3839826
## range                 NA  52.0799629   83.8007476
## sum                   NA 100.0000000  247.7466539
## median                NA  14.4790081   39.9061093
## mean                  NA  20.0000000   49.5493308
## SE.mean               NA   9.1171701   14.5483379
## CI.mean.0.95          NA  25.3133222   40.3926615
## var                   NA 415.6139506 1058.2706780
## std.dev               NA  20.3866120   32.5310725
## coef.var              NA   1.0193306    0.6565391
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Quetzaltenang (Guatemala)

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_GT <- subset(df, country_name == "Guatemala")
knitr::kable(head(df_GT)) 
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
165 8/9/07 NA NA Guatemala GT Guatemala 47247 San José Pinula 4.74385 NA 14.5667 -90.4500 (14.566700000000001, -90.45) Landslide Mudslide Medium Rain NA NA 5 International Herald http://www.iht.com/articles/ap/2007/08/09/america/LA-GEN-Guatemala-Deadly-Mudslide.php
198 8/21/07 NA NA Guatemala GT Alta Verapaz 2006 Lanquín 13.39817 NA 15.6046 -90.0853 (15.6046, -90.085300000000004) Landslide Landslide Medium Tropical cyclone Hurricane Dean NA NA United Nations Development Programme - Relief Web http://www.reliefweb.int/rw/RWB.NSF/db900SID/EDIS-76BSG6?OpenDocument
199 8/21/07 NA NA Guatemala GT Izabal 18994 Morales 12.55184 NA 15.5163 -88.9286 (15.516299999999999, -88.928600000000003) Landslide Landslide Medium Tropical cyclone Hurricane Dean NA NA United Nations Development Programme - Relief Web http://www.reliefweb.int/rw/RWB.NSF/db900SID/EDIS-76BSG6?OpenDocument
277 9/22/07 NA NA Guatemala GT Guatemala 994938 Guatemala City 2.79113 NA 14.6229 -90.5316 (14.6229, -90.531599999999997) Landslide Mudslide Medium Rain NA NA 3 Fox News http://www.foxnews.com/story/0,2933,297714,00.html
563 6/1/08 NA NA Guatemala GT Escuintla 31329 Palín 3.10150 NA 14.4226 -90.6755 (14.422599999999999, -90.6755) Landslide Mudslide Medium Tropical cyclone Tropical Storm Arthur NA 1 NA http://209.85.215.104/search?q=cache:QU_lPxNfk78J:www.plenglish.com/article.asp?ID=%7B1D4A74F7-CDCA-49D0-ABD4-D2E0FD9D2130%7D&language=EN+Colom+said+the+declaration+came+after+a+death+in+Palin+and+40+houses+partially&hl=en&ct=clnk&cd=1&gl=us&c
591 6/18/08 NA NA Guatemala GT Guatemala 994938 Guatemala City 3.12614 NA 14.6510 -90.5403 (14.651, -90.540300000000002) Landslide Complex Medium Rain NA NA 8 NA http://cnnwire.blogs.cnn.com/2008/06/20/8-dead-in-rough-weather-in-guatemala/
library(dplyr)
df_GT <- subset(df, state == "Quetzaltenango")
knitr::kable(head(df_GT))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
1904 5/26/10 NA NA Guatemala GT Quetzaltenango 11913 Almolonga 0.77254 NA 14.8123 -91.4944 (14.8123, -91.494399999999999) Landslide Mudslide Medium Tropical cyclone Tropical Cyclone Agatha NA 4 NA http://www.mb.com.ph/articles/259475/50-families-lose-homes-la-union-landslide
2385 9/4/10 NA NA Guatemala GT Quetzaltenango 19115 Colomba 0.92729 NA 14.7085 -91.7314 (14.708500000000001, -91.731399999999994) Landslide Landslide Medium Downpour NA NA 4 NA http://www.upi.com/Top_News/World-News/2010/09/05/Rain-blamed-for-21-deaths-in-Guatemala/UPI-36581283666884/
7439 10/6/15 NA NA Guatemala GT Quetzaltenango 45654 Coatepeque 1.81216 Above road 14.7159 -91.8708 (14.7159, -91.870800000000003) Landslide Landslide Small Rain NA 0 0 Canal Antigua https://canalantigua.tv/derrumbe-en-coatepeque/

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_GT, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_GT, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_GT, aes(x=state, y=distance, fill=city)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_GT <- df_GT %>% 
  arrange(desc(city)) %>%
  mutate(prop = distance / sum(df_GT$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_GT, aes(x=state, y = prop, fill=city)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
data<- ts(df_GT$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
0.92729
1.81216
0.77254
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_GT$distance
names(distance) <- df_GT$city 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por ciudades"
)

##             
## Pareto chart analysis for distance
##              Frequency Cum.Freq. Percentage Cum.Percent.
##   Coatepeque   1.81216   1.81216   51.59924     51.59924
##   Colomba      0.92729   2.73945   26.40355     78.00278
##   Almolonga    0.77254   3.51199   21.99722    100.00000
stem(df_GT$"distance")
## 
##   The decimal point is at the |
## 
##   0 | 89
##   1 | 
##   1 | 8
head(df_GT)
## # A tibble: 3 x 25
##      id date    time  continent_code country_name country_code state  population
##   <dbl> <chr>   <chr> <chr>          <chr>        <chr>        <chr>       <dbl>
## 1  2385 9/4/10  <NA>  <NA>           Guatemala    GT           Quetz~      19115
## 2  7439 10/6/15 <NA>  <NA>           Guatemala    GT           Quetz~      45654
## 3  1904 5/26/10 <NA>  <NA>           Guatemala    GT           Quetz~      11913
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_GT))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
2385 9/4/10 NA NA Guatemala GT Quetzaltenango 19115 Colomba 0.92729 NA 14.7085 -91.7314 (14.708500000000001, -91.731399999999994) Landslide Landslide Medium Downpour NA NA 4 NA http://www.upi.com/Top_News/World-News/2010/09/05/Rain-blamed-for-21-deaths-in-Guatemala/UPI-36581283666884/ 26.40355 13.20177
7439 10/6/15 NA NA Guatemala GT Quetzaltenango 45654 Coatepeque 1.81216 Above road 14.7159 -91.8708 (14.7159, -91.870800000000003) Landslide Landslide Small Rain NA 0 0 Canal Antigua https://canalantigua.tv/derrumbe-en-coatepeque/ 51.59924 52.20317
1904 5/26/10 NA NA Guatemala GT Quetzaltenango 11913 Almolonga 0.77254 NA 14.8123 -91.4944 (14.8123, -91.494399999999999) Landslide Mudslide Medium Tropical cyclone Tropical Cyclone Agatha NA 4 NA http://www.mb.com.ph/articles/259475/50-families-lose-homes-la-union-landslide 21.99722 89.00139
stem(df_GT$"distance")
## 
##   The decimal point is at the |
## 
##   0 | 89
##   1 | 
##   1 | 8
stem(df_GT$"distance", scale = 2)
## 
##   The decimal point is 1 digit(s) to the left of the |
## 
##    6 | 7
##    8 | 3
##   10 | 
##   12 | 
##   14 | 
##   16 | 
##   18 | 1

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
0.77254 1 33.3 33.3 33.3 33.3
0.92729 1 33.3 33.3 66.7 66.7
1.81216 1 33.3 33.3 100.0 100.0
Total 3 100.0 100.0 100.0 100.0
str(table)
## Classes 'freqtab' and 'data.frame':  4 obs. of  5 variables:
##  $ n      : num  1 1 1 3
##  $ %      : num  33.3 33.3 33.3 100
##  $ val%   : num  33.3 33.3 33.3 100
##  $ %cum   : num  33.3 66.7 100 100
##  $ val%cum: num  33.3 66.7 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
0.77254 1
0.92729 1
1.81216 1
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.77254 1.77254 2.77254
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(0.773,1.77] 1 0.5 1
(1.77,2.77] 1 0.5 2
str(Freq_table)
## 'data.frame':    2 obs. of  4 variables:
##  $ distance: Factor w/ 2 levels "(0.773,1.77]",..: 1 2
##  $ Freq    : int  1 1
##  $ Rel_Freq: num  0.5 0.5
##  $ Cum_Freq: int  1 2
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(0.773,1.77] 1
(1.77,2.77] 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_GT)
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
##                        id date time continent_code country_name country_code
## nbr.val      3.000000e+00   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          1.904000e+03   NA   NA             NA           NA           NA
## max          7.439000e+03   NA   NA             NA           NA           NA
## range        5.535000e+03   NA   NA             NA           NA           NA
## sum          1.172800e+04   NA   NA             NA           NA           NA
## median       2.385000e+03   NA   NA             NA           NA           NA
## mean         3.909333e+03   NA   NA             NA           NA           NA
## SE.mean      1.770287e+03   NA   NA             NA           NA           NA
## CI.mean.0.95 7.616931e+03   NA   NA             NA           NA           NA
## var          9.401750e+06   NA   NA             NA           NA           NA
## std.dev      3.066227e+03   NA   NA             NA           NA           NA
## coef.var     7.843351e-01   NA   NA             NA           NA           NA
##              state   population city  distance location_description
## nbr.val         NA 3.000000e+00   NA 3.0000000                   NA
## nbr.null        NA 0.000000e+00   NA 0.0000000                   NA
## nbr.na          NA 0.000000e+00   NA 0.0000000                   NA
## min             NA 1.191300e+04   NA 0.7725400                   NA
## max             NA 4.565400e+04   NA 1.8121600                   NA
## range           NA 3.374100e+04   NA 1.0396200                   NA
## sum             NA 7.668200e+04   NA 3.5119900                   NA
## median          NA 1.911500e+04   NA 0.9272900                   NA
## mean            NA 2.556067e+04   NA 1.1706633                   NA
## SE.mean         NA 1.025953e+04   NA 0.3238443                   NA
## CI.mean.0.95    NA 4.414319e+04   NA 1.3933895                   NA
## var             NA 3.157737e+08   NA 0.3146254                   NA
## std.dev         NA 1.777002e+04   NA 0.5609148                   NA
## coef.var        NA 6.952097e-01   NA 0.4791427                   NA
##                  latitude     longitude geolocation hazard_type landslide_type
## nbr.val       3.000000000  3.000000e+00          NA          NA             NA
## nbr.null      0.000000000  0.000000e+00          NA          NA             NA
## nbr.na        0.000000000  0.000000e+00          NA          NA             NA
## min          14.708500000 -9.187080e+01          NA          NA             NA
## max          14.812300000 -9.149440e+01          NA          NA             NA
## range         0.103800000  3.764000e-01          NA          NA             NA
## sum          44.236700000 -2.750966e+02          NA          NA             NA
## median       14.715900000 -9.173140e+01          NA          NA             NA
## mean         14.745566667 -9.169887e+01          NA          NA             NA
## SE.mean       0.033434978  1.098682e-01          NA          NA             NA
## CI.mean.0.95  0.143859101  4.727246e-01          NA          NA             NA
## var           0.003353693  3.621305e-02          NA          NA             NA
## std.dev       0.057911081  1.902973e-01          NA          NA             NA
## coef.var      0.003927355 -2.075241e-03          NA          NA             NA
##              landslide_size trigger storm_name injuries fatalities source_name
## nbr.val                  NA      NA         NA        1  3.0000000          NA
## nbr.null                 NA      NA         NA        1  1.0000000          NA
## nbr.na                   NA      NA         NA        2  0.0000000          NA
## min                      NA      NA         NA        0  0.0000000          NA
## max                      NA      NA         NA        0  4.0000000          NA
## range                    NA      NA         NA        0  4.0000000          NA
## sum                      NA      NA         NA        0  8.0000000          NA
## median                   NA      NA         NA        0  4.0000000          NA
## mean                     NA      NA         NA        0  2.6666667          NA
## SE.mean                  NA      NA         NA       NA  1.3333333          NA
## CI.mean.0.95             NA      NA         NA      NaN  5.7368703          NA
## var                      NA      NA         NA       NA  5.3333333          NA
## std.dev                  NA      NA         NA       NA  2.3094011          NA
## coef.var                 NA      NA         NA       NA  0.8660254          NA
##              source_link        prop         ypos
## nbr.val               NA   3.0000000    3.0000000
## nbr.null              NA   0.0000000    0.0000000
## nbr.na                NA   0.0000000    0.0000000
## min                   NA  21.9972153   13.2017745
## max                   NA  51.5992358   89.0013924
## range                 NA  29.6020205   75.7996179
## sum                   NA 100.0000000  154.4063337
## median                NA  26.4035490   52.2031669
## mean                  NA  33.3333333   51.4687779
## SE.mean               NA   9.2211051   21.8845456
## CI.mean.0.95          NA  39.6752130   94.1616000
## var                   NA 255.0863379 1436.8000131
## std.dev               NA  15.9714225   37.9051449
## coef.var              NA   0.4791427    0.7364687
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Quiché (Guatemala)

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_GT <- subset(df, country_name == "Guatemala")
knitr::kable(head(df_GT)) 
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
165 8/9/07 NA NA Guatemala GT Guatemala 47247 San José Pinula 4.74385 NA 14.5667 -90.4500 (14.566700000000001, -90.45) Landslide Mudslide Medium Rain NA NA 5 International Herald http://www.iht.com/articles/ap/2007/08/09/america/LA-GEN-Guatemala-Deadly-Mudslide.php
198 8/21/07 NA NA Guatemala GT Alta Verapaz 2006 Lanquín 13.39817 NA 15.6046 -90.0853 (15.6046, -90.085300000000004) Landslide Landslide Medium Tropical cyclone Hurricane Dean NA NA United Nations Development Programme - Relief Web http://www.reliefweb.int/rw/RWB.NSF/db900SID/EDIS-76BSG6?OpenDocument
199 8/21/07 NA NA Guatemala GT Izabal 18994 Morales 12.55184 NA 15.5163 -88.9286 (15.516299999999999, -88.928600000000003) Landslide Landslide Medium Tropical cyclone Hurricane Dean NA NA United Nations Development Programme - Relief Web http://www.reliefweb.int/rw/RWB.NSF/db900SID/EDIS-76BSG6?OpenDocument
277 9/22/07 NA NA Guatemala GT Guatemala 994938 Guatemala City 2.79113 NA 14.6229 -90.5316 (14.6229, -90.531599999999997) Landslide Mudslide Medium Rain NA NA 3 Fox News http://www.foxnews.com/story/0,2933,297714,00.html
563 6/1/08 NA NA Guatemala GT Escuintla 31329 Palín 3.10150 NA 14.4226 -90.6755 (14.422599999999999, -90.6755) Landslide Mudslide Medium Tropical cyclone Tropical Storm Arthur NA 1 NA http://209.85.215.104/search?q=cache:QU_lPxNfk78J:www.plenglish.com/article.asp?ID=%7B1D4A74F7-CDCA-49D0-ABD4-D2E0FD9D2130%7D&language=EN+Colom+said+the+declaration+came+after+a+death+in+Palin+and+40+houses+partially&hl=en&ct=clnk&cd=1&gl=us&c
591 6/18/08 NA NA Guatemala GT Guatemala 994938 Guatemala City 3.12614 NA 14.6510 -90.5403 (14.651, -90.540300000000002) Landslide Complex Medium Rain NA NA 8 NA http://cnnwire.blogs.cnn.com/2008/06/20/8-dead-in-rough-weather-in-guatemala/
library(dplyr)
df_GT <- subset(df, state == "Quiché")
knitr::kable(head(df_GT))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
1924 5/30/10 NA NA Guatemala GT Quiché 11046 San Juan Cotzal 8.93658 NA 15.4446 -90.9516 (15.444599999999999, -90.951599999999999) Landslide Landslide Medium Tropical cyclone Tropical Cyclone Agatha NA 3 NA http://calamities.gaeatimes.com/2010/05/30/tropical-storm-agathas-torrential-rains-landslides-kill-44-in-central-america-27140/
2386 9/4/10 NA NA Guatemala GT Quiché 2090 Chicamán 21.83272 NA 15.5800 -90.8500 (15.58, -90.85) Landslide Landslide Medium Downpour NA NA 1 NA http://www.upi.com/Top_News/World-News/2010/09/05/Rain-blamed-for-21-deaths-in-Guatemala/UPI-36581283666884/
2408 9/10/10 NA NA Guatemala GT Quiché 12088 Sacapulas 0.35171 NA 15.2892 -91.0892 (15.289199999999999, -91.089200000000005) Landslide Landslide Medium Downpour NA NA 5 NA http://www.tmcnet.com/usubmit/2010/09/11/5001935.htm
4361 5/15/12 NA NA Guatemala GT Quiché 11657 Chajul 7.39906 NA 15.5238 -90.9778 (15.5238, -90.977800000000002) Landslide Landslide Medium Downpour NA NA 1 NA http://latino.foxnews.com/latino/news/2012/05/16/boy-dies-in-rains-in-guatemala/
6664 3/12/14 Night NA Guatemala GT Quiché 7850 San Luis Ixcán 22.56101 Deforested slope 15.7318 -90.8924 (15.7318, -90.892399999999995) Landslide Landslide Medium Downpour NA 0 0 Hoy http://www.hoy.com.ni/2014/03/13/deslizamiento-destruye-5-casas-en-poblado-indigena-guatemalteco/
6668 6/24/14 NA NA Guatemala GT Quiché 2693 Chinique 3.98185 Unknown 15.0771 -91.0096 (15.0771, -91.009600000000006) Landslide Landslide Medium Rain NA 0 2 Noticias de Bomberos http://noticiasdebomberosgua.blogspot.com/2014/06/muertos-por-deslizamiento-de-tierra-en.html

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_GT, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_GT, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_GT, aes(x=state, y=distance, fill=city)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_GT <- df_GT %>% 
  arrange(desc(city)) %>%
  mutate(prop = distance / sum(df_GT$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_GT, aes(x=state, y = prop, fill=city)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
data<- ts(df_GT$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
22.56101
8.93658
0.35171
3.98185
2.27725
21.83272
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_GT$distance
names(distance) <- df_GT$city 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por ciudades"
)

##                   
## Pareto chart analysis for distance
##                      Frequency   Cum.Freq.  Percentage Cum.Percent.
##   San Luis Ixcán    22.5610100  22.5610100  33.5030438   33.5030438
##   Chicamán          21.8327200  44.3937300  32.4215350   65.9245788
##   San Juan Cotzal    8.9365800  53.3303100  13.2707991   79.1953779
##   Chajul             7.3990600  60.7293700  10.9875857   90.1829636
##   Chinique           3.9818500  64.7112200   5.9130374   96.0960009
##   Chichicastenango   2.2772500  66.9884700   3.3817106   99.4777115
##   Sacapulas          0.3517100  67.3401800   0.5222885  100.0000000
stem(df_GT$"distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 024
##   0 | 79
##   1 | 
##   1 | 
##   2 | 23
head(df_GT)
## # A tibble: 6 x 25
##      id date    time  continent_code country_name country_code state  population
##   <dbl> <chr>   <chr> <chr>          <chr>        <chr>        <chr>       <dbl>
## 1  6664 3/12/14 Night <NA>           Guatemala    GT           Quiché       7850
## 2  1924 5/30/10 <NA>  <NA>           Guatemala    GT           Quiché      11046
## 3  2408 9/10/10 <NA>  <NA>           Guatemala    GT           Quiché      12088
## 4  6668 6/24/14 <NA>  <NA>           Guatemala    GT           Quiché       2693
## 5  7427 9/27/15 <NA>  <NA>           Guatemala    GT           Quiché      79759
## 6  2386 9/4/10  <NA>  <NA>           Guatemala    GT           Quiché       2090
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_GT))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
6664 3/12/14 Night NA Guatemala GT Quiché 7850 San Luis Ixcán 22.56101 Deforested slope 15.7318 -90.8924 (15.7318, -90.892399999999995) Landslide Landslide Medium Downpour NA 0 0 Hoy http://www.hoy.com.ni/2014/03/13/deslizamiento-destruye-5-casas-en-poblado-indigena-guatemalteco/ 33.5030438 16.75152
1924 5/30/10 NA NA Guatemala GT Quiché 11046 San Juan Cotzal 8.93658 NA 15.4446 -90.9516 (15.444599999999999, -90.951599999999999) Landslide Landslide Medium Tropical cyclone Tropical Cyclone Agatha NA 3 NA http://calamities.gaeatimes.com/2010/05/30/tropical-storm-agathas-torrential-rains-landslides-kill-44-in-central-america-27140/ 13.2707991 40.13844
2408 9/10/10 NA NA Guatemala GT Quiché 12088 Sacapulas 0.35171 NA 15.2892 -91.0892 (15.289199999999999, -91.089200000000005) Landslide Landslide Medium Downpour NA NA 5 NA http://www.tmcnet.com/usubmit/2010/09/11/5001935.htm 0.5222885 47.03499
6668 6/24/14 NA NA Guatemala GT Quiché 2693 Chinique 3.98185 Unknown 15.0771 -91.0096 (15.0771, -91.009600000000006) Landslide Landslide Medium Rain NA 0 2 Noticias de Bomberos http://noticiasdebomberosgua.blogspot.com/2014/06/muertos-por-deslizamiento-de-tierra-en.html 5.9130374 50.25265
7427 9/27/15 NA NA Guatemala GT Quiché 79759 Chichicastenango 2.27725 Unknown 14.9233 -91.1352 (14.923299999999999, -91.135199999999998) Landslide Landslide Medium Rain NA 1 2 CONRED http://www.redhum.org/documento_download/17080 3.3817106 54.90002
2386 9/4/10 NA NA Guatemala GT Quiché 2090 Chicamán 21.83272 NA 15.5800 -90.8500 (15.58, -90.85) Landslide Landslide Medium Downpour NA NA 1 NA http://www.upi.com/Top_News/World-News/2010/09/05/Rain-blamed-for-21-deaths-in-Guatemala/UPI-36581283666884/ 32.4215350 72.80165
stem(df_GT$"distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 024
##   0 | 79
##   1 | 
##   1 | 
##   2 | 23
stem(df_GT$"distance", scale = 2)
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 024
##   0 | 79
##   1 | 
##   1 | 
##   2 | 23

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
0.35171 1 14.3 14.3 14.3 14.3
2.27725 1 14.3 14.3 28.6 28.6
3.98185 1 14.3 14.3 42.9 42.9
7.39906 1 14.3 14.3 57.1 57.1
8.93658 1 14.3 14.3 71.4 71.4
21.83272 1 14.3 14.3 85.7 85.7
22.56101 1 14.3 14.3 100.0 100.0
Total 7 100.0 100.0 100.0 100.0
str(table)
## Classes 'freqtab' and 'data.frame':  8 obs. of  5 variables:
##  $ n      : num  1 1 1 1 1 1 1 7
##  $ %      : num  14.3 14.3 14.3 14.3 14.3 14.3 14.3 100
##  $ val%   : num  14.3 14.3 14.3 14.3 14.3 14.3 14.3 100
##  $ %cum   : num  14.3 28.6 42.9 57.1 71.4 85.7 100 100
##  $ val%cum: num  14.3 28.6 42.9 57.1 71.4 85.7 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
0.35171 1
2.27725 1
3.98185 1
7.39906 1
8.93658 1
21.83272 1
22.56101 1
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1]  0.35171  8.35171 16.35171 24.35171
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(0.352,8.35] 3 0.5000000 3
(8.35,16.4] 1 0.1666667 4
(16.4,24.4] 2 0.3333333 6
str(Freq_table)
## 'data.frame':    3 obs. of  4 variables:
##  $ distance: Factor w/ 3 levels "(0.352,8.35]",..: 1 2 3
##  $ Freq    : int  3 1 2
##  $ Rel_Freq: num  0.5 0.167 0.333
##  $ Cum_Freq: int  3 4 6
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(0.352,8.35] 3
(8.35,16.4] 1
(16.4,24.4] 2
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_GT)
##                        id date time continent_code country_name country_code
## nbr.val      7.000000e+00   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          1.924000e+03   NA   NA             NA           NA           NA
## max          7.427000e+03   NA   NA             NA           NA           NA
## range        5.503000e+03   NA   NA             NA           NA           NA
## sum          3.183800e+04   NA   NA             NA           NA           NA
## median       4.361000e+03   NA   NA             NA           NA           NA
## mean         4.548286e+03   NA   NA             NA           NA           NA
## SE.mean      8.922242e+02   NA   NA             NA           NA           NA
## CI.mean.0.95 2.183194e+03   NA   NA             NA           NA           NA
## var          5.572448e+06   NA   NA             NA           NA           NA
## std.dev      2.360603e+03   NA   NA             NA           NA           NA
## coef.var     5.190094e-01   NA   NA             NA           NA           NA
##              state   population city   distance location_description
## nbr.val         NA 7.000000e+00   NA  7.0000000                   NA
## nbr.null        NA 0.000000e+00   NA  0.0000000                   NA
## nbr.na          NA 0.000000e+00   NA  0.0000000                   NA
## min             NA 2.090000e+03   NA  0.3517100                   NA
## max             NA 7.975900e+04   NA 22.5610100                   NA
## range           NA 7.766900e+04   NA 22.2093000                   NA
## sum             NA 1.271830e+05   NA 67.3401800                   NA
## median          NA 1.104600e+04   NA  7.3990600                   NA
## mean            NA 1.816900e+04   NA  9.6200257                   NA
## SE.mean         NA 1.038310e+04   NA  3.4282642                   NA
## CI.mean.0.95    NA 2.540654e+04   NA  8.3886604                   NA
## var             NA 7.546621e+08   NA 82.2709699                   NA
## std.dev         NA 2.747111e+04   NA  9.0703346                   NA
## coef.var        NA 1.511977e+00   NA  0.9428597                   NA
##                  latitude     longitude geolocation hazard_type landslide_type
## nbr.val        7.00000000  7.000000e+00          NA          NA             NA
## nbr.null       0.00000000  0.000000e+00          NA          NA             NA
## nbr.na         0.00000000  0.000000e+00          NA          NA             NA
## min           14.92330000 -9.113520e+01          NA          NA             NA
## max           15.73180000 -9.085000e+01          NA          NA             NA
## range          0.80850000  2.852000e-01          NA          NA             NA
## sum          107.56980000 -6.369058e+02          NA          NA             NA
## median        15.44460000 -9.097780e+01          NA          NA             NA
## mean          15.36711429 -9.098654e+01          NA          NA             NA
## SE.mean        0.10867388  3.841296e-02          NA          NA             NA
## CI.mean.0.95   0.26591540  9.399313e-02          NA          NA             NA
## var            0.08267008  1.032889e-02          NA          NA             NA
## std.dev        0.28752405  1.016311e-01          NA          NA             NA
## coef.var       0.01871035 -1.116991e-03          NA          NA             NA
##              landslide_size trigger storm_name  injuries fatalities source_name
## nbr.val                  NA      NA         NA 3.0000000  7.0000000          NA
## nbr.null                 NA      NA         NA 2.0000000  1.0000000          NA
## nbr.na                   NA      NA         NA 4.0000000  0.0000000          NA
## min                      NA      NA         NA 0.0000000  0.0000000          NA
## max                      NA      NA         NA 1.0000000  5.0000000          NA
## range                    NA      NA         NA 1.0000000  5.0000000          NA
## sum                      NA      NA         NA 1.0000000 14.0000000          NA
## median                   NA      NA         NA 0.0000000  2.0000000          NA
## mean                     NA      NA         NA 0.3333333  2.0000000          NA
## SE.mean                  NA      NA         NA 0.3333333  0.6172134          NA
## CI.mean.0.95             NA      NA         NA 1.4342176  1.5102668          NA
## var                      NA      NA         NA 0.3333333  2.6666667          NA
## std.dev                  NA      NA         NA 0.5773503  1.6329932          NA
## coef.var                 NA      NA         NA 1.7320508  0.8164966          NA
##              source_link        prop        ypos
## nbr.val               NA   7.0000000   7.0000000
## nbr.null              NA   0.0000000   0.0000000
## nbr.na                NA   0.0000000   0.0000000
## min                   NA   0.5222885  16.7515219
## max                   NA  33.5030438  94.5062071
## range                 NA  32.9807553  77.7546852
## sum                   NA 100.0000000 376.3854804
## median                NA  10.9875857  50.2526501
## mean                  NA  14.2857143  53.7693543
## SE.mean               NA   5.0909639   9.3054494
## CI.mean.0.95          NA  12.4571399  22.7696144
## var                   NA 181.4253926 606.1397184
## std.dev               NA  13.4694244  24.6199049
## coef.var              NA   0.9428597   0.4578799
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Sacatepéquez (Guatemala)

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_GT <- subset(df, country_name == "Guatemala")
knitr::kable(head(df_GT)) 
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
165 8/9/07 NA NA Guatemala GT Guatemala 47247 San José Pinula 4.74385 NA 14.5667 -90.4500 (14.566700000000001, -90.45) Landslide Mudslide Medium Rain NA NA 5 International Herald http://www.iht.com/articles/ap/2007/08/09/america/LA-GEN-Guatemala-Deadly-Mudslide.php
198 8/21/07 NA NA Guatemala GT Alta Verapaz 2006 Lanquín 13.39817 NA 15.6046 -90.0853 (15.6046, -90.085300000000004) Landslide Landslide Medium Tropical cyclone Hurricane Dean NA NA United Nations Development Programme - Relief Web http://www.reliefweb.int/rw/RWB.NSF/db900SID/EDIS-76BSG6?OpenDocument
199 8/21/07 NA NA Guatemala GT Izabal 18994 Morales 12.55184 NA 15.5163 -88.9286 (15.516299999999999, -88.928600000000003) Landslide Landslide Medium Tropical cyclone Hurricane Dean NA NA United Nations Development Programme - Relief Web http://www.reliefweb.int/rw/RWB.NSF/db900SID/EDIS-76BSG6?OpenDocument
277 9/22/07 NA NA Guatemala GT Guatemala 994938 Guatemala City 2.79113 NA 14.6229 -90.5316 (14.6229, -90.531599999999997) Landslide Mudslide Medium Rain NA NA 3 Fox News http://www.foxnews.com/story/0,2933,297714,00.html
563 6/1/08 NA NA Guatemala GT Escuintla 31329 Palín 3.10150 NA 14.4226 -90.6755 (14.422599999999999, -90.6755) Landslide Mudslide Medium Tropical cyclone Tropical Storm Arthur NA 1 NA http://209.85.215.104/search?q=cache:QU_lPxNfk78J:www.plenglish.com/article.asp?ID=%7B1D4A74F7-CDCA-49D0-ABD4-D2E0FD9D2130%7D&language=EN+Colom+said+the+declaration+came+after+a+death+in+Palin+and+40+houses+partially&hl=en&ct=clnk&cd=1&gl=us&c
591 6/18/08 NA NA Guatemala GT Guatemala 994938 Guatemala City 3.12614 NA 14.6510 -90.5403 (14.651, -90.540300000000002) Landslide Complex Medium Rain NA NA 8 NA http://cnnwire.blogs.cnn.com/2008/06/20/8-dead-in-rough-weather-in-guatemala/
library(dplyr)
df_GT <- subset(df, state == "Sacatepéquez")
knitr::kable(head(df_GT))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
1917 5/29/10 NA NA Guatemala GT Sacatepéquez 15529 Santa María De Jesús 6.13527 NA 14.4651 -90.7426 (14.4651, -90.742599999999996) Landslide Mudslide Medium Tropical cyclone Tropical Cyclone Agatha NA 0 NA http://www.boston.com/bigpicture/2010/06/a_rough_week_for_guatemala.html#photo17
2387 9/4/10 NA NA Guatemala GT Sacatepéquez 17918 Jocotenango 0.63089 NA 14.5832 -90.7379 (14.5832, -90.737899999999996) Landslide Landslide Medium Downpour NA NA 1 NA http://www.upi.com/Top_News/World-News/2010/09/05/Rain-blamed-for-21-deaths-in-Guatemala/UPI-36581283666884/
7432 9/23/15 NA NA Guatemala GT Sacatepéquez 3214 Santa Catarina Barahona 1.45200 Unknown 14.5558 -90.7998 (14.5558, -90.799800000000005) Landslide Landslide Medium Continuous rain NA 0 0 Prensa Libre http://www.prensalibre.com/guatemala/comunitario/constante-lluvia-provoca-primeras-desastres-en-la-provincia

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_GT, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_GT, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_GT, aes(x=state, y=distance, fill=city)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_GT <- df_GT %>% 
  arrange(desc(city)) %>%
  mutate(prop = distance / sum(df_GT$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_GT, aes(x=state, y = prop, fill=city)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
data<- ts(df_GT$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
6.13527
1.45200
0.63089
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_GT$distance
names(distance) <- df_GT$city 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por ciudades"
)

##                          
## Pareto chart analysis for distance
##                            Frequency  Cum.Freq. Percentage Cum.Percent.
##   Santa María De Jesús      6.135270   6.135270  74.655032    74.655032
##   Santa Catarina Barahona   1.452000   7.587270  17.668188    92.323221
##   Jocotenango               0.630890   8.218160   7.676779   100.000000
stem(df_GT$"distance")
## 
##   The decimal point is at the |
## 
##   0 | 65
##   2 | 
##   4 | 
##   6 | 1
head(df_GT)
## # A tibble: 3 x 25
##      id date    time  continent_code country_name country_code state  population
##   <dbl> <chr>   <chr> <chr>          <chr>        <chr>        <chr>       <dbl>
## 1  1917 5/29/10 <NA>  <NA>           Guatemala    GT           Sacat~      15529
## 2  7432 9/23/15 <NA>  <NA>           Guatemala    GT           Sacat~       3214
## 3  2387 9/4/10  <NA>  <NA>           Guatemala    GT           Sacat~      17918
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_GT))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
1917 5/29/10 NA NA Guatemala GT Sacatepéquez 15529 Santa María De Jesús 6.13527 NA 14.4651 -90.7426 (14.4651, -90.742599999999996) Landslide Mudslide Medium Tropical cyclone Tropical Cyclone Agatha NA 0 NA http://www.boston.com/bigpicture/2010/06/a_rough_week_for_guatemala.html#photo17 74.655032 37.32752
7432 9/23/15 NA NA Guatemala GT Sacatepéquez 3214 Santa Catarina Barahona 1.45200 Unknown 14.5558 -90.7998 (14.5558, -90.799800000000005) Landslide Landslide Medium Continuous rain NA 0 0 Prensa Libre http://www.prensalibre.com/guatemala/comunitario/constante-lluvia-provoca-primeras-desastres-en-la-provincia 17.668188 83.48913
2387 9/4/10 NA NA Guatemala GT Sacatepéquez 17918 Jocotenango 0.63089 NA 14.5832 -90.7379 (14.5832, -90.737899999999996) Landslide Landslide Medium Downpour NA NA 1 NA http://www.upi.com/Top_News/World-News/2010/09/05/Rain-blamed-for-21-deaths-in-Guatemala/UPI-36581283666884/ 7.676779 96.16161
stem(df_GT$"distance")
## 
##   The decimal point is at the |
## 
##   0 | 65
##   2 | 
##   4 | 
##   6 | 1
stem(df_GT$"distance", scale = 2)
## 
##   The decimal point is at the |
## 
##   0 | 6
##   1 | 5
##   2 | 
##   3 | 
##   4 | 
##   5 | 
##   6 | 1

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
0.63089 1 33.3 33.3 33.3 33.3
1.452 1 33.3 33.3 66.7 66.7
6.13527 1 33.3 33.3 100.0 100.0
Total 3 100.0 100.0 100.0 100.0
str(table)
## Classes 'freqtab' and 'data.frame':  4 obs. of  5 variables:
##  $ n      : num  1 1 1 3
##  $ %      : num  33.3 33.3 33.3 100
##  $ val%   : num  33.3 33.3 33.3 100
##  $ %cum   : num  33.3 66.7 100 100
##  $ val%cum: num  33.3 66.7 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
0.63089 1
1.452 1
6.13527 1
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.63089 2.63089 4.63089 6.63089
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(0.631,2.63] 1 0.5 1
(2.63,4.63] 0 0.0 1
(4.63,6.63] 1 0.5 2
str(Freq_table)
## 'data.frame':    3 obs. of  4 variables:
##  $ distance: Factor w/ 3 levels "(0.631,2.63]",..: 1 2 3
##  $ Freq    : int  1 0 1
##  $ Rel_Freq: num  0.5 0 0.5
##  $ Cum_Freq: int  1 1 2
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(0.631,2.63] 1
(2.63,4.63] 0
(4.63,6.63] 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_GT)
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
##                        id date time continent_code country_name country_code
## nbr.val      3.000000e+00   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          1.917000e+03   NA   NA             NA           NA           NA
## max          7.432000e+03   NA   NA             NA           NA           NA
## range        5.515000e+03   NA   NA             NA           NA           NA
## sum          1.173600e+04   NA   NA             NA           NA           NA
## median       2.387000e+03   NA   NA             NA           NA           NA
## mean         3.912000e+03   NA   NA             NA           NA           NA
## SE.mean      1.765222e+03   NA   NA             NA           NA           NA
## CI.mean.0.95 7.595137e+03   NA   NA             NA           NA           NA
## var          9.348025e+06   NA   NA             NA           NA           NA
## std.dev      3.057454e+03   NA   NA             NA           NA           NA
## coef.var     7.815578e-01   NA   NA             NA           NA           NA
##              state   population city distance location_description     latitude
## nbr.val         NA 3.000000e+00   NA 3.000000                   NA  3.000000000
## nbr.null        NA 0.000000e+00   NA 0.000000                   NA  0.000000000
## nbr.na          NA 0.000000e+00   NA 0.000000                   NA  0.000000000
## min             NA 3.214000e+03   NA 0.630890                   NA 14.465100000
## max             NA 1.791800e+04   NA 6.135270                   NA 14.583200000
## range           NA 1.470400e+04   NA 5.504380                   NA  0.118100000
## sum             NA 3.666100e+04   NA 8.218160                   NA 43.604100000
## median          NA 1.552900e+04   NA 1.452000                   NA 14.555800000
## mean            NA 1.222033e+04   NA 2.739387                   NA 14.534700000
## SE.mean         NA 4.555669e+03   NA 1.714407                   NA  0.035687580
## CI.mean.0.95    NA 1.960146e+04   NA 7.376498                   NA  0.153551262
## var             NA 6.226236e+07   NA 8.817573                   NA  0.003820810
## std.dev         NA 7.890650e+03   NA 2.969440                   NA  0.061812701
## coef.var        NA 6.456984e-01   NA 1.083980                   NA  0.004252768
##                  longitude geolocation hazard_type landslide_type
## nbr.val       3.000000e+00          NA          NA             NA
## nbr.null      0.000000e+00          NA          NA             NA
## nbr.na        0.000000e+00          NA          NA             NA
## min          -9.079980e+01          NA          NA             NA
## max          -9.073790e+01          NA          NA             NA
## range         6.190000e-02          NA          NA             NA
## sum          -2.722803e+02          NA          NA             NA
## median       -9.074260e+01          NA          NA             NA
## mean         -9.076010e+01          NA          NA             NA
## SE.mean       1.989631e-02          NA          NA             NA
## CI.mean.0.95  8.560693e-02          NA          NA             NA
## var           1.187590e-03          NA          NA             NA
## std.dev       3.446143e-02          NA          NA             NA
## coef.var     -3.796980e-04          NA          NA             NA
##              landslide_size trigger storm_name injuries fatalities source_name
## nbr.val                  NA      NA         NA        1  3.0000000          NA
## nbr.null                 NA      NA         NA        1  2.0000000          NA
## nbr.na                   NA      NA         NA        2  0.0000000          NA
## min                      NA      NA         NA        0  0.0000000          NA
## max                      NA      NA         NA        0  1.0000000          NA
## range                    NA      NA         NA        0  1.0000000          NA
## sum                      NA      NA         NA        0  1.0000000          NA
## median                   NA      NA         NA        0  0.0000000          NA
## mean                     NA      NA         NA        0  0.3333333          NA
## SE.mean                  NA      NA         NA       NA  0.3333333          NA
## CI.mean.0.95             NA      NA         NA      NaN  1.4342176          NA
## var                      NA      NA         NA       NA  0.3333333          NA
## std.dev                  NA      NA         NA       NA  0.5773503          NA
## coef.var                 NA      NA         NA       NA  1.7320508          NA
##              source_link        prop        ypos
## nbr.val               NA    3.000000   3.0000000
## nbr.null              NA    0.000000   0.0000000
## nbr.na                NA    0.000000   0.0000000
## min                   NA    7.676779  37.3275161
## max                   NA   74.655032  96.1616104
## range                 NA   66.978253  58.8340942
## sum                   NA  100.000000 216.9782530
## median                NA   17.668188  83.4891265
## mean                  NA   33.333333  72.3260843
## SE.mean               NA   20.861201  17.8775725
## CI.mean.0.95          NA   89.758505  76.9209861
## var                   NA 1305.569167 958.8227945
## std.dev               NA   36.132661  30.9648639
## coef.var              NA    1.083980   0.4281286
boxplot(data, horizontal=TRUE, col='green')

Gráfico para San Marcos (Guatemala)

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_GT <- subset(df, country_name == "Guatemala")
knitr::kable(head(df_GT)) 
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
165 8/9/07 NA NA Guatemala GT Guatemala 47247 San José Pinula 4.74385 NA 14.5667 -90.4500 (14.566700000000001, -90.45) Landslide Mudslide Medium Rain NA NA 5 International Herald http://www.iht.com/articles/ap/2007/08/09/america/LA-GEN-Guatemala-Deadly-Mudslide.php
198 8/21/07 NA NA Guatemala GT Alta Verapaz 2006 Lanquín 13.39817 NA 15.6046 -90.0853 (15.6046, -90.085300000000004) Landslide Landslide Medium Tropical cyclone Hurricane Dean NA NA United Nations Development Programme - Relief Web http://www.reliefweb.int/rw/RWB.NSF/db900SID/EDIS-76BSG6?OpenDocument
199 8/21/07 NA NA Guatemala GT Izabal 18994 Morales 12.55184 NA 15.5163 -88.9286 (15.516299999999999, -88.928600000000003) Landslide Landslide Medium Tropical cyclone Hurricane Dean NA NA United Nations Development Programme - Relief Web http://www.reliefweb.int/rw/RWB.NSF/db900SID/EDIS-76BSG6?OpenDocument
277 9/22/07 NA NA Guatemala GT Guatemala 994938 Guatemala City 2.79113 NA 14.6229 -90.5316 (14.6229, -90.531599999999997) Landslide Mudslide Medium Rain NA NA 3 Fox News http://www.foxnews.com/story/0,2933,297714,00.html
563 6/1/08 NA NA Guatemala GT Escuintla 31329 Palín 3.10150 NA 14.4226 -90.6755 (14.422599999999999, -90.6755) Landslide Mudslide Medium Tropical cyclone Tropical Storm Arthur NA 1 NA http://209.85.215.104/search?q=cache:QU_lPxNfk78J:www.plenglish.com/article.asp?ID=%7B1D4A74F7-CDCA-49D0-ABD4-D2E0FD9D2130%7D&language=EN+Colom+said+the+declaration+came+after+a+death+in+Palin+and+40+houses+partially&hl=en&ct=clnk&cd=1&gl=us&c
591 6/18/08 NA NA Guatemala GT Guatemala 994938 Guatemala City 3.12614 NA 14.6510 -90.5403 (14.651, -90.540300000000002) Landslide Complex Medium Rain NA NA 8 NA http://cnnwire.blogs.cnn.com/2008/06/20/8-dead-in-rough-weather-in-guatemala/
library(dplyr)
df_GT <- subset(df, state == "San Marcos")
knitr::kable(head(df_GT))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
5475 9/7/13 0:13:00 NA Guatemala GT San Marcos 8164 Pajapita 0.96647 NA 14.7254 -92.0335 (14.7254, -92.033500000000004) Landslide Landslide Large Earthquake NA NA 0 www.chinapost.com.tw http://www.chinapost.com.tw/international/americas/2013/09/09/388474/Power-outages.htm
6669 9/27/14 NA NA Guatemala GT San Marcos 6438 Tacaná 0.75729 Unknown 15.2422 -92.0738 (15.2422, -92.073800000000006) Landslide Landslide Medium Rain NA 0 2 Univision http://www.kint.com/2014/09/28/lluvias-dejan-tres-muertos-y-cuatro-desaparecidos-en-guatemala/

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_GT, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_GT, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_GT, aes(x=state, y=distance, fill=city)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_GT <- df_GT %>% 
  arrange(desc(city)) %>%
  mutate(prop = distance / sum(df_GT$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_GT, aes(x=state, y = prop, fill=city)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
data<- ts(df_GT$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
0.75729
0.96647
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_GT$distance
names(distance) <- df_GT$city 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por ciudades"
)

##           
## Pareto chart analysis for distance
##            Frequency Cum.Freq. Percentage Cum.Percent.
##   Pajapita   0.96647   0.96647   56.06755     56.06755
##   Tacaná     0.75729   1.72376   43.93245    100.00000
stem(df_GT$"distance")
## 
##   The decimal point is 1 digit(s) to the left of the |
## 
##   7 | 6
##   8 | 
##   8 | 
##   9 | 
##   9 | 7
head(df_GT)
## # A tibble: 2 x 25
##      id date    time    continent_code country_name country_code state population
##   <dbl> <chr>   <chr>   <chr>          <chr>        <chr>        <chr>      <dbl>
## 1  6669 9/27/14 <NA>    <NA>           Guatemala    GT           San ~       6438
## 2  5475 9/7/13  0:13:00 <NA>           Guatemala    GT           San ~       8164
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_GT))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
6669 9/27/14 NA NA Guatemala GT San Marcos 6438 Tacaná 0.75729 Unknown 15.2422 -92.0738 (15.2422, -92.073800000000006) Landslide Landslide Medium Rain NA 0 2 Univision http://www.kint.com/2014/09/28/lluvias-dejan-tres-muertos-y-cuatro-desaparecidos-en-guatemala/ 43.93245 21.96623
5475 9/7/13 0:13:00 NA Guatemala GT San Marcos 8164 Pajapita 0.96647 NA 14.7254 -92.0335 (14.7254, -92.033500000000004) Landslide Landslide Large Earthquake NA NA 0 www.chinapost.com.tw http://www.chinapost.com.tw/international/americas/2013/09/09/388474/Power-outages.htm 56.06755 71.96622
stem(df_GT$"distance")
## 
##   The decimal point is 1 digit(s) to the left of the |
## 
##   7 | 6
##   8 | 
##   8 | 
##   9 | 
##   9 | 7
stem(df_GT$"distance", scale = 2)
## 
##   The decimal point is 1 digit(s) to the left of the |
## 
##   7 | 6
##   8 | 
##   8 | 
##   9 | 
##   9 | 7

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
0.75729 1 50 50 50 50
0.96647 1 50 50 100 100
Total 2 100 100 100 100
str(table)
## Classes 'freqtab' and 'data.frame':  3 obs. of  5 variables:
##  $ n      : num  1 1 2
##  $ %      : num  50 50 100
##  $ val%   : num  50 50 100
##  $ %cum   : num  50 100 100
##  $ val%cum: num  50 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
0.75729 1
0.96647 1
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.75729 1.75729
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(0.757,1.76] 1 1 1
str(Freq_table)
## 'data.frame':    1 obs. of  4 variables:
##  $ distance: Factor w/ 1 level "(0.757,1.76]": 1
##  $ Freq    : int 1
##  $ Rel_Freq: num 1
##  $ Cum_Freq: int 1
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(0.757,1.76] 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_GT)
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
##                        id date time continent_code country_name country_code
## nbr.val      2.000000e+00   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          5.475000e+03   NA   NA             NA           NA           NA
## max          6.669000e+03   NA   NA             NA           NA           NA
## range        1.194000e+03   NA   NA             NA           NA           NA
## sum          1.214400e+04   NA   NA             NA           NA           NA
## median       6.072000e+03   NA   NA             NA           NA           NA
## mean         6.072000e+03   NA   NA             NA           NA           NA
## SE.mean      5.970000e+02   NA   NA             NA           NA           NA
## CI.mean.0.95 7.585604e+03   NA   NA             NA           NA           NA
## var          7.128180e+05   NA   NA             NA           NA           NA
## std.dev      8.442855e+02   NA   NA             NA           NA           NA
## coef.var     1.390457e-01   NA   NA             NA           NA           NA
##              state   population city   distance location_description
## nbr.val         NA 2.000000e+00   NA 2.00000000                   NA
## nbr.null        NA 0.000000e+00   NA 0.00000000                   NA
## nbr.na          NA 0.000000e+00   NA 0.00000000                   NA
## min             NA 6.438000e+03   NA 0.75729000                   NA
## max             NA 8.164000e+03   NA 0.96647000                   NA
## range           NA 1.726000e+03   NA 0.20918000                   NA
## sum             NA 1.460200e+04   NA 1.72376000                   NA
## median          NA 7.301000e+03   NA 0.86188000                   NA
## mean            NA 7.301000e+03   NA 0.86188000                   NA
## SE.mean         NA 8.630000e+02   NA 0.10459000                   NA
## CI.mean.0.95    NA 1.096545e+04   NA 1.32894195                   NA
## var             NA 1.489538e+06   NA 0.02187814                   NA
## std.dev         NA 1.220466e+03   NA 0.14791260                   NA
## coef.var        NA 1.671643e-01   NA 0.17161623                   NA
##                 latitude     longitude geolocation hazard_type landslide_type
## nbr.val       2.00000000  2.000000e+00          NA          NA             NA
## nbr.null      0.00000000  0.000000e+00          NA          NA             NA
## nbr.na        0.00000000  0.000000e+00          NA          NA             NA
## min          14.72540000 -9.207380e+01          NA          NA             NA
## max          15.24220000 -9.203350e+01          NA          NA             NA
## range         0.51680000  4.030000e-02          NA          NA             NA
## sum          29.96760000 -1.841073e+02          NA          NA             NA
## median       14.98380000 -9.205365e+01          NA          NA             NA
## mean         14.98380000 -9.205365e+01          NA          NA             NA
## SE.mean       0.25840000  2.015000e-02          NA          NA             NA
## CI.mean.0.95  3.28328330  2.560300e-01          NA          NA             NA
## var           0.13354112  8.120450e-04          NA          NA             NA
## std.dev       0.36543278  2.849640e-02          NA          NA             NA
## coef.var      0.02438853 -3.095630e-04          NA          NA             NA
##              landslide_size trigger storm_name injuries fatalities source_name
## nbr.val                  NA      NA         NA        1   2.000000          NA
## nbr.null                 NA      NA         NA        1   1.000000          NA
## nbr.na                   NA      NA         NA        1   0.000000          NA
## min                      NA      NA         NA        0   0.000000          NA
## max                      NA      NA         NA        0   2.000000          NA
## range                    NA      NA         NA        0   2.000000          NA
## sum                      NA      NA         NA        0   2.000000          NA
## median                   NA      NA         NA        0   1.000000          NA
## mean                     NA      NA         NA        0   1.000000          NA
## SE.mean                  NA      NA         NA       NA   1.000000          NA
## CI.mean.0.95             NA      NA         NA      NaN  12.706205          NA
## var                      NA      NA         NA       NA   2.000000          NA
## std.dev                  NA      NA         NA       NA   1.414214          NA
## coef.var                 NA      NA         NA       NA   1.414214          NA
##              source_link        prop         ypos
## nbr.val               NA   2.0000000    2.0000000
## nbr.null              NA   0.0000000    0.0000000
## nbr.na                NA   0.0000000    0.0000000
## min                   NA  43.9324500   21.9662250
## max                   NA  56.0675500   71.9662250
## range                 NA  12.1351000   50.0000000
## sum                   NA 100.0000000   93.9324500
## median                NA  50.0000000   46.9662250
## mean                  NA  50.0000000   46.9662250
## SE.mean               NA   6.0675500   25.0000000
## CI.mean.0.95          NA  77.0955326  317.6551184
## var                   NA  73.6303262 1250.0000000
## std.dev               NA   8.5808115   35.3553391
## coef.var              NA   0.1716162    0.7527822
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Sololá (Guatemala)

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_GT <- subset(df, country_name == "Guatemala")
knitr::kable(head(df_GT)) 
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
165 8/9/07 NA NA Guatemala GT Guatemala 47247 San José Pinula 4.74385 NA 14.5667 -90.4500 (14.566700000000001, -90.45) Landslide Mudslide Medium Rain NA NA 5 International Herald http://www.iht.com/articles/ap/2007/08/09/america/LA-GEN-Guatemala-Deadly-Mudslide.php
198 8/21/07 NA NA Guatemala GT Alta Verapaz 2006 Lanquín 13.39817 NA 15.6046 -90.0853 (15.6046, -90.085300000000004) Landslide Landslide Medium Tropical cyclone Hurricane Dean NA NA United Nations Development Programme - Relief Web http://www.reliefweb.int/rw/RWB.NSF/db900SID/EDIS-76BSG6?OpenDocument
199 8/21/07 NA NA Guatemala GT Izabal 18994 Morales 12.55184 NA 15.5163 -88.9286 (15.516299999999999, -88.928600000000003) Landslide Landslide Medium Tropical cyclone Hurricane Dean NA NA United Nations Development Programme - Relief Web http://www.reliefweb.int/rw/RWB.NSF/db900SID/EDIS-76BSG6?OpenDocument
277 9/22/07 NA NA Guatemala GT Guatemala 994938 Guatemala City 2.79113 NA 14.6229 -90.5316 (14.6229, -90.531599999999997) Landslide Mudslide Medium Rain NA NA 3 Fox News http://www.foxnews.com/story/0,2933,297714,00.html
563 6/1/08 NA NA Guatemala GT Escuintla 31329 Palín 3.10150 NA 14.4226 -90.6755 (14.422599999999999, -90.6755) Landslide Mudslide Medium Tropical cyclone Tropical Storm Arthur NA 1 NA http://209.85.215.104/search?q=cache:QU_lPxNfk78J:www.plenglish.com/article.asp?ID=%7B1D4A74F7-CDCA-49D0-ABD4-D2E0FD9D2130%7D&language=EN+Colom+said+the+declaration+came+after+a+death+in+Palin+and+40+houses+partially&hl=en&ct=clnk&cd=1&gl=us&c
591 6/18/08 NA NA Guatemala GT Guatemala 994938 Guatemala City 3.12614 NA 14.6510 -90.5403 (14.651, -90.540300000000002) Landslide Complex Medium Rain NA NA 8 NA http://cnnwire.blogs.cnn.com/2008/06/20/8-dead-in-rough-weather-in-guatemala/
library(dplyr)
df_GT <- subset(df, state == "Sololá")
knitr::kable(head(df_GT))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
1920 5/30/10 NA NA Guatemala GT Sololá 1422 Santa Cruz La Laguna 6.00513 Urban area 14.6918 -91.1168 (14.691800000000001, -91.116799999999998) Landslide Mudslide Medium Tropical cyclone Tropical Cyclone Agatha NA 14 Mayan Trip http://mayantrip.com/lake-atitlan-guatemala-agatha-update-san-antonio-polopo
1922 5/29/10 Night NA Guatemala GT Sololá 9681 San Pedro La Laguna 0.50611 Natural slope 14.6685 -91.2697 (14.6685, -91.2697) Landslide Mudslide Medium Tropical cyclone Tropical Cyclone Agatha NA 1 by Melissa Joy http://melissajoy.wordpress.com/2010/06/01/mudslide/
2021 6/27/10 NA NA Guatemala GT Sololá 6186 San Pablo La Laguna 3.85753 Unknown 14.7801 -91.2764 (14.780099999999999, -91.276399999999995) Landslide Landslide Medium Tropical cyclone Tropical Storm Alex NA 2 Latin American Herald Tribune http://www.laht.com/article.asp?ArticleId=359405&CategoryId=23558
2353 8/28/10 NA NA Guatemala GT Sololá 2605 San Andrés Semetabaj 0.00359 NA 14.7598 -91.1287 (14.7598, -91.128699999999995) Landslide Landslide Medium Downpour NA NA 0 NA NA
2384 9/4/10 NA NA Guatemala GT Sololá 27690 Nahualá 3.04642 NA 14.8231 -91.3227 (14.8231, -91.322699999999998) Landslide Landslide Large Downpour NA NA 25 NA http://www.upi.com/Top_News/World-News/2010/09/05/Rain-blamed-for-21-deaths-in-Guatemala/UPI-36581283666884/
6586 10/20/14 NA NA Guatemala GT Sololá 9986 Zunil 7.03115 Natural slope 14.7836 -91.1836 (14.7836, -91.183599999999998) Landslide Landslide Small Rain NA 0 6 El Aragueno http://elaragueno.com.ve/deslizamiento-en-guatemala-deja-tres-muertos-y-tres-desaparecidos/

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_GT, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_GT, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_GT, aes(x=state, y=distance, fill=city)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_GT <- df_GT %>% 
  arrange(desc(city)) %>%
  mutate(prop = distance / sum(df_GT$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_GT, aes(x=state, y = prop, fill=city)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
data<- ts(df_GT$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
7.03115
6.00513
0.50611
3.85753
3.18658
0.00359
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_GT$distance
names(distance) <- df_GT$city 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por ciudades"
)

##                       
## Pareto chart analysis for distance
##                           Frequency    Cum.Freq.   Percentage Cum.Percent.
##   Zunil                  7.03115000   7.03115000  28.05305349  28.05305349
##   Santa Cruz La Laguna   6.00513000  13.03628000  23.95941391  52.01246740
##   San Pablo La Laguna    3.85753000  16.89381000  15.39086713  67.40333454
##   San Antonio Palopó     3.18658000  20.08039000  12.71389448  80.11722902
##   Nahualá                3.04642000  23.12681000  12.15468070  92.27190972
##   San Andrés Semetabaj   0.75685000  23.88366000   3.01969856  95.29160828
##   San Andrés Semetabaj   0.67040000  24.55406000   2.67477825  97.96638653
##   San Pedro La Laguna    0.50611000  25.06017000   2.01929000  99.98567653
##   San Andrés Semetabaj   0.00359000  25.06376000   0.01432347 100.00000000
stem(df_GT$"distance")
## 
##   The decimal point is at the |
## 
##   0 | 0578
##   2 | 029
##   4 | 
##   6 | 00
head(df_GT)
## # A tibble: 6 x 25
##      id date     time  continent_code country_name country_code state  population
##   <dbl> <chr>    <chr> <chr>          <chr>        <chr>        <chr>       <dbl>
## 1  6586 10/20/14 <NA>  <NA>           Guatemala    GT           Sololá       9986
## 2  1920 5/30/10  <NA>  <NA>           Guatemala    GT           Sololá       1422
## 3  1922 5/29/10  Night <NA>           Guatemala    GT           Sololá       9681
## 4  2021 6/27/10  <NA>  <NA>           Guatemala    GT           Sololá       6186
## 5  7416 9/27/15  <NA>  <NA>           Guatemala    GT           Sololá       3588
## 6  2353 8/28/10  <NA>  <NA>           Guatemala    GT           Sololá       2605
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_GT))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
6586 10/20/14 NA NA Guatemala GT Sololá 9986 Zunil 7.03115 Natural slope 14.7836 -91.1836 (14.7836, -91.183599999999998) Landslide Landslide Small Rain NA 0 6 El Aragueno http://elaragueno.com.ve/deslizamiento-en-guatemala-deja-tres-muertos-y-tres-desaparecidos/ 28.0530535 14.02653
1920 5/30/10 NA NA Guatemala GT Sololá 1422 Santa Cruz La Laguna 6.00513 Urban area 14.6918 -91.1168 (14.691800000000001, -91.116799999999998) Landslide Mudslide Medium Tropical cyclone Tropical Cyclone Agatha NA 14 Mayan Trip http://mayantrip.com/lake-atitlan-guatemala-agatha-update-san-antonio-polopo 23.9594139 40.03276
1922 5/29/10 Night NA Guatemala GT Sololá 9681 San Pedro La Laguna 0.50611 Natural slope 14.6685 -91.2697 (14.6685, -91.2697) Landslide Mudslide Medium Tropical cyclone Tropical Cyclone Agatha NA 1 by Melissa Joy http://melissajoy.wordpress.com/2010/06/01/mudslide/ 2.0192900 53.02211
2021 6/27/10 NA NA Guatemala GT Sololá 6186 San Pablo La Laguna 3.85753 Unknown 14.7801 -91.2764 (14.780099999999999, -91.276399999999995) Landslide Landslide Medium Tropical cyclone Tropical Storm Alex NA 2 Latin American Herald Tribune http://www.laht.com/article.asp?ArticleId=359405&CategoryId=23558 15.3908671 61.72719
7416 9/27/15 NA NA Guatemala GT Sololá 3588 San Antonio Palopó 3.18658 Above road 14.7038 -91.0873 (14.703799999999999, -91.087299999999999) Landslide Landslide Medium Rain NA 0 0 CONRED http://www.redhum.org/documento_detail/conred-boletin-informativo-no-4038-lluvias-del-domingo-generaron-22-incidentes-en-6-departamentos 12.7138945 75.77957
2353 8/28/10 NA NA Guatemala GT Sololá 2605 San Andrés Semetabaj 0.00359 NA 14.7598 -91.1287 (14.7598, -91.128699999999995) Landslide Landslide Medium Downpour NA NA 0 NA NA 0.0143235 82.14368
stem(df_GT$"distance")
## 
##   The decimal point is at the |
## 
##   0 | 0578
##   2 | 029
##   4 | 
##   6 | 00
stem(df_GT$"distance", scale = 2)
## 
##   The decimal point is at the |
## 
##   0 | 0578
##   1 | 
##   2 | 
##   3 | 029
##   4 | 
##   5 | 
##   6 | 0
##   7 | 0

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
0.00359 1 11.1 11.1 11.1 11.1
0.50611 1 11.1 11.1 22.2 22.2
0.6704 1 11.1 11.1 33.3 33.3
0.75685 1 11.1 11.1 44.4 44.4
3.04642 1 11.1 11.1 55.6 55.6
3.18658 1 11.1 11.1 66.7 66.7
3.85753 1 11.1 11.1 77.8 77.8
6.00513 1 11.1 11.1 88.9 88.9
7.03115 1 11.1 11.1 100.0 100.0
Total 9 100.0 100.0 100.0 100.0
str(table)
## Classes 'freqtab' and 'data.frame':  10 obs. of  5 variables:
##  $ n      : num  1 1 1 1 1 1 1 1 1 9
##  $ %      : num  11.1 11.1 11.1 11.1 11.1 11.1 11.1 11.1 11.1 100
##  $ val%   : num  11.1 11.1 11.1 11.1 11.1 11.1 11.1 11.1 11.1 100
##  $ %cum   : num  11.1 22.2 33.3 44.4 55.6 66.7 77.8 88.9 100 100
##  $ val%cum: num  11.1 22.2 33.3 44.4 55.6 66.7 77.8 88.9 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
0.00359 1
0.50611 1
0.6704 1
0.75685 1
3.04642 1
3.18658 1
3.85753 1
6.00513 1
7.03115 1
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.00359 2.00359 4.00359 6.00359 8.00359
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(0.00359,2] 3 0.375 3
(2,4] 3 0.375 6
(4,6] 0 0.000 6
(6,8] 2 0.250 8
str(Freq_table)
## 'data.frame':    4 obs. of  4 variables:
##  $ distance: Factor w/ 4 levels "(0.00359,2]",..: 1 2 3 4
##  $ Freq    : int  3 3 0 2
##  $ Rel_Freq: num  0.375 0.375 0 0.25
##  $ Cum_Freq: int  3 6 6 8
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(0.00359,2] 3
(2,4] 3
(4,6] 0
(6,8] 2
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_GT)
##                        id date time continent_code country_name country_code
## nbr.val      9.000000e+00   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          1.920000e+03   NA   NA             NA           NA           NA
## max          7.436000e+03   NA   NA             NA           NA           NA
## range        5.516000e+03   NA   NA             NA           NA           NA
## sum          3.945500e+04   NA   NA             NA           NA           NA
## median       2.384000e+03   NA   NA             NA           NA           NA
## mean         4.383889e+03   NA   NA             NA           NA           NA
## SE.mean      9.005946e+02   NA   NA             NA           NA           NA
## CI.mean.0.95 2.076775e+03   NA   NA             NA           NA           NA
## var          7.299636e+06   NA   NA             NA           NA           NA
## std.dev      2.701784e+03   NA   NA             NA           NA           NA
## coef.var     6.162984e-01   NA   NA             NA           NA           NA
##              state   population city   distance location_description
## nbr.val         NA 9.000000e+00   NA  9.0000000                   NA
## nbr.null        NA 0.000000e+00   NA  0.0000000                   NA
## nbr.na          NA 0.000000e+00   NA  0.0000000                   NA
## min             NA 1.422000e+03   NA  0.0035900                   NA
## max             NA 2.769000e+04   NA  7.0311500                   NA
## range           NA 2.626800e+04   NA  7.0275600                   NA
## sum             NA 6.636800e+04   NA 25.0637600                   NA
## median          NA 3.588000e+03   NA  3.0464200                   NA
## mean            NA 7.374222e+03   NA  2.7848622                   NA
## SE.mean         NA 2.749460e+03   NA  0.8440434                   NA
## CI.mean.0.95    NA 6.340266e+03   NA  1.9463675                   NA
## var             NA 6.803577e+07   NA  6.4116826                   NA
## std.dev         NA 8.248380e+03   NA  2.5321301                   NA
## coef.var        NA 1.118542e+00   NA  0.9092479                   NA
##                  latitude     longitude geolocation hazard_type landslide_type
## nbr.val      9.000000e+00  9.000000e+00          NA          NA             NA
## nbr.null     0.000000e+00  0.000000e+00          NA          NA             NA
## nbr.na       0.000000e+00  0.000000e+00          NA          NA             NA
## min          1.466850e+01 -9.132270e+01          NA          NA             NA
## max          1.482310e+01 -9.108730e+01          NA          NA             NA
## range        1.546000e-01  2.354000e-01          NA          NA             NA
## sum          1.327115e+02 -8.206534e+02          NA          NA             NA
## median       1.475680e+01 -9.113440e+01          NA          NA             NA
## mean         1.474572e+01 -9.118371e+01          NA          NA             NA
## SE.mean      1.648067e-02  2.814259e-02          NA          NA             NA
## CI.mean.0.95 3.800449e-02  6.489692e-02          NA          NA             NA
## var          2.444512e-03  7.128046e-03          NA          NA             NA
## std.dev      4.944201e-02  8.442776e-02          NA          NA             NA
## coef.var     3.352973e-03 -9.259083e-04          NA          NA             NA
##              landslide_size trigger storm_name injuries fatalities source_name
## nbr.val                  NA      NA         NA        4   9.000000          NA
## nbr.null                 NA      NA         NA        4   4.000000          NA
## nbr.na                   NA      NA         NA        5   0.000000          NA
## min                      NA      NA         NA        0   0.000000          NA
## max                      NA      NA         NA        0  25.000000          NA
## range                    NA      NA         NA        0  25.000000          NA
## sum                      NA      NA         NA        0  48.000000          NA
## median                   NA      NA         NA        0   1.000000          NA
## mean                     NA      NA         NA        0   5.333333          NA
## SE.mean                  NA      NA         NA        0   2.901149          NA
## CI.mean.0.95             NA      NA         NA        0   6.690062          NA
## var                      NA      NA         NA        0  75.750000          NA
## std.dev                  NA      NA         NA        0   8.703448          NA
## coef.var                 NA      NA         NA      NaN   1.631896          NA
##              source_link         prop        ypos
## nbr.val               NA   9.00000000   9.0000000
## nbr.null              NA   0.00000000   0.0000000
## nbr.na                NA   0.00000000   0.0000000
## min                   NA   0.01432347  14.0265267
## max                   NA  28.05305349  93.9226596
## range                 NA  28.03873002  79.8961329
## sum                   NA 100.00000000 590.4782044
## median                NA  12.15468070  75.7795718
## mean                  NA  11.11111111  65.6086894
## SE.mean               NA   3.36758473   8.6731921
## CI.mean.0.95          NA   7.76566431  20.0004168
## var                   NA 102.06564205 677.0183450
## std.dev               NA  10.10275418  26.0195762
## coef.var              NA   0.90924788   0.3965873
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Jamaica

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_JA <- subset(df, country_name == "Jamaica")
knitr::kable(head(df_JA))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
174 8/11/07 NA NA Jamaica JM Portland 14400 Port Antonio 7.79027 NA 18.1258 -76.5082 (18.125800000000002, -76.508200000000002) Landslide Landslide Medium Rain NA NA NA Jamaica Gleaner http://www.jamaica-gleaner.com/gleaner/20070812/lead/lead2.html
304 10/12/07 NA NA Jamaica JM St. Elizabeth 1371 Maggotty 4.57363 NA 18.1257 -77.7405 (18.125699999999998, -77.740499999999997) Landslide Complex Medium Rain NA NA NA RadioJamaica http://www.radiojamaica.com/content/view/2039/26/
314 10/17/07 NA NA Jamaica JM Saint Thomas 2382 Bath 0.21825 NA 17.9500 -76.3500 (17.95, -76.349999999999994) Landslide Mudslide Medium Rain NA NA NA RadioJamaica http://www.radiojamaica.com/content/view/2193/26/
339 10/31/07 NA NA Jamaica JM Saint Thomas 2634 Easington 6.51940 NA 17.9384 -76.6479 (17.938400000000001, -76.647900000000007) Landslide Landslide Medium Tropical cyclone Tropical Storm Noel NA 1 RadioJamaica http://www.radiojamaica.com/content/view/2583/26/
340 10/31/07 NA NA Jamaica JM Saint Catherine 4085 Riversdale 5.46381 NA 18.2152 -76.9659 (18.215199999999999, -76.965900000000005) Landslide Complex Medium Rain NA NA NA RadioJamaica http://www.radiojamaica.com/content/view/2933/26/
341 10/31/07 NA NA Jamaica JM Saint Andrew 1088 Gordon Town 0.96514 NA 18.0370 -76.7088 (18.036999999999999, -76.708799999999997) Landslide Landslide Medium Tropical cyclone Tropical Storm Noel NA NA Jamaica Observer http://www.jamaicaobserver.com/news/html/20071101T010000-0500_128916_OBS_DEADLY_RAIN.asp

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_JA, aes(fill= state, y=distance, x=country_name)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_JA, aes(fill=state, y=distance, x=country_name)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_JA, aes(x=country_name, y=distance, fill=state)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_JA <- df_JA %>% 
  arrange(desc(state)) %>%
  mutate(prop = distance / sum(df_JA$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_JA, aes(x=country_name, y=prop, fill=state)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
data<- ts(df_JA$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
4.57363
0.21825
6.51940
1.71217
4.53632
1.24727
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_JA$distance
names(distance) <- df_JA$state 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por estados"
)

##                  
## Pareto chart analysis for distance
##                     Frequency   Cum.Freq.  Percentage Cum.Percent.
##   Saint Andrew     10.8446700  10.8446700   7.3695199    7.3695199
##   Saint Andrew      9.8305100  20.6751800   6.6803451   14.0498650
##   Clarendon         9.5395300  30.2147100   6.4826090   20.5324740
##   Saint Mary        8.7463500  38.9610600   5.9436018   26.4760758
##   Portland          7.8126900  46.7737500   5.3091310   31.7852069
##   Portland          7.7902700  54.5640200   5.2938955   37.0791023
##   Portland          7.5231700  62.0871900   5.1123871   42.1914894
##   Saint Catherine   6.7126900  68.7998800   4.5616236   46.7531130
##   Saint Thomas      6.5194000  75.3192800   4.4302729   51.1833859
##   Saint Andrew      5.9873100  81.3065900   4.0686900   55.2520758
##   Saint Catherine   5.8653000  87.1718900   3.9857778   59.2378536
##   Saint Catherine   5.4638100  92.6357000   3.7129444   62.9507980
##   Portland          5.0235100  97.6592100   3.4137375   66.3645355
##   Portland          4.6873200 102.3465300   3.1852788   69.5498143
##   St. Elizabeth     4.5736300 106.9201600   3.1080205   72.6578348
##   Saint Thomas      4.5363200 111.4564800   3.0826664   75.7405013
##   Portland          4.4694200 115.9259000   3.0372044   78.7777057
##   Portland          4.1345400 120.0604400   2.8096359   81.5873416
##   Saint Ann         3.9653300 124.0257700   2.6946489   84.2819905
##   Saint Mary        3.7758000 127.8015700   2.5658534   86.8478439
##   Portland          3.6079900 131.4095600   2.4518177   89.2996616
##   Portland          3.1793000 134.5888600   2.1605005   91.4601621
##   Portland          2.7888500 137.3777100   1.8951693   93.3553313
##   Saint Andrew      2.1387800 139.5164900   1.4534128   94.8087441
##   Saint Thomas      1.7121700 141.2286600   1.1635090   95.9722531
##   Saint Mary        1.2472700 142.4759300   0.8475851   96.8198382
##   Saint Ann         1.0819600 143.5578900   0.7352483   97.5550865
##   Saint Andrew      0.9651400 144.5230300   0.6558631   98.2109496
##   Portland          0.7177300 145.2407600   0.4877350   98.6986846
##   Portland          0.5959900 145.8367500   0.4050063   99.1036910
##   Clarendon         0.5618600 146.3986100   0.3818132   99.4855042
##   Saint Ann         0.5388600 146.9374700   0.3661835   99.8516877
##   Saint Thomas      0.2182500 147.1557200   0.1483123  100.0000000
stem(df_JA$"distance")
## 
##   The decimal point is at the |
## 
##    0 | 256670127
##    2 | 18268
##    4 | 015567059
##    6 | 057588
##    8 | 758
##   10 | 8
head(df_JA)
## # A tibble: 6 x 25
##      id date     time  continent_code country_name country_code state population
##   <dbl> <chr>    <chr> <chr>          <chr>        <chr>        <chr>      <dbl>
## 1   304 10/12/07 <NA>  <NA>           Jamaica      JM           St. ~       1371
## 2   314 10/17/07 <NA>  <NA>           Jamaica      JM           Sain~       2382
## 3   339 10/31/07 <NA>  <NA>           Jamaica      JM           Sain~       2634
## 4   774 9/4/08   <NA>  <NA>           Jamaica      JM           Sain~       2382
## 5  1760 4/18/10  <NA>  <NA>           Jamaica      JM           Sain~       2634
## 6  2517 9/29/10  <NA>  <NA>           Jamaica      JM           Sain~       2046
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_JA))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
304 10/12/07 NA NA Jamaica JM St. Elizabeth 1371 Maggotty 4.57363 NA 18.1257 -77.7405 (18.125699999999998, -77.740499999999997) Landslide Complex Medium Rain NA NA NA RadioJamaica http://www.radiojamaica.com/content/view/2039/26/ 3.1080205 1.554010
314 10/17/07 NA NA Jamaica JM Saint Thomas 2382 Bath 0.21825 NA 17.9500 -76.3500 (17.95, -76.349999999999994) Landslide Mudslide Medium Rain NA NA NA RadioJamaica http://www.radiojamaica.com/content/view/2193/26/ 0.1483123 3.182177
339 10/31/07 NA NA Jamaica JM Saint Thomas 2634 Easington 6.51940 NA 17.9384 -76.6479 (17.938400000000001, -76.647900000000007) Landslide Landslide Medium Tropical cyclone Tropical Storm Noel NA 1 RadioJamaica http://www.radiojamaica.com/content/view/2583/26/ 4.4302729 5.471469
774 9/4/08 NA NA Jamaica JM Saint Thomas 2382 Bath 1.71217 NA 17.9480 -76.3330 (17.948, -76.332999999999998) Landslide Complex Medium Tropical cyclone Tropical Storm Gustav NA NA NA http://www.jamaica-gleaner.com/gleaner/20080905/lead/lead4.html 1.1635090 8.268360
1760 4/18/10 NA NA Jamaica JM Saint Thomas 2634 Easington 4.53632 NA 17.9647 -76.5835 (17.964700000000001, -76.583500000000001) Landslide Landslide Medium Downpour NA NA 0 NA http://www.jamaica-gleaner.com/gleaner/20100420/lead/lead4.html 3.0826664 10.391448
2517 9/29/10 NA NA Jamaica JM Saint Mary 2046 Richmond 1.24727 NA 18.2544 -76.8865 (18.2544, -76.886499999999998) Landslide Landslide Medium Downpour NA NA 0 NA http://go-jamaica.com/news/read_article.php?id=22964 0.8475851 12.356574
stem(df_JA$"distance")
## 
##   The decimal point is at the |
## 
##    0 | 256670127
##    2 | 18268
##    4 | 015567059
##    6 | 057588
##    8 | 758
##   10 | 8
stem(df_JA$"distance", scale = 2)
## 
##   The decimal point is at the |
## 
##    0 | 25667
##    1 | 0127
##    2 | 18
##    3 | 268
##    4 | 015567
##    5 | 059
##    6 | 057
##    7 | 588
##    8 | 7
##    9 | 58
##   10 | 8

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
0.21825 1 3 3 3.0 3.0
0.53886 1 3 3 6.1 6.1
0.56186 1 3 3 9.1 9.1
0.59599 1 3 3 12.1 12.1
0.71773 1 3 3 15.2 15.2
0.96514 1 3 3 18.2 18.2
1.08196 1 3 3 21.2 21.2
1.24727 1 3 3 24.2 24.2
1.71217 1 3 3 27.3 27.3
2.13878 1 3 3 30.3 30.3
2.78885 1 3 3 33.3 33.3
3.1793 1 3 3 36.4 36.4
3.60799 1 3 3 39.4 39.4
3.7758 1 3 3 42.4 42.4
3.96533 1 3 3 45.5 45.5
4.13454 1 3 3 48.5 48.5
4.46942 1 3 3 51.5 51.5
4.53632 1 3 3 54.5 54.5
4.57363 1 3 3 57.6 57.6
4.68732 1 3 3 60.6 60.6
5.02351 1 3 3 63.6 63.6
5.46381 1 3 3 66.7 66.7
5.8653 1 3 3 69.7 69.7
5.98731 1 3 3 72.7 72.7
6.5194 1 3 3 75.8 75.8
6.71269 1 3 3 78.8 78.8
7.52317 1 3 3 81.8 81.8
7.79027 1 3 3 84.8 84.8
7.81269 1 3 3 87.9 87.9
8.74635 1 3 3 90.9 90.9
9.53953 1 3 3 93.9 93.9
9.83051 1 3 3 97.0 97.0
10.84467 1 3 3 100.0 100.0
Total 33 100 100 100.0 100.0
str(table)
## Classes 'freqtab' and 'data.frame':  34 obs. of  5 variables:
##  $ n      : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ %      : num  3 3 3 3 3 3 3 3 3 3 ...
##  $ val%   : num  3 3 3 3 3 3 3 3 3 3 ...
##  $ %cum   : num  3 6.1 9.1 12.1 15.2 18.2 21.2 24.2 27.3 30.3 ...
##  $ val%cum: num  3 6.1 9.1 12.1 15.2 18.2 21.2 24.2 27.3 30.3 ...
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
0.21825 1
0.53886 1
0.56186 1
0.59599 1
0.71773 1
0.96514 1
1.08196 1
1.24727 1
1.71217 1
2.13878 1
2.78885 1
3.1793 1
3.60799 1
3.7758 1
3.96533 1
4.13454 1
4.46942 1
4.53632 1
4.57363 1
4.68732 1
5.02351 1
5.46381 1
5.8653 1
5.98731 1
6.5194 1
6.71269 1
7.52317 1
7.79027 1
7.81269 1
8.74635 1
9.53953 1
9.83051 1
10.84467 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1]  0.21825  2.21825  4.21825  6.21825  8.21825 10.21825 12.21825
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(0.218,2.22] 9 0.28125 9
(2.22,4.22] 6 0.18750 15
(4.22,6.22] 8 0.25000 23
(6.22,8.22] 5 0.15625 28
(8.22,10.2] 3 0.09375 31
(10.2,12.2] 1 0.03125 32
str(Freq_table)
## 'data.frame':    6 obs. of  4 variables:
##  $ distance: Factor w/ 6 levels "(0.218,2.22]",..: 1 2 3 4 5 6
##  $ Freq    : int  9 6 8 5 3 1
##  $ Rel_Freq: num  0.2812 0.1875 0.25 0.1562 0.0938 ...
##  $ Cum_Freq: int  9 15 23 28 31 32
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(0.218,2.22] 9
(2.22,4.22] 6
(4.22,6.22] 8
(6.22,8.22] 5
(8.22,10.2] 3
(10.2,12.2] 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_JA)
##                        id date time continent_code country_name country_code
## nbr.val      3.300000e+01   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          1.740000e+02   NA   NA             NA           NA           NA
## max          7.474000e+03   NA   NA             NA           NA           NA
## range        7.300000e+03   NA   NA             NA           NA           NA
## sum          8.120500e+04   NA   NA             NA           NA           NA
## median       2.233000e+03   NA   NA             NA           NA           NA
## mean         2.460758e+03   NA   NA             NA           NA           NA
## SE.mean      3.719434e+02   NA   NA             NA           NA           NA
## CI.mean.0.95 7.576240e+02   NA   NA             NA           NA           NA
## var          4.565283e+06   NA   NA             NA           NA           NA
## std.dev      2.136652e+03   NA   NA             NA           NA           NA
## coef.var     8.682905e-01   NA   NA             NA           NA           NA
##              state   population city    distance location_description
## nbr.val         NA 3.300000e+01   NA  33.0000000                   NA
## nbr.null        NA 0.000000e+00   NA   0.0000000                   NA
## nbr.na          NA 0.000000e+00   NA   0.0000000                   NA
## min             NA 1.088000e+03   NA   0.2182500                   NA
## max             NA 4.475500e+04   NA  10.8446700                   NA
## range           NA 4.366700e+04   NA  10.6264200                   NA
## sum             NA 2.342270e+05   NA 147.1557200                   NA
## median          NA 2.757000e+03   NA   4.4694200                   NA
## mean            NA 7.097788e+03   NA   4.4592642                   NA
## SE.mean         NA 1.474547e+03   NA   0.5248336                   NA
## CI.mean.0.95    NA 3.003553e+03   NA   1.0690510                   NA
## var             NA 7.175151e+07   NA   9.0898595                   NA
## std.dev         NA 8.470626e+03   NA   3.0149394                   NA
## coef.var        NA 1.193418e+00   NA   0.6761069                   NA
##                  latitude     longitude geolocation hazard_type landslide_type
## nbr.val      3.300000e+01  3.300000e+01          NA          NA             NA
## nbr.null     0.000000e+00  0.000000e+00          NA          NA             NA
## nbr.na       0.000000e+00  0.000000e+00          NA          NA             NA
## min          1.793840e+01 -7.774050e+01          NA          NA             NA
## max          1.843130e+01 -7.627810e+01          NA          NA             NA
## range        4.929000e-01  1.462400e+00          NA          NA             NA
## sum          5.982112e+02 -2.532567e+03          NA          NA             NA
## median       1.812570e+01 -7.668520e+01          NA          NA             NA
## mean         1.812761e+01 -7.674444e+01          NA          NA             NA
## SE.mean      2.204383e-02  6.043166e-02          NA          NA             NA
## CI.mean.0.95 4.490182e-02  1.230953e-01          NA          NA             NA
## var          1.603571e-02  1.205155e-01          NA          NA             NA
## std.dev      1.266322e-01  3.471534e-01          NA          NA             NA
## coef.var     6.985596e-03 -4.523499e-03          NA          NA             NA
##              landslide_size trigger storm_name injuries fatalities source_name
## nbr.val                  NA      NA         NA        4 23.0000000          NA
## nbr.null                 NA      NA         NA        4 20.0000000          NA
## nbr.na                   NA      NA         NA       29 10.0000000          NA
## min                      NA      NA         NA        0  0.0000000          NA
## max                      NA      NA         NA        0  7.0000000          NA
## range                    NA      NA         NA        0  7.0000000          NA
## sum                      NA      NA         NA        0 10.0000000          NA
## median                   NA      NA         NA        0  0.0000000          NA
## mean                     NA      NA         NA        0  0.4347826          NA
## SE.mean                  NA      NA         NA        0  0.3132520          NA
## CI.mean.0.95             NA      NA         NA        0  0.6496449          NA
## var                      NA      NA         NA        0  2.2569170          NA
## std.dev                  NA      NA         NA        0  1.5023039          NA
## coef.var                 NA      NA         NA      NaN  3.4552990          NA
##              source_link        prop         ypos
## nbr.val               NA  33.0000000   33.0000000
## nbr.null              NA   0.0000000    0.0000000
## nbr.na                NA   0.0000000    0.0000000
## min                   NA   0.1483123    1.5540103
## max                   NA   7.3695199   99.8090934
## range                 NA   7.2212076   98.2550831
## sum                   NA 100.0000000 1613.0748027
## median                NA   3.0372044   46.7168011
## mean                  NA   3.0303030   48.8810546
## SE.mean               NA   0.3566518    5.3099807
## CI.mean.0.95          NA   0.7264760   10.8160768
## var                   NA   4.1976176  930.4645430
## std.dev               NA   2.0488088   30.5035169
## coef.var              NA   0.6761069    0.6240356
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Clarendon (Jamaica)

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_JA <- subset(df, country_name == "Jamaica")
knitr::kable(head(df_JA)) 
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
174 8/11/07 NA NA Jamaica JM Portland 14400 Port Antonio 7.79027 NA 18.1258 -76.5082 (18.125800000000002, -76.508200000000002) Landslide Landslide Medium Rain NA NA NA Jamaica Gleaner http://www.jamaica-gleaner.com/gleaner/20070812/lead/lead2.html
304 10/12/07 NA NA Jamaica JM St. Elizabeth 1371 Maggotty 4.57363 NA 18.1257 -77.7405 (18.125699999999998, -77.740499999999997) Landslide Complex Medium Rain NA NA NA RadioJamaica http://www.radiojamaica.com/content/view/2039/26/
314 10/17/07 NA NA Jamaica JM Saint Thomas 2382 Bath 0.21825 NA 17.9500 -76.3500 (17.95, -76.349999999999994) Landslide Mudslide Medium Rain NA NA NA RadioJamaica http://www.radiojamaica.com/content/view/2193/26/
339 10/31/07 NA NA Jamaica JM Saint Thomas 2634 Easington 6.51940 NA 17.9384 -76.6479 (17.938400000000001, -76.647900000000007) Landslide Landslide Medium Tropical cyclone Tropical Storm Noel NA 1 RadioJamaica http://www.radiojamaica.com/content/view/2583/26/
340 10/31/07 NA NA Jamaica JM Saint Catherine 4085 Riversdale 5.46381 NA 18.2152 -76.9659 (18.215199999999999, -76.965900000000005) Landslide Complex Medium Rain NA NA NA RadioJamaica http://www.radiojamaica.com/content/view/2933/26/
341 10/31/07 NA NA Jamaica JM Saint Andrew 1088 Gordon Town 0.96514 NA 18.0370 -76.7088 (18.036999999999999, -76.708799999999997) Landslide Landslide Medium Tropical cyclone Tropical Storm Noel NA NA Jamaica Observer http://www.jamaicaobserver.com/news/html/20071101T010000-0500_128916_OBS_DEADLY_RAIN.asp
library(dplyr)
df_JA <- subset(df, state == "Clarendon")
knitr::kable(head(df_JA))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
1761 4/18/10 NA NA Jamaica JM Clarendon 4514 Chapelton 9.53953 NA 18.1437 -77.3310 (18.143699999999999, -77.331000000000003) Landslide Landslide Medium Downpour NA NA 0 NA http://www.jamaica-gleaner.com/gleaner/20100420/lead/lead4.html
1762 4/18/10 NA NA Jamaica JM Clarendon 44755 May Pen 0.56186 NA 17.9693 -77.2433 (17.9693, -77.243300000000005) Landslide Landslide Medium Downpour NA NA 0 NA http://www.jamaica-gleaner.com/gleaner/20100420/lead/lead4.html

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_JA, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_JA, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_JA, aes(x=state, y=distance, fill=city)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_JA <- df_JA %>% 
  arrange(desc(city)) %>%
  mutate(prop = distance / sum(df_JA$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_JA, aes(x=state, y = prop, fill=city)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
data<- ts(df_JA$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
0.56186
9.53953
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_JA$distance
names(distance) <- df_JA$city 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por ciudades"
)

##            
## Pareto chart analysis for distance
##              Frequency  Cum.Freq. Percentage Cum.Percent.
##   Chapelton   9.539530   9.539530  94.437795    94.437795
##   May Pen     0.561860  10.101390   5.562205   100.000000
stem(df_JA$"distance")
## 
##   The decimal point is at the |
## 
##   0 | 6
##   2 | 
##   4 | 
##   6 | 
##   8 | 5
head(df_JA)
## # A tibble: 2 x 25
##      id date    time  continent_code country_name country_code state     population
##   <dbl> <chr>   <chr> <chr>          <chr>        <chr>        <chr>          <dbl>
## 1  1762 4/18/10 <NA>  <NA>           Jamaica      JM           Clarendon      44755
## 2  1761 4/18/10 <NA>  <NA>           Jamaica      JM           Clarendon       4514
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_JA))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
1762 4/18/10 NA NA Jamaica JM Clarendon 44755 May Pen 0.56186 NA 17.9693 -77.2433 (17.9693, -77.243300000000005) Landslide Landslide Medium Downpour NA NA 0 NA http://www.jamaica-gleaner.com/gleaner/20100420/lead/lead4.html 5.562205 2.781102
1761 4/18/10 NA NA Jamaica JM Clarendon 4514 Chapelton 9.53953 NA 18.1437 -77.3310 (18.143699999999999, -77.331000000000003) Landslide Landslide Medium Downpour NA NA 0 NA http://www.jamaica-gleaner.com/gleaner/20100420/lead/lead4.html 94.437795 52.781102
stem(df_JA$"distance")
## 
##   The decimal point is at the |
## 
##   0 | 6
##   2 | 
##   4 | 
##   6 | 
##   8 | 5
stem(df_JA$"distance", scale = 2)
## 
##   The decimal point is at the |
## 
##   0 | 6
##   1 | 
##   2 | 
##   3 | 
##   4 | 
##   5 | 
##   6 | 
##   7 | 
##   8 | 
##   9 | 5

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
0.56186 1 50 50 50 50
9.53953 1 50 50 100 100
Total 2 100 100 100 100
str(table)
## Classes 'freqtab' and 'data.frame':  3 obs. of  5 variables:
##  $ n      : num  1 1 2
##  $ %      : num  50 50 100
##  $ val%   : num  50 50 100
##  $ %cum   : num  50 100 100
##  $ val%cum: num  50 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
0.56186 1
9.53953 1
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1]  0.56186  5.56186 10.56186
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(0.562,5.56] 0 0 0
(5.56,10.6] 1 1 1
str(Freq_table)
## 'data.frame':    2 obs. of  4 variables:
##  $ distance: Factor w/ 2 levels "(0.562,5.56]",..: 1 2
##  $ Freq    : int  0 1
##  $ Rel_Freq: num  0 1
##  $ Cum_Freq: int  0 1
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(0.562,5.56] 0
(5.56,10.6] 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_JA)
## Warning in min(x): ningún argumento finito para min; retornando Inf
## Warning in max(x): ningun argumento finito para max; retornando -Inf
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
##                        id date time continent_code country_name country_code
## nbr.val      2.000000e+00   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          1.761000e+03   NA   NA             NA           NA           NA
## max          1.762000e+03   NA   NA             NA           NA           NA
## range        1.000000e+00   NA   NA             NA           NA           NA
## sum          3.523000e+03   NA   NA             NA           NA           NA
## median       1.761500e+03   NA   NA             NA           NA           NA
## mean         1.761500e+03   NA   NA             NA           NA           NA
## SE.mean      5.000000e-01   NA   NA             NA           NA           NA
## CI.mean.0.95 6.353102e+00   NA   NA             NA           NA           NA
## var          5.000000e-01   NA   NA             NA           NA           NA
## std.dev      7.071068e-01   NA   NA             NA           NA           NA
## coef.var     4.014231e-04   NA   NA             NA           NA           NA
##              state   population city  distance location_description
## nbr.val         NA 2.000000e+00   NA  2.000000                   NA
## nbr.null        NA 0.000000e+00   NA  0.000000                   NA
## nbr.na          NA 0.000000e+00   NA  0.000000                   NA
## min             NA 4.514000e+03   NA  0.561860                   NA
## max             NA 4.475500e+04   NA  9.539530                   NA
## range           NA 4.024100e+04   NA  8.977670                   NA
## sum             NA 4.926900e+04   NA 10.101390                   NA
## median          NA 2.463450e+04   NA  5.050695                   NA
## mean            NA 2.463450e+04   NA  5.050695                   NA
## SE.mean         NA 2.012050e+04   NA  4.488835                   NA
## CI.mean.0.95    NA 2.556552e+05   NA 57.036057                   NA
## var             NA 8.096690e+08   NA 40.299279                   NA
## std.dev         NA 2.845468e+04   NA  6.348171                   NA
## coef.var        NA 1.155075e+00   NA  1.256891                   NA
##                  latitude     longitude geolocation hazard_type landslide_type
## nbr.val       2.000000000  2.000000e+00          NA          NA             NA
## nbr.null      0.000000000  0.000000e+00          NA          NA             NA
## nbr.na        0.000000000  0.000000e+00          NA          NA             NA
## min          17.969300000 -7.733100e+01          NA          NA             NA
## max          18.143700000 -7.724330e+01          NA          NA             NA
## range         0.174400000  8.770000e-02          NA          NA             NA
## sum          36.113000000 -1.545743e+02          NA          NA             NA
## median       18.056500000 -7.728715e+01          NA          NA             NA
## mean         18.056500000 -7.728715e+01          NA          NA             NA
## SE.mean       0.087200000  4.385000e-02          NA          NA             NA
## CI.mean.0.95  1.107981053  5.571671e-01          NA          NA             NA
## var           0.015207680  3.845645e-03          NA          NA             NA
## std.dev       0.123319423  6.201326e-02          NA          NA             NA
## coef.var      0.006829642 -8.023748e-04          NA          NA             NA
##              landslide_size trigger storm_name injuries fatalities source_name
## nbr.val                  NA      NA         NA        0          2          NA
## nbr.null                 NA      NA         NA        0          2          NA
## nbr.na                   NA      NA         NA        2          0          NA
## min                      NA      NA         NA      Inf          0          NA
## max                      NA      NA         NA     -Inf          0          NA
## range                    NA      NA         NA     -Inf          0          NA
## sum                      NA      NA         NA        0          0          NA
## median                   NA      NA         NA       NA          0          NA
## mean                     NA      NA         NA      NaN          0          NA
## SE.mean                  NA      NA         NA       NA          0          NA
## CI.mean.0.95             NA      NA         NA      NaN          0          NA
## var                      NA      NA         NA       NA          0          NA
## std.dev                  NA      NA         NA       NA          0          NA
## coef.var                 NA      NA         NA       NA        NaN          NA
##              source_link        prop        ypos
## nbr.val               NA    2.000000    2.000000
## nbr.null              NA    0.000000    0.000000
## nbr.na                NA    0.000000    0.000000
## min                   NA    5.562205    2.781102
## max                   NA   94.437795   52.781102
## range                 NA   88.875590   50.000000
## sum                   NA  100.000000   55.562205
## median                NA   50.000000   27.781102
## mean                  NA   50.000000   27.781102
## SE.mean               NA   44.437795   25.000000
## CI.mean.0.95          NA  564.635724  317.655118
## var                   NA 3949.435284 1250.000000
## std.dev               NA   62.844533   35.355339
## coef.var              NA    1.256891    1.272640
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Portland (Jamaica)

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_JA <- subset(df, country_name == "Jamaica")
knitr::kable(head(df_JA)) 
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
174 8/11/07 NA NA Jamaica JM Portland 14400 Port Antonio 7.79027 NA 18.1258 -76.5082 (18.125800000000002, -76.508200000000002) Landslide Landslide Medium Rain NA NA NA Jamaica Gleaner http://www.jamaica-gleaner.com/gleaner/20070812/lead/lead2.html
304 10/12/07 NA NA Jamaica JM St. Elizabeth 1371 Maggotty 4.57363 NA 18.1257 -77.7405 (18.125699999999998, -77.740499999999997) Landslide Complex Medium Rain NA NA NA RadioJamaica http://www.radiojamaica.com/content/view/2039/26/
314 10/17/07 NA NA Jamaica JM Saint Thomas 2382 Bath 0.21825 NA 17.9500 -76.3500 (17.95, -76.349999999999994) Landslide Mudslide Medium Rain NA NA NA RadioJamaica http://www.radiojamaica.com/content/view/2193/26/
339 10/31/07 NA NA Jamaica JM Saint Thomas 2634 Easington 6.51940 NA 17.9384 -76.6479 (17.938400000000001, -76.647900000000007) Landslide Landslide Medium Tropical cyclone Tropical Storm Noel NA 1 RadioJamaica http://www.radiojamaica.com/content/view/2583/26/
340 10/31/07 NA NA Jamaica JM Saint Catherine 4085 Riversdale 5.46381 NA 18.2152 -76.9659 (18.215199999999999, -76.965900000000005) Landslide Complex Medium Rain NA NA NA RadioJamaica http://www.radiojamaica.com/content/view/2933/26/
341 10/31/07 NA NA Jamaica JM Saint Andrew 1088 Gordon Town 0.96514 NA 18.0370 -76.7088 (18.036999999999999, -76.708799999999997) Landslide Landslide Medium Tropical cyclone Tropical Storm Noel NA NA Jamaica Observer http://www.jamaicaobserver.com/news/html/20071101T010000-0500_128916_OBS_DEADLY_RAIN.asp
library(dplyr)
df_JA <- subset(df, state == "Portland")
knitr::kable(head(df_JA))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
174 8/11/07 NA NA Jamaica JM Portland 14400 Port Antonio 7.79027 NA 18.1258 -76.5082 (18.125800000000002, -76.508200000000002) Landslide Landslide Medium Rain NA NA NA Jamaica Gleaner http://www.jamaica-gleaner.com/gleaner/20070812/lead/lead2.html
1391 1/10/10 NA NA Jamaica JM Portland 1119 Moore Town 5.02351 NA 18.1167 -76.4500 (18.116700000000002, -76.45) Landslide Landslide Medium Downpour NA NA 0 NA http://go-jamaica.com/news/read_article.php?id=15895
1763 4/18/10 NA NA Jamaica JM Portland 2757 Buff Bay 2.78885 NA 18.2106 -76.6735 (18.210599999999999, -76.673500000000004) Landslide Landslide Medium Downpour NA NA 0 NA http://www.jamaica-gleaner.com/gleaner/20100420/lead/lead4.html
1764 4/18/10 NA NA Jamaica JM Portland 14400 Port Antonio 4.46942 NA 18.1865 -76.5029 (18.186499999999999, -76.502899999999997) Landslide Landslide Medium Downpour NA NA 0 NA http://allafrica.com/stories/201004200719.html
2418 9/13/10 Night NA Jamaica JM Portland 1119 Moore Town 0.59599 NA 18.0723 -76.4254 (18.072299999999998, -76.425399999999996) Landslide Landslide Medium Downpour NA NA 0 NA http://www.jamaica-gleaner.com/gleaner/20100916/news/news4.html
2511 9/28/10 NA NA Jamaica JM Portland 1119 Moore Town 7.81269 NA 18.0083 -76.4394 (18.008299999999998, -76.439400000000006) Landslide Landslide Medium Downpour NA NA 0 NA http://www.jamaicaobserver.com/news/More-floods-in-St-Thomas

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_JA, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_JA, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_JA, aes(x=state, y=distance, fill=city)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_JA <- df_JA %>% 
  arrange(desc(city)) %>%
  mutate(prop = distance / sum(df_JA$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_JA, aes(x=state, y = prop, fill=city)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
data<- ts(df_JA$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
7.79027
4.46942
3.60799
4.68732
5.02351
0.59599
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_JA$distance
names(distance) <- df_JA$city 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por ciudades"
)

##               
## Pareto chart analysis for distance
##                 Frequency  Cum.Freq. Percentage Cum.Percent.
##   Moore Town     7.812690   7.812690  14.929435    14.929435
##   Port Antonio   7.790270  15.602960  14.886593    29.816028
##   Buff Bay       7.523170  23.126130  14.376185    44.192213
##   Moore Town     5.023510  28.149640   9.599532    53.791746
##   Port Antonio   4.687320  32.836960   8.957099    62.748845
##   Port Antonio   4.469420  37.306380   8.540710    71.289555
##   Manchioneal    4.134540  41.440920   7.900780    79.190335
##   Port Antonio   3.607990  45.048910   6.894585    86.084920
##   Moore Town     3.179300  48.228210   6.075392    92.160312
##   Buff Bay       2.788850  51.017060   5.329273    97.489585
##   Manchioneal    0.717730  51.734790   1.371526    98.861110
##   Moore Town     0.595990  52.330780   1.138890   100.000000
stem(df_JA$"distance")
## 
##   The decimal point is at the |
## 
##   0 | 67
##   2 | 826
##   4 | 1570
##   6 | 588
head(df_JA)
## # A tibble: 6 x 25
##      id date    time  continent_code country_name country_code state    population
##   <dbl> <chr>   <chr> <chr>          <chr>        <chr>        <chr>         <dbl>
## 1   174 8/11/07 <NA>  <NA>           Jamaica      JM           Portland      14400
## 2  1764 4/18/10 <NA>  <NA>           Jamaica      JM           Portland      14400
## 3  6345 9/8/14  0:15  <NA>           Jamaica      JM           Portland      14400
## 4  7474 12/1/15 <NA>  <NA>           Jamaica      JM           Portland      14400
## 5  1391 1/10/10 <NA>  <NA>           Jamaica      JM           Portland       1119
## 6  2418 9/13/10 Night <NA>           Jamaica      JM           Portland       1119
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_JA))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
174 8/11/07 NA NA Jamaica JM Portland 14400 Port Antonio 7.79027 NA 18.1258 -76.5082 (18.125800000000002, -76.508200000000002) Landslide Landslide Medium Rain NA NA NA Jamaica Gleaner http://www.jamaica-gleaner.com/gleaner/20070812/lead/lead2.html 14.886593 7.443296
1764 4/18/10 NA NA Jamaica JM Portland 14400 Port Antonio 4.46942 NA 18.1865 -76.5029 (18.186499999999999, -76.502899999999997) Landslide Landslide Medium Downpour NA NA 0 NA http://allafrica.com/stories/201004200719.html 8.540710 19.156947
6345 9/8/14 0:15 NA Jamaica JM Portland 14400 Port Antonio 3.60799 Unknown 18.1444 -76.4587 (18.144400000000001, -76.458699999999993) Landslide Mudslide Small Downpour NA 0 0 Gleaner http://jamaica-gleaner.com/gleaner/20140909/lead/lead23.html 6.894585 26.874595
7474 12/1/15 NA NA Jamaica JM Portland 14400 Port Antonio 4.68732 Above road 18.1340 -76.4551 (18.134, -76.455100000000002) Landslide Landslide Medium Rain NA 0 0 Restoration Village Farm http://www.restorationvillagefarm.com/tag/port-antonio/ 8.957099 34.800437
1391 1/10/10 NA NA Jamaica JM Portland 1119 Moore Town 5.02351 NA 18.1167 -76.4500 (18.116700000000002, -76.45) Landslide Landslide Medium Downpour NA NA 0 NA http://go-jamaica.com/news/read_article.php?id=15895 9.599532 44.078753
2418 9/13/10 Night NA Jamaica JM Portland 1119 Moore Town 0.59599 NA 18.0723 -76.4254 (18.072299999999998, -76.425399999999996) Landslide Landslide Medium Downpour NA NA 0 NA http://www.jamaica-gleaner.com/gleaner/20100916/news/news4.html 1.138890 49.447964
stem(df_JA$"distance")
## 
##   The decimal point is at the |
## 
##   0 | 67
##   2 | 826
##   4 | 1570
##   6 | 588
stem(df_JA$"distance", scale = 2)
## 
##   The decimal point is at the |
## 
##   0 | 67
##   1 | 
##   2 | 8
##   3 | 26
##   4 | 157
##   5 | 0
##   6 | 
##   7 | 588

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
0.59599 1 8.3 8.3 8.3 8.3
0.71773 1 8.3 8.3 16.7 16.7
2.78885 1 8.3 8.3 25.0 25.0
3.1793 1 8.3 8.3 33.3 33.3
3.60799 1 8.3 8.3 41.7 41.7
4.13454 1 8.3 8.3 50.0 50.0
4.46942 1 8.3 8.3 58.3 58.3
4.68732 1 8.3 8.3 66.7 66.7
5.02351 1 8.3 8.3 75.0 75.0
7.52317 1 8.3 8.3 83.3 83.3
7.79027 1 8.3 8.3 91.7 91.7
7.81269 1 8.3 8.3 100.0 100.0
Total 12 100.0 100.0 100.0 100.0
str(table)
## Classes 'freqtab' and 'data.frame':  13 obs. of  5 variables:
##  $ n      : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ %      : num  8.3 8.3 8.3 8.3 8.3 8.3 8.3 8.3 8.3 8.3 ...
##  $ val%   : num  8.3 8.3 8.3 8.3 8.3 8.3 8.3 8.3 8.3 8.3 ...
##  $ %cum   : num  8.3 16.7 25 33.3 41.7 50 58.3 66.7 75 83.3 ...
##  $ val%cum: num  8.3 16.7 25 33.3 41.7 50 58.3 66.7 75 83.3 ...
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
0.59599 1
0.71773 1
2.78885 1
3.1793 1
3.60799 1
4.13454 1
4.46942 1
4.68732 1
5.02351 1
7.52317 1
7.79027 1
7.81269 1
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.59599 2.59599 4.59599 6.59599 8.59599
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(0.596,2.6] 1 0.0909091 1
(2.6,4.6] 5 0.4545455 6
(4.6,6.6] 2 0.1818182 8
(6.6,8.6] 3 0.2727273 11
str(Freq_table)
## 'data.frame':    4 obs. of  4 variables:
##  $ distance: Factor w/ 4 levels "(0.596,2.6]",..: 1 2 3 4
##  $ Freq    : int  1 5 2 3
##  $ Rel_Freq: num  0.0909 0.4545 0.1818 0.2727
##  $ Cum_Freq: int  1 6 8 11
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(0.596,2.6] 1
(2.6,4.6] 5
(4.6,6.6] 2
(6.6,8.6] 3
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_JA)
##                        id date time continent_code country_name country_code
## nbr.val      1.200000e+01   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          1.740000e+02   NA   NA             NA           NA           NA
## max          7.474000e+03   NA   NA             NA           NA           NA
## range        7.300000e+03   NA   NA             NA           NA           NA
## sum          3.885800e+04   NA   NA             NA           NA           NA
## median       2.623000e+03   NA   NA             NA           NA           NA
## mean         3.238167e+03   NA   NA             NA           NA           NA
## SE.mean      6.142102e+02   NA   NA             NA           NA           NA
## CI.mean.0.95 1.351868e+03   NA   NA             NA           NA           NA
## var          4.527050e+06   NA   NA             NA           NA           NA
## std.dev      2.127687e+03   NA   NA             NA           NA           NA
## coef.var     6.570652e-01   NA   NA             NA           NA           NA
##              state   population city   distance location_description
## nbr.val         NA 1.200000e+01   NA 12.0000000                   NA
## nbr.null        NA 0.000000e+00   NA  0.0000000                   NA
## nbr.na          NA 0.000000e+00   NA  0.0000000                   NA
## min             NA 1.119000e+03   NA  0.5959900                   NA
## max             NA 1.440000e+04   NA  7.8126900                   NA
## range           NA 1.328100e+04   NA  7.2167000                   NA
## sum             NA 7.223200e+04   NA 52.3307800                   NA
## median          NA 2.539000e+03   NA  4.3019800                   NA
## mean            NA 6.019333e+03   NA  4.3608983                   NA
## SE.mean         NA 1.795694e+03   NA  0.7079935                   NA
## CI.mean.0.95    NA 3.952296e+03   NA  1.5582832                   NA
## var             NA 3.869421e+07   NA  6.0150573                   NA
## std.dev         NA 6.220467e+03   NA  2.4525614                   NA
## coef.var        NA 1.033415e+00   NA  0.5623982                   NA
##                  latitude     longitude geolocation hazard_type landslide_type
## nbr.val      1.200000e+01  1.200000e+01          NA          NA             NA
## nbr.null     0.000000e+00  0.000000e+00          NA          NA             NA
## nbr.na       0.000000e+00  0.000000e+00          NA          NA             NA
## min          1.800830e+01 -7.668520e+01          NA          NA             NA
## max          1.821060e+01 -7.627810e+01          NA          NA             NA
## range        2.023000e-01  4.071000e-01          NA          NA             NA
## sum          2.173922e+02 -9.175819e+02          NA          NA             NA
## median       1.812125e+01 -7.645255e+01          NA          NA             NA
## mean         1.811602e+01 -7.646516e+01          NA          NA             NA
## SE.mean      1.703378e-02  3.541084e-02          NA          NA             NA
## CI.mean.0.95 3.749110e-02  7.793873e-02          NA          NA             NA
## var          3.481796e-03  1.504713e-02          NA          NA             NA
## std.dev      5.900675e-02  1.226667e-01          NA          NA             NA
## coef.var     3.257159e-03 -1.604217e-03          NA          NA             NA
##              landslide_size trigger storm_name injuries fatalities source_name
## nbr.val                  NA      NA         NA        2          9          NA
## nbr.null                 NA      NA         NA        2          9          NA
## nbr.na                   NA      NA         NA       10          3          NA
## min                      NA      NA         NA        0          0          NA
## max                      NA      NA         NA        0          0          NA
## range                    NA      NA         NA        0          0          NA
## sum                      NA      NA         NA        0          0          NA
## median                   NA      NA         NA        0          0          NA
## mean                     NA      NA         NA        0          0          NA
## SE.mean                  NA      NA         NA        0          0          NA
## CI.mean.0.95             NA      NA         NA        0          0          NA
## var                      NA      NA         NA        0          0          NA
## std.dev                  NA      NA         NA        0          0          NA
## coef.var                 NA      NA         NA      NaN        NaN          NA
##              source_link        prop        ypos
## nbr.val               NA  12.0000000  12.0000000
## nbr.null              NA   0.0000000   0.0000000
## nbr.na                NA   0.0000000   0.0000000
## min                   NA   1.1388900   7.4432963
## max                   NA  14.9294354  92.8119073
## range                 NA  13.7905454  85.3686110
## sum                   NA 100.0000000 631.0918928
## median                NA   8.2207450  53.4650449
## mean                  NA   8.3333333  52.5909911
## SE.mean               NA   1.3529198   7.7590755
## CI.mean.0.95          NA   2.9777564  17.0776100
## var                   NA  21.9647038 722.4390327
## std.dev               NA   4.6866517  26.8782260
## coef.var              NA   0.5623982   0.5110804
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Saint Andrew (Jamaica)

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_JA <- subset(df, country_name == "Jamaica")
knitr::kable(head(df_JA)) 
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
174 8/11/07 NA NA Jamaica JM Portland 14400 Port Antonio 7.79027 NA 18.1258 -76.5082 (18.125800000000002, -76.508200000000002) Landslide Landslide Medium Rain NA NA NA Jamaica Gleaner http://www.jamaica-gleaner.com/gleaner/20070812/lead/lead2.html
304 10/12/07 NA NA Jamaica JM St. Elizabeth 1371 Maggotty 4.57363 NA 18.1257 -77.7405 (18.125699999999998, -77.740499999999997) Landslide Complex Medium Rain NA NA NA RadioJamaica http://www.radiojamaica.com/content/view/2039/26/
314 10/17/07 NA NA Jamaica JM Saint Thomas 2382 Bath 0.21825 NA 17.9500 -76.3500 (17.95, -76.349999999999994) Landslide Mudslide Medium Rain NA NA NA RadioJamaica http://www.radiojamaica.com/content/view/2193/26/
339 10/31/07 NA NA Jamaica JM Saint Thomas 2634 Easington 6.51940 NA 17.9384 -76.6479 (17.938400000000001, -76.647900000000007) Landslide Landslide Medium Tropical cyclone Tropical Storm Noel NA 1 RadioJamaica http://www.radiojamaica.com/content/view/2583/26/
340 10/31/07 NA NA Jamaica JM Saint Catherine 4085 Riversdale 5.46381 NA 18.2152 -76.9659 (18.215199999999999, -76.965900000000005) Landslide Complex Medium Rain NA NA NA RadioJamaica http://www.radiojamaica.com/content/view/2933/26/
341 10/31/07 NA NA Jamaica JM Saint Andrew 1088 Gordon Town 0.96514 NA 18.0370 -76.7088 (18.036999999999999, -76.708799999999997) Landslide Landslide Medium Tropical cyclone Tropical Storm Noel NA NA Jamaica Observer http://www.jamaicaobserver.com/news/html/20071101T010000-0500_128916_OBS_DEADLY_RAIN.asp
library(dplyr)
df_JA <- subset(df, state == "Saint Andrew")
knitr::kable(head(df_JA))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
341 10/31/07 NA NA Jamaica JM Saint Andrew 1088 Gordon Town 0.96514 NA 18.0370 -76.7088 (18.036999999999999, -76.708799999999997) Landslide Landslide Medium Tropical cyclone Tropical Storm Noel NA NA Jamaica Observer http://www.jamaicaobserver.com/news/html/20071101T010000-0500_128916_OBS_DEADLY_RAIN.asp
756 8/28/08 NA NA Jamaica JM Saint Andrew 1821 Mavis Bank 9.83051 NA 18.0388 -76.5740 (18.038799999999998, -76.573999999999998) Landslide Landslide Medium Tropical cyclone Hurricane Gustav NA 7 NA http://www.reliefweb.int/rw/rwb.nsf/db900SID/EDIS-7HYLCK?OpenDocument
2233 8/11/10 NA NA Jamaica JM Saint Andrew 8551 Stony Hill 5.98731 NA 18.1260 -76.8110 (18.126000000000001, -76.811000000000007) Landslide Complex Medium Downpour NA NA 0 NA NA
2518 9/29/10 NA NA Jamaica JM Saint Andrew 8551 Stony Hill 10.84467 NA 18.1686 -76.8226 (18.168600000000001, -76.822599999999994) Landslide Landslide Medium Tropical cyclone Tropical Storm Nicole NA 0 NA http://www.jamaica-gleaner.com/gleaner/20100930/lead/lead7.html
2523 9/30/10 Early morning NA Jamaica JM Saint Andrew 8551 Stony Hill 2.13878 NA 18.0622 -76.7763 (18.062200000000001, -76.776300000000006) Landslide Mudslide Medium Tropical cyclone Tropical Storm Nicole NA 2 NA http://www.todayonline.com/BreakingNews/EDC101001-0000003/Officials--Death-toll-from-Tropical-Storm-Nicole-at-5,-expected-to-rise
2547 10/5/10 NA NA Dominica DM Saint Andrew 1020 Calibishie 2.64873 NA 15.5918 -61.3731 (15.591799999999999, -61.373100000000001) Landslide Landslide Medium Rain NA NA 0 NA http://dominicanewsonline.com/dno/news-photo-landslide-in-blenhim/

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_JA, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_JA, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_JA, aes(x=state, y=distance, fill=city)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_JA <- df_JA %>% 
  arrange(desc(city)) %>%
  mutate(prop = distance / sum(df_JA$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_JA, aes(x=state, y = prop, fill=city)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
data<- ts(df_JA$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
5.98731
10.84467
2.13878
9.83051
0.96514
2.64873
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_JA$distance
names(distance) <- df_JA$city 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por ciudades"
)

##              
## Pareto chart analysis for distance
##                Frequency  Cum.Freq. Percentage Cum.Percent.
##   Stony Hill   10.844670  10.844670  33.455570    33.455570
##   Mavis Bank    9.830510  20.675180  30.326909    63.782479
##   Stony Hill    5.987310  26.662490  18.470721    82.253200
##   Calibishie    2.648730  29.311220   8.171274    90.424474
##   Stony Hill    2.138780  31.450000   6.598090    97.022564
##   Gordon Town   0.965140  32.415140   2.977436   100.000000
stem(df_JA$"distance")
## 
##   The decimal point is at the |
## 
##    0 | 0
##    2 | 16
##    4 | 
##    6 | 0
##    8 | 8
##   10 | 8
head(df_JA)
## # A tibble: 6 x 25
##      id date     time  continent_code country_name country_code state population
##   <dbl> <chr>    <chr> <chr>          <chr>        <chr>        <chr>      <dbl>
## 1  2233 8/11/10  <NA>  <NA>           Jamaica      JM           Sain~       8551
## 2  2518 9/29/10  <NA>  <NA>           Jamaica      JM           Sain~       8551
## 3  2523 9/30/10  Earl~ <NA>           Jamaica      JM           Sain~       8551
## 4   756 8/28/08  <NA>  <NA>           Jamaica      JM           Sain~       1821
## 5   341 10/31/07 <NA>  <NA>           Jamaica      JM           Sain~       1088
## 6  2547 10/5/10  <NA>  <NA>           Dominica     DM           Sain~       1020
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_JA))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
2233 8/11/10 NA NA Jamaica JM Saint Andrew 8551 Stony Hill 5.98731 NA 18.1260 -76.8110 (18.126000000000001, -76.811000000000007) Landslide Complex Medium Downpour NA NA 0 NA NA 18.470721 9.23536
2518 9/29/10 NA NA Jamaica JM Saint Andrew 8551 Stony Hill 10.84467 NA 18.1686 -76.8226 (18.168600000000001, -76.822599999999994) Landslide Landslide Medium Tropical cyclone Tropical Storm Nicole NA 0 NA http://www.jamaica-gleaner.com/gleaner/20100930/lead/lead7.html 33.455571 35.19851
2523 9/30/10 Early morning NA Jamaica JM Saint Andrew 8551 Stony Hill 2.13878 NA 18.0622 -76.7763 (18.062200000000001, -76.776300000000006) Landslide Mudslide Medium Tropical cyclone Tropical Storm Nicole NA 2 NA http://www.todayonline.com/BreakingNews/EDC101001-0000003/Officials--Death-toll-from-Tropical-Storm-Nicole-at-5,-expected-to-rise 6.598090 55.22534
756 8/28/08 NA NA Jamaica JM Saint Andrew 1821 Mavis Bank 9.83051 NA 18.0388 -76.5740 (18.038799999999998, -76.573999999999998) Landslide Landslide Medium Tropical cyclone Hurricane Gustav NA 7 NA http://www.reliefweb.int/rw/rwb.nsf/db900SID/EDIS-7HYLCK?OpenDocument 30.326909 73.68784
341 10/31/07 NA NA Jamaica JM Saint Andrew 1088 Gordon Town 0.96514 NA 18.0370 -76.7088 (18.036999999999999, -76.708799999999997) Landslide Landslide Medium Tropical cyclone Tropical Storm Noel NA NA Jamaica Observer http://www.jamaicaobserver.com/news/html/20071101T010000-0500_128916_OBS_DEADLY_RAIN.asp 2.977436 90.34001
2547 10/5/10 NA NA Dominica DM Saint Andrew 1020 Calibishie 2.64873 NA 15.5918 -61.3731 (15.591799999999999, -61.373100000000001) Landslide Landslide Medium Rain NA NA 0 NA http://dominicanewsonline.com/dno/news-photo-landslide-in-blenhim/ 8.171274 95.91436
stem(df_JA$"distance")
## 
##   The decimal point is at the |
## 
##    0 | 0
##    2 | 16
##    4 | 
##    6 | 0
##    8 | 8
##   10 | 8
stem(df_JA$"distance", scale = 2)
## 
##   The decimal point is at the |
## 
##    0 | 
##    1 | 0
##    2 | 16
##    3 | 
##    4 | 
##    5 | 
##    6 | 0
##    7 | 
##    8 | 
##    9 | 8
##   10 | 8

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
0.96514 1 16.7 16.7 16.7 16.7
2.13878 1 16.7 16.7 33.3 33.3
2.64873 1 16.7 16.7 50.0 50.0
5.98731 1 16.7 16.7 66.7 66.7
9.83051 1 16.7 16.7 83.3 83.3
10.84467 1 16.7 16.7 100.0 100.0
Total 6 100.0 100.0 100.0 100.0
str(table)
## Classes 'freqtab' and 'data.frame':  7 obs. of  5 variables:
##  $ n      : num  1 1 1 1 1 1 6
##  $ %      : num  16.7 16.7 16.7 16.7 16.7 16.7 100
##  $ val%   : num  16.7 16.7 16.7 16.7 16.7 16.7 100
##  $ %cum   : num  16.7 33.3 50 66.7 83.3 100 100
##  $ val%cum: num  16.7 33.3 50 66.7 83.3 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
0.96514 1
2.13878 1
2.64873 1
5.98731 1
9.83051 1
10.84467 1
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1]  0.96514  4.96514  8.96514 12.96514
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(0.965,4.97] 2 0.4 2
(4.97,8.97] 1 0.2 3
(8.97,13] 2 0.4 5
str(Freq_table)
## 'data.frame':    3 obs. of  4 variables:
##  $ distance: Factor w/ 3 levels "(0.965,4.97]",..: 1 2 3
##  $ Freq    : int  2 1 2
##  $ Rel_Freq: num  0.4 0.2 0.4
##  $ Cum_Freq: int  2 3 5
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(0.965,4.97] 2
(4.97,8.97] 1
(8.97,13] 2
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_JA)
## Warning in min(x): ningún argumento finito para min; retornando Inf
## Warning in max(x): ningun argumento finito para max; retornando -Inf
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
##                        id date time continent_code country_name country_code
## nbr.val      6.000000e+00   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          3.410000e+02   NA   NA             NA           NA           NA
## max          2.547000e+03   NA   NA             NA           NA           NA
## range        2.206000e+03   NA   NA             NA           NA           NA
## sum          1.091800e+04   NA   NA             NA           NA           NA
## median       2.375500e+03   NA   NA             NA           NA           NA
## mean         1.819667e+03   NA   NA             NA           NA           NA
## SE.mean      4.082502e+02   NA   NA             NA           NA           NA
## CI.mean.0.95 1.049441e+03   NA   NA             NA           NA           NA
## var          1.000009e+06   NA   NA             NA           NA           NA
## std.dev      1.000005e+03   NA   NA             NA           NA           NA
## coef.var     5.495538e-01   NA   NA             NA           NA           NA
##              state   population city  distance location_description
## nbr.val         NA 6.000000e+00   NA  6.000000                   NA
## nbr.null        NA 0.000000e+00   NA  0.000000                   NA
## nbr.na          NA 0.000000e+00   NA  0.000000                   NA
## min             NA 1.020000e+03   NA  0.965140                   NA
## max             NA 8.551000e+03   NA 10.844670                   NA
## range           NA 7.531000e+03   NA  9.879530                   NA
## sum             NA 2.958200e+04   NA 32.415140                   NA
## median          NA 5.186000e+03   NA  4.318020                   NA
## mean            NA 4.930333e+03   NA  5.402523                   NA
## SE.mean         NA 1.623267e+03   NA  1.707745                   NA
## CI.mean.0.95    NA 4.172741e+03   NA  4.389898                   NA
## var             NA 1.580997e+07   NA 17.498354                   NA
## std.dev         NA 3.976176e+03   NA  4.183103                   NA
## coef.var        NA 8.064720e-01   NA  0.774287                   NA
##                  latitude     longitude geolocation hazard_type landslide_type
## nbr.val        6.00000000    6.00000000          NA          NA             NA
## nbr.null       0.00000000    0.00000000          NA          NA             NA
## nbr.na         0.00000000    0.00000000          NA          NA             NA
## min           15.59180000  -76.82260000          NA          NA             NA
## max           18.16860000  -61.37310000          NA          NA             NA
## range          2.57680000   15.44950000          NA          NA             NA
## sum          106.02440000 -445.06580000          NA          NA             NA
## median        18.05050000  -76.74255000          NA          NA             NA
## mean          17.67073333  -74.17763333          NA          NA             NA
## SE.mean        0.41633213    2.56117804          NA          NA             NA
## CI.mean.0.95   1.07021581    6.58371775          NA          NA             NA
## var            1.03999465   39.35779778          NA          NA             NA
## std.dev        1.01980128    6.27357934          NA          NA             NA
## coef.var       0.05771132   -0.08457508          NA          NA             NA
##              landslide_size trigger storm_name injuries fatalities source_name
## nbr.val                  NA      NA         NA        0   5.000000          NA
## nbr.null                 NA      NA         NA        0   3.000000          NA
## nbr.na                   NA      NA         NA        6   1.000000          NA
## min                      NA      NA         NA      Inf   0.000000          NA
## max                      NA      NA         NA     -Inf   7.000000          NA
## range                    NA      NA         NA     -Inf   7.000000          NA
## sum                      NA      NA         NA        0   9.000000          NA
## median                   NA      NA         NA       NA   0.000000          NA
## mean                     NA      NA         NA      NaN   1.800000          NA
## SE.mean                  NA      NA         NA       NA   1.356466          NA
## CI.mean.0.95             NA      NA         NA      NaN   3.766153          NA
## var                      NA      NA         NA       NA   9.200000          NA
## std.dev                  NA      NA         NA       NA   3.033150          NA
## coef.var                 NA      NA         NA       NA   1.685083          NA
##              source_link       prop         ypos
## nbr.val               NA   6.000000    6.0000000
## nbr.null              NA   0.000000    0.0000000
## nbr.na                NA   0.000000    0.0000000
## min                   NA   2.977436    9.2353604
## max                   NA  33.455570   95.9143629
## range                 NA  30.478135   86.6790025
## sum                   NA 100.000000  359.6014085
## median                NA  13.320998   64.4565857
## mean                  NA  16.666667   59.9335681
## SE.mean               NA   5.268356   13.6780214
## CI.mean.0.95          NA  13.542739   35.1604732
## var                   NA 166.533419 1122.5296089
## std.dev               NA  12.904783   33.5041730
## coef.var              NA   0.774287    0.5590218
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Saint Ann (Jamaica)

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_JA <- subset(df, country_name == "Jamaica")
knitr::kable(head(df_JA)) 
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
174 8/11/07 NA NA Jamaica JM Portland 14400 Port Antonio 7.79027 NA 18.1258 -76.5082 (18.125800000000002, -76.508200000000002) Landslide Landslide Medium Rain NA NA NA Jamaica Gleaner http://www.jamaica-gleaner.com/gleaner/20070812/lead/lead2.html
304 10/12/07 NA NA Jamaica JM St. Elizabeth 1371 Maggotty 4.57363 NA 18.1257 -77.7405 (18.125699999999998, -77.740499999999997) Landslide Complex Medium Rain NA NA NA RadioJamaica http://www.radiojamaica.com/content/view/2039/26/
314 10/17/07 NA NA Jamaica JM Saint Thomas 2382 Bath 0.21825 NA 17.9500 -76.3500 (17.95, -76.349999999999994) Landslide Mudslide Medium Rain NA NA NA RadioJamaica http://www.radiojamaica.com/content/view/2193/26/
339 10/31/07 NA NA Jamaica JM Saint Thomas 2634 Easington 6.51940 NA 17.9384 -76.6479 (17.938400000000001, -76.647900000000007) Landslide Landslide Medium Tropical cyclone Tropical Storm Noel NA 1 RadioJamaica http://www.radiojamaica.com/content/view/2583/26/
340 10/31/07 NA NA Jamaica JM Saint Catherine 4085 Riversdale 5.46381 NA 18.2152 -76.9659 (18.215199999999999, -76.965900000000005) Landslide Complex Medium Rain NA NA NA RadioJamaica http://www.radiojamaica.com/content/view/2933/26/
341 10/31/07 NA NA Jamaica JM Saint Andrew 1088 Gordon Town 0.96514 NA 18.0370 -76.7088 (18.036999999999999, -76.708799999999997) Landslide Landslide Medium Tropical cyclone Tropical Storm Noel NA NA Jamaica Observer http://www.jamaicaobserver.com/news/html/20071101T010000-0500_128916_OBS_DEADLY_RAIN.asp
library(dplyr)
df_JA <- subset(df, state == "Saint Ann")
knitr::kable(head(df_JA))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
354 11/12/07 NA NA Jamaica JM Saint Ann 13671 Saint Ann<U+043D><U+045E><U+0434>‰<U+0435><U+0434>‹<U+045E>s Bay 3.96533 NA 18.4000 -77.2000 (18.399999999999999, -77.2) Landslide Landslide Medium Rain NA NA NA Jamaica Gleaner http://www.jamaica-gleaner.com/gleaner/20071113/news/news1.html
501 4/16/08 NA NA Jamaica JM Saint Ann 9450 Ocho Rios 0.53886 NA 18.4054 -77.1007 (18.4054, -77.100700000000003) Landslide Mudslide Medium Rain NA NA NA NA http://www.nwa.gov.jm/content/newsread.aspx?newsId=268
7473 12/1/15 NA NA Jamaica JM Saint Ann 13671 Saint Ann<U+043D><U+045E><U+0434>‰<U+0435><U+0434>‹<U+045E>s Bay 1.08196 Above road 18.4313 -77.1919 (18.4313, -77.191900000000004) Landslide Landslide Medium Rain NA 0 0 First Look http://go-jamaica.com/pressrelease/item.php?id=5755

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_JA, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_JA, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_JA, aes(x=state, y=distance, fill=city)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_JA <- df_JA %>% 
  arrange(desc(city)) %>%
  mutate(prop = distance / sum(df_JA$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_JA, aes(x=state, y = prop, fill=city)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
data<- ts(df_JA$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
3.96533
1.08196
0.53886
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_JA$distance
names(distance) <- df_JA$city 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por ciudades"
)

##                         
## Pareto chart analysis for distance
##                           Frequency  Cum.Freq. Percentage Cum.Percent.
##   Saint Ann<U+043D><U+045E><U+0434>‰<U+0435><U+0434>‹<U+045E>s Bay   3.965330   3.965330  70.985025    70.985025
##   Saint Ann<U+043D><U+045E><U+0434>‰<U+0435><U+0434>‹<U+045E>s Bay   1.081960   5.047290  19.368617    90.353642
##   Ocho Rios                0.538860   5.586150   9.646358   100.000000
stem(df_JA$"distance")
## 
##   The decimal point is at the |
## 
##   0 | 5
##   1 | 1
##   2 | 
##   3 | 
##   4 | 0
head(df_JA)
## # A tibble: 3 x 25
##      id date     time  continent_code country_name country_code state     population
##   <dbl> <chr>    <chr> <chr>          <chr>        <chr>        <chr>          <dbl>
## 1   354 11/12/07 <NA>  <NA>           Jamaica      JM           Saint Ann      13671
## 2  7473 12/1/15  <NA>  <NA>           Jamaica      JM           Saint Ann      13671
## 3   501 4/16/08  <NA>  <NA>           Jamaica      JM           Saint Ann       9450
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_JA))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
354 11/12/07 NA NA Jamaica JM Saint Ann 13671 Saint Ann<U+043D><U+045E><U+0434>‰<U+0435><U+0434>‹<U+045E>s Bay 3.96533 NA 18.4000 -77.2000 (18.399999999999999, -77.2) Landslide Landslide Medium Rain NA NA NA Jamaica Gleaner http://www.jamaica-gleaner.com/gleaner/20071113/news/news1.html 70.985026 35.49251
7473 12/1/15 NA NA Jamaica JM Saint Ann 13671 Saint Ann<U+043D><U+045E><U+0434>‰<U+0435><U+0434>‹<U+045E>s Bay 1.08196 Above road 18.4313 -77.1919 (18.4313, -77.191900000000004) Landslide Landslide Medium Rain NA 0 0 First Look http://go-jamaica.com/pressrelease/item.php?id=5755 19.368617 80.66933
501 4/16/08 NA NA Jamaica JM Saint Ann 9450 Ocho Rios 0.53886 NA 18.4054 -77.1007 (18.4054, -77.100700000000003) Landslide Mudslide Medium Rain NA NA NA NA http://www.nwa.gov.jm/content/newsread.aspx?newsId=268 9.646358 95.17682
stem(df_JA$"distance")
## 
##   The decimal point is at the |
## 
##   0 | 5
##   1 | 1
##   2 | 
##   3 | 
##   4 | 0
stem(df_JA$"distance", scale = 2)
## 
##   The decimal point is at the |
## 
##   0 | 5
##   1 | 1
##   1 | 
##   2 | 
##   2 | 
##   3 | 
##   3 | 
##   4 | 0

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
0.53886 1 33.3 33.3 33.3 33.3
1.08196 1 33.3 33.3 66.7 66.7
3.96533 1 33.3 33.3 100.0 100.0
Total 3 100.0 100.0 100.0 100.0
str(table)
## Classes 'freqtab' and 'data.frame':  4 obs. of  5 variables:
##  $ n      : num  1 1 1 3
##  $ %      : num  33.3 33.3 33.3 100
##  $ val%   : num  33.3 33.3 33.3 100
##  $ %cum   : num  33.3 66.7 100 100
##  $ val%cum: num  33.3 66.7 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
0.53886 1
1.08196 1
3.96533 1
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.53886 2.53886 4.53886
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(0.539,2.54] 1 0.5 1
(2.54,4.54] 1 0.5 2
str(Freq_table)
## 'data.frame':    2 obs. of  4 variables:
##  $ distance: Factor w/ 2 levels "(0.539,2.54]",..: 1 2
##  $ Freq    : int  1 1
##  $ Rel_Freq: num  0.5 0.5
##  $ Cum_Freq: int  1 2
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(0.539,2.54] 1
(2.54,4.54] 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_JA)
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced

## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
##                        id date time continent_code country_name country_code
## nbr.val      3.000000e+00   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          3.540000e+02   NA   NA             NA           NA           NA
## max          7.473000e+03   NA   NA             NA           NA           NA
## range        7.119000e+03   NA   NA             NA           NA           NA
## sum          8.328000e+03   NA   NA             NA           NA           NA
## median       5.010000e+02   NA   NA             NA           NA           NA
## mean         2.776000e+03   NA   NA             NA           NA           NA
## SE.mean      2.348883e+03   NA   NA             NA           NA           NA
## CI.mean.0.95 1.010643e+04   NA   NA             NA           NA           NA
## var          1.655176e+07   NA   NA             NA           NA           NA
## std.dev      4.068385e+03   NA   NA             NA           NA           NA
## coef.var     1.465557e+00   NA   NA             NA           NA           NA
##              state   population city  distance location_description
## nbr.val         NA 3.000000e+00   NA 3.0000000                   NA
## nbr.null        NA 0.000000e+00   NA 0.0000000                   NA
## nbr.na          NA 0.000000e+00   NA 0.0000000                   NA
## min             NA 9.450000e+03   NA 0.5388600                   NA
## max             NA 1.367100e+04   NA 3.9653300                   NA
## range           NA 4.221000e+03   NA 3.4264700                   NA
## sum             NA 3.679200e+04   NA 5.5861500                   NA
## median          NA 1.367100e+04   NA 1.0819600                   NA
## mean            NA 1.226400e+04   NA 1.8620500                   NA
## SE.mean         NA 1.407000e+03   NA 1.0632622                   NA
## CI.mean.0.95    NA 6.053832e+03   NA 4.5748480                   NA
## var             NA 5.938947e+06   NA 3.3915795                   NA
## std.dev         NA 2.436995e+03   NA 1.8416241                   NA
## coef.var        NA 1.987113e-01   NA 0.9890304                   NA
##                  latitude     longitude geolocation hazard_type landslide_type
## nbr.val      3.000000e+00  3.000000e+00          NA          NA             NA
## nbr.null     0.000000e+00  0.000000e+00          NA          NA             NA
## nbr.na       0.000000e+00  0.000000e+00          NA          NA             NA
## min          1.840000e+01 -7.720000e+01          NA          NA             NA
## max          1.843130e+01 -7.710070e+01          NA          NA             NA
## range        3.130000e-02  9.930000e-02          NA          NA             NA
## sum          5.523670e+01 -2.314926e+02          NA          NA             NA
## median       1.840540e+01 -7.719190e+01          NA          NA             NA
## mean         1.841223e+01 -7.716420e+01          NA          NA             NA
## SE.mean      9.659940e-03  3.183599e-02          NA          NA             NA
## CI.mean.0.95 4.156337e-02  1.369792e-01          NA          NA             NA
## var          2.799433e-04  3.040590e-03          NA          NA             NA
## std.dev      1.673151e-02  5.514155e-02          NA          NA             NA
## coef.var     9.087169e-04 -7.146001e-04          NA          NA             NA
##              landslide_size trigger storm_name injuries fatalities source_name
## nbr.val                  NA      NA         NA        1          1          NA
## nbr.null                 NA      NA         NA        1          1          NA
## nbr.na                   NA      NA         NA        2          2          NA
## min                      NA      NA         NA        0          0          NA
## max                      NA      NA         NA        0          0          NA
## range                    NA      NA         NA        0          0          NA
## sum                      NA      NA         NA        0          0          NA
## median                   NA      NA         NA        0          0          NA
## mean                     NA      NA         NA        0          0          NA
## SE.mean                  NA      NA         NA       NA         NA          NA
## CI.mean.0.95             NA      NA         NA      NaN        NaN          NA
## var                      NA      NA         NA       NA         NA          NA
## std.dev                  NA      NA         NA       NA         NA          NA
## coef.var                 NA      NA         NA       NA         NA          NA
##              source_link         prop        ypos
## nbr.val               NA    3.0000000   3.0000000
## nbr.null              NA    0.0000000   0.0000000
## nbr.na                NA    0.0000000   0.0000000
## min                   NA    9.6463575  35.4925127
## max                   NA   70.9850255  95.1768212
## range                 NA   61.3386680  59.6843085
## sum                   NA  100.0000000 211.3386680
## median                NA   19.3686170  80.6693340
## mean                  NA   33.3333333  70.4462227
## SE.mean               NA   19.0338998  17.9716274
## CI.mean.0.95          NA   81.8962609  77.3256717
## var                   NA 1086.8680228 968.9381746
## std.dev               NA   32.9676815  31.1277718
## coef.var              NA    0.9890304   0.4418657
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Saint Catherine (Jamaica)

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_JA <- subset(df, country_name == "Jamaica")
knitr::kable(head(df_JA)) 
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
174 8/11/07 NA NA Jamaica JM Portland 14400 Port Antonio 7.79027 NA 18.1258 -76.5082 (18.125800000000002, -76.508200000000002) Landslide Landslide Medium Rain NA NA NA Jamaica Gleaner http://www.jamaica-gleaner.com/gleaner/20070812/lead/lead2.html
304 10/12/07 NA NA Jamaica JM St. Elizabeth 1371 Maggotty 4.57363 NA 18.1257 -77.7405 (18.125699999999998, -77.740499999999997) Landslide Complex Medium Rain NA NA NA RadioJamaica http://www.radiojamaica.com/content/view/2039/26/
314 10/17/07 NA NA Jamaica JM Saint Thomas 2382 Bath 0.21825 NA 17.9500 -76.3500 (17.95, -76.349999999999994) Landslide Mudslide Medium Rain NA NA NA RadioJamaica http://www.radiojamaica.com/content/view/2193/26/
339 10/31/07 NA NA Jamaica JM Saint Thomas 2634 Easington 6.51940 NA 17.9384 -76.6479 (17.938400000000001, -76.647900000000007) Landslide Landslide Medium Tropical cyclone Tropical Storm Noel NA 1 RadioJamaica http://www.radiojamaica.com/content/view/2583/26/
340 10/31/07 NA NA Jamaica JM Saint Catherine 4085 Riversdale 5.46381 NA 18.2152 -76.9659 (18.215199999999999, -76.965900000000005) Landslide Complex Medium Rain NA NA NA RadioJamaica http://www.radiojamaica.com/content/view/2933/26/
341 10/31/07 NA NA Jamaica JM Saint Andrew 1088 Gordon Town 0.96514 NA 18.0370 -76.7088 (18.036999999999999, -76.708799999999997) Landslide Landslide Medium Tropical cyclone Tropical Storm Noel NA NA Jamaica Observer http://www.jamaicaobserver.com/news/html/20071101T010000-0500_128916_OBS_DEADLY_RAIN.asp
library(dplyr)
df_JA <- subset(df, state == "Saint Catherine")
knitr::kable(head(df_JA))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
340 10/31/07 NA NA Jamaica JM Saint Catherine 4085 Riversdale 5.46381 NA 18.2152 -76.9659 (18.215199999999999, -76.965900000000005) Landslide Complex Medium Rain NA NA NA RadioJamaica http://www.radiojamaica.com/content/view/2933/26/
2519 9/29/10 NA NA Jamaica JM Saint Catherine 12873 Bog Walk 5.86530 NA 18.0677 -77.0476 (18.067699999999999, -77.047600000000003) Landslide Landslide Medium Tropical cyclone Tropical Storm Nicole NA 0 NA http://www.reliefweb.int/rw/RWFiles2010.nsf/FilesByRWDocUnidFilename/KHII-89T7Y2-full_report.pdf/$File/full_report.pdf
7472 12/1/15 NA NA Jamaica JM Saint Catherine 12873 Bog Walk 6.71269 Above road 18.0918 -76.9429 (18.091799999999999, -76.942899999999995) Landslide Landslide Small Rain NA 0 0 Loop http://www.loopjamaica.com/content/landslide-waugh-hill-main-road-blocks-thoroughfare

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_JA, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_JA, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_JA, aes(x=state, y=distance, fill=city)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_JA <- df_JA %>% 
  arrange(desc(city)) %>%
  mutate(prop = distance / sum(df_JA$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_JA, aes(x=state, y = prop, fill=city)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
data<- ts(df_JA$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
5.46381
5.86530
6.71269
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_JA$distance
names(distance) <- df_JA$city 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por ciudades"
)

##             
## Pareto chart analysis for distance
##              Frequency Cum.Freq. Percentage Cum.Percent.
##   Bog Walk     6.71269   6.71269   37.20632     37.20632
##   Bog Walk     5.86530  12.57799   32.50951     69.71583
##   Riversdale   5.46381  18.04180   30.28417    100.00000
stem(df_JA$"distance")
## 
##   The decimal point is at the |
## 
##   5 | 
##   5 | 59
##   6 | 
##   6 | 7
head(df_JA)
## # A tibble: 3 x 25
##      id date     time  continent_code country_name country_code state population
##   <dbl> <chr>    <chr> <chr>          <chr>        <chr>        <chr>      <dbl>
## 1   340 10/31/07 <NA>  <NA>           Jamaica      JM           Sain~       4085
## 2  2519 9/29/10  <NA>  <NA>           Jamaica      JM           Sain~      12873
## 3  7472 12/1/15  <NA>  <NA>           Jamaica      JM           Sain~      12873
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_JA))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
340 10/31/07 NA NA Jamaica JM Saint Catherine 4085 Riversdale 5.46381 NA 18.2152 -76.9659 (18.215199999999999, -76.965900000000005) Landslide Complex Medium Rain NA NA NA RadioJamaica http://www.radiojamaica.com/content/view/2933/26/ 30.28417 15.14209
2519 9/29/10 NA NA Jamaica JM Saint Catherine 12873 Bog Walk 5.86530 NA 18.0677 -77.0476 (18.067699999999999, -77.047600000000003) Landslide Landslide Medium Tropical cyclone Tropical Storm Nicole NA 0 NA http://www.reliefweb.int/rw/RWFiles2010.nsf/FilesByRWDocUnidFilename/KHII-89T7Y2-full_report.pdf/$File/full_report.pdf 32.50951 46.53893
7472 12/1/15 NA NA Jamaica JM Saint Catherine 12873 Bog Walk 6.71269 Above road 18.0918 -76.9429 (18.091799999999999, -76.942899999999995) Landslide Landslide Small Rain NA 0 0 Loop http://www.loopjamaica.com/content/landslide-waugh-hill-main-road-blocks-thoroughfare 37.20632 81.39684
stem(df_JA$"distance")
## 
##   The decimal point is at the |
## 
##   5 | 
##   5 | 59
##   6 | 
##   6 | 7
stem(df_JA$"distance", scale = 2)
## 
##   The decimal point is 1 digit(s) to the left of the |
## 
##   54 | 6
##   56 | 
##   58 | 7
##   60 | 
##   62 | 
##   64 | 
##   66 | 1

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
5.46381 1 33.3 33.3 33.3 33.3
5.8653 1 33.3 33.3 66.7 66.7
6.71269 1 33.3 33.3 100.0 100.0
Total 3 100.0 100.0 100.0 100.0
str(table)
## Classes 'freqtab' and 'data.frame':  4 obs. of  5 variables:
##  $ n      : num  1 1 1 3
##  $ %      : num  33.3 33.3 33.3 100
##  $ val%   : num  33.3 33.3 33.3 100
##  $ %cum   : num  33.3 66.7 100 100
##  $ val%cum: num  33.3 66.7 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
5.46381 1
5.8653 1
6.71269 1
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 5.46381 6.46381 7.46381
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(5.46,6.46] 1 0.5 1
(6.46,7.46] 1 0.5 2
str(Freq_table)
## 'data.frame':    2 obs. of  4 variables:
##  $ distance: Factor w/ 2 levels "(5.46,6.46]",..: 1 2
##  $ Freq    : int  1 1
##  $ Rel_Freq: num  0.5 0.5
##  $ Cum_Freq: int  1 2
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(5.46,6.46] 1
(6.46,7.46] 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_JA)
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
##                        id date time continent_code country_name country_code
## nbr.val      3.000000e+00   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          3.400000e+02   NA   NA             NA           NA           NA
## max          7.472000e+03   NA   NA             NA           NA           NA
## range        7.132000e+03   NA   NA             NA           NA           NA
## sum          1.033100e+04   NA   NA             NA           NA           NA
## median       2.519000e+03   NA   NA             NA           NA           NA
## mean         3.443667e+03   NA   NA             NA           NA           NA
## SE.mean      2.110104e+03   NA   NA             NA           NA           NA
## CI.mean.0.95 9.079043e+03   NA   NA             NA           NA           NA
## var          1.335761e+07   NA   NA             NA           NA           NA
## std.dev      3.654807e+03   NA   NA             NA           NA           NA
## coef.var     1.061313e+00   NA   NA             NA           NA           NA
##              state   population city   distance location_description
## nbr.val         NA 3.000000e+00   NA  3.0000000                   NA
## nbr.null        NA 0.000000e+00   NA  0.0000000                   NA
## nbr.na          NA 0.000000e+00   NA  0.0000000                   NA
## min             NA 4.085000e+03   NA  5.4638100                   NA
## max             NA 1.287300e+04   NA  6.7126900                   NA
## range           NA 8.788000e+03   NA  1.2488800                   NA
## sum             NA 2.983100e+04   NA 18.0418000                   NA
## median          NA 1.287300e+04   NA  5.8653000                   NA
## mean            NA 9.943667e+03   NA  6.0139333                   NA
## SE.mean         NA 2.929333e+03   NA  0.3681006                   NA
## CI.mean.0.95    NA 1.260390e+04   NA  1.5838092                   NA
## var             NA 2.574298e+07   NA  0.4064942                   NA
## std.dev         NA 5.073754e+03   NA  0.6375690                   NA
## coef.var        NA 5.102498e-01   NA  0.1060153                   NA
##                  latitude     longitude geolocation hazard_type landslide_type
## nbr.val       3.000000000  3.000000e+00          NA          NA             NA
## nbr.null      0.000000000  0.000000e+00          NA          NA             NA
## nbr.na        0.000000000  0.000000e+00          NA          NA             NA
## min          18.067700000 -7.704760e+01          NA          NA             NA
## max          18.215200000 -7.694290e+01          NA          NA             NA
## range         0.147500000  1.047000e-01          NA          NA             NA
## sum          54.374700000 -2.309564e+02          NA          NA             NA
## median       18.091800000 -7.696590e+01          NA          NA             NA
## mean         18.124900000 -7.698547e+01          NA          NA             NA
## SE.mean       0.045682856  3.176824e-02          NA          NA             NA
## CI.mean.0.95  0.196557465  1.366877e-01          NA          NA             NA
## var           0.006260770  3.027663e-03          NA          NA             NA
## std.dev       0.079125028  5.502421e-02          NA          NA             NA
## coef.var      0.004365543 -7.147350e-04          NA          NA             NA
##              landslide_size trigger storm_name injuries fatalities source_name
## nbr.val                  NA      NA         NA        1          2          NA
## nbr.null                 NA      NA         NA        1          2          NA
## nbr.na                   NA      NA         NA        2          1          NA
## min                      NA      NA         NA        0          0          NA
## max                      NA      NA         NA        0          0          NA
## range                    NA      NA         NA        0          0          NA
## sum                      NA      NA         NA        0          0          NA
## median                   NA      NA         NA        0          0          NA
## mean                     NA      NA         NA        0          0          NA
## SE.mean                  NA      NA         NA       NA          0          NA
## CI.mean.0.95             NA      NA         NA      NaN          0          NA
## var                      NA      NA         NA       NA          0          NA
## std.dev                  NA      NA         NA       NA          0          NA
## coef.var                 NA      NA         NA       NA        NaN          NA
##              source_link        prop         ypos
## nbr.val               NA   3.0000000    3.0000000
## nbr.null              NA   0.0000000    0.0000000
## nbr.na                NA   0.0000000    0.0000000
## min                   NA  30.2841734   15.1420867
## max                   NA  37.2063209   81.3968396
## range                 NA   6.9221475   66.2547529
## sum                   NA 100.0000000  143.0778525
## median                NA  32.5095057   46.5389263
## mean                  NA  33.3333333   47.6926175
## SE.mean               NA   2.0402655   19.1347966
## CI.mean.0.95          NA   8.7785541   82.3303848
## var                   NA  12.4880503 1098.4213215
## std.dev               NA   3.5338436   33.1424399
## coef.var              NA   0.1060153    0.6949176
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Saint Thomas (Jamaica)

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_JA <- subset(df, country_name == "Jamaica")
knitr::kable(head(df_JA)) 
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
174 8/11/07 NA NA Jamaica JM Portland 14400 Port Antonio 7.79027 NA 18.1258 -76.5082 (18.125800000000002, -76.508200000000002) Landslide Landslide Medium Rain NA NA NA Jamaica Gleaner http://www.jamaica-gleaner.com/gleaner/20070812/lead/lead2.html
304 10/12/07 NA NA Jamaica JM St. Elizabeth 1371 Maggotty 4.57363 NA 18.1257 -77.7405 (18.125699999999998, -77.740499999999997) Landslide Complex Medium Rain NA NA NA RadioJamaica http://www.radiojamaica.com/content/view/2039/26/
314 10/17/07 NA NA Jamaica JM Saint Thomas 2382 Bath 0.21825 NA 17.9500 -76.3500 (17.95, -76.349999999999994) Landslide Mudslide Medium Rain NA NA NA RadioJamaica http://www.radiojamaica.com/content/view/2193/26/
339 10/31/07 NA NA Jamaica JM Saint Thomas 2634 Easington 6.51940 NA 17.9384 -76.6479 (17.938400000000001, -76.647900000000007) Landslide Landslide Medium Tropical cyclone Tropical Storm Noel NA 1 RadioJamaica http://www.radiojamaica.com/content/view/2583/26/
340 10/31/07 NA NA Jamaica JM Saint Catherine 4085 Riversdale 5.46381 NA 18.2152 -76.9659 (18.215199999999999, -76.965900000000005) Landslide Complex Medium Rain NA NA NA RadioJamaica http://www.radiojamaica.com/content/view/2933/26/
341 10/31/07 NA NA Jamaica JM Saint Andrew 1088 Gordon Town 0.96514 NA 18.0370 -76.7088 (18.036999999999999, -76.708799999999997) Landslide Landslide Medium Tropical cyclone Tropical Storm Noel NA NA Jamaica Observer http://www.jamaicaobserver.com/news/html/20071101T010000-0500_128916_OBS_DEADLY_RAIN.asp
library(dplyr)
df_JA <- subset(df, state == "Saint Thomas")
knitr::kable(head(df_JA))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
314 10/17/07 NA NA Jamaica JM Saint Thomas 2382 Bath 0.21825 NA 17.9500 -76.3500 (17.95, -76.349999999999994) Landslide Mudslide Medium Rain NA NA NA RadioJamaica http://www.radiojamaica.com/content/view/2193/26/
339 10/31/07 NA NA Jamaica JM Saint Thomas 2634 Easington 6.51940 NA 17.9384 -76.6479 (17.938400000000001, -76.647900000000007) Landslide Landslide Medium Tropical cyclone Tropical Storm Noel NA 1 RadioJamaica http://www.radiojamaica.com/content/view/2583/26/
774 9/4/08 NA NA Jamaica JM Saint Thomas 2382 Bath 1.71217 NA 17.9480 -76.3330 (17.948, -76.332999999999998) Landslide Complex Medium Tropical cyclone Tropical Storm Gustav NA NA NA http://www.jamaica-gleaner.com/gleaner/20080905/lead/lead4.html
1760 4/18/10 NA NA Jamaica JM Saint Thomas 2634 Easington 4.53632 NA 17.9647 -76.5835 (17.964700000000001, -76.583500000000001) Landslide Landslide Medium Downpour NA NA 0 NA http://www.jamaica-gleaner.com/gleaner/20100420/lead/lead4.html

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_JA, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_JA, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_JA, aes(x=state, y=distance, fill=city)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_JA <- df_JA %>% 
  arrange(desc(city)) %>%
  mutate(prop = distance / sum(df_JA$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_JA, aes(x=state, y = prop, fill=city)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
data<- ts(df_JA$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
6.51940
4.53632
0.21825
1.71217
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_JA$distance
names(distance) <- df_JA$city 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por ciudades"
)

##            
## Pareto chart analysis for distance
##              Frequency  Cum.Freq. Percentage Cum.Percent.
##   Easington   6.519400   6.519400  50.202755    50.202755
##   Easington   4.536320  11.055720  34.932012    85.134767
##   Bath        1.712170  12.767890  13.184595    98.319362
##   Bath        0.218250  12.986140   1.680638   100.000000
stem(df_JA$"distance")
## 
##   The decimal point is at the |
## 
##   0 | 27
##   2 | 
##   4 | 5
##   6 | 5
head(df_JA)
## # A tibble: 4 x 25
##      id date     time  continent_code country_name country_code state population
##   <dbl> <chr>    <chr> <chr>          <chr>        <chr>        <chr>      <dbl>
## 1   339 10/31/07 <NA>  <NA>           Jamaica      JM           Sain~       2634
## 2  1760 4/18/10  <NA>  <NA>           Jamaica      JM           Sain~       2634
## 3   314 10/17/07 <NA>  <NA>           Jamaica      JM           Sain~       2382
## 4   774 9/4/08   <NA>  <NA>           Jamaica      JM           Sain~       2382
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_JA))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
339 10/31/07 NA NA Jamaica JM Saint Thomas 2634 Easington 6.51940 NA 17.9384 -76.6479 (17.938400000000001, -76.647900000000007) Landslide Landslide Medium Tropical cyclone Tropical Storm Noel NA 1 RadioJamaica http://www.radiojamaica.com/content/view/2583/26/ 50.202755 25.10138
1760 4/18/10 NA NA Jamaica JM Saint Thomas 2634 Easington 4.53632 NA 17.9647 -76.5835 (17.964700000000001, -76.583500000000001) Landslide Landslide Medium Downpour NA NA 0 NA http://www.jamaica-gleaner.com/gleaner/20100420/lead/lead4.html 34.932012 67.66876
314 10/17/07 NA NA Jamaica JM Saint Thomas 2382 Bath 0.21825 NA 17.9500 -76.3500 (17.95, -76.349999999999994) Landslide Mudslide Medium Rain NA NA NA RadioJamaica http://www.radiojamaica.com/content/view/2193/26/ 1.680638 85.97509
774 9/4/08 NA NA Jamaica JM Saint Thomas 2382 Bath 1.71217 NA 17.9480 -76.3330 (17.948, -76.332999999999998) Landslide Complex Medium Tropical cyclone Tropical Storm Gustav NA NA NA http://www.jamaica-gleaner.com/gleaner/20080905/lead/lead4.html 13.184595 93.40770
stem(df_JA$"distance")
## 
##   The decimal point is at the |
## 
##   0 | 27
##   2 | 
##   4 | 5
##   6 | 5
stem(df_JA$"distance", scale = 2)
## 
##   The decimal point is at the |
## 
##   0 | 2
##   1 | 7
##   2 | 
##   3 | 
##   4 | 5
##   5 | 
##   6 | 5

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
0.21825 1 25 25 25 25
1.71217 1 25 25 50 50
4.53632 1 25 25 75 75
6.5194 1 25 25 100 100
Total 4 100 100 100 100
str(table)
## Classes 'freqtab' and 'data.frame':  5 obs. of  5 variables:
##  $ n      : num  1 1 1 1 4
##  $ %      : num  25 25 25 25 100
##  $ val%   : num  25 25 25 25 100
##  $ %cum   : num  25 50 75 100 100
##  $ val%cum: num  25 50 75 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
0.21825 1
1.71217 1
4.53632 1
6.5194 1
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.21825 3.21825 6.21825 9.21825
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(0.218,3.22] 1 0.3333333 1
(3.22,6.22] 1 0.3333333 2
(6.22,9.22] 1 0.3333333 3
str(Freq_table)
## 'data.frame':    3 obs. of  4 variables:
##  $ distance: Factor w/ 3 levels "(0.218,3.22]",..: 1 2 3
##  $ Freq    : int  1 1 1
##  $ Rel_Freq: num  0.333 0.333 0.333
##  $ Cum_Freq: int  1 2 3
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(0.218,3.22] 1
(3.22,6.22] 1
(6.22,9.22] 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_JA)
## Warning in min(x): ningún argumento finito para min; retornando Inf
## Warning in max(x): ningun argumento finito para max; retornando -Inf
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
##                        id date time continent_code country_name country_code
## nbr.val      4.000000e+00   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          3.140000e+02   NA   NA             NA           NA           NA
## max          1.760000e+03   NA   NA             NA           NA           NA
## range        1.446000e+03   NA   NA             NA           NA           NA
## sum          3.187000e+03   NA   NA             NA           NA           NA
## median       5.565000e+02   NA   NA             NA           NA           NA
## mean         7.967500e+02   NA   NA             NA           NA           NA
## SE.mean      3.380028e+02   NA   NA             NA           NA           NA
## CI.mean.0.95 1.075676e+03   NA   NA             NA           NA           NA
## var          4.569836e+05   NA   NA             NA           NA           NA
## std.dev      6.760056e+02   NA   NA             NA           NA           NA
## coef.var     8.484539e-01   NA   NA             NA           NA           NA
##              state   population city   distance location_description
## nbr.val         NA 4.000000e+00   NA  4.0000000                   NA
## nbr.null        NA 0.000000e+00   NA  0.0000000                   NA
## nbr.na          NA 0.000000e+00   NA  0.0000000                   NA
## min             NA 2.382000e+03   NA  0.2182500                   NA
## max             NA 2.634000e+03   NA  6.5194000                   NA
## range           NA 2.520000e+02   NA  6.3011500                   NA
## sum             NA 1.003200e+04   NA 12.9861400                   NA
## median          NA 2.508000e+03   NA  3.1242450                   NA
## mean            NA 2.508000e+03   NA  3.2465350                   NA
## SE.mean         NA 7.274613e+01   NA  1.4112635                   NA
## CI.mean.0.95    NA 2.315107e+02   NA  4.4912704                   NA
## var             NA 2.116800e+04   NA  7.9666589                   NA
## std.dev         NA 1.454923e+02   NA  2.8225270                   NA
## coef.var        NA 5.801127e-02   NA  0.8693968                   NA
##                  latitude     longitude geolocation hazard_type landslide_type
## nbr.val      4.000000e+00  4.000000e+00          NA          NA             NA
## nbr.null     0.000000e+00  0.000000e+00          NA          NA             NA
## nbr.na       0.000000e+00  0.000000e+00          NA          NA             NA
## min          1.793840e+01 -7.664790e+01          NA          NA             NA
## max          1.796470e+01 -7.633300e+01          NA          NA             NA
## range        2.630000e-02  3.149000e-01          NA          NA             NA
## sum          7.180110e+01 -3.059144e+02          NA          NA             NA
## median       1.794900e+01 -7.646675e+01          NA          NA             NA
## mean         1.795028e+01 -7.647860e+01          NA          NA             NA
## SE.mean      5.434055e-03  8.031387e-02          NA          NA             NA
## CI.mean.0.95 1.729359e-02  2.555946e-01          NA          NA             NA
## var          1.181158e-04  2.580127e-02          NA          NA             NA
## std.dev      1.086811e-02  1.606277e-01          NA          NA             NA
## coef.var     6.054565e-04 -2.100297e-03          NA          NA             NA
##              landslide_size trigger storm_name injuries fatalities source_name
## nbr.val                  NA      NA         NA        0  2.0000000          NA
## nbr.null                 NA      NA         NA        0  1.0000000          NA
## nbr.na                   NA      NA         NA        4  2.0000000          NA
## min                      NA      NA         NA      Inf  0.0000000          NA
## max                      NA      NA         NA     -Inf  1.0000000          NA
## range                    NA      NA         NA     -Inf  1.0000000          NA
## sum                      NA      NA         NA        0  1.0000000          NA
## median                   NA      NA         NA       NA  0.5000000          NA
## mean                     NA      NA         NA      NaN  0.5000000          NA
## SE.mean                  NA      NA         NA       NA  0.5000000          NA
## CI.mean.0.95             NA      NA         NA      NaN  6.3531024          NA
## var                      NA      NA         NA       NA  0.5000000          NA
## std.dev                  NA      NA         NA       NA  0.7071068          NA
## coef.var                 NA      NA         NA       NA  1.4142136          NA
##              source_link        prop        ypos
## nbr.val               NA   4.0000000   4.0000000
## nbr.null              NA   0.0000000   0.0000000
## nbr.na                NA   0.0000000   0.0000000
## min                   NA   1.6806380  25.1013773
## max                   NA  50.2027546  93.4077024
## range                 NA  48.5221167  68.3063251
## sum                   NA 100.0000000 272.1529261
## median                NA  24.0583037  76.8219232
## mean                  NA  25.0000000  68.0382315
## SE.mean               NA  10.8674596  15.2999203
## CI.mean.0.95          NA  34.5851067  48.6911750
## var                   NA 472.4067134 936.3502494
## std.dev               NA  21.7349192  30.5998407
## coef.var              NA   0.8693968   0.4497448
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Saint Joseph (Barbados)

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_BA <- subset(df, country_name == "Barbados")
knitr::kable(head(df_BA)) 
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
857 10/22/08 NA NA Barbados BB Saint Joseph 1765 Bathsheba 2.87363 NA 13.229 -59.54 (13.228999999999999, -59.54) Landslide Mudslide Medium Downpour NA NA NA NA http://www.nationnews.com/story/326456269849259.php
library(dplyr)
df_BA <- subset(df, state == "Saint Joseph")
knitr::kable(head(df_BA))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
857 10/22/08 NA NA Barbados BB Saint Joseph 1765 Bathsheba 2.87363 NA 13.229 -59.5400 (13.228999999999999, -59.54) Landslide Mudslide Medium Downpour NA NA NA NA http://www.nationnews.com/story/326456269849259.php
5754 1/7/14 Morning NA Dominica DM Saint Joseph 2184 Saint Joseph 2.38605 Above road 15.421 -61.4285 (15.420999999999999, -61.4285) Landslide Landslide Medium unknown NA 0 0 DaVibes The Caribbean News Portal http://dominicavibes.dm/colihaut-men-escape-landslide/

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_BA, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_BA, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_BA, aes(x=state, y=distance, fill=city)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_BA <- df_BA %>% 
  arrange(desc(city)) %>%
  mutate(prop = distance / sum(df_BA$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_BA, aes(x=state, y = prop, fill=city)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
data<- ts(df_BA$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
2.38605
2.87363
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_BA$distance
names(distance) <- df_BA$city 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por ciudades"
)

##               
## Pareto chart analysis for distance
##                Frequency Cum.Freq. Percentage Cum.Percent.
##   Bathsheba      2.87363   2.87363   54.63507     54.63507
##   Saint Joseph   2.38605   5.25968   45.36493    100.00000
stem(df_BA$"distance")
## 
##   The decimal point is 1 digit(s) to the left of the |
## 
##   23 | 9
##   24 | 
##   25 | 
##   26 | 
##   27 | 
##   28 | 7
head(df_BA)
## # A tibble: 2 x 25
##      id date     time    continent_code country_name country_code state population
##   <dbl> <chr>    <chr>   <chr>          <chr>        <chr>        <chr>      <dbl>
## 1  5754 1/7/14   Morning <NA>           Dominica     DM           Sain~       2184
## 2   857 10/22/08 <NA>    <NA>           Barbados     BB           Sain~       1765
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_BA))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
5754 1/7/14 Morning NA Dominica DM Saint Joseph 2184 Saint Joseph 2.38605 Above road 15.421 -61.4285 (15.420999999999999, -61.4285) Landslide Landslide Medium unknown NA 0 0 DaVibes The Caribbean News Portal http://dominicavibes.dm/colihaut-men-escape-landslide/ 45.36493 22.68246
857 10/22/08 NA NA Barbados BB Saint Joseph 1765 Bathsheba 2.87363 NA 13.229 -59.5400 (13.228999999999999, -59.54) Landslide Mudslide Medium Downpour NA NA NA NA http://www.nationnews.com/story/326456269849259.php 54.63507 72.68246
stem(df_BA$"distance")
## 
##   The decimal point is 1 digit(s) to the left of the |
## 
##   23 | 9
##   24 | 
##   25 | 
##   26 | 
##   27 | 
##   28 | 7
stem(df_BA$"distance", scale = 2)
## 
##   The decimal point is 1 digit(s) to the left of the |
## 
##   23 | 9
##   24 | 
##   24 | 
##   25 | 
##   25 | 
##   26 | 
##   26 | 
##   27 | 
##   27 | 
##   28 | 
##   28 | 7

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
2.38605 1 50 50 50 50
2.87363 1 50 50 100 100
Total 2 100 100 100 100
str(table)
## Classes 'freqtab' and 'data.frame':  3 obs. of  5 variables:
##  $ n      : num  1 1 2
##  $ %      : num  50 50 100
##  $ val%   : num  50 50 100
##  $ %cum   : num  50 100 100
##  $ val%cum: num  50 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
2.38605 1
2.87363 1
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 2.38605 3.38605
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(2.39,3.39] 1 1 1
str(Freq_table)
## 'data.frame':    1 obs. of  4 variables:
##  $ distance: Factor w/ 1 level "(2.39,3.39]": 1
##  $ Freq    : int 1
##  $ Rel_Freq: num 1
##  $ Cum_Freq: int 1
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(2.39,3.39] 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_BA)
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced

## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
##                        id date time continent_code country_name country_code
## nbr.val      2.000000e+00   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          8.570000e+02   NA   NA             NA           NA           NA
## max          5.754000e+03   NA   NA             NA           NA           NA
## range        4.897000e+03   NA   NA             NA           NA           NA
## sum          6.611000e+03   NA   NA             NA           NA           NA
## median       3.305500e+03   NA   NA             NA           NA           NA
## mean         3.305500e+03   NA   NA             NA           NA           NA
## SE.mean      2.448500e+03   NA   NA             NA           NA           NA
## CI.mean.0.95 3.111114e+04   NA   NA             NA           NA           NA
## var          1.199030e+07   NA   NA             NA           NA           NA
## std.dev      3.462702e+03   NA   NA             NA           NA           NA
## coef.var     1.047558e+00   NA   NA             NA           NA           NA
##              state   population city  distance location_description   latitude
## nbr.val         NA     2.000000   NA 2.0000000                   NA  2.0000000
## nbr.null        NA     0.000000   NA 0.0000000                   NA  0.0000000
## nbr.na          NA     0.000000   NA 0.0000000                   NA  0.0000000
## min             NA  1765.000000   NA 2.3860500                   NA 13.2290000
## max             NA  2184.000000   NA 2.8736300                   NA 15.4210000
## range           NA   419.000000   NA 0.4875800                   NA  2.1920000
## sum             NA  3949.000000   NA 5.2596800                   NA 28.6500000
## median          NA  1974.500000   NA 2.6298400                   NA 14.3250000
## mean            NA  1974.500000   NA 2.6298400                   NA 14.3250000
## SE.mean         NA   209.500000   NA 0.2437900                   NA  1.0960000
## CI.mean.0.95    NA  2661.949892   NA 3.0976457                   NA 13.9260004
## var             NA 87780.500000   NA 0.1188671                   NA  2.4024320
## std.dev         NA   296.277741   NA 0.3447711                   NA  1.5499781
## coef.var        NA     0.150052   NA 0.1310997                   NA  0.1082009
##                longitude geolocation hazard_type landslide_type landslide_size
## nbr.val         2.000000          NA          NA             NA             NA
## nbr.null        0.000000          NA          NA             NA             NA
## nbr.na          0.000000          NA          NA             NA             NA
## min           -61.428500          NA          NA             NA             NA
## max           -59.540000          NA          NA             NA             NA
## range           1.888500          NA          NA             NA             NA
## sum          -120.968500          NA          NA             NA             NA
## median        -60.484250          NA          NA             NA             NA
## mean          -60.484250          NA          NA             NA             NA
## SE.mean         0.944250          NA          NA             NA             NA
## CI.mean.0.95   11.997834          NA          NA             NA             NA
## var             1.783216          NA          NA             NA             NA
## std.dev         1.335371          NA          NA             NA             NA
## coef.var       -0.022078          NA          NA             NA             NA
##              trigger storm_name injuries fatalities source_name source_link
## nbr.val           NA         NA        1          1          NA          NA
## nbr.null          NA         NA        1          1          NA          NA
## nbr.na            NA         NA        1          1          NA          NA
## min               NA         NA        0          0          NA          NA
## max               NA         NA        0          0          NA          NA
## range             NA         NA        0          0          NA          NA
## sum               NA         NA        0          0          NA          NA
## median            NA         NA        0          0          NA          NA
## mean              NA         NA        0          0          NA          NA
## SE.mean           NA         NA       NA         NA          NA          NA
## CI.mean.0.95      NA         NA      NaN        NaN          NA          NA
## var               NA         NA       NA         NA          NA          NA
## std.dev           NA         NA       NA         NA          NA          NA
## coef.var          NA         NA       NA         NA          NA          NA
##                     prop         ypos
## nbr.val        2.0000000    2.0000000
## nbr.null       0.0000000    0.0000000
## nbr.na         0.0000000    0.0000000
## min           45.3649271   22.6824636
## max           54.6350729   72.6824636
## range          9.2701457   50.0000000
## sum          100.0000000   95.3649271
## median        50.0000000   47.6824636
## mean          50.0000000   47.6824636
## SE.mean        4.6350729   25.0000000
## CI.mean.0.95  58.8941847  317.6551184
## var           42.9678008 1250.0000000
## std.dev        6.5549829   35.3553391
## coef.var       0.1310997    0.7414747
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Haiti

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_HT <- subset(df, country_name == "Haiti")
knitr::kable(head(df_HT))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
297 10/8/07 NA NA Haiti HT Artibonite 7294 Gros Morne 8.70343 NA 19.6990 -72.7540 (19.699000000000002, -72.754000000000005) Landslide Landslide Medium Downpour NA NA NA NA https://www-secure.ifrc.org/dmis/prepare/view_report.asp?ReportID=3285
303 10/12/07 NA NA Haiti HT Ouest 3951 Cabaret 0.51272 NA 18.7335 -72.4133 (18.733499999999999, -72.413300000000007) Landslide Complex Large Rain NA NA 23 Euronews.net http://www.euronews.net/index.php?page=info&article=448067&lng=1
334 10/29/07 NA NA Haiti HT Ouest 1234742 Port-au-Prince 2.72168 NA 18.5146 -72.3361 (18.514600000000002, -72.336100000000002) Landslide Complex Medium Tropical cyclone Tropical Storm Noel NA NA ABC news http://www.abcnews.go.com/International/wireStory?id=3807131
506 4/20/08 NA NA Haiti HT Ouest 1234742 Port-au-Prince 1.80063 NA 18.5283 -72.3224 (18.528300000000002, -72.322400000000002) Landslide Mudslide Medium Rain NA NA 3 NA http://www.news.com.au/heraldsun/story/0,21985,23596379-5005961,00.html
747 8/26/08 NA NA Haiti HT Sud-Est 137966 Jacmel 4.41574 NA 18.2640 -72.5070 (18.263999999999999, -72.507000000000005) Landslide Landslide Medium Tropical cyclone Hurricane Gustav NA 25 NA http://ap.google.com/article/ALeqM5gVWjsPEiqe1tEu2mhBIRaxxGi8owD92RGO9O1
748 8/26/08 NA NA Haiti HT Ouest 1234742 Port-au-Prince 3.50201 NA 18.5090 -72.3450 (18.509, -72.344999999999999) Landslide Mudslide Medium Tropical cyclone Hurricane Gustav NA 3 NA http://www.reuters.com/article/worldNews/idUSN2541891320080827?pageNumber=1&virtualBrandChannel=0

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_HT, aes(fill= state, y=distance, x=country_name)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_HT, aes(fill=state, y=distance, x=country_name)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_HT, aes(x=country_name, y=distance, fill=state)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_HT <- df_HT %>% 
  arrange(desc(state)) %>%
  mutate(prop = distance / sum(df_HT$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_HT, aes(x=country_name, y=prop, fill=state)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
data<- ts(df_HT$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
4.41574
0.19079
0.51272
2.72168
1.80063
3.50201
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_HT$distance
names(distance) <- df_HT$state 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por estados"
)

##             
## Pareto chart analysis for distance
##                 Frequency    Cum.Freq.   Percentage Cum.Percent.
##   Artibonite  17.29836000  17.29836000  20.57571552  20.57571552
##   Ouest       12.13199000  29.43035000  14.43052260  35.00623813
##   Artibonite   8.70343000  38.13378000  10.35238599  45.35862412
##   Centre       7.86436000  45.99814000   9.35434539  54.71296951
##   Ouest        7.67473000  53.67287000   9.12878800  63.84175751
##   Nord         5.23459000  58.90746000   6.22633791  70.06809542
##   Artibonite   4.72379000  63.63125000   5.61876150  75.68685693
##   Sud-Est      4.41574000  68.04699000   5.25234820  80.93920513
##   Ouest        3.50201000  71.54900000   4.16550248  85.10470761
##   Ouest        2.72168000  74.27068000   3.23733079  88.34203840
##   Ouest        2.63565000  76.90633000   3.13500150  91.47703990
##   Ouest        1.80063000  78.70696000   2.14177822  93.61881812
##   Nord         1.58489000  80.29185000   1.88516401  95.50398214
##   Ouest        1.33931000  81.63116000   1.59305631  97.09703845
##   Ouest        1.31659000  82.94775000   1.56603177  98.66307021
##   Ouest        0.51272000  83.46047000   0.60986018  99.27293039
##   Nord         0.27505000  83.73552000   0.32716110  99.60009149
##   Sud-Est      0.19079000  83.92631000   0.22693716  99.82702866
##   Ouest        0.11071000  84.03702000   0.13168517  99.95871383
##   Nord         0.03471000  84.07173000   0.04128617 100.00000000
stem(df_HT$"distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 0000111223344
##   0 | 55889
##   1 | 2
##   1 | 7
head(df_HT)
## # A tibble: 6 x 25
##      id date     time  continent_code country_name country_code state   population
##   <dbl> <chr>    <chr> <chr>          <chr>        <chr>        <chr>        <dbl>
## 1   747 8/26/08  <NA>  <NA>           Haiti        HT           Sud-Est     137966
## 2  3563 6/2/11   <NA>  <NA>           Haiti        HT           Sud-Est     137966
## 3   303 10/12/07 <NA>  <NA>           Haiti        HT           Ouest         3951
## 4   334 10/29/07 <NA>  <NA>           Haiti        HT           Ouest      1234742
## 5   506 4/20/08  <NA>  <NA>           Haiti        HT           Ouest      1234742
## 6   748 8/26/08  <NA>  <NA>           Haiti        HT           Ouest      1234742
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_HT))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
747 8/26/08 NA NA Haiti HT Sud-Est 137966 Jacmel 4.41574 NA 18.2640 -72.5070 (18.263999999999999, -72.507000000000005) Landslide Landslide Medium Tropical cyclone Hurricane Gustav NA 25 NA http://ap.google.com/article/ALeqM5gVWjsPEiqe1tEu2mhBIRaxxGi8owD92RGO9O1 5.2523482 2.626174
3563 6/2/11 NA NA Haiti HT Sud-Est 137966 Jacmel 0.19079 NA 18.2348 -72.5364 (18.2348, -72.5364) Landslide Landslide Small Downpour NA NA 0 NA http://www.haitilibre.com/en/news-3095-haiti-climate-the-situation-by-department.html 0.2269372 5.365817
303 10/12/07 NA NA Haiti HT Ouest 3951 Cabaret 0.51272 NA 18.7335 -72.4133 (18.733499999999999, -72.413300000000007) Landslide Complex Large Rain NA NA 23 Euronews.net http://www.euronews.net/index.php?page=info&article=448067&lng=1 0.6098602 5.784216
334 10/29/07 NA NA Haiti HT Ouest 1234742 Port-au-Prince 2.72168 NA 18.5146 -72.3361 (18.514600000000002, -72.336100000000002) Landslide Complex Medium Tropical cyclone Tropical Storm Noel NA NA ABC news http://www.abcnews.go.com/International/wireStory?id=3807131 3.2373308 7.707811
506 4/20/08 NA NA Haiti HT Ouest 1234742 Port-au-Prince 1.80063 NA 18.5283 -72.3224 (18.528300000000002, -72.322400000000002) Landslide Mudslide Medium Rain NA NA 3 NA http://www.news.com.au/heraldsun/story/0,21985,23596379-5005961,00.html 2.1417782 10.397365
748 8/26/08 NA NA Haiti HT Ouest 1234742 Port-au-Prince 3.50201 NA 18.5090 -72.3450 (18.509, -72.344999999999999) Landslide Mudslide Medium Tropical cyclone Hurricane Gustav NA 3 NA http://www.reuters.com/article/worldNews/idUSN2541891320080827?pageNumber=1&virtualBrandChannel=0 4.1655025 13.551006
stem(df_HT$"distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 0000111223344
##   0 | 55889
##   1 | 2
##   1 | 7
stem(df_HT$"distance", scale = 2)
## 
##   The decimal point is at the |
## 
##    0 | 012353368
##    2 | 675
##    4 | 472
##    6 | 79
##    8 | 7
##   10 | 
##   12 | 1
##   14 | 
##   16 | 3

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
0.03471 1 5 5 5 5
0.11071 1 5 5 10 10
0.19079 1 5 5 15 15
0.27505 1 5 5 20 20
0.51272 1 5 5 25 25
1.31659 1 5 5 30 30
1.33931 1 5 5 35 35
1.58489 1 5 5 40 40
1.80063 1 5 5 45 45
2.63565 1 5 5 50 50
2.72168 1 5 5 55 55
3.50201 1 5 5 60 60
4.41574 1 5 5 65 65
4.72379 1 5 5 70 70
5.23459 1 5 5 75 75
7.67473 1 5 5 80 80
7.86436 1 5 5 85 85
8.70343 1 5 5 90 90
12.13199 1 5 5 95 95
17.29836 1 5 5 100 100
Total 20 100 100 100 100
str(table)
## Classes 'freqtab' and 'data.frame':  21 obs. of  5 variables:
##  $ n      : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ %      : num  5 5 5 5 5 5 5 5 5 5 ...
##  $ val%   : num  5 5 5 5 5 5 5 5 5 5 ...
##  $ %cum   : num  5 10 15 20 25 30 35 40 45 50 ...
##  $ val%cum: num  5 10 15 20 25 30 35 40 45 50 ...
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
0.03471 1
0.11071 1
0.19079 1
0.27505 1
0.51272 1
1.31659 1
1.33931 1
1.58489 1
1.80063 1
2.63565 1
2.72168 1
3.50201 1
4.41574 1
4.72379 1
5.23459 1
7.67473 1
7.86436 1
8.70343 1
12.13199 1
17.29836 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1]  0.03471  4.03471  8.03471 12.03471 16.03471 20.03471
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(0.0347,4.03] 11 0.5789474 11
(4.03,8.03] 5 0.2631579 16
(8.03,12] 1 0.0526316 17
(12,16] 1 0.0526316 18
(16,20] 1 0.0526316 19
str(Freq_table)
## 'data.frame':    5 obs. of  4 variables:
##  $ distance: Factor w/ 5 levels "(0.0347,4.03]",..: 1 2 3 4 5
##  $ Freq    : int  11 5 1 1 1
##  $ Rel_Freq: num  0.5789 0.2632 0.0526 0.0526 0.0526
##  $ Cum_Freq: int  11 16 17 18 19
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(0.0347,4.03] 11
(4.03,8.03] 5
(8.03,12] 1
(12,16] 1
(16,20] 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_HT)
##                        id date time continent_code country_name country_code
## nbr.val      2.000000e+01   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          2.970000e+02   NA   NA             NA           NA           NA
## max          6.722000e+03   NA   NA             NA           NA           NA
## range        6.425000e+03   NA   NA             NA           NA           NA
## sum          4.809400e+04   NA   NA             NA           NA           NA
## median       2.017000e+03   NA   NA             NA           NA           NA
## mean         2.404700e+03   NA   NA             NA           NA           NA
## SE.mean      4.506116e+02   NA   NA             NA           NA           NA
## CI.mean.0.95 9.431408e+02   NA   NA             NA           NA           NA
## var          4.061016e+06   NA   NA             NA           NA           NA
## std.dev      2.015196e+03   NA   NA             NA           NA           NA
## coef.var     8.380240e-01   NA   NA             NA           NA           NA
##              state   population city  distance location_description
## nbr.val         NA 2.000000e+01   NA 20.000000                   NA
## nbr.null        NA 0.000000e+00   NA  0.000000                   NA
## nbr.na          NA 0.000000e+00   NA  0.000000                   NA
## min             NA 3.951000e+03   NA  0.034710                   NA
## max             NA 1.234742e+06   NA 17.298360                   NA
## range           NA 1.230791e+06   NA 17.263650                   NA
## sum             NA 6.625032e+06   NA 84.071730                   NA
## median          NA 1.363905e+05   NA  2.678665                   NA
## mean            NA 3.312516e+05   NA  4.203587                   NA
## SE.mean         NA 9.247531e+04   NA  1.019568                   NA
## CI.mean.0.95    NA 1.935530e+05   NA  2.133979                   NA
## var             NA 1.710337e+11   NA 20.790360                   NA
## std.dev         NA 4.135622e+05   NA  4.559645                   NA
## coef.var        NA 1.248483e+00   NA  1.084703                   NA
##                  latitude     longitude geolocation hazard_type landslide_type
## nbr.val       20.00000000  2.000000e+01          NA          NA             NA
## nbr.null       0.00000000  0.000000e+00          NA          NA             NA
## nbr.na         0.00000000  0.000000e+00          NA          NA             NA
## min           18.23480000 -7.275400e+01          NA          NA             NA
## max           19.76980000 -7.197470e+01          NA          NA             NA
## range          1.53500000  7.793000e-01          NA          NA             NA
## sum          378.03620000 -1.448005e+03          NA          NA             NA
## median        18.53150000 -7.240515e+01          NA          NA             NA
## mean          18.90181000 -7.240024e+01          NA          NA             NA
## SE.mean        0.12729585  4.201268e-02          NA          NA             NA
## CI.mean.0.95   0.26643329  8.793354e-02          NA          NA             NA
## var            0.32408469  3.530130e-02          NA          NA             NA
## std.dev        0.56928437  1.878864e-01          NA          NA             NA
## coef.var       0.03011798 -2.595107e-03          NA          NA             NA
##              landslide_size trigger storm_name   injuries fatalities
## nbr.val                  NA      NA         NA  2.0000000  18.000000
## nbr.null                 NA      NA         NA  1.0000000   1.000000
## nbr.na                   NA      NA         NA 18.0000000   2.000000
## min                      NA      NA         NA  0.0000000   0.000000
## max                      NA      NA         NA  1.0000000  26.000000
## range                    NA      NA         NA  1.0000000  26.000000
## sum                      NA      NA         NA  1.0000000 128.000000
## median                   NA      NA         NA  0.5000000   3.000000
## mean                     NA      NA         NA  0.5000000   7.111111
## SE.mean                  NA      NA         NA  0.5000000   2.035452
## CI.mean.0.95             NA      NA         NA  6.3531024   4.294428
## var                      NA      NA         NA  0.5000000  74.575163
## std.dev                  NA      NA         NA  0.7071068   8.635691
## coef.var                 NA      NA         NA  1.4142136   1.214394
##              source_name source_link         prop       ypos
## nbr.val               NA          NA  20.00000000  20.000000
## nbr.null              NA          NA   0.00000000   0.000000
## nbr.na                NA          NA   0.00000000   0.000000
## min                   NA          NA   0.04128617   2.626174
## max                   NA          NA  20.57571552  89.712142
## range                 NA          NA  20.53442935  87.085968
## sum                   NA          NA 100.00000000 741.305853
## median                NA          NA   3.18616615  43.143272
## mean                  NA          NA   5.00000000  37.065293
## SE.mean               NA          NA   1.21273530   5.707890
## CI.mean.0.95          NA          NA   2.53828416  11.946751
## var                   NA          NA  29.41453837 651.600189
## std.dev               NA          NA   5.42351716  25.526461
## coef.var              NA          NA   1.08470343   0.688689
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Artibonite (Haiti)

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_HT <- subset(df, country_name == "Haiti")
knitr::kable(head(df_HT)) 
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
297 10/8/07 NA NA Haiti HT Artibonite 7294 Gros Morne 8.70343 NA 19.6990 -72.7540 (19.699000000000002, -72.754000000000005) Landslide Landslide Medium Downpour NA NA NA NA https://www-secure.ifrc.org/dmis/prepare/view_report.asp?ReportID=3285
303 10/12/07 NA NA Haiti HT Ouest 3951 Cabaret 0.51272 NA 18.7335 -72.4133 (18.733499999999999, -72.413300000000007) Landslide Complex Large Rain NA NA 23 Euronews.net http://www.euronews.net/index.php?page=info&article=448067&lng=1
334 10/29/07 NA NA Haiti HT Ouest 1234742 Port-au-Prince 2.72168 NA 18.5146 -72.3361 (18.514600000000002, -72.336100000000002) Landslide Complex Medium Tropical cyclone Tropical Storm Noel NA NA ABC news http://www.abcnews.go.com/International/wireStory?id=3807131
506 4/20/08 NA NA Haiti HT Ouest 1234742 Port-au-Prince 1.80063 NA 18.5283 -72.3224 (18.528300000000002, -72.322400000000002) Landslide Mudslide Medium Rain NA NA 3 NA http://www.news.com.au/heraldsun/story/0,21985,23596379-5005961,00.html
747 8/26/08 NA NA Haiti HT Sud-Est 137966 Jacmel 4.41574 NA 18.2640 -72.5070 (18.263999999999999, -72.507000000000005) Landslide Landslide Medium Tropical cyclone Hurricane Gustav NA 25 NA http://ap.google.com/article/ALeqM5gVWjsPEiqe1tEu2mhBIRaxxGi8owD92RGO9O1
748 8/26/08 NA NA Haiti HT Ouest 1234742 Port-au-Prince 3.50201 NA 18.5090 -72.3450 (18.509, -72.344999999999999) Landslide Mudslide Medium Tropical cyclone Hurricane Gustav NA 3 NA http://www.reuters.com/article/worldNews/idUSN2541891320080827?pageNumber=1&virtualBrandChannel=0
library(dplyr)
df_HT <- subset(df, state == "Artibonite")
knitr::kable(head(df_HT))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
297 10/8/07 NA NA Haiti HT Artibonite 7294 Gros Morne 8.70343 NA 19.6990 -72.7540 (19.699000000000002, -72.754000000000005) Landslide Landslide Medium Downpour NA NA NA NA https://www-secure.ifrc.org/dmis/prepare/view_report.asp?ReportID=3285
771 9/3/08 NA NA Haiti HT Artibonite 84961 Gonaïves 4.72379 NA 19.4300 -72.6480 (19.43, -72.647999999999996) Landslide Mudslide Medium Tropical cyclone Hurricane Hannah NA 26 NA http://www.miamiherald.com/news/americas/cuba/story/671682.html
1140 9/7/09 Early morning NA Haiti HT Artibonite 66226 Saint-Marc 17.29836 NA 18.9523 -72.7053 (18.952300000000001, -72.705299999999994) Landslide Mudslide Medium Downpour NA NA 1 NA http://www.google.com/hostednews/ap/article/ALeqM5hdjzxxFRHymhlrd1BpUjDSV3HK6AD9AIQ5OO0

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_HT, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_HT, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_HT, aes(x=state, y=distance, fill=city)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_HT <- df_HT %>% 
  arrange(desc(city)) %>%
  mutate(prop = distance / sum(df_HT$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_HT, aes(x=state, y = prop, fill=city)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
data<- ts(df_HT$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
17.29836
8.70343
4.72379
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_HT$distance
names(distance) <- df_HT$city 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por ciudades"
)

##             
## Pareto chart analysis for distance
##              Frequency Cum.Freq. Percentage Cum.Percent.
##   Saint-Marc  17.29836  17.29836   56.29954     56.29954
##   Gros Morne   8.70343  26.00179   28.32633     84.62587
##   Gonaïves     4.72379  30.72558   15.37413    100.00000
stem(df_HT$"distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 
##   0 | 59
##   1 | 
##   1 | 7
head(df_HT)
## # A tibble: 3 x 25
##      id date    time   continent_code country_name country_code state population
##   <dbl> <chr>   <chr>  <chr>          <chr>        <chr>        <chr>      <dbl>
## 1  1140 9/7/09  Early~ <NA>           Haiti        HT           Arti~      66226
## 2   297 10/8/07 <NA>   <NA>           Haiti        HT           Arti~       7294
## 3   771 9/3/08  <NA>   <NA>           Haiti        HT           Arti~      84961
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_HT))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
1140 9/7/09 Early morning NA Haiti HT Artibonite 66226 Saint-Marc 17.29836 NA 18.9523 -72.7053 (18.952300000000001, -72.705299999999994) Landslide Mudslide Medium Downpour NA NA 1 NA http://www.google.com/hostednews/ap/article/ALeqM5hdjzxxFRHymhlrd1BpUjDSV3HK6AD9AIQ5OO0 56.29954 28.14977
297 10/8/07 NA NA Haiti HT Artibonite 7294 Gros Morne 8.70343 NA 19.6990 -72.7540 (19.699000000000002, -72.754000000000005) Landslide Landslide Medium Downpour NA NA NA NA https://www-secure.ifrc.org/dmis/prepare/view_report.asp?ReportID=3285 28.32633 70.46271
771 9/3/08 NA NA Haiti HT Artibonite 84961 Gonaïves 4.72379 NA 19.4300 -72.6480 (19.43, -72.647999999999996) Landslide Mudslide Medium Tropical cyclone Hurricane Hannah NA 26 NA http://www.miamiherald.com/news/americas/cuba/story/671682.html 15.37413 92.31294
stem(df_HT$"distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 
##   0 | 59
##   1 | 
##   1 | 7
stem(df_HT$"distance", scale = 2)
## 
##   The decimal point is at the |
## 
##    4 | 7
##    6 | 
##    8 | 7
##   10 | 
##   12 | 
##   14 | 
##   16 | 3

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
4.72379 1 33.3 33.3 33.3 33.3
8.70343 1 33.3 33.3 66.7 66.7
17.29836 1 33.3 33.3 100.0 100.0
Total 3 100.0 100.0 100.0 100.0
str(table)
## Classes 'freqtab' and 'data.frame':  4 obs. of  5 variables:
##  $ n      : num  1 1 1 3
##  $ %      : num  33.3 33.3 33.3 100
##  $ val%   : num  33.3 33.3 33.3 100
##  $ %cum   : num  33.3 66.7 100 100
##  $ val%cum: num  33.3 66.7 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
4.72379 1
8.70343 1
17.29836 1
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1]  4.72379  9.72379 14.72379 19.72379
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(4.72,9.72] 1 0.5 1
(9.72,14.7] 0 0.0 1
(14.7,19.7] 1 0.5 2
str(Freq_table)
## 'data.frame':    3 obs. of  4 variables:
##  $ distance: Factor w/ 3 levels "(4.72,9.72]",..: 1 2 3
##  $ Freq    : int  1 0 1
##  $ Rel_Freq: num  0.5 0 0.5
##  $ Cum_Freq: int  1 1 2
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(4.72,9.72] 1
(9.72,14.7] 0
(14.7,19.7] 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_HT)
## Warning in min(x): ningún argumento finito para min; retornando Inf
## Warning in max(x): ningun argumento finito para max; retornando -Inf
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
##                        id date time continent_code country_name country_code
## nbr.val      3.000000e+00   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          2.970000e+02   NA   NA             NA           NA           NA
## max          1.140000e+03   NA   NA             NA           NA           NA
## range        8.430000e+02   NA   NA             NA           NA           NA
## sum          2.208000e+03   NA   NA             NA           NA           NA
## median       7.710000e+02   NA   NA             NA           NA           NA
## mean         7.360000e+02   NA   NA             NA           NA           NA
## SE.mean      2.439816e+02   NA   NA             NA           NA           NA
## CI.mean.0.95 1.049768e+03   NA   NA             NA           NA           NA
## var          1.785810e+05   NA   NA             NA           NA           NA
## std.dev      4.225885e+02   NA   NA             NA           NA           NA
## coef.var     5.741691e-01   NA   NA             NA           NA           NA
##              state   population city   distance location_description
## nbr.val         NA 3.000000e+00   NA  3.0000000                   NA
## nbr.null        NA 0.000000e+00   NA  0.0000000                   NA
## nbr.na          NA 0.000000e+00   NA  0.0000000                   NA
## min             NA 7.294000e+03   NA  4.7237900                   NA
## max             NA 8.496100e+04   NA 17.2983600                   NA
## range           NA 7.766700e+04   NA 12.5745700                   NA
## sum             NA 1.584810e+05   NA 30.7255800                   NA
## median          NA 6.622600e+04   NA  8.7034300                   NA
## mean            NA 5.282700e+04   NA 10.2418600                   NA
## SE.mean         NA 2.340008e+04   NA  3.7105717                   NA
## CI.mean.0.95    NA 1.006824e+05   NA 15.9653016                   NA
## var             NA 1.642691e+09   NA 41.3050278                   NA
## std.dev         NA 4.053012e+04   NA  6.4268988                   NA
## coef.var        NA 7.672236e-01   NA  0.6275129                   NA
##                 latitude     longitude geolocation hazard_type landslide_type
## nbr.val       3.00000000  3.000000e+00          NA          NA             NA
## nbr.null      0.00000000  0.000000e+00          NA          NA             NA
## nbr.na        0.00000000  0.000000e+00          NA          NA             NA
## min          18.95230000 -7.275400e+01          NA          NA             NA
## max          19.69900000 -7.264800e+01          NA          NA             NA
## range         0.74670000  1.060000e-01          NA          NA             NA
## sum          58.08130000 -2.181073e+02          NA          NA             NA
## median       19.43000000 -7.270530e+01          NA          NA             NA
## mean         19.36043333 -7.270243e+01          NA          NA             NA
## SE.mean       0.21834213  3.063312e-02          NA          NA             NA
## CI.mean.0.95  0.93945038  1.318037e-01          NA          NA             NA
## var           0.14301986  2.815163e-03          NA          NA             NA
## std.dev       0.37817967  5.305811e-02          NA          NA             NA
## coef.var      0.01953364 -7.297983e-04          NA          NA             NA
##              landslide_size trigger storm_name injuries fatalities source_name
## nbr.val                  NA      NA         NA        0   2.000000          NA
## nbr.null                 NA      NA         NA        0   0.000000          NA
## nbr.na                   NA      NA         NA        3   1.000000          NA
## min                      NA      NA         NA      Inf   1.000000          NA
## max                      NA      NA         NA     -Inf  26.000000          NA
## range                    NA      NA         NA     -Inf  25.000000          NA
## sum                      NA      NA         NA        0  27.000000          NA
## median                   NA      NA         NA       NA  13.500000          NA
## mean                     NA      NA         NA      NaN  13.500000          NA
## SE.mean                  NA      NA         NA       NA  12.500000          NA
## CI.mean.0.95             NA      NA         NA      NaN 158.827559          NA
## var                      NA      NA         NA       NA 312.500000          NA
## std.dev                  NA      NA         NA       NA  17.677670          NA
## coef.var                 NA      NA         NA       NA   1.309457          NA
##              source_link        prop         ypos
## nbr.val               NA   3.0000000    3.0000000
## nbr.null              NA   0.0000000    0.0000000
## nbr.na                NA   0.0000000    0.0000000
## min                   NA  15.3741280   28.1497697
## max                   NA  56.2995393   92.3129360
## range                 NA  40.9254113   64.1631663
## sum                   NA 100.0000000  190.9254113
## median                NA  28.3263327   70.4627057
## mean                  NA  33.3333333   63.6418038
## SE.mean               NA  12.0764905   18.8336711
## CI.mean.0.95          NA  51.9609446   81.0347465
## var                   NA 437.5248651 1064.1215052
## std.dev               NA  20.9170950   32.6208753
## coef.var              NA   0.6275129    0.5125699
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Nord (Haiti)

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_HT <- subset(df, country_name == "Haiti")
knitr::kable(head(df_HT)) 
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
297 10/8/07 NA NA Haiti HT Artibonite 7294 Gros Morne 8.70343 NA 19.6990 -72.7540 (19.699000000000002, -72.754000000000005) Landslide Landslide Medium Downpour NA NA NA NA https://www-secure.ifrc.org/dmis/prepare/view_report.asp?ReportID=3285
303 10/12/07 NA NA Haiti HT Ouest 3951 Cabaret 0.51272 NA 18.7335 -72.4133 (18.733499999999999, -72.413300000000007) Landslide Complex Large Rain NA NA 23 Euronews.net http://www.euronews.net/index.php?page=info&article=448067&lng=1
334 10/29/07 NA NA Haiti HT Ouest 1234742 Port-au-Prince 2.72168 NA 18.5146 -72.3361 (18.514600000000002, -72.336100000000002) Landslide Complex Medium Tropical cyclone Tropical Storm Noel NA NA ABC news http://www.abcnews.go.com/International/wireStory?id=3807131
506 4/20/08 NA NA Haiti HT Ouest 1234742 Port-au-Prince 1.80063 NA 18.5283 -72.3224 (18.528300000000002, -72.322400000000002) Landslide Mudslide Medium Rain NA NA 3 NA http://www.news.com.au/heraldsun/story/0,21985,23596379-5005961,00.html
747 8/26/08 NA NA Haiti HT Sud-Est 137966 Jacmel 4.41574 NA 18.2640 -72.5070 (18.263999999999999, -72.507000000000005) Landslide Landslide Medium Tropical cyclone Hurricane Gustav NA 25 NA http://ap.google.com/article/ALeqM5gVWjsPEiqe1tEu2mhBIRaxxGi8owD92RGO9O1
748 8/26/08 NA NA Haiti HT Ouest 1234742 Port-au-Prince 3.50201 NA 18.5090 -72.3450 (18.509, -72.344999999999999) Landslide Mudslide Medium Tropical cyclone Hurricane Gustav NA 3 NA http://www.reuters.com/article/worldNews/idUSN2541891320080827?pageNumber=1&virtualBrandChannel=0
library(dplyr)
df_HT <- subset(df, state == "Nord")
knitr::kable(head(df_HT))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
1506 2/15/10 12:00 NA Haiti HT Nord 134815 Cap-Haïtien 0.27505 Urban area 19.7560 -72.2060 (19.756, -72.206000000000003) Landslide Mudslide Medium Downpour NA NA 4 Times Live http://www.timeslive.co.za/world/article311411.ece
4312 4/8/12 NA NA Haiti HT Nord 32645 Limbé 0.03471 NA 19.7041 -72.4006 (19.7041, -72.400599999999997) Landslide Landslide Medium Downpour NA NA 2 NA http://www.usatoday.com/news/world/story/2012-04-10/Haiti-floods/54160810/1
6713 11/1/14 NA NA Haiti HT Nord 134815 Okap 5.23459 Urban area 19.7450 -72.2152 (19.745000000000001, -72.215199999999996) Landslide Landslide Medium Downpour NA 0 1 reliefweb http://reliefweb.int/report/haiti/undp-government-haiti-provide-immediate-support-flood-affected-victims
6722 5/27/14 NA NA Haiti HT Nord 134815 Okap 1.58489 Unknown 19.7698 -72.2085 (19.7698, -72.208500000000001) Landslide Landslide Small Continuous rain NA 1 3 Business Recorder http://www.brecorder.com/world/north-america/15393-three-children-die-in-haiti-landslide.html

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_HT, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_HT, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_HT, aes(x=state, y=distance, fill=city)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_HT <- df_HT %>% 
  arrange(desc(city)) %>%
  mutate(prop = distance / sum(df_HT$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_HT, aes(x=state, y = prop, fill=city)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
data<- ts(df_HT$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
5.23459
1.58489
0.03471
0.27505
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_HT$distance
names(distance) <- df_HT$city 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por ciudades"
)

##              
## Pareto chart analysis for distance
##                 Frequency   Cum.Freq.  Percentage Cum.Percent.
##   Okap          5.2345900   5.2345900  73.4242360   73.4242360
##   Okap          1.5848900   6.8194800  22.2308409   95.6550768
##   Cap-Haïtien   0.2750500   7.0945300   3.8580550   99.5131318
##   Limbé         0.0347100   7.1292400   0.4868682  100.0000000
stem(df_HT$"distance")
## 
##   The decimal point is at the |
## 
##   0 | 036
##   2 | 
##   4 | 2
head(df_HT)
## # A tibble: 4 x 25
##      id date    time  continent_code country_name country_code state population
##   <dbl> <chr>   <chr> <chr>          <chr>        <chr>        <chr>      <dbl>
## 1  6713 11/1/14 <NA>  <NA>           Haiti        HT           Nord      134815
## 2  6722 5/27/14 <NA>  <NA>           Haiti        HT           Nord      134815
## 3  4312 4/8/12  <NA>  <NA>           Haiti        HT           Nord       32645
## 4  1506 2/15/10 12:00 <NA>           Haiti        HT           Nord      134815
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_HT))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
6713 11/1/14 NA NA Haiti HT Nord 134815 Okap 5.23459 Urban area 19.7450 -72.2152 (19.745000000000001, -72.215199999999996) Landslide Landslide Medium Downpour NA 0 1 reliefweb http://reliefweb.int/report/haiti/undp-government-haiti-provide-immediate-support-flood-affected-victims 73.4242360 36.71212
6722 5/27/14 NA NA Haiti HT Nord 134815 Okap 1.58489 Unknown 19.7698 -72.2085 (19.7698, -72.208500000000001) Landslide Landslide Small Continuous rain NA 1 3 Business Recorder http://www.brecorder.com/world/north-america/15393-three-children-die-in-haiti-landslide.html 22.2308409 84.53966
4312 4/8/12 NA NA Haiti HT Nord 32645 Limbé 0.03471 NA 19.7041 -72.4006 (19.7041, -72.400599999999997) Landslide Landslide Medium Downpour NA NA 2 NA http://www.usatoday.com/news/world/story/2012-04-10/Haiti-floods/54160810/1 0.4868682 95.89851
1506 2/15/10 12:00 NA Haiti HT Nord 134815 Cap-Haïtien 0.27505 Urban area 19.7560 -72.2060 (19.756, -72.206000000000003) Landslide Mudslide Medium Downpour NA NA 4 Times Live http://www.timeslive.co.za/world/article311411.ece 3.8580550 98.07097
stem(df_HT$"distance")
## 
##   The decimal point is at the |
## 
##   0 | 036
##   2 | 
##   4 | 2
stem(df_HT$"distance", scale = 2)
## 
##   The decimal point is at the |
## 
##   0 | 03
##   1 | 6
##   2 | 
##   3 | 
##   4 | 
##   5 | 2

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
0.03471 1 25 25 25 25
0.27505 1 25 25 50 50
1.58489 1 25 25 75 75
5.23459 1 25 25 100 100
Total 4 100 100 100 100
str(table)
## Classes 'freqtab' and 'data.frame':  5 obs. of  5 variables:
##  $ n      : num  1 1 1 1 4
##  $ %      : num  25 25 25 25 100
##  $ val%   : num  25 25 25 25 100
##  $ %cum   : num  25 50 75 100 100
##  $ val%cum: num  25 50 75 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
0.03471 1
0.27505 1
1.58489 1
5.23459 1
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.03471 2.03471 4.03471 6.03471
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(0.0347,2.03] 2 0.6666667 2
(2.03,4.03] 0 0.0000000 2
(4.03,6.03] 1 0.3333333 3
str(Freq_table)
## 'data.frame':    3 obs. of  4 variables:
##  $ distance: Factor w/ 3 levels "(0.0347,2.03]",..: 1 2 3
##  $ Freq    : int  2 0 1
##  $ Rel_Freq: num  0.667 0 0.333
##  $ Cum_Freq: int  2 2 3
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(0.0347,2.03] 2
(2.03,4.03] 0
(4.03,6.03] 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_HT)
##                        id date time continent_code country_name country_code
## nbr.val      4.000000e+00   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          1.506000e+03   NA   NA             NA           NA           NA
## max          6.722000e+03   NA   NA             NA           NA           NA
## range        5.216000e+03   NA   NA             NA           NA           NA
## sum          1.925300e+04   NA   NA             NA           NA           NA
## median       5.512500e+03   NA   NA             NA           NA           NA
## mean         4.813250e+03   NA   NA             NA           NA           NA
## SE.mean      1.239675e+03   NA   NA             NA           NA           NA
## CI.mean.0.95 3.945199e+03   NA   NA             NA           NA           NA
## var          6.147177e+06   NA   NA             NA           NA           NA
## std.dev      2.479350e+03   NA   NA             NA           NA           NA
## coef.var     5.151094e-01   NA   NA             NA           NA           NA
##              state   population city distance location_description     latitude
## nbr.val         NA 4.000000e+00   NA 4.000000                   NA 4.000000e+00
## nbr.null        NA 0.000000e+00   NA 0.000000                   NA 0.000000e+00
## nbr.na          NA 0.000000e+00   NA 0.000000                   NA 0.000000e+00
## min             NA 3.264500e+04   NA 0.034710                   NA 1.970410e+01
## max             NA 1.348150e+05   NA 5.234590                   NA 1.976980e+01
## range           NA 1.021700e+05   NA 5.199880                   NA 6.570000e-02
## sum             NA 4.370900e+05   NA 7.129240                   NA 7.897490e+01
## median          NA 1.348150e+05   NA 0.929970                   NA 1.975050e+01
## mean            NA 1.092725e+05   NA 1.782310                   NA 1.974373e+01
## SE.mean         NA 2.554250e+04   NA 1.200109                   NA 1.414905e-02
## CI.mean.0.95    NA 8.128763e+04   NA 3.819284                   NA 4.502859e-02
## var             NA 2.609677e+09   NA 5.761050                   NA 8.007825e-04
## std.dev         NA 5.108500e+04   NA 2.400219                   NA 2.829810e-02
## coef.var        NA 4.675010e-01   NA 1.346690                   NA 1.433271e-03
##                  longitude geolocation hazard_type landslide_type
## nbr.val       4.000000e+00          NA          NA             NA
## nbr.null      0.000000e+00          NA          NA             NA
## nbr.na        0.000000e+00          NA          NA             NA
## min          -7.240060e+01          NA          NA             NA
## max          -7.220600e+01          NA          NA             NA
## range         1.946000e-01          NA          NA             NA
## sum          -2.890303e+02          NA          NA             NA
## median       -7.221185e+01          NA          NA             NA
## mean         -7.225758e+01          NA          NA             NA
## SE.mean       4.771454e-02          NA          NA             NA
## CI.mean.0.95  1.518490e-01          NA          NA             NA
## var           9.106709e-03          NA          NA             NA
## std.dev       9.542908e-02          NA          NA             NA
## coef.var     -1.320679e-03          NA          NA             NA
##              landslide_size trigger storm_name  injuries fatalities source_name
## nbr.val                  NA      NA         NA 2.0000000  4.0000000          NA
## nbr.null                 NA      NA         NA 1.0000000  0.0000000          NA
## nbr.na                   NA      NA         NA 2.0000000  0.0000000          NA
## min                      NA      NA         NA 0.0000000  1.0000000          NA
## max                      NA      NA         NA 1.0000000  4.0000000          NA
## range                    NA      NA         NA 1.0000000  3.0000000          NA
## sum                      NA      NA         NA 1.0000000 10.0000000          NA
## median                   NA      NA         NA 0.5000000  2.5000000          NA
## mean                     NA      NA         NA 0.5000000  2.5000000          NA
## SE.mean                  NA      NA         NA 0.5000000  0.6454972          NA
## CI.mean.0.95             NA      NA         NA 6.3531024  2.0542603          NA
## var                      NA      NA         NA 0.5000000  1.6666667          NA
## std.dev                  NA      NA         NA 0.7071068  1.2909944          NA
## coef.var                 NA      NA         NA 1.4142136  0.5163978          NA
##              source_link         prop       ypos
## nbr.val               NA    4.0000000   4.000000
## nbr.null              NA    0.0000000   0.000000
## nbr.na                NA    0.0000000   0.000000
## min                   NA    0.4868682  36.712118
## max                   NA   73.4242360  98.070973
## range                 NA   72.9373678  61.358855
## sum                   NA  100.0000000 315.221258
## median                NA   13.0444479  90.219084
## mean                  NA   25.0000000  78.805314
## SE.mean               NA   16.8336230  14.341266
## CI.mean.0.95          NA   53.5721012  45.640308
## var                   NA 1133.4834473 822.687604
## std.dev               NA   33.6672459  28.682531
## coef.var              NA    1.3466898   0.363967
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Ouest (Haiti)

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_HT <- subset(df, country_name == "Haiti")
knitr::kable(head(df_HT)) 
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
297 10/8/07 NA NA Haiti HT Artibonite 7294 Gros Morne 8.70343 NA 19.6990 -72.7540 (19.699000000000002, -72.754000000000005) Landslide Landslide Medium Downpour NA NA NA NA https://www-secure.ifrc.org/dmis/prepare/view_report.asp?ReportID=3285
303 10/12/07 NA NA Haiti HT Ouest 3951 Cabaret 0.51272 NA 18.7335 -72.4133 (18.733499999999999, -72.413300000000007) Landslide Complex Large Rain NA NA 23 Euronews.net http://www.euronews.net/index.php?page=info&article=448067&lng=1
334 10/29/07 NA NA Haiti HT Ouest 1234742 Port-au-Prince 2.72168 NA 18.5146 -72.3361 (18.514600000000002, -72.336100000000002) Landslide Complex Medium Tropical cyclone Tropical Storm Noel NA NA ABC news http://www.abcnews.go.com/International/wireStory?id=3807131
506 4/20/08 NA NA Haiti HT Ouest 1234742 Port-au-Prince 1.80063 NA 18.5283 -72.3224 (18.528300000000002, -72.322400000000002) Landslide Mudslide Medium Rain NA NA 3 NA http://www.news.com.au/heraldsun/story/0,21985,23596379-5005961,00.html
747 8/26/08 NA NA Haiti HT Sud-Est 137966 Jacmel 4.41574 NA 18.2640 -72.5070 (18.263999999999999, -72.507000000000005) Landslide Landslide Medium Tropical cyclone Hurricane Gustav NA 25 NA http://ap.google.com/article/ALeqM5gVWjsPEiqe1tEu2mhBIRaxxGi8owD92RGO9O1
748 8/26/08 NA NA Haiti HT Ouest 1234742 Port-au-Prince 3.50201 NA 18.5090 -72.3450 (18.509, -72.344999999999999) Landslide Mudslide Medium Tropical cyclone Hurricane Gustav NA 3 NA http://www.reuters.com/article/worldNews/idUSN2541891320080827?pageNumber=1&virtualBrandChannel=0
library(dplyr)
df_HT <- subset(df, state == "Ouest")
knitr::kable(head(df_HT))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
303 10/12/07 NA NA Haiti HT Ouest 3951 Cabaret 0.51272 NA 18.7335 -72.4133 (18.733499999999999, -72.413300000000007) Landslide Complex Large Rain NA NA 23 Euronews.net http://www.euronews.net/index.php?page=info&article=448067&lng=1
334 10/29/07 NA NA Haiti HT Ouest 1234742 Port-au-Prince 2.72168 NA 18.5146 -72.3361 (18.514600000000002, -72.336100000000002) Landslide Complex Medium Tropical cyclone Tropical Storm Noel NA NA ABC news http://www.abcnews.go.com/International/wireStory?id=3807131
506 4/20/08 NA NA Haiti HT Ouest 1234742 Port-au-Prince 1.80063 NA 18.5283 -72.3224 (18.528300000000002, -72.322400000000002) Landslide Mudslide Medium Rain NA NA 3 NA http://www.news.com.au/heraldsun/story/0,21985,23596379-5005961,00.html
748 8/26/08 NA NA Haiti HT Ouest 1234742 Port-au-Prince 3.50201 NA 18.5090 -72.3450 (18.509, -72.344999999999999) Landslide Mudslide Medium Tropical cyclone Hurricane Gustav NA 3 NA http://www.reuters.com/article/worldNews/idUSN2541891320080827?pageNumber=1&virtualBrandChannel=0
1266 10/20/09 NA NA Haiti HT Ouest 442156 Carrefour 1.31659 NA 18.5347 -72.4097 (18.534700000000001, -72.409700000000001) Landslide Landslide Small Downpour NA NA 4 NA http://www.etaiwannews.com/etn/news_content.php?id=1088959&lang=eng_news
2528 10/1/10 NA NA Haiti HT Ouest 442156 Carrefour 12.13199 NA 18.4468 -72.4577 (18.4468, -72.457700000000003) Landslide Mudslide Medium Downpour NA NA 3 NA http://www.presstv.ir/detail/144854.html

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_HT, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_HT, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_HT, aes(x=state, y=distance, fill=city)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_HT <- df_HT %>% 
  arrange(desc(city)) %>%
  mutate(prop = distance / sum(df_HT$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_HT, aes(x=state, y = prop, fill=city)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
data<- ts(df_HT$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
2.72168
1.80063
3.50201
0.11071
1.33931
7.67473
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_HT$distance
names(distance) <- df_HT$city 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por ciudades"
)

##                 
## Pareto chart analysis for distance
##                    Frequency   Cum.Freq.  Percentage Cum.Percent.
##   Carrefour       12.1319900  12.1319900  35.9508766   35.9508766
##   Léogâne          7.6747300  19.8067200  22.7426227   58.6934993
##   Port-au-Prince   3.5020100  23.3087300  10.3775497   69.0710490
##   Port-au-Prince   2.7216800  26.0304100   8.0651881   77.1362371
##   Carrefour        2.6356500  28.6660600   7.8102544   84.9464915
##   Port-au-Prince   1.8006300  30.4666900   5.3358292   90.2823207
##   Pétionville      1.3393100  31.8060000   3.9687939   94.2511146
##   Carrefour        1.3165900  33.1225900   3.9014675   98.1525821
##   Cabaret          0.5127200  33.6353100   1.5193495   99.6719317
##   Pétionville      0.1107100  33.7460200   0.3280683  100.0000000
stem(df_HT$"distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 01112334
##   0 | 8
##   1 | 2
head(df_HT)
## # A tibble: 6 x 25
##      id date     time       continent_code country_name country_code state population
##   <dbl> <chr>    <chr>      <chr>          <chr>        <chr>        <chr>      <dbl>
## 1   334 10/29/07 <NA>       <NA>           Haiti        HT           Ouest    1234742
## 2   506 4/20/08  <NA>       <NA>           Haiti        HT           Ouest    1234742
## 3   748 8/26/08  <NA>       <NA>           Haiti        HT           Ouest    1234742
## 4  3576 6/7/11   <NA>       <NA>           Haiti        HT           Ouest     283052
## 5  4289 3/30/12  Late night <NA>           Haiti        HT           Ouest     283052
## 6  2604 10/17/10 <NA>       <NA>           Haiti        HT           Ouest     134190
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_HT))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
334 10/29/07 NA NA Haiti HT Ouest 1234742 Port-au-Prince 2.72168 NA 18.5146 -72.3361 (18.514600000000002, -72.336100000000002) Landslide Complex Medium Tropical cyclone Tropical Storm Noel NA NA ABC news http://www.abcnews.go.com/International/wireStory?id=3807131 8.0651881 4.032594
506 4/20/08 NA NA Haiti HT Ouest 1234742 Port-au-Prince 1.80063 NA 18.5283 -72.3224 (18.528300000000002, -72.322400000000002) Landslide Mudslide Medium Rain NA NA 3 NA http://www.news.com.au/heraldsun/story/0,21985,23596379-5005961,00.html 5.3358292 10.733103
748 8/26/08 NA NA Haiti HT Ouest 1234742 Port-au-Prince 3.50201 NA 18.5090 -72.3450 (18.509, -72.344999999999999) Landslide Mudslide Medium Tropical cyclone Hurricane Gustav NA 3 NA http://www.reuters.com/article/worldNews/idUSN2541891320080827?pageNumber=1&virtualBrandChannel=0 10.3775497 18.589792
3576 6/7/11 NA NA Haiti HT Ouest 283052 Pétionville 0.11071 NA 18.5135 -72.2853 (18.513500000000001, -72.285300000000007) Landslide Landslide Large Downpour NA NA 13 NA http://www.bbc.co.uk/news/world-latin-america-13689711 0.3280683 23.942601
4289 3/30/12 Late night NA Haiti HT Ouest 283052 Pétionville 1.33931 NA 18.5044 -72.2947 (18.5044, -72.294700000000006) Landslide Landslide Medium Downpour NA NA 6 NA http://www.haitilibre.com/en/news-5290-haiti-weather-first-drama-of-the-rain.html 3.9687939 26.091032
2604 10/17/10 NA NA Haiti HT Ouest 134190 Léogâne 7.67473 NA 18.4674 -72.5738 (18.467400000000001, -72.573800000000006) Landslide Complex Medium Downpour NA NA 8 NA http://edition.cnn.com/2010/WORLD/americas/10/19/haiti.flooding/ 22.7426227 39.446741
stem(df_HT$"distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 01112334
##   0 | 8
##   1 | 2
stem(df_HT$"distance", scale = 2)
## 
##   The decimal point is at the |
## 
##    0 | 15338
##    2 | 675
##    4 | 
##    6 | 7
##    8 | 
##   10 | 
##   12 | 1

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
0.11071 1 10 10 10 10
0.51272 1 10 10 20 20
1.31659 1 10 10 30 30
1.33931 1 10 10 40 40
1.80063 1 10 10 50 50
2.63565 1 10 10 60 60
2.72168 1 10 10 70 70
3.50201 1 10 10 80 80
7.67473 1 10 10 90 90
12.13199 1 10 10 100 100
Total 10 100 100 100 100
str(table)
## Classes 'freqtab' and 'data.frame':  11 obs. of  5 variables:
##  $ n      : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ %      : num  10 10 10 10 10 10 10 10 10 10 ...
##  $ val%   : num  10 10 10 10 10 10 10 10 10 10 ...
##  $ %cum   : num  10 20 30 40 50 60 70 80 90 100 ...
##  $ val%cum: num  10 20 30 40 50 60 70 80 90 100 ...
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
0.11071 1
0.51272 1
1.31659 1
1.33931 1
1.80063 1
2.63565 1
2.72168 1
3.50201 1
7.67473 1
12.13199 1
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1]  0.11071  3.11071  6.11071  9.11071 12.11071 15.11071
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(0.111,3.11] 6 0.6666667 6
(3.11,6.11] 1 0.1111111 7
(6.11,9.11] 1 0.1111111 8
(9.11,12.1] 0 0.0000000 8
(12.1,15.1] 1 0.1111111 9
str(Freq_table)
## 'data.frame':    5 obs. of  4 variables:
##  $ distance: Factor w/ 5 levels "(0.111,3.11]",..: 1 2 3 4 5
##  $ Freq    : int  6 1 1 0 1
##  $ Rel_Freq: num  0.667 0.111 0.111 0 0.111
##  $ Cum_Freq: int  6 7 8 8 9
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(0.111,3.11] 6
(3.11,6.11] 1
(6.11,9.11] 1
(9.11,12.1] 0
(12.1,15.1] 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_HT)
## Warning in min(x): ningún argumento finito para min; retornando Inf
## Warning in max(x): ningun argumento finito para max; retornando -Inf
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
##                        id date time continent_code country_name country_code
## nbr.val      1.000000e+01   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          3.030000e+02   NA   NA             NA           NA           NA
## max          4.289000e+03   NA   NA             NA           NA           NA
## range        3.986000e+03   NA   NA             NA           NA           NA
## sum          1.875900e+04   NA   NA             NA           NA           NA
## median       1.897000e+03   NA   NA             NA           NA           NA
## mean         1.875900e+03   NA   NA             NA           NA           NA
## SE.mean      4.542708e+02   NA   NA             NA           NA           NA
## CI.mean.0.95 1.027632e+03   NA   NA             NA           NA           NA
## var          2.063619e+06   NA   NA             NA           NA           NA
## std.dev      1.436530e+03   NA   NA             NA           NA           NA
## coef.var     7.657819e-01   NA   NA             NA           NA           NA
##              state   population city  distance location_description
## nbr.val         NA 1.000000e+01   NA 10.000000                   NA
## nbr.null        NA 0.000000e+00   NA  0.000000                   NA
## nbr.na          NA 0.000000e+00   NA  0.000000                   NA
## min             NA 3.951000e+03   NA  0.110710                   NA
## max             NA 1.234742e+06   NA 12.131990                   NA
## range           NA 1.230791e+06   NA 12.021280                   NA
## sum             NA 5.734939e+06   NA 33.746020                   NA
## median          NA 4.421560e+05   NA  2.218140                   NA
## mean            NA 5.734939e+05   NA  3.374602                   NA
## SE.mean         NA 1.509075e+05   NA  1.182606                   NA
## CI.mean.0.95    NA 3.413764e+05   NA  2.675240                   NA
## var             NA 2.277307e+11   NA 13.985568                   NA
## std.dev         NA 4.772114e+05   NA  3.739728                   NA
## coef.var        NA 8.321123e-01   NA  1.108198                   NA
##                  latitude     longitude geolocation hazard_type landslide_type
## nbr.val      1.000000e+01  1.000000e+01          NA          NA             NA
## nbr.null     0.000000e+00  0.000000e+00          NA          NA             NA
## nbr.na       0.000000e+00  0.000000e+00          NA          NA             NA
## min          1.844680e+01 -7.257380e+01          NA          NA             NA
## max          1.873350e+01 -7.228530e+01          NA          NA             NA
## range        2.867000e-01  2.885000e-01          NA          NA             NA
## sum          1.852724e+02 -7.238491e+02          NA          NA             NA
## median       1.851405e+01 -7.237735e+01          NA          NA             NA
## mean         1.852724e+01 -7.238491e+01          NA          NA             NA
## SE.mean      2.447308e-02  2.769416e-02          NA          NA             NA
## CI.mean.0.95 5.536196e-02  6.264854e-02          NA          NA             NA
## var          5.989318e-03  7.669665e-03          NA          NA             NA
## std.dev      7.739069e-02  8.757663e-02          NA          NA             NA
## coef.var     4.177130e-03 -1.209874e-03          NA          NA             NA
##              landslide_size trigger storm_name injuries fatalities source_name
## nbr.val                  NA      NA         NA        0  9.0000000          NA
## nbr.null                 NA      NA         NA        0  0.0000000          NA
## nbr.na                   NA      NA         NA       10  1.0000000          NA
## min                      NA      NA         NA      Inf  2.0000000          NA
## max                      NA      NA         NA     -Inf 23.0000000          NA
## range                    NA      NA         NA     -Inf 21.0000000          NA
## sum                      NA      NA         NA        0 65.0000000          NA
## median                   NA      NA         NA       NA  4.0000000          NA
## mean                     NA      NA         NA      NaN  7.2222222          NA
## SE.mean                  NA      NA         NA       NA  2.2838672          NA
## CI.mean.0.95             NA      NA         NA      NaN  5.2666072          NA
## var                      NA      NA         NA       NA 46.9444444          NA
## std.dev                  NA      NA         NA       NA  6.8516016          NA
## coef.var                 NA      NA         NA       NA  0.9486833          NA
##              source_link        prop         ypos
## nbr.val               NA  10.0000000   10.0000000
## nbr.null              NA   0.0000000    0.0000000
## nbr.na                NA   0.0000000    0.0000000
## min                   NA   0.3280683    4.0325941
## max                   NA  35.9508766   99.2403252
## range                 NA  35.6228083   95.2077312
## sum                   NA 100.0000000  442.1154554
## median                NA   6.5730418   32.7688865
## mean                  NA  10.0000000   44.2115455
## SE.mean               NA   3.5044308   10.8539511
## CI.mean.0.95          NA   7.9275733   24.5533433
## var                   NA 122.8103537 1178.0825517
## std.dev               NA  11.0819833   34.3232072
## coef.var              NA   1.1081983    0.7763404
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Trinidad and Tobago

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_TT <- subset(df, country_name == "Trinidad and Tobago")
knitr::kable(head(df_TT))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
224 9/1/07 NA NA Trinidad and Tobago TT Tobago 17000 Scarborough 9.11607 NA 11.2415 -60.6742 (11.2415, -60.674199999999999) Landslide Landslide Medium Tropical cyclone Hurricane Felix NA NA Trinadad Express http://www.trinidadexpress.com/index.pl/article_news?id=161197580
357 11/17/07 NA NA Trinidad and Tobago TT Eastern Tobago 0 Roxborough 7.33295 NA 11.2965 -60.6312 (11.2965, -60.6312) Landslide Landslide Medium Rain NA NA NA Trinadad Express http://www.trinidadexpress.com/index.pl/article_news?id=161237574
390 12/11/07 NA NA Trinidad and Tobago TT Sangre Grande 15968 Sangre Grande 29.28864 NA 10.8410 -61.0550 (10.840999999999999, -61.055) Landslide Landslide Medium Tropical cyclone Tropical Storm Olga NA 3 Trinidad and Tobago’s Newsday http://www.newsday.co.tt/news/0,69681.html
391 12/11/07 NA NA Trinidad and Tobago TT Eastern Tobago 0 Roxborough 8.62938 NA 11.3000 -60.6440 (11.3, -60.643999999999998) Landslide Landslide Medium Tropical cyclone Tropical Storm Olga NA NA Trinidad and Tobago’s Newsday http://www.newsday.co.tt/news/0,69681.html
392 12/11/07 NA NA Trinidad and Tobago TT Eastern Tobago 0 Roxborough 2.66802 NA 11.2670 -60.5660 (11.266999999999999, -60.566000000000003) Landslide Landslide Small Tropical cyclone Tropical Storm Olga NA NA Trinidad and Tobago’s Newsday http://www.newsday.co.tt/news/0,69681.html
780 9/7/08 NA NA Trinidad and Tobago TT Diego Martin 8140 Petit Valley 10.61854 NA 10.7603 -61.4578 (10.760300000000001, -61.457799999999999) Landslide Landslide Medium Downpour NA NA NA NA http://www.newsday.co.tt/news/0,85847.html

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_TT, aes(fill= state, y=distance, x=country_name)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_TT, aes(fill=state, y=distance, x=country_name)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_TT, aes(x=country_name, y=distance, fill=state)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_TT <- df_TT %>% 
  arrange(desc(state)) %>%
  mutate(prop = distance / sum(df_TT$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_TT, aes(x=country_name, y=prop, fill=state)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4
## Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Greens is 9
## Returning the palette you asked for with that many colors

Grafico de series temporales

library(forecast)
data<- ts(df_TT$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
2.64003
16.73194
13.75900
2.63186
9.11607
4.68038
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_TT$distance
names(distance) <- df_TT$city 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por estados"
)

##                
## Pareto chart analysis for distance
##                    Frequency    Cum.Freq.   Percentage Cum.Percent.
##   Sangre Grande  33.51924000  33.51924000   6.44876727   6.44876727
##   Sangre Grande  33.10893000  66.62817000   6.36982772  12.81859499
##   Sangre Grande  33.10893000  99.73710000   6.36982772  19.18842271
##   Sangre Grande  30.29383000 130.03093000   5.82823057  25.01665328
##   Sangre Grande  29.28864000 159.31957000   5.63484205  30.65149533
##   Sangre Grande  26.68822000 186.00779000   5.13454719  35.78604252
##   Point Fortin   22.47289000 208.48068000   4.32355977  40.10960229
##   Paradise       16.73194000 225.21262000   3.21905828  43.32866056
##   Paradise       13.75900000 238.97162000   2.64709429  45.97575486
##   Arima          13.34116000 252.31278000   2.56670605  48.54246090
##   Petit Valley   11.75674000 264.06952000   2.26187945  50.80434035
##   Petit Valley   10.61854000 274.68806000   2.04290113  52.84724148
##   Marabella       9.42494000 284.11300000   1.81326440  54.66050588
##   Scarborough     9.11607000 293.22907000   1.75384089  56.41434677
##   Roxborough      8.62938000 301.85845000   1.66020659  58.07455336
##   Tabaquite       8.55164000 310.41009000   1.64525019  59.71980356
##   Roxborough      8.44112000 318.85121000   1.62398725  61.34379081
##   Princes Town    8.41931000 327.27052000   1.61979122  62.96358203
##   Roxborough      8.24676000 335.51728000   1.58659432  64.55017636
##   Roxborough      7.87263000 343.38991000   1.51461545  66.06479180
##   Roxborough      7.47816000 350.86807000   1.43872336  67.50351516
##   Mucurapo        7.43310000 358.30117000   1.43005426  68.93356942
##   Laventille      7.37181000 365.67298000   1.41826268  70.35183210
##   Roxborough      7.33295000 373.00593000   1.41078640  71.76261850
##   Mucurapo        7.24469000 380.25062000   1.39380606  73.15642455
##   Petit Valley    6.95807000 387.20869000   1.33866323  74.49508778
##   Tabaquite       6.91642000 394.12511000   1.33065019  75.82573797
##   Scarborough     6.81393000 400.93904000   1.31093213  77.13667010
##   Petit Valley    6.58396000 407.52300000   1.26668820  78.40335830
##   Petit Valley    6.39375000 413.91675000   1.23009369  79.63345199
##   Scarborough     6.35974000 420.27649000   1.22355051  80.85700250
##   Petit Valley    5.91101000 426.18750000   1.13721933  81.99422184
##   Petit Valley    5.73985000 431.92735000   1.10428986  83.09851169
##   Roxborough      5.62092000 437.54827000   1.08140891  84.17992061
##   Scarborough     4.68038000 442.22865000   0.90045840  85.08037901
##   Petit Valley    4.66234000 446.89099000   0.89698769  85.97736670
##   Petit Valley    4.64073000 451.53172000   0.89283014  86.87019684
##   Petit Valley    4.50278000 456.03450000   0.86628994  87.73648678
##   Petit Valley    4.33904000 460.37354000   0.83478799  88.57127477
##   Peñal,          4.21321000 464.58675000   0.81057956  89.38185433
##   Petit Valley    4.00979000 468.59654000   0.77144358  90.15329791
##   Scarborough     3.88123000 472.47777000   0.74670992  90.90000783
##   Siparia         3.75200000 476.22977000   0.72184736  91.62185518
##   Scarborough     3.48176000 479.71153000   0.66985588  92.29171106
##   Roxborough      3.36240000 483.07393000   0.64689220  92.93860327
##   Petit Valley    3.33629000 486.41022000   0.64186890  93.58047217
##   Scarborough     3.22335000 489.63357000   0.62014037  94.20061254
##   Petit Valley    3.08955000 492.72312000   0.59439859  94.79501113
##   Roxborough      2.66802000 495.39114000   0.51330042  95.30831155
##   Paradise        2.64003000 498.03117000   0.50791543  95.81622697
##   Tunapuna        2.63186000 500.66303000   0.50634360  96.32257057
##   Peñal,          2.57071000 503.23374000   0.49457895  96.81714952
##   Port-of-Spain   2.54016000 505.77390000   0.48870143  97.30585096
##   Petit Valley    2.24772000 508.02162000   0.43243890  97.73828985
##   Port-of-Spain   2.15046000 510.17208000   0.41372704  98.15201690
##   Sangre Grande   2.00931000 512.18139000   0.38657119  98.53858809
##   Petit Valley    1.84331000 514.02470000   0.35463445  98.89322254
##   Petit Valley    1.83626000 515.86096000   0.35327810  99.24650064
##   Port-of-Spain   1.07831000 516.93927000   0.20745608  99.45395672
##   San Fernando    0.92162000 517.86089000   0.17731049  99.63126721
##   Roxborough      0.91163000 518.77252000   0.17538851  99.80665573
##   Tabaquite       0.61975000 519.39227000   0.11923372  99.92588944
##   Laventille      0.38521000 519.77748000   0.07411056 100.00000000
stem(df_TT$"distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 0111122222333333333344444
##   0 | 55556666677777777788889999
##   1 | 1234
##   1 | 7
##   2 | 2
##   2 | 79
##   3 | 0334
head(df_TT)
## # A tibble: 6 x 25
##      id date     time    continent_code country_name country_code state population
##   <dbl> <chr>    <chr>   <chr>          <chr>        <chr>        <chr>      <dbl>
## 1  2667 10/31/10 Morning <NA>           Trinidad an~ TT           Tuna~      15067
## 2  4108 12/25/11 <NA>    <NA>           Trinidad an~ TT           Tuna~      15067
## 3  4374 5/30/12  <NA>    <NA>           Trinidad an~ TT           Tuna~      15067
## 4  4919 6/14/13  Morning <NA>           Trinidad an~ TT           Tuna~      17758
## 5   224 9/1/07   <NA>    <NA>           Trinidad an~ TT           Toba~      17000
## 6  2669 10/31/10 <NA>    <NA>           Trinidad an~ TT           Toba~      17000
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_TT))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
2667 10/31/10 Morning NA Trinidad and Tobago TT Tunapuna/Piarco 15067 Paradise 2.64003 NA 10.6660 -61.3832 (10.666, -61.383200000000002) Landslide Landslide Medium Tropical cyclone Hurricane Tomas NA 0 NA http://stormcarib.com/reports/current/report.php?id=1288616789_49546 0.5079154 0.2539577
4108 12/25/11 NA NA Trinidad and Tobago TT Tunapuna/Piarco 15067 Paradise 16.73194 NA 10.7901 -61.2984 (10.790100000000001, -61.298400000000001) Landslide Landslide Large Downpour NA NA 0 NA http://www.guardian.co.tt/news/thursday-december-29-2011/blanchisseuse-residents-cut-road-collapses 3.2190583 2.1174446
4374 5/30/12 NA NA Trinidad and Tobago TT Tunapuna/Piarco 15067 Paradise 13.75900 NA 10.7692 -61.4078 (10.7692, -61.407800000000002) Landslide Landslide Large Downpour NA NA NA NA http://www.guardian.co.tt/news/2012-05-31/landslides-choke-north-coast-road 2.6470943 5.0505208
4919 6/14/13 Morning NA Trinidad and Tobago TT Tunapuna/Piarco 17758 Tunapuna 2.63186 NA 10.6501 -61.4004 (10.6501, -61.400399999999998) Landslide Landslide Small Downpour NA NA 0 www.newsday.co.tt http://www.newsday.co.tt/news/0,179174.html 0.5063436 6.6272398
224 9/1/07 NA NA Trinidad and Tobago TT Tobago 17000 Scarborough 9.11607 NA 11.2415 -60.6742 (11.2415, -60.674199999999999) Landslide Landslide Medium Tropical cyclone Hurricane Felix NA NA Trinadad Express http://www.trinidadexpress.com/index.pl/article_news?id=161197580 1.7538409 7.7573320
2669 10/31/10 NA NA Trinidad and Tobago TT Tobago 17000 Scarborough 4.68038 NA 11.2246 -60.7428 (11.224600000000001, -60.742800000000003) Landslide Landslide Medium Downpour NA NA 0 NA http://www.trinidadexpress.com/news/Kamla_tours_affected_Tobago_areas_for_2nd_day-106494398.html 0.9004584 9.0844817
stem(df_TT$"distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 0111122222333333333344444
##   0 | 55556666677777777788889999
##   1 | 1234
##   1 | 7
##   2 | 2
##   2 | 79
##   3 | 0334
stem(df_TT$"distance", scale = 2)
## 
##   The decimal point is at the |
## 
##    0 | 4699188
##    2 | 022566671234589
##    4 | 0235677679
##    6 | 446890234459
##    8 | 2446614
##   10 | 68
##   12 | 38
##   14 | 
##   16 | 7
##   18 | 
##   20 | 
##   22 | 5
##   24 | 
##   26 | 7
##   28 | 3
##   30 | 3
##   32 | 115

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
33.10893 2 3.2 3.2 3.2 3.2
0.38521 1 1.6 1.6 4.8 4.8
0.61975 1 1.6 1.6 6.3 6.3
0.91163 1 1.6 1.6 7.9 7.9
0.92162 1 1.6 1.6 9.5 9.5
1.07831 1 1.6 1.6 11.1 11.1
1.83626 1 1.6 1.6 12.7 12.7
1.84331 1 1.6 1.6 14.3 14.3
2.00931 1 1.6 1.6 15.9 15.9
2.15046 1 1.6 1.6 17.5 17.5
2.24772 1 1.6 1.6 19.0 19.0
2.54016 1 1.6 1.6 20.6 20.6
2.57071 1 1.6 1.6 22.2 22.2
2.63186 1 1.6 1.6 23.8 23.8
2.64003 1 1.6 1.6 25.4 25.4
2.66802 1 1.6 1.6 27.0 27.0
3.08955 1 1.6 1.6 28.6 28.6
3.22335 1 1.6 1.6 30.2 30.2
3.33629 1 1.6 1.6 31.7 31.7
3.3624 1 1.6 1.6 33.3 33.3
3.48176 1 1.6 1.6 34.9 34.9
3.752 1 1.6 1.6 36.5 36.5
3.88123 1 1.6 1.6 38.1 38.1
4.00979 1 1.6 1.6 39.7 39.7
4.21321 1 1.6 1.6 41.3 41.3
4.33904 1 1.6 1.6 42.9 42.9
4.50278 1 1.6 1.6 44.4 44.4
4.64073 1 1.6 1.6 46.0 46.0
4.66234 1 1.6 1.6 47.6 47.6
4.68038 1 1.6 1.6 49.2 49.2
5.62092 1 1.6 1.6 50.8 50.8
5.73985 1 1.6 1.6 52.4 52.4
5.91101 1 1.6 1.6 54.0 54.0
6.35974 1 1.6 1.6 55.6 55.6
6.39375 1 1.6 1.6 57.1 57.1
6.58396 1 1.6 1.6 58.7 58.7
6.81393 1 1.6 1.6 60.3 60.3
6.91642 1 1.6 1.6 61.9 61.9
6.95807 1 1.6 1.6 63.5 63.5
7.24469 1 1.6 1.6 65.1 65.1
7.33295 1 1.6 1.6 66.7 66.7
7.37181 1 1.6 1.6 68.3 68.3
7.4331 1 1.6 1.6 69.8 69.8
7.47816 1 1.6 1.6 71.4 71.4
7.87263 1 1.6 1.6 73.0 73.0
8.24676 1 1.6 1.6 74.6 74.6
8.41931 1 1.6 1.6 76.2 76.2
8.44112 1 1.6 1.6 77.8 77.8
8.55164 1 1.6 1.6 79.4 79.4
8.62938 1 1.6 1.6 81.0 81.0
9.11607 1 1.6 1.6 82.5 82.5
9.42494 1 1.6 1.6 84.1 84.1
10.61854 1 1.6 1.6 85.7 85.7
11.75674 1 1.6 1.6 87.3 87.3
13.34116 1 1.6 1.6 88.9 88.9
13.759 1 1.6 1.6 90.5 90.5
16.73194 1 1.6 1.6 92.1 92.1
22.47289 1 1.6 1.6 93.7 93.7
26.68822 1 1.6 1.6 95.2 95.2
29.28864 1 1.6 1.6 96.8 96.8
30.29383 1 1.6 1.6 98.4 98.4
33.51924 1 1.6 1.6 100.0 100.0
Total 63 100.0 100.0 100.0 100.0
str(table)
## Classes 'freqtab' and 'data.frame':  63 obs. of  5 variables:
##  $ n      : num  2 1 1 1 1 1 1 1 1 1 ...
##  $ %      : num  3.2 1.6 1.6 1.6 1.6 1.6 1.6 1.6 1.6 1.6 ...
##  $ val%   : num  3.2 1.6 1.6 1.6 1.6 1.6 1.6 1.6 1.6 1.6 ...
##  $ %cum   : num  3.2 4.8 6.3 7.9 9.5 11.1 12.7 14.3 15.9 17.5 ...
##  $ val%cum: num  3.2 4.8 6.3 7.9 9.5 11.1 12.7 14.3 15.9 17.5 ...
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
33.10893 2
0.38521 1
0.61975 1
0.91163 1
0.92162 1
1.07831 1
1.83626 1
1.84331 1
2.00931 1
2.15046 1
2.24772 1
2.54016 1
2.57071 1
2.63186 1
2.64003 1
2.66802 1
3.08955 1
3.22335 1
3.33629 1
3.3624 1
3.48176 1
3.752 1
3.88123 1
4.00979 1
4.21321 1
4.33904 1
4.50278 1
4.64073 1
4.66234 1
4.68038 1
5.62092 1
5.73985 1
5.91101 1
6.35974 1
6.39375 1
6.58396 1
6.81393 1
6.91642 1
6.95807 1
7.24469 1
7.33295 1
7.37181 1
7.4331 1
7.47816 1
7.87263 1
8.24676 1
8.41931 1
8.44112 1
8.55164 1
8.62938 1
9.11607 1
9.42494 1
10.61854 1
11.75674 1
13.34116 1
13.759 1
16.73194 1
22.47289 1
26.68822 1
29.28864 1
30.29383 1
33.51924 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1]  0.38521  5.38521 10.38521 15.38521 20.38521 25.38521 30.38521 35.38521
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(0.385,5.39] 28 0.4516129 28
(5.39,10.4] 22 0.3548387 50
(10.4,15.4] 4 0.0645161 54
(15.4,20.4] 1 0.0161290 55
(20.4,25.4] 1 0.0161290 56
(25.4,30.4] 3 0.0483871 59
(30.4,35.4] 3 0.0483871 62
str(Freq_table)
## 'data.frame':    7 obs. of  4 variables:
##  $ distance: Factor w/ 7 levels "(0.385,5.39]",..: 1 2 3 4 5 6 7
##  $ Freq    : int  28 22 4 1 1 3 3
##  $ Rel_Freq: num  0.4516 0.3548 0.0645 0.0161 0.0161 ...
##  $ Cum_Freq: int  28 50 54 55 56 59 62
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(0.385,5.39] 28
(5.39,10.4] 22
(10.4,15.4] 4
(15.4,20.4] 1
(20.4,25.4] 1
(25.4,30.4] 3
(30.4,35.4] 3
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_TT)
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
##                        id date time continent_code country_name country_code
## nbr.val      6.300000e+01   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          2.240000e+02   NA   NA             NA           NA           NA
## max          6.301000e+03   NA   NA             NA           NA           NA
## range        6.077000e+03   NA   NA             NA           NA           NA
## sum          1.939570e+05   NA   NA             NA           NA           NA
## median       2.759000e+03   NA   NA             NA           NA           NA
## mean         3.078683e+03   NA   NA             NA           NA           NA
## SE.mean      1.907885e+02   NA   NA             NA           NA           NA
## CI.mean.0.95 3.813809e+02   NA   NA             NA           NA           NA
## var          2.293217e+06   NA   NA             NA           NA           NA
## std.dev      1.514337e+03   NA   NA             NA           NA           NA
## coef.var     4.918783e-01   NA   NA             NA           NA           NA
##              state   population city   distance location_description
## nbr.val         NA 6.300000e+01   NA  63.000000                   NA
## nbr.null        NA 1.000000e+01   NA   0.000000                   NA
## nbr.na          NA 0.000000e+00   NA   0.000000                   NA
## min             NA 0.000000e+00   NA   0.385210                   NA
## max             NA 1.572580e+05   NA  33.519240                   NA
## range           NA 1.572580e+05   NA  33.134030                   NA
## sum             NA 1.091655e+06   NA 519.777480                   NA
## median          NA 8.140000e+03   NA   5.911010                   NA
## mean            NA 1.732786e+04   NA   8.250436                   NA
## SE.mean         NA 3.563658e+03   NA   1.064793                   NA
## CI.mean.0.95    NA 7.123651e+03   NA   2.128491                   NA
## var             NA 8.000786e+08   NA  71.428394                   NA
## std.dev         NA 2.828566e+04   NA   8.451532                   NA
## coef.var        NA 1.632381e+00   NA   1.024374                   NA
##                  latitude     longitude geolocation hazard_type landslide_type
## nbr.val       63.00000000  6.300000e+01          NA          NA             NA
## nbr.null       0.00000000  0.000000e+00          NA          NA             NA
## nbr.na         0.00000000  0.000000e+00          NA          NA             NA
## min           10.08760000 -6.186420e+01          NA          NA             NA
## max           11.31760000 -6.054170e+01          NA          NA             NA
## range          1.23000000  1.322500e+00          NA          NA             NA
## sum          679.61780000 -3.856206e+03          NA          NA             NA
## median        10.71420000 -6.139170e+01          NA          NA             NA
## mean          10.78758413 -6.120962e+01          NA          NA             NA
## SE.mean        0.04203106  4.867603e-02          NA          NA             NA
## CI.mean.0.95   0.08401889  9.730199e-02          NA          NA             NA
## var            0.11129642  1.492694e-01          NA          NA             NA
## std.dev        0.33361118  3.863540e-01          NA          NA             NA
## coef.var       0.03092548 -6.311982e-03          NA          NA             NA
##              landslide_size trigger storm_name injuries  fatalities source_name
## nbr.val                  NA      NA         NA        1 44.00000000          NA
## nbr.null                 NA      NA         NA        1 40.00000000          NA
## nbr.na                   NA      NA         NA       62 19.00000000          NA
## min                      NA      NA         NA        0  0.00000000          NA
## max                      NA      NA         NA        0  3.00000000          NA
## range                    NA      NA         NA        0  3.00000000          NA
## sum                      NA      NA         NA        0  6.00000000          NA
## median                   NA      NA         NA        0  0.00000000          NA
## mean                     NA      NA         NA        0  0.13636364          NA
## SE.mean                  NA      NA         NA       NA  0.07687686          NA
## CI.mean.0.95             NA      NA         NA      NaN  0.15503697          NA
## var                      NA      NA         NA       NA  0.26004228          NA
## std.dev                  NA      NA         NA       NA  0.50994341          NA
## coef.var                 NA      NA         NA       NA  3.73958502          NA
##              source_link         prop         ypos
## nbr.val               NA  63.00000000   63.0000000
## nbr.null              NA   0.00000000    0.0000000
## nbr.na                NA   0.00000000    0.0000000
## min                   NA   0.07411056    0.2539577
## max                   NA   6.44876727   99.8226828
## range                 NA   6.37465671   99.5687251
## sum                   NA 100.00000000 3733.7070298
## median                NA   1.13721933   66.3125093
## mean                  NA   1.58730159   59.2651909
## SE.mean               NA   0.20485554    3.8579049
## CI.mean.0.95          NA   0.40950038    7.7118419
## var                   NA   2.64384485  937.6560850
## std.dev               NA   1.62599042   30.6211705
## coef.var              NA   1.02437397    0.5166805
boxplot(data, horizontal=TRUE, col='green')

Gráfico para City of Port of Spain (Trinidad and Tobago)

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_TT <- subset(df, country_name == "Trinidad and Tobago")
knitr::kable(head(df_TT)) 
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
224 9/1/07 NA NA Trinidad and Tobago TT Tobago 17000 Scarborough 9.11607 NA 11.2415 -60.6742 (11.2415, -60.674199999999999) Landslide Landslide Medium Tropical cyclone Hurricane Felix NA NA Trinadad Express http://www.trinidadexpress.com/index.pl/article_news?id=161197580
357 11/17/07 NA NA Trinidad and Tobago TT Eastern Tobago 0 Roxborough 7.33295 NA 11.2965 -60.6312 (11.2965, -60.6312) Landslide Landslide Medium Rain NA NA NA Trinadad Express http://www.trinidadexpress.com/index.pl/article_news?id=161237574
390 12/11/07 NA NA Trinidad and Tobago TT Sangre Grande 15968 Sangre Grande 29.28864 NA 10.8410 -61.0550 (10.840999999999999, -61.055) Landslide Landslide Medium Tropical cyclone Tropical Storm Olga NA 3 Trinidad and Tobago’s Newsday http://www.newsday.co.tt/news/0,69681.html
391 12/11/07 NA NA Trinidad and Tobago TT Eastern Tobago 0 Roxborough 8.62938 NA 11.3000 -60.6440 (11.3, -60.643999999999998) Landslide Landslide Medium Tropical cyclone Tropical Storm Olga NA NA Trinidad and Tobago’s Newsday http://www.newsday.co.tt/news/0,69681.html
392 12/11/07 NA NA Trinidad and Tobago TT Eastern Tobago 0 Roxborough 2.66802 NA 11.2670 -60.5660 (11.266999999999999, -60.566000000000003) Landslide Landslide Small Tropical cyclone Tropical Storm Olga NA NA Trinidad and Tobago’s Newsday http://www.newsday.co.tt/news/0,69681.html
780 9/7/08 NA NA Trinidad and Tobago TT Diego Martin 8140 Petit Valley 10.61854 NA 10.7603 -61.4578 (10.760300000000001, -61.457799999999999) Landslide Landslide Medium Downpour NA NA NA NA http://www.newsday.co.tt/news/0,85847.html
library(dplyr)
df_TT <- subset(df, state == "City of Port of Spain")
knitr::kable(head(df_TT))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
891 11/16/08 NA NA Trinidad and Tobago TT City of Port of Spain 49031 Port-of-Spain 1.07831 NA 10.6760 -61.5160 (10.676, -61.515999999999998) Landslide Complex Medium Downpour NA NA 1 NA http://www.newsday.co.tt/news/0,90097.html
2157 7/29/10 NA NA Trinidad and Tobago TT City of Port of Spain 4342 Mucurapo 7.43310 NA 10.6871 -61.6002 (10.687099999999999, -61.600200000000001) Landslide Landslide Small Downpour NA NA 0 NA http://www.i955fm.com/News.aspx?id=8920
2158 7/29/10 NA NA Trinidad and Tobago TT City of Port of Spain 49031 Port-of-Spain 2.15046 NA 10.6738 -61.5006 (10.6738, -61.500599999999999) Landslide Landslide Small Downpour NA NA 0 NA http://www.i955fm.com/News.aspx?id=8920
2199 8/5/10 13:45:00 NA Trinidad and Tobago TT City of Port of Spain 4342 Mucurapo 7.24469 NA 10.6857 -61.5989 (10.685700000000001, -61.5989) Landslide Landslide Small Downpour NA NA 0 NA NA
4558 9/16/12 Early morning NA Trinidad and Tobago TT City of Port of Spain 49031 Port-of-Spain 2.54016 NA 10.6771 -61.4982 (10.677099999999999, -61.498199999999997) Landslide Landslide Medium Downpour NA NA NA NA http://92.f9.78ae.static.theplanet.com/news/2012-09-16/morvant-landslide-leaves-motorists-frustrated

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_TT, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_TT, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_TT, aes(x=state, y=distance, fill=city)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_TT <- df_TT %>% 
  arrange(desc(city)) %>%
  mutate(prop = distance / sum(df_TT$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_TT, aes(x=state, y = prop, fill=city)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
data<- ts(df_TT$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
1.07831
2.15046
2.54016
7.43310
7.24469
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_TT$distance
names(distance) <- df_TT$city 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por ciudades"
)

##                
## Pareto chart analysis for distance
##                  Frequency  Cum.Freq. Percentage Cum.Percent.
##   Mucurapo        7.433100   7.433100  36.353508    36.353508
##   Mucurapo        7.244690  14.677790  35.432040    71.785548
##   Port-of-Spain   2.540160  17.217950  12.423313    84.208861
##   Port-of-Spain   2.150460  19.368410  10.517384    94.726245
##   Port-of-Spain   1.078310  20.446720   5.273755   100.000000
stem(df_TT$"distance")
## 
##   The decimal point is at the |
## 
##   0 | 1
##   2 | 25
##   4 | 
##   6 | 24
head(df_TT)
## # A tibble: 5 x 25
##      id date     time  continent_code country_name country_code state population
##   <dbl> <chr>    <chr> <chr>          <chr>        <chr>        <chr>      <dbl>
## 1   891 11/16/08 <NA>  <NA>           Trinidad an~ TT           City~      49031
## 2  2158 7/29/10  <NA>  <NA>           Trinidad an~ TT           City~      49031
## 3  4558 9/16/12  Earl~ <NA>           Trinidad an~ TT           City~      49031
## 4  2157 7/29/10  <NA>  <NA>           Trinidad an~ TT           City~       4342
## 5  2199 8/5/10   13:4~ <NA>           Trinidad an~ TT           City~       4342
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_TT))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
891 11/16/08 NA NA Trinidad and Tobago TT City of Port of Spain 49031 Port-of-Spain 1.07831 NA 10.6760 -61.5160 (10.676, -61.515999999999998) Landslide Complex Medium Downpour NA NA 1 NA http://www.newsday.co.tt/news/0,90097.html 5.273755 2.636878
2158 7/29/10 NA NA Trinidad and Tobago TT City of Port of Spain 49031 Port-of-Spain 2.15046 NA 10.6738 -61.5006 (10.6738, -61.500599999999999) Landslide Landslide Small Downpour NA NA 0 NA http://www.i955fm.com/News.aspx?id=8920 10.517384 10.532447
4558 9/16/12 Early morning NA Trinidad and Tobago TT City of Port of Spain 49031 Port-of-Spain 2.54016 NA 10.6771 -61.4982 (10.677099999999999, -61.498199999999997) Landslide Landslide Medium Downpour NA NA NA NA http://92.f9.78ae.static.theplanet.com/news/2012-09-16/morvant-landslide-leaves-motorists-frustrated 12.423313 22.002796
2157 7/29/10 NA NA Trinidad and Tobago TT City of Port of Spain 4342 Mucurapo 7.43310 NA 10.6871 -61.6002 (10.687099999999999, -61.600200000000001) Landslide Landslide Small Downpour NA NA 0 NA http://www.i955fm.com/News.aspx?id=8920 36.353508 46.391206
2199 8/5/10 13:45:00 NA Trinidad and Tobago TT City of Port of Spain 4342 Mucurapo 7.24469 NA 10.6857 -61.5989 (10.685700000000001, -61.5989) Landslide Landslide Small Downpour NA NA 0 NA NA 35.432040 82.283980
stem(df_TT$"distance")
## 
##   The decimal point is at the |
## 
##   0 | 1
##   2 | 25
##   4 | 
##   6 | 24
stem(df_TT$"distance", scale = 2)
## 
##   The decimal point is at the |
## 
##   1 | 1
##   2 | 25
##   3 | 
##   4 | 
##   5 | 
##   6 | 
##   7 | 24

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
1.07831 1 20 20 20 20
2.15046 1 20 20 40 40
2.54016 1 20 20 60 60
7.24469 1 20 20 80 80
7.4331 1 20 20 100 100
Total 5 100 100 100 100
str(table)
## Classes 'freqtab' and 'data.frame':  6 obs. of  5 variables:
##  $ n      : num  1 1 1 1 1 5
##  $ %      : num  20 20 20 20 20 100
##  $ val%   : num  20 20 20 20 20 100
##  $ %cum   : num  20 40 60 80 100 100
##  $ val%cum: num  20 40 60 80 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
1.07831 1
2.15046 1
2.54016 1
7.24469 1
7.4331 1
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1]  1.07831  4.07831  7.07831 10.07831
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(1.08,4.08] 2 0.5 2
(4.08,7.08] 0 0.0 2
(7.08,10.1] 2 0.5 4
str(Freq_table)
## 'data.frame':    3 obs. of  4 variables:
##  $ distance: Factor w/ 3 levels "(1.08,4.08]",..: 1 2 3
##  $ Freq    : int  2 0 2
##  $ Rel_Freq: num  0.5 0 0.5
##  $ Cum_Freq: int  2 2 4
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(1.08,4.08] 2
(4.08,7.08] 0
(7.08,10.1] 2
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_TT)
## Warning in min(x): ningún argumento finito para min; retornando Inf
## Warning in max(x): ningun argumento finito para max; retornando -Inf
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
##                        id date time continent_code country_name country_code
## nbr.val      5.000000e+00   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          8.910000e+02   NA   NA             NA           NA           NA
## max          4.558000e+03   NA   NA             NA           NA           NA
## range        3.667000e+03   NA   NA             NA           NA           NA
## sum          1.196300e+04   NA   NA             NA           NA           NA
## median       2.158000e+03   NA   NA             NA           NA           NA
## mean         2.392600e+03   NA   NA             NA           NA           NA
## SE.mean      5.954740e+02   NA   NA             NA           NA           NA
## CI.mean.0.95 1.653301e+03   NA   NA             NA           NA           NA
## var          1.772946e+06   NA   NA             NA           NA           NA
## std.dev      1.331520e+03   NA   NA             NA           NA           NA
## coef.var     5.565160e-01   NA   NA             NA           NA           NA
##              state   population city   distance location_description
## nbr.val         NA 5.000000e+00   NA  5.0000000                   NA
## nbr.null        NA 0.000000e+00   NA  0.0000000                   NA
## nbr.na          NA 0.000000e+00   NA  0.0000000                   NA
## min             NA 4.342000e+03   NA  1.0783100                   NA
## max             NA 4.903100e+04   NA  7.4331000                   NA
## range           NA 4.468900e+04   NA  6.3547900                   NA
## sum             NA 1.557770e+05   NA 20.4467200                   NA
## median          NA 4.903100e+04   NA  2.5401600                   NA
## mean            NA 3.115540e+04   NA  4.0893440                   NA
## SE.mean         NA 1.094652e+04   NA  1.3483783                   NA
## CI.mean.0.95    NA 3.039242e+04   NA  3.7436983                   NA
## var             NA 5.991320e+08   NA  9.0906201                   NA
## std.dev         NA 2.447717e+04   NA  3.0150655                   NA
## coef.var        NA 7.856479e-01   NA  0.7372981                   NA
##                  latitude     longitude geolocation hazard_type landslide_type
## nbr.val      5.000000e+00  5.000000e+00          NA          NA             NA
## nbr.null     0.000000e+00  0.000000e+00          NA          NA             NA
## nbr.na       0.000000e+00  0.000000e+00          NA          NA             NA
## min          1.067380e+01 -6.160020e+01          NA          NA             NA
## max          1.068710e+01 -6.149820e+01          NA          NA             NA
## range        1.330000e-02  1.020000e-01          NA          NA             NA
## sum          5.339970e+01 -3.077139e+02          NA          NA             NA
## median       1.067710e+01 -6.151600e+01          NA          NA             NA
## mean         1.067994e+01 -6.154278e+01          NA          NA             NA
## SE.mean      2.699370e-03  2.337756e-02          NA          NA             NA
## CI.mean.0.95 7.494653e-03  6.490652e-02          NA          NA             NA
## var          3.643300e-05  2.732552e-03          NA          NA             NA
## std.dev      6.035975e-03  5.227382e-02          NA          NA             NA
## coef.var     5.651694e-04 -8.493899e-04          NA          NA             NA
##              landslide_size trigger storm_name injuries fatalities source_name
## nbr.val                  NA      NA         NA        0  4.0000000          NA
## nbr.null                 NA      NA         NA        0  3.0000000          NA
## nbr.na                   NA      NA         NA        5  1.0000000          NA
## min                      NA      NA         NA      Inf  0.0000000          NA
## max                      NA      NA         NA     -Inf  1.0000000          NA
## range                    NA      NA         NA     -Inf  1.0000000          NA
## sum                      NA      NA         NA        0  1.0000000          NA
## median                   NA      NA         NA       NA  0.0000000          NA
## mean                     NA      NA         NA      NaN  0.2500000          NA
## SE.mean                  NA      NA         NA       NA  0.2500000          NA
## CI.mean.0.95             NA      NA         NA      NaN  0.7956116          NA
## var                      NA      NA         NA       NA  0.2500000          NA
## std.dev                  NA      NA         NA       NA  0.5000000          NA
## coef.var                 NA      NA         NA       NA  2.0000000          NA
##              source_link        prop        ypos
## nbr.val               NA   5.0000000    5.000000
## nbr.null              NA   0.0000000    0.000000
## nbr.na                NA   0.0000000    0.000000
## min                   NA   5.2737554    2.636878
## max                   NA  36.3535080   82.283980
## range                 NA  31.0797526   79.647102
## sum                   NA 100.0000000  163.847307
## median                NA  12.4233129   22.002796
## mean                  NA  20.0000000   32.769461
## SE.mean               NA   6.5945946   14.414607
## CI.mean.0.95          NA  18.3095299   40.021365
## var                   NA 217.4433905 1038.904493
## std.dev               NA  14.7459618   32.232041
## coef.var              NA   0.7372981    0.983600
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Tunapuna/Piarco (Trinidad and Tobago)

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_TT <- subset(df, country_name == "Trinidad and Tobago")
knitr::kable(head(df_TT)) 
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
224 9/1/07 NA NA Trinidad and Tobago TT Tobago 17000 Scarborough 9.11607 NA 11.2415 -60.6742 (11.2415, -60.674199999999999) Landslide Landslide Medium Tropical cyclone Hurricane Felix NA NA Trinadad Express http://www.trinidadexpress.com/index.pl/article_news?id=161197580
357 11/17/07 NA NA Trinidad and Tobago TT Eastern Tobago 0 Roxborough 7.33295 NA 11.2965 -60.6312 (11.2965, -60.6312) Landslide Landslide Medium Rain NA NA NA Trinadad Express http://www.trinidadexpress.com/index.pl/article_news?id=161237574
390 12/11/07 NA NA Trinidad and Tobago TT Sangre Grande 15968 Sangre Grande 29.28864 NA 10.8410 -61.0550 (10.840999999999999, -61.055) Landslide Landslide Medium Tropical cyclone Tropical Storm Olga NA 3 Trinidad and Tobago’s Newsday http://www.newsday.co.tt/news/0,69681.html
391 12/11/07 NA NA Trinidad and Tobago TT Eastern Tobago 0 Roxborough 8.62938 NA 11.3000 -60.6440 (11.3, -60.643999999999998) Landslide Landslide Medium Tropical cyclone Tropical Storm Olga NA NA Trinidad and Tobago’s Newsday http://www.newsday.co.tt/news/0,69681.html
392 12/11/07 NA NA Trinidad and Tobago TT Eastern Tobago 0 Roxborough 2.66802 NA 11.2670 -60.5660 (11.266999999999999, -60.566000000000003) Landslide Landslide Small Tropical cyclone Tropical Storm Olga NA NA Trinidad and Tobago’s Newsday http://www.newsday.co.tt/news/0,69681.html
780 9/7/08 NA NA Trinidad and Tobago TT Diego Martin 8140 Petit Valley 10.61854 NA 10.7603 -61.4578 (10.760300000000001, -61.457799999999999) Landslide Landslide Medium Downpour NA NA NA NA http://www.newsday.co.tt/news/0,85847.html
library(dplyr)
df_TT <- subset(df, state == "Tunapuna/Piarco")
knitr::kable(head(df_TT))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
2667 10/31/10 Morning NA Trinidad and Tobago TT Tunapuna/Piarco 15067 Paradise 2.64003 NA 10.6660 -61.3832 (10.666, -61.383200000000002) Landslide Landslide Medium Tropical cyclone Hurricane Tomas NA 0 NA http://stormcarib.com/reports/current/report.php?id=1288616789_49546
4108 12/25/11 NA NA Trinidad and Tobago TT Tunapuna/Piarco 15067 Paradise 16.73194 NA 10.7901 -61.2984 (10.790100000000001, -61.298400000000001) Landslide Landslide Large Downpour NA NA 0 NA http://www.guardian.co.tt/news/thursday-december-29-2011/blanchisseuse-residents-cut-road-collapses
4374 5/30/12 NA NA Trinidad and Tobago TT Tunapuna/Piarco 15067 Paradise 13.75900 NA 10.7692 -61.4078 (10.7692, -61.407800000000002) Landslide Landslide Large Downpour NA NA NA NA http://www.guardian.co.tt/news/2012-05-31/landslides-choke-north-coast-road
4919 6/14/13 Morning NA Trinidad and Tobago TT Tunapuna/Piarco 17758 Tunapuna 2.63186 NA 10.6501 -61.4004 (10.6501, -61.400399999999998) Landslide Landslide Small Downpour NA NA 0 www.newsday.co.tt http://www.newsday.co.tt/news/0,179174.html

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_TT, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_TT, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_TT, aes(x=state, y=distance, fill=city)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_TT <- df_TT %>% 
  arrange(desc(city)) %>%
  mutate(prop = distance / sum(df_TT$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_TT, aes(x=state, y = prop, fill=city)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
data<- ts(df_TT$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
2.63186
2.64003
16.73194
13.75900
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_TT$distance
names(distance) <- df_TT$city 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por ciudades"
)

##           
## Pareto chart analysis for distance
##             Frequency  Cum.Freq. Percentage Cum.Percent.
##   Paradise  16.731940  16.731940  46.785839    46.785839
##   Paradise  13.759000  30.490940  38.472906    85.258745
##   Paradise   2.640030  33.130970   7.382050    92.640795
##   Tunapuna   2.631860  35.762830   7.359205   100.000000
stem(df_TT$"distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 33
##   0 | 
##   1 | 4
##   1 | 7
head(df_TT)
## # A tibble: 4 x 25
##      id date     time    continent_code country_name country_code state population
##   <dbl> <chr>    <chr>   <chr>          <chr>        <chr>        <chr>      <dbl>
## 1  4919 6/14/13  Morning <NA>           Trinidad an~ TT           Tuna~      17758
## 2  2667 10/31/10 Morning <NA>           Trinidad an~ TT           Tuna~      15067
## 3  4108 12/25/11 <NA>    <NA>           Trinidad an~ TT           Tuna~      15067
## 4  4374 5/30/12  <NA>    <NA>           Trinidad an~ TT           Tuna~      15067
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_TT))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
4919 6/14/13 Morning NA Trinidad and Tobago TT Tunapuna/Piarco 17758 Tunapuna 2.63186 NA 10.6501 -61.4004 (10.6501, -61.400399999999998) Landslide Landslide Small Downpour NA NA 0 www.newsday.co.tt http://www.newsday.co.tt/news/0,179174.html 7.359205 3.679603
2667 10/31/10 Morning NA Trinidad and Tobago TT Tunapuna/Piarco 15067 Paradise 2.64003 NA 10.6660 -61.3832 (10.666, -61.383200000000002) Landslide Landslide Medium Tropical cyclone Hurricane Tomas NA 0 NA http://stormcarib.com/reports/current/report.php?id=1288616789_49546 7.382050 11.050230
4108 12/25/11 NA NA Trinidad and Tobago TT Tunapuna/Piarco 15067 Paradise 16.73194 NA 10.7901 -61.2984 (10.790100000000001, -61.298400000000001) Landslide Landslide Large Downpour NA NA 0 NA http://www.guardian.co.tt/news/thursday-december-29-2011/blanchisseuse-residents-cut-road-collapses 46.785839 38.134175
4374 5/30/12 NA NA Trinidad and Tobago TT Tunapuna/Piarco 15067 Paradise 13.75900 NA 10.7692 -61.4078 (10.7692, -61.407800000000002) Landslide Landslide Large Downpour NA NA NA NA http://www.guardian.co.tt/news/2012-05-31/landslides-choke-north-coast-road 38.472906 80.763547
stem(df_TT$"distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 33
##   0 | 
##   1 | 4
##   1 | 7
stem(df_TT$"distance", scale = 2)
## 
##   The decimal point is at the |
## 
##    2 | 66
##    4 | 
##    6 | 
##    8 | 
##   10 | 
##   12 | 8
##   14 | 
##   16 | 7

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
2.63186 1 25 25 25 25
2.64003 1 25 25 50 50
13.759 1 25 25 75 75
16.73194 1 25 25 100 100
Total 4 100 100 100 100
str(table)
## Classes 'freqtab' and 'data.frame':  5 obs. of  5 variables:
##  $ n      : num  1 1 1 1 4
##  $ %      : num  25 25 25 25 100
##  $ val%   : num  25 25 25 25 100
##  $ %cum   : num  25 50 75 100 100
##  $ val%cum: num  25 50 75 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
2.63186 1
2.64003 1
13.759 1
16.73194 1
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1]  2.63186  7.63186 12.63186 17.63186
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(2.63,7.63] 1 0.3333333 1
(7.63,12.6] 0 0.0000000 1
(12.6,17.6] 2 0.6666667 3
str(Freq_table)
## 'data.frame':    3 obs. of  4 variables:
##  $ distance: Factor w/ 3 levels "(2.63,7.63]",..: 1 2 3
##  $ Freq    : int  1 0 2
##  $ Rel_Freq: num  0.333 0 0.667
##  $ Cum_Freq: int  1 1 3
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(2.63,7.63] 1
(7.63,12.6] 0
(12.6,17.6] 2
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_TT)
## Warning in min(x): ningún argumento finito para min; retornando Inf
## Warning in max(x): ningun argumento finito para max; retornando -Inf
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
##                        id date time continent_code country_name country_code
## nbr.val      4.000000e+00   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          2.667000e+03   NA   NA             NA           NA           NA
## max          4.919000e+03   NA   NA             NA           NA           NA
## range        2.252000e+03   NA   NA             NA           NA           NA
## sum          1.606800e+04   NA   NA             NA           NA           NA
## median       4.241000e+03   NA   NA             NA           NA           NA
## mean         4.017000e+03   NA   NA             NA           NA           NA
## SE.mean      4.806102e+02   NA   NA             NA           NA           NA
## CI.mean.0.95 1.529516e+03   NA   NA             NA           NA           NA
## var          9.239447e+05   NA   NA             NA           NA           NA
## std.dev      9.612204e+02   NA   NA             NA           NA           NA
## coef.var     2.392881e-01   NA   NA             NA           NA           NA
##              state   population city   distance location_description
## nbr.val         NA 4.000000e+00   NA  4.0000000                   NA
## nbr.null        NA 0.000000e+00   NA  0.0000000                   NA
## nbr.na          NA 0.000000e+00   NA  0.0000000                   NA
## min             NA 1.506700e+04   NA  2.6318600                   NA
## max             NA 1.775800e+04   NA 16.7319400                   NA
## range           NA 2.691000e+03   NA 14.1000800                   NA
## sum             NA 6.295900e+04   NA 35.7628300                   NA
## median          NA 1.506700e+04   NA  8.1995150                   NA
## mean            NA 1.573975e+04   NA  8.9407075                   NA
## SE.mean         NA 6.727500e+02   NA  3.6902952                   NA
## CI.mean.0.95    NA 2.140991e+03   NA 11.7441662                   NA
## var             NA 1.810370e+06   NA 54.4731134                   NA
## std.dev         NA 1.345500e+03   NA  7.3805903                   NA
## coef.var        NA 8.548420e-02   NA  0.8255041                   NA
##                  latitude     longitude geolocation hazard_type landslide_type
## nbr.val       4.000000000  4.000000e+00          NA          NA             NA
## nbr.null      0.000000000  0.000000e+00          NA          NA             NA
## nbr.na        0.000000000  0.000000e+00          NA          NA             NA
## min          10.650100000 -6.140780e+01          NA          NA             NA
## max          10.790100000 -6.129840e+01          NA          NA             NA
## range         0.140000000  1.094000e-01          NA          NA             NA
## sum          42.875400000 -2.454898e+02          NA          NA             NA
## median       10.717600000 -6.139180e+01          NA          NA             NA
## mean         10.718850000 -6.137245e+01          NA          NA             NA
## SE.mean       0.035509823  2.521539e-02          NA          NA             NA
## CI.mean.0.95  0.113008104  8.024662e-02          NA          NA             NA
## var           0.005043790  2.543263e-03          NA          NA             NA
## std.dev       0.071019645  5.043078e-02          NA          NA             NA
## coef.var      0.006625678 -8.217169e-04          NA          NA             NA
##              landslide_size trigger storm_name injuries fatalities source_name
## nbr.val                  NA      NA         NA        0          3          NA
## nbr.null                 NA      NA         NA        0          3          NA
## nbr.na                   NA      NA         NA        4          1          NA
## min                      NA      NA         NA      Inf          0          NA
## max                      NA      NA         NA     -Inf          0          NA
## range                    NA      NA         NA     -Inf          0          NA
## sum                      NA      NA         NA        0          0          NA
## median                   NA      NA         NA       NA          0          NA
## mean                     NA      NA         NA      NaN          0          NA
## SE.mean                  NA      NA         NA       NA          0          NA
## CI.mean.0.95             NA      NA         NA      NaN          0          NA
## var                      NA      NA         NA       NA          0          NA
## std.dev                  NA      NA         NA       NA          0          NA
## coef.var                 NA      NA         NA       NA        NaN          NA
##              source_link        prop        ypos
## nbr.val               NA   4.0000000    4.000000
## nbr.null              NA   0.0000000    0.000000
## nbr.na                NA   0.0000000    0.000000
## min                   NA   7.3592051    3.679603
## max                   NA  46.7858388   80.763547
## range                 NA  39.4266337   77.083944
## sum                   NA 100.0000000  133.627554
## median                NA  22.9274781   24.592202
## mean                  NA  25.0000000   33.406889
## SE.mean               NA  10.3188007   17.436867
## CI.mean.0.95          NA  32.8390292   55.491893
## var                   NA 425.9105928 1216.177346
## std.dev               NA  20.6376014   34.873734
## coef.var              NA   0.8255041    1.043908
boxplot(data, horizontal=TRUE, col='green')

Gráfico para City of San Fernando (Trinidad and Tobago)

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_TT <- subset(df, country_name == "Trinidad and Tobago")
knitr::kable(head(df_TT)) 
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
224 9/1/07 NA NA Trinidad and Tobago TT Tobago 17000 Scarborough 9.11607 NA 11.2415 -60.6742 (11.2415, -60.674199999999999) Landslide Landslide Medium Tropical cyclone Hurricane Felix NA NA Trinadad Express http://www.trinidadexpress.com/index.pl/article_news?id=161197580
357 11/17/07 NA NA Trinidad and Tobago TT Eastern Tobago 0 Roxborough 7.33295 NA 11.2965 -60.6312 (11.2965, -60.6312) Landslide Landslide Medium Rain NA NA NA Trinadad Express http://www.trinidadexpress.com/index.pl/article_news?id=161237574
390 12/11/07 NA NA Trinidad and Tobago TT Sangre Grande 15968 Sangre Grande 29.28864 NA 10.8410 -61.0550 (10.840999999999999, -61.055) Landslide Landslide Medium Tropical cyclone Tropical Storm Olga NA 3 Trinidad and Tobago’s Newsday http://www.newsday.co.tt/news/0,69681.html
391 12/11/07 NA NA Trinidad and Tobago TT Eastern Tobago 0 Roxborough 8.62938 NA 11.3000 -60.6440 (11.3, -60.643999999999998) Landslide Landslide Medium Tropical cyclone Tropical Storm Olga NA NA Trinidad and Tobago’s Newsday http://www.newsday.co.tt/news/0,69681.html
392 12/11/07 NA NA Trinidad and Tobago TT Eastern Tobago 0 Roxborough 2.66802 NA 11.2670 -60.5660 (11.266999999999999, -60.566000000000003) Landslide Landslide Small Tropical cyclone Tropical Storm Olga NA NA Trinidad and Tobago’s Newsday http://www.newsday.co.tt/news/0,69681.html
780 9/7/08 NA NA Trinidad and Tobago TT Diego Martin 8140 Petit Valley 10.61854 NA 10.7603 -61.4578 (10.760300000000001, -61.457799999999999) Landslide Landslide Medium Downpour NA NA NA NA http://www.newsday.co.tt/news/0,85847.html
library(dplyr)
df_TT <- subset(df, state == "City of San Fernando")
knitr::kable(head(df_TT))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
1117 8/21/09 NA NA Trinidad and Tobago TT City of San Fernando 26700 Marabella 9.42494 NA 10.3717 -61.3917 (10.371700000000001, -61.3917) Landslide Landslide Small Downpour NA NA 0 NA http://www.trinidadexpress.com/index.pl/article_news?id=161522186
2300 8/21/10 Morning NA Trinidad and Tobago TT City of San Fernando 55419 San Fernando 0.92162 NA 10.2753 -61.4689 (10.2753, -61.468899999999998) Landslide Landslide Medium Downpour NA NA 0 NA NA

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_TT, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_TT, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_TT, aes(x=state, y=distance, fill=city)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_TT <- df_TT %>% 
  arrange(desc(city)) %>%
  mutate(prop = distance / sum(df_TT$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_TT, aes(x=state, y = prop, fill=city)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
data<- ts(df_TT$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
0.92162
9.42494
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_TT$distance
names(distance) <- df_TT$city 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por ciudades"
)

##               
## Pareto chart analysis for distance
##                 Frequency  Cum.Freq. Percentage Cum.Percent.
##   Marabella      9.424940   9.424940  91.092498    91.092498
##   San Fernando   0.921620  10.346560   8.907502   100.000000
stem(df_TT$"distance")
## 
##   The decimal point is at the |
## 
##   0 | 9
##   2 | 
##   4 | 
##   6 | 
##   8 | 4
head(df_TT)
## # A tibble: 2 x 25
##      id date    time    continent_code country_name country_code state population
##   <dbl> <chr>   <chr>   <chr>          <chr>        <chr>        <chr>      <dbl>
## 1  2300 8/21/10 Morning <NA>           Trinidad an~ TT           City~      55419
## 2  1117 8/21/09 <NA>    <NA>           Trinidad an~ TT           City~      26700
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_TT))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
2300 8/21/10 Morning NA Trinidad and Tobago TT City of San Fernando 55419 San Fernando 0.92162 NA 10.2753 -61.4689 (10.2753, -61.468899999999998) Landslide Landslide Medium Downpour NA NA 0 NA NA 8.907502 4.453751
1117 8/21/09 NA NA Trinidad and Tobago TT City of San Fernando 26700 Marabella 9.42494 NA 10.3717 -61.3917 (10.371700000000001, -61.3917) Landslide Landslide Small Downpour NA NA 0 NA http://www.trinidadexpress.com/index.pl/article_news?id=161522186 91.092498 54.453751
stem(df_TT$"distance")
## 
##   The decimal point is at the |
## 
##   0 | 9
##   2 | 
##   4 | 
##   6 | 
##   8 | 4
stem(df_TT$"distance", scale = 2)
## 
##   The decimal point is at the |
## 
##   0 | 9
##   1 | 
##   2 | 
##   3 | 
##   4 | 
##   5 | 
##   6 | 
##   7 | 
##   8 | 
##   9 | 4

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
0.92162 1 50 50 50 50
9.42494 1 50 50 100 100
Total 2 100 100 100 100
str(table)
## Classes 'freqtab' and 'data.frame':  3 obs. of  5 variables:
##  $ n      : num  1 1 2
##  $ %      : num  50 50 100
##  $ val%   : num  50 50 100
##  $ %cum   : num  50 100 100
##  $ val%cum: num  50 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
0.92162 1
9.42494 1
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1]  0.92162  5.92162 10.92162
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(0.922,5.92] 0 0 0
(5.92,10.9] 1 1 1
str(Freq_table)
## 'data.frame':    2 obs. of  4 variables:
##  $ distance: Factor w/ 2 levels "(0.922,5.92]",..: 1 2
##  $ Freq    : int  0 1
##  $ Rel_Freq: num  0 1
##  $ Cum_Freq: int  0 1
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(0.922,5.92] 0
(5.92,10.9] 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_TT)
## Warning in min(x): ningún argumento finito para min; retornando Inf
## Warning in max(x): ningun argumento finito para max; retornando -Inf
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
##                        id date time continent_code country_name country_code
## nbr.val      2.000000e+00   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          1.117000e+03   NA   NA             NA           NA           NA
## max          2.300000e+03   NA   NA             NA           NA           NA
## range        1.183000e+03   NA   NA             NA           NA           NA
## sum          3.417000e+03   NA   NA             NA           NA           NA
## median       1.708500e+03   NA   NA             NA           NA           NA
## mean         1.708500e+03   NA   NA             NA           NA           NA
## SE.mean      5.915000e+02   NA   NA             NA           NA           NA
## CI.mean.0.95 7.515720e+03   NA   NA             NA           NA           NA
## var          6.997445e+05   NA   NA             NA           NA           NA
## std.dev      8.365073e+02   NA   NA             NA           NA           NA
## coef.var     4.896151e-01   NA   NA             NA           NA           NA
##              state   population city  distance location_description
## nbr.val         NA 2.000000e+00   NA  2.000000                   NA
## nbr.null        NA 0.000000e+00   NA  0.000000                   NA
## nbr.na          NA 0.000000e+00   NA  0.000000                   NA
## min             NA 2.670000e+04   NA  0.921620                   NA
## max             NA 5.541900e+04   NA  9.424940                   NA
## range           NA 2.871900e+04   NA  8.503320                   NA
## sum             NA 8.211900e+04   NA 10.346560                   NA
## median          NA 4.105950e+04   NA  5.173280                   NA
## mean            NA 4.105950e+04   NA  5.173280                   NA
## SE.mean         NA 1.435950e+04   NA  4.251660                   NA
## CI.mean.0.95    NA 1.824547e+05   NA 54.022462                   NA
## var             NA 4.123905e+08   NA 36.153226                   NA
## std.dev         NA 2.030740e+04   NA  6.012755                   NA
## coef.var        NA 4.945847e-01   NA  1.162271                   NA
##                  latitude     longitude geolocation hazard_type landslide_type
## nbr.val       2.000000000  2.000000e+00          NA          NA             NA
## nbr.null      0.000000000  0.000000e+00          NA          NA             NA
## nbr.na        0.000000000  0.000000e+00          NA          NA             NA
## min          10.275300000 -6.146890e+01          NA          NA             NA
## max          10.371700000 -6.139170e+01          NA          NA             NA
## range         0.096400000  7.720000e-02          NA          NA             NA
## sum          20.647000000 -1.228606e+02          NA          NA             NA
## median       10.323500000 -6.143030e+01          NA          NA             NA
## mean         10.323500000 -6.143030e+01          NA          NA             NA
## SE.mean       0.048200000  3.860000e-02          NA          NA             NA
## CI.mean.0.95  0.612439068  4.904595e-01          NA          NA             NA
## var           0.004646480  2.979920e-03          NA          NA             NA
## std.dev       0.068165094  5.458864e-02          NA          NA             NA
## coef.var      0.006602905 -8.886273e-04          NA          NA             NA
##              landslide_size trigger storm_name injuries fatalities source_name
## nbr.val                  NA      NA         NA        0          2          NA
## nbr.null                 NA      NA         NA        0          2          NA
## nbr.na                   NA      NA         NA        2          0          NA
## min                      NA      NA         NA      Inf          0          NA
## max                      NA      NA         NA     -Inf          0          NA
## range                    NA      NA         NA     -Inf          0          NA
## sum                      NA      NA         NA        0          0          NA
## median                   NA      NA         NA       NA          0          NA
## mean                     NA      NA         NA      NaN          0          NA
## SE.mean                  NA      NA         NA       NA          0          NA
## CI.mean.0.95             NA      NA         NA      NaN          0          NA
## var                      NA      NA         NA       NA          0          NA
## std.dev                  NA      NA         NA       NA          0          NA
## coef.var                 NA      NA         NA       NA        NaN          NA
##              source_link        prop        ypos
## nbr.val               NA    2.000000    2.000000
## nbr.null              NA    0.000000    0.000000
## nbr.na                NA    0.000000    0.000000
## min                   NA    8.907502    4.453751
## max                   NA   91.092498   54.453751
## range                 NA   82.184997   50.000000
## sum                   NA  100.000000   58.907502
## median                NA   50.000000   29.453751
## mean                  NA   50.000000   29.453751
## SE.mean               NA   41.092498   25.000000
## CI.mean.0.95          NA  522.129697  317.655118
## var                   NA 3377.186846 1250.000000
## std.dev               NA   58.113569   35.355339
## coef.var              NA    1.162271    1.200368
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Dominican Republic

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_DR <- subset(df, country_name == "Dominican Republic")
knitr::kable(head(df_DR))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
124 7/13/07 Night NA Dominican Republic DO Distrito Nacional 13456 San Carlos 1.70298 NA 18.4757 -69.9140 (18.4757, -69.914000000000001) Landslide Landslide Small Unknown NA NA NA Dominican Today http://www.dominicantoday.com/app/article.aspx?id=24682
333 10/29/07 NA NA Dominican Republic DO San Cristóbal 66784 Bajos de Haina 1.72138 NA 18.4270 -70.0440 (18.427, -70.043999999999997) Landslide Mudslide Medium Tropical cyclone Tropical Storm Noel NA 3 United Nations Development Programme - Relief Web http://news.scotsman.com/international.cfm?id=1730152007
343 11/1/07 NA NA Dominican Republic DO La Vega 3613 Río Verde Abajo 3.72637 NA 19.3050 -70.6000 (19.305, -70.599999999999994) Landslide Complex Large Tropical cyclone Tropical Storm Noel NA 68 United Nations Development Programme - Relief Web http://www.reliefweb.int/rw/fullMaps_Am.nsf/luFullMap/CEB72F0756431A7CC125738D003E2EF4/$File/ifrc_TC_carib071108.pdf?OpenElement
388 12/11/07 NA NA Dominican Republic DO Santiago 1200000 Santiago de los Caballeros 1.10868 NA 19.4550 -70.7070 (19.454999999999998, -70.706999999999994) Landslide Landslide Medium Tropical cyclone Tropical Storm Olga NA 17 news.gossip.info http://clutchmagonline.com/newsgossipinfo/caribbean-storm-death-toll-rises/
724 8/17/08 NA NA Dominican Republic DO Hato Mayor 13977 Sabana de La Mar 0.75284 NA 19.0560 -69.3822 (19.056000000000001, -69.382199999999997) Landslide Complex Medium Tropical cyclone Tropical Storm Fay NA NA NA http://www.dominicantoday.com/dr/economy/2008/8/18/29085/Storms-downpours-block-transit-on-newest-Dominican-highway
746 8/26/08 NA NA Dominican Republic DO Distrito Nacional 10457 La Agustina 5.71058 NA 18.5500 -69.9200 (18.55, -69.92) Landslide Mudslide Medium Tropical cyclone Hurricane Gustav NA 8 NA http://www.reuters.com/article/worldNews/idUSN2541891320080827?pageNumber=1&virtualBrandChannel=0

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_DR, aes(fill= state, y=distance, x=country_name)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_DR, aes(fill=state, y=distance, x=country_name)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_DR, aes(x=country_name, y=distance, fill=state)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_DR <- df_DR %>% 
  arrange(desc(state)) %>%
  mutate(prop = distance / sum(df_DR$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_DR, aes(x=country_name, y=prop, fill=state)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
data<- ts(df_DR$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
3.98059
1.10868
4.86398
4.31327
2.72462
1.72138
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_DR$distance
names(distance) <- df_DR$city 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por estados"
)

##                             
## Pareto chart analysis for distance
##                               Frequency  Cum.Freq. Percentage Cum.Percent.
##   La Agustina                  5.710580   5.710580  16.167021    16.167021
##   Pedro García                 4.863980  10.574560  13.770242    29.937264
##   Tamboril                     4.313270  14.887830  12.211146    42.148410
##   Santo Domingo Este           3.980590  18.868420  11.269308    53.417718
##   Río Verde Abajo              3.726370  22.594790  10.549595    63.967313
##   San José de Las Matas        2.724620  25.319410   7.713576    71.680888
##   Bajos de Haina               1.721380  27.040790   4.873338    76.554226
##   San Carlos                   1.702980  28.743770   4.821247    81.375473
##   Luperón                      1.548850  30.292620   4.384895    85.760367
##   Puerto Plata                 1.196360  31.488980   3.386973    89.147340
##   Santiago de los Caballeros   1.108680  32.597660   3.138745    92.286085
##   Altamira                     0.885000  33.482660   2.505492    94.791577
##   Sabana de La Mar             0.752840  34.235500   2.131339    96.922916
##   Santo Domingo                0.557210  34.792710   1.577498    98.500413
##   Constanza                    0.529690  35.322400   1.499587   100.000000
stem(df_DR$"distance")
## 
##   The decimal point is at the |
## 
##   0 | 568912577
##   2 | 77
##   4 | 0397
head(df_DR)
## # A tibble: 6 x 25
##      id date     time  continent_code country_name country_code state population
##   <dbl> <chr>    <chr> <chr>          <chr>        <chr>        <chr>      <dbl>
## 1  6706 8/3/14   <NA>  <NA>           Dominican R~ DO           Sant~          0
## 2   388 12/11/07 <NA>  <NA>           Dominican R~ DO           Sant~    1200000
## 3   984 2/12/09  <NA>  <NA>           Dominican R~ DO           Sant~       1457
## 4  1178 9/20/09  <NA>  <NA>           Dominican R~ DO           Sant~      23304
## 5  3569 6/3/11   <NA>  <NA>           Dominican R~ DO           Sant~       9853
## 6   333 10/29/07 <NA>  <NA>           Dominican R~ DO           San ~      66784
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_DR))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
6706 8/3/14 NA NA Dominican Republic DO Santo Domingo 0 Santo Domingo Este 3.98059 Urban area 18.5225 -69.8693 (18.522500000000001, -69.869299999999996) Landslide Landslide Medium Tropical cyclone Bertha 0 0 Zona Oriental http://www.delazonaoriental.net/2014/08/03/derrumbes-y-deslizamientos-de-tierra-afectan-varias-viviendas-en-la-barquita-tras-paso-tormenta-bertha/ 11.269308 5.634654
388 12/11/07 NA NA Dominican Republic DO Santiago 1200000 Santiago de los Caballeros 1.10868 NA 19.4550 -70.7070 (19.454999999999998, -70.706999999999994) Landslide Landslide Medium Tropical cyclone Tropical Storm Olga NA 17 news.gossip.info http://clutchmagonline.com/newsgossipinfo/caribbean-storm-death-toll-rises/ 3.138745 12.838680
984 2/12/09 NA NA Dominican Republic DO Santiago 1457 Pedro García 4.86398 NA 19.5500 -70.6390 (19.55, -70.638999999999996) Landslide Mudslide Medium Downpour NA NA 0 NA http://us.puerto-plata-live.com/puerto-plata/news/year-2009/february-2009.html 13.770242 21.293174
1178 9/20/09 NA NA Dominican Republic DO Santiago 23304 Tamboril 4.31327 NA 19.5167 -70.5866 (19.5167, -70.586600000000004) Landslide Landslide Small Downpour NA NA NA NA http://www.laht.com/article.asp?CategoryId=14092&ArticleId=327347 12.211146 34.283868
3569 6/3/11 NA NA Dominican Republic DO Santiago 9853 San José de Las Matas 2.72462 NA 19.3556 -70.9189 (19.355599999999999, -70.918899999999994) Landslide Landslide Medium Downpour NA NA 1 NA http://english.peopledaily.com.cn/90001/90777/90852/7402423.html 7.713576 44.246229
333 10/29/07 NA NA Dominican Republic DO San Cristóbal 66784 Bajos de Haina 1.72138 NA 18.4270 -70.0440 (18.427, -70.043999999999997) Landslide Mudslide Medium Tropical cyclone Tropical Storm Noel NA 3 United Nations Development Programme - Relief Web http://news.scotsman.com/international.cfm?id=1730152007 4.873338 50.539686
stem(df_DR$"distance")
## 
##   The decimal point is at the |
## 
##   0 | 568912577
##   2 | 77
##   4 | 0397
stem(df_DR$"distance", scale = 2)
## 
##   The decimal point is at the |
## 
##   0 | 5689
##   1 | 12577
##   2 | 7
##   3 | 7
##   4 | 039
##   5 | 7

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
0.52969 1 6.7 6.7 6.7 6.7
0.55721 1 6.7 6.7 13.3 13.3
0.75284 1 6.7 6.7 20.0 20.0
0.885 1 6.7 6.7 26.7 26.7
1.10868 1 6.7 6.7 33.3 33.3
1.19636 1 6.7 6.7 40.0 40.0
1.54885 1 6.7 6.7 46.7 46.7
1.70298 1 6.7 6.7 53.3 53.3
1.72138 1 6.7 6.7 60.0 60.0
2.72462 1 6.7 6.7 66.7 66.7
3.72637 1 6.7 6.7 73.3 73.3
3.98059 1 6.7 6.7 80.0 80.0
4.31327 1 6.7 6.7 86.7 86.7
4.86398 1 6.7 6.7 93.3 93.3
5.71058 1 6.7 6.7 100.0 100.0
Total 15 100.0 100.0 100.0 100.0
str(table)
## Classes 'freqtab' and 'data.frame':  16 obs. of  5 variables:
##  $ n      : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ %      : num  6.7 6.7 6.7 6.7 6.7 6.7 6.7 6.7 6.7 6.7 ...
##  $ val%   : num  6.7 6.7 6.7 6.7 6.7 6.7 6.7 6.7 6.7 6.7 ...
##  $ %cum   : num  6.7 13.3 20 26.7 33.3 40 46.7 53.3 60 66.7 ...
##  $ val%cum: num  6.7 13.3 20 26.7 33.3 40 46.7 53.3 60 66.7 ...
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
0.52969 1
0.55721 1
0.75284 1
0.885 1
1.10868 1
1.19636 1
1.54885 1
1.70298 1
1.72138 1
2.72462 1
3.72637 1
3.98059 1
4.31327 1
4.86398 1
5.71058 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.52969 2.52969 4.52969 6.52969
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(0.53,2.53] 8 0.5714286 8
(2.53,4.53] 4 0.2857143 12
(4.53,6.53] 2 0.1428571 14
str(Freq_table)
## 'data.frame':    3 obs. of  4 variables:
##  $ distance: Factor w/ 3 levels "(0.53,2.53]",..: 1 2 3
##  $ Freq    : int  8 4 2
##  $ Rel_Freq: num  0.571 0.286 0.143
##  $ Cum_Freq: int  8 12 14
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(0.53,2.53] 8
(2.53,4.53] 4
(4.53,6.53] 2
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_DR)
##                        id date time continent_code country_name country_code
## nbr.val      1.500000e+01   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          1.240000e+02   NA   NA             NA           NA           NA
## max          6.707000e+03   NA   NA             NA           NA           NA
## range        6.583000e+03   NA   NA             NA           NA           NA
## sum          3.542100e+04   NA   NA             NA           NA           NA
## median       1.177000e+03   NA   NA             NA           NA           NA
## mean         2.361400e+03   NA   NA             NA           NA           NA
## SE.mean      6.027818e+02   NA   NA             NA           NA           NA
## CI.mean.0.95 1.292838e+03   NA   NA             NA           NA           NA
## var          5.450188e+06   NA   NA             NA           NA           NA
## std.dev      2.334564e+03   NA   NA             NA           NA           NA
## coef.var     9.886355e-01   NA   NA             NA           NA           NA
##              state   population city   distance location_description
## nbr.val         NA 1.500000e+01   NA 15.0000000                   NA
## nbr.null        NA 1.000000e+00   NA  0.0000000                   NA
## nbr.na          NA 0.000000e+00   NA  0.0000000                   NA
## min             NA 0.000000e+00   NA  0.5296900                   NA
## max             NA 2.201941e+06   NA  5.7105800                   NA
## range           NA 2.201941e+06   NA  5.1808900                   NA
## sum             NA 3.729279e+06   NA 35.3224000                   NA
## median          NA 1.345600e+04   NA  1.7029800                   NA
## mean            NA 2.486186e+05   NA  2.3548267                   NA
## SE.mean         NA 1.601963e+05   NA  0.4456851                   NA
## CI.mean.0.95    NA 3.435870e+05   NA  0.9558995                   NA
## var             NA 3.849430e+11   NA  2.9795286                   NA
## std.dev         NA 6.204378e+05   NA  1.7261311                   NA
## coef.var        NA 2.495540e+00   NA  0.7330183                   NA
##                  latitude     longitude geolocation hazard_type landslide_type
## nbr.val       15.00000000  1.500000e+01          NA          NA             NA
## nbr.null       0.00000000  0.000000e+00          NA          NA             NA
## nbr.na         0.00000000  0.000000e+00          NA          NA             NA
## min           18.42700000 -7.096300e+01          NA          NA             NA
## max           19.90530000 -6.938220e+01          NA          NA             NA
## range          1.47830000  1.580800e+00          NA          NA             NA
## sum          286.98100000 -1.055795e+03          NA          NA             NA
## median        19.30500000 -7.060000e+01          NA          NA             NA
## mean          19.13206667 -7.038631e+01          NA          NA             NA
## SE.mean        0.13659378  1.250746e-01          NA          NA             NA
## CI.mean.0.95   0.29296451  2.682583e-01          NA          NA             NA
## var            0.27986790  2.346547e-01          NA          NA             NA
## std.dev        0.52902542  4.844117e-01          NA          NA             NA
## coef.var       0.02765124 -6.882187e-03          NA          NA             NA
##              landslide_size trigger storm_name injuries fatalities source_name
## nbr.val                  NA      NA         NA        2  11.000000          NA
## nbr.null                 NA      NA         NA        2   4.000000          NA
## nbr.na                   NA      NA         NA       13   4.000000          NA
## min                      NA      NA         NA        0   0.000000          NA
## max                      NA      NA         NA        0  68.000000          NA
## range                    NA      NA         NA        0  68.000000          NA
## sum                      NA      NA         NA        0 100.000000          NA
## median                   NA      NA         NA        0   1.000000          NA
## mean                     NA      NA         NA        0   9.090909          NA
## SE.mean                  NA      NA         NA        0   6.092401          NA
## CI.mean.0.95             NA      NA         NA        0  13.574716          NA
## var                      NA      NA         NA        0 408.290909          NA
## std.dev                  NA      NA         NA        0  20.206210          NA
## coef.var                 NA      NA         NA      NaN   2.222683          NA
##              source_link        prop        ypos
## nbr.val               NA  15.0000000  15.0000000
## nbr.null              NA   0.0000000   0.0000000
## nbr.na                NA   0.0000000   0.0000000
## min                   NA   1.4995867   5.6346539
## max                   NA  16.1670215  99.2112512
## range                 NA  14.6674348  93.5765973
## sum                   NA 100.0000000 830.1472720
## median                NA   4.8212466  57.1753335
## mean                  NA   6.6666667  55.3431515
## SE.mean               NA   1.2617635   7.1708290
## CI.mean.0.95          NA   2.7062135  15.3798986
## var                   NA  23.8807055 771.3118270
## std.dev               NA   4.8867889  27.7725013
## coef.var              NA   0.7330183   0.5018236
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Distrito Nacional (Dominican Republic)

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_DR <- subset(df, country_name == "Dominican Republic")
knitr::kable(head(df_DR)) 
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
124 7/13/07 Night NA Dominican Republic DO Distrito Nacional 13456 San Carlos 1.70298 NA 18.4757 -69.9140 (18.4757, -69.914000000000001) Landslide Landslide Small Unknown NA NA NA Dominican Today http://www.dominicantoday.com/app/article.aspx?id=24682
333 10/29/07 NA NA Dominican Republic DO San Cristóbal 66784 Bajos de Haina 1.72138 NA 18.4270 -70.0440 (18.427, -70.043999999999997) Landslide Mudslide Medium Tropical cyclone Tropical Storm Noel NA 3 United Nations Development Programme - Relief Web http://news.scotsman.com/international.cfm?id=1730152007
343 11/1/07 NA NA Dominican Republic DO La Vega 3613 Río Verde Abajo 3.72637 NA 19.3050 -70.6000 (19.305, -70.599999999999994) Landslide Complex Large Tropical cyclone Tropical Storm Noel NA 68 United Nations Development Programme - Relief Web http://www.reliefweb.int/rw/fullMaps_Am.nsf/luFullMap/CEB72F0756431A7CC125738D003E2EF4/$File/ifrc_TC_carib071108.pdf?OpenElement
388 12/11/07 NA NA Dominican Republic DO Santiago 1200000 Santiago de los Caballeros 1.10868 NA 19.4550 -70.7070 (19.454999999999998, -70.706999999999994) Landslide Landslide Medium Tropical cyclone Tropical Storm Olga NA 17 news.gossip.info http://clutchmagonline.com/newsgossipinfo/caribbean-storm-death-toll-rises/
724 8/17/08 NA NA Dominican Republic DO Hato Mayor 13977 Sabana de La Mar 0.75284 NA 19.0560 -69.3822 (19.056000000000001, -69.382199999999997) Landslide Complex Medium Tropical cyclone Tropical Storm Fay NA NA NA http://www.dominicantoday.com/dr/economy/2008/8/18/29085/Storms-downpours-block-transit-on-newest-Dominican-highway
746 8/26/08 NA NA Dominican Republic DO Distrito Nacional 10457 La Agustina 5.71058 NA 18.5500 -69.9200 (18.55, -69.92) Landslide Mudslide Medium Tropical cyclone Hurricane Gustav NA 8 NA http://www.reuters.com/article/worldNews/idUSN2541891320080827?pageNumber=1&virtualBrandChannel=0
library(dplyr)
df_DR <- subset(df, state == "Distrito Nacional")
knitr::kable(head(df_DR))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
124 7/13/07 Night NA Dominican Republic DO Distrito Nacional 13456 San Carlos 1.70298 NA 18.4757 -69.9140 (18.4757, -69.914000000000001) Landslide Landslide Small Unknown NA NA NA Dominican Today http://www.dominicantoday.com/app/article.aspx?id=24682
746 8/26/08 NA NA Dominican Republic DO Distrito Nacional 10457 La Agustina 5.71058 NA 18.5500 -69.9200 (18.55, -69.92) Landslide Mudslide Medium Tropical cyclone Hurricane Gustav NA 8 NA http://www.reuters.com/article/worldNews/idUSN2541891320080827?pageNumber=1&virtualBrandChannel=0
3736 7/6/11 NA NA Dominican Republic DO Distrito Nacional 2201941 Santo Domingo 0.55721 NA 18.5000 -69.9833 (18.5, -69.9833) Landslide Landslide Medium Downpour NA NA 1 NA http://www.google.com/hostednews/ap/article/ALeqM5jKexw046ZtYaNWiMsOVfydb2LttA?docId=1ef9771813d64e66919fb5a9e3633a52

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_DR, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_DR, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_DR, aes(x=state, y=distance, fill=city)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_DR <- df_DR %>% 
  arrange(desc(city)) %>%
  mutate(prop = distance / sum(df_DR$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_DR, aes(x=state, y = prop, fill=city)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
data<- ts(df_DR$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
0.55721
1.70298
5.71058
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_DR$distance
names(distance) <- df_DR$city 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por ciudades"
)

##                
## Pareto chart analysis for distance
##                  Frequency  Cum.Freq. Percentage Cum.Percent.
##   La Agustina     5.710580   5.710580  71.644019    71.644019
##   San Carlos      1.702980   7.413560  21.365314    93.009333
##   Santo Domingo   0.557210   7.970770   6.990667   100.000000
stem(df_DR$"distance")
## 
##   The decimal point is at the |
## 
##   0 | 67
##   2 | 
##   4 | 7
head(df_DR)
## # A tibble: 3 x 25
##      id date    time  continent_code country_name  country_code state population
##   <dbl> <chr>   <chr> <chr>          <chr>         <chr>        <chr>      <dbl>
## 1  3736 7/6/11  <NA>  <NA>           Dominican Re~ DO           Dist~    2201941
## 2   124 7/13/07 Night <NA>           Dominican Re~ DO           Dist~      13456
## 3   746 8/26/08 <NA>  <NA>           Dominican Re~ DO           Dist~      10457
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_DR))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
3736 7/6/11 NA NA Dominican Republic DO Distrito Nacional 2201941 Santo Domingo 0.55721 NA 18.5000 -69.9833 (18.5, -69.9833) Landslide Landslide Medium Downpour NA NA 1 NA http://www.google.com/hostednews/ap/article/ALeqM5jKexw046ZtYaNWiMsOVfydb2LttA?docId=1ef9771813d64e66919fb5a9e3633a52 6.990667 3.495334
124 7/13/07 Night NA Dominican Republic DO Distrito Nacional 13456 San Carlos 1.70298 NA 18.4757 -69.9140 (18.4757, -69.914000000000001) Landslide Landslide Small Unknown NA NA NA Dominican Today http://www.dominicantoday.com/app/article.aspx?id=24682 21.365313 17.673324
746 8/26/08 NA NA Dominican Republic DO Distrito Nacional 10457 La Agustina 5.71058 NA 18.5500 -69.9200 (18.55, -69.92) Landslide Mudslide Medium Tropical cyclone Hurricane Gustav NA 8 NA http://www.reuters.com/article/worldNews/idUSN2541891320080827?pageNumber=1&virtualBrandChannel=0 71.644019 64.177990
stem(df_DR$"distance")
## 
##   The decimal point is at the |
## 
##   0 | 67
##   2 | 
##   4 | 7
stem(df_DR$"distance", scale = 2)
## 
##   The decimal point is at the |
## 
##   0 | 6
##   1 | 7
##   2 | 
##   3 | 
##   4 | 
##   5 | 7

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
0.55721 1 33.3 33.3 33.3 33.3
1.70298 1 33.3 33.3 66.7 66.7
5.71058 1 33.3 33.3 100.0 100.0
Total 3 100.0 100.0 100.0 100.0
str(table)
## Classes 'freqtab' and 'data.frame':  4 obs. of  5 variables:
##  $ n      : num  1 1 1 3
##  $ %      : num  33.3 33.3 33.3 100
##  $ val%   : num  33.3 33.3 33.3 100
##  $ %cum   : num  33.3 66.7 100 100
##  $ val%cum: num  33.3 66.7 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
0.55721 1
1.70298 1
5.71058 1
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.55721 2.55721 4.55721 6.55721
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(0.557,2.56] 1 0.5 1
(2.56,4.56] 0 0.0 1
(4.56,6.56] 1 0.5 2
str(Freq_table)
## 'data.frame':    3 obs. of  4 variables:
##  $ distance: Factor w/ 3 levels "(0.557,2.56]",..: 1 2 3
##  $ Freq    : int  1 0 1
##  $ Rel_Freq: num  0.5 0 0.5
##  $ Cum_Freq: int  1 1 2
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(0.557,2.56] 1
(2.56,4.56] 0
(4.56,6.56] 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_DR)
## Warning in min(x): ningún argumento finito para min; retornando Inf
## Warning in max(x): ningun argumento finito para max; retornando -Inf
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
##                        id date time continent_code country_name country_code
## nbr.val      3.000000e+00   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          1.240000e+02   NA   NA             NA           NA           NA
## max          3.736000e+03   NA   NA             NA           NA           NA
## range        3.612000e+03   NA   NA             NA           NA           NA
## sum          4.606000e+03   NA   NA             NA           NA           NA
## median       7.460000e+02   NA   NA             NA           NA           NA
## mean         1.535333e+03   NA   NA             NA           NA           NA
## SE.mean      1.114887e+03   NA   NA             NA           NA           NA
## CI.mean.0.95 4.796973e+03   NA   NA             NA           NA           NA
## var          3.728921e+06   NA   NA             NA           NA           NA
## std.dev      1.931042e+03   NA   NA             NA           NA           NA
## coef.var     1.257734e+00   NA   NA             NA           NA           NA
##              state   population city distance location_description     latitude
## nbr.val         NA 3.000000e+00   NA 3.000000                   NA  3.000000000
## nbr.null        NA 0.000000e+00   NA 0.000000                   NA  0.000000000
## nbr.na          NA 0.000000e+00   NA 0.000000                   NA  0.000000000
## min             NA 1.045700e+04   NA 0.557210                   NA 18.475700000
## max             NA 2.201941e+06   NA 5.710580                   NA 18.550000000
## range           NA 2.191484e+06   NA 5.153370                   NA  0.074300000
## sum             NA 2.225854e+06   NA 7.970770                   NA 55.525700000
## median          NA 1.345600e+04   NA 1.702980                   NA 18.500000000
## mean            NA 7.419513e+05   NA 2.656923                   NA 18.508566667
## SE.mean         NA 7.299953e+05   NA 1.562243                   NA  0.021872078
## CI.mean.0.95    NA 3.140916e+06   NA 6.721790                   NA  0.094107954
## var             NA 1.598680e+12   NA 7.321812                   NA  0.001435163
## std.dev         NA 1.264389e+06   NA 2.705885                   NA  0.037883550
## coef.var        NA 1.704140e+00   NA 1.018428                   NA  0.002046812
##                  longitude geolocation hazard_type landslide_type
## nbr.val       3.000000e+00          NA          NA             NA
## nbr.null      0.000000e+00          NA          NA             NA
## nbr.na        0.000000e+00          NA          NA             NA
## min          -6.998330e+01          NA          NA             NA
## max          -6.991400e+01          NA          NA             NA
## range         6.930000e-02          NA          NA             NA
## sum          -2.098173e+02          NA          NA             NA
## median       -6.992000e+01          NA          NA             NA
## mean         -6.993910e+01          NA          NA             NA
## SE.mean       2.216777e-02          NA          NA             NA
## CI.mean.0.95  9.538021e-02          NA          NA             NA
## var           1.474230e-03          NA          NA             NA
## std.dev       3.839570e-02          NA          NA             NA
## coef.var     -5.489877e-04          NA          NA             NA
##              landslide_size trigger storm_name injuries fatalities source_name
## nbr.val                  NA      NA         NA        0   2.000000          NA
## nbr.null                 NA      NA         NA        0   0.000000          NA
## nbr.na                   NA      NA         NA        3   1.000000          NA
## min                      NA      NA         NA      Inf   1.000000          NA
## max                      NA      NA         NA     -Inf   8.000000          NA
## range                    NA      NA         NA     -Inf   7.000000          NA
## sum                      NA      NA         NA        0   9.000000          NA
## median                   NA      NA         NA       NA   4.500000          NA
## mean                     NA      NA         NA      NaN   4.500000          NA
## SE.mean                  NA      NA         NA       NA   3.500000          NA
## CI.mean.0.95             NA      NA         NA      NaN  44.471717          NA
## var                      NA      NA         NA       NA  24.500000          NA
## std.dev                  NA      NA         NA       NA   4.949747          NA
## coef.var                 NA      NA         NA       NA   1.099944          NA
##              source_link        prop        ypos
## nbr.val               NA    3.000000    3.000000
## nbr.null              NA    0.000000    0.000000
## nbr.na                NA    0.000000    0.000000
## min                   NA    6.990667    3.495334
## max                   NA   71.644019   64.177990
## range                 NA   64.653352   60.682657
## sum                   NA  100.000000   85.346648
## median                NA   21.365314   17.673324
## mean                  NA   33.333333   28.448883
## SE.mean               NA   19.599652   18.327399
## CI.mean.0.95          NA   84.330497   78.856435
## var                   NA 1152.439111 1007.680707
## std.dev               NA   33.947594   31.743987
## coef.var              NA    1.018428    1.115825
boxplot(data, horizontal=TRUE, col='green')

Gráfico para La Vega (Dominican Republic)

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_DR <- subset(df, country_name == "Dominican Republic")
knitr::kable(head(df_DR)) 
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
124 7/13/07 Night NA Dominican Republic DO Distrito Nacional 13456 San Carlos 1.70298 NA 18.4757 -69.9140 (18.4757, -69.914000000000001) Landslide Landslide Small Unknown NA NA NA Dominican Today http://www.dominicantoday.com/app/article.aspx?id=24682
333 10/29/07 NA NA Dominican Republic DO San Cristóbal 66784 Bajos de Haina 1.72138 NA 18.4270 -70.0440 (18.427, -70.043999999999997) Landslide Mudslide Medium Tropical cyclone Tropical Storm Noel NA 3 United Nations Development Programme - Relief Web http://news.scotsman.com/international.cfm?id=1730152007
343 11/1/07 NA NA Dominican Republic DO La Vega 3613 Río Verde Abajo 3.72637 NA 19.3050 -70.6000 (19.305, -70.599999999999994) Landslide Complex Large Tropical cyclone Tropical Storm Noel NA 68 United Nations Development Programme - Relief Web http://www.reliefweb.int/rw/fullMaps_Am.nsf/luFullMap/CEB72F0756431A7CC125738D003E2EF4/$File/ifrc_TC_carib071108.pdf?OpenElement
388 12/11/07 NA NA Dominican Republic DO Santiago 1200000 Santiago de los Caballeros 1.10868 NA 19.4550 -70.7070 (19.454999999999998, -70.706999999999994) Landslide Landslide Medium Tropical cyclone Tropical Storm Olga NA 17 news.gossip.info http://clutchmagonline.com/newsgossipinfo/caribbean-storm-death-toll-rises/
724 8/17/08 NA NA Dominican Republic DO Hato Mayor 13977 Sabana de La Mar 0.75284 NA 19.0560 -69.3822 (19.056000000000001, -69.382199999999997) Landslide Complex Medium Tropical cyclone Tropical Storm Fay NA NA NA http://www.dominicantoday.com/dr/economy/2008/8/18/29085/Storms-downpours-block-transit-on-newest-Dominican-highway
746 8/26/08 NA NA Dominican Republic DO Distrito Nacional 10457 La Agustina 5.71058 NA 18.5500 -69.9200 (18.55, -69.92) Landslide Mudslide Medium Tropical cyclone Hurricane Gustav NA 8 NA http://www.reuters.com/article/worldNews/idUSN2541891320080827?pageNumber=1&virtualBrandChannel=0
library(dplyr)
df_DR <- subset(df, state == "La Vega")
knitr::kable(head(df_DR))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
343 11/1/07 NA NA Dominican Republic DO La Vega 3613 Río Verde Abajo 3.72637 NA 19.3050 -70.600 (19.305, -70.599999999999994) Landslide Complex Large Tropical cyclone Tropical Storm Noel NA 68 United Nations Development Programme - Relief Web http://www.reliefweb.int/rw/fullMaps_Am.nsf/luFullMap/CEB72F0756431A7CC125738D003E2EF4/$File/ifrc_TC_carib071108.pdf?OpenElement
4051 11/18/11 NA NA Dominican Republic DO La Vega 29481 Constanza 0.52969 NA 18.9045 -70.744 (18.904499999999999, -70.744) Landslide Mudslide Medium Downpour NA NA 0 NA http://www.dominicantoday.com/dr/local/2011/11/18/41684/Mudslides-halt-traffic-to-Constanza

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_DR, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_DR, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_DR, aes(x=state, y=distance, fill=city)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_DR <- df_DR %>% 
  arrange(desc(city)) %>%
  mutate(prop = distance / sum(df_DR$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_DR, aes(x=state, y = prop, fill=city)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
data<- ts(df_DR$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
3.72637
0.52969
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_DR$distance
names(distance) <- df_DR$city 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por ciudades"
)

##                  
## Pareto chart analysis for distance
##                   Frequency Cum.Freq. Percentage Cum.Percent.
##   Río Verde Abajo   3.72637   3.72637   87.55445     87.55445
##   Constanza         0.52969   4.25606   12.44555    100.00000
stem(df_DR$"distance")
## 
##   The decimal point is at the |
## 
##   0 | 5
##   1 | 
##   2 | 
##   3 | 7
head(df_DR)
## # A tibble: 2 x 25
##      id date     time  continent_code country_name country_code state population
##   <dbl> <chr>    <chr> <chr>          <chr>        <chr>        <chr>      <dbl>
## 1   343 11/1/07  <NA>  <NA>           Dominican R~ DO           La V~       3613
## 2  4051 11/18/11 <NA>  <NA>           Dominican R~ DO           La V~      29481
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_DR))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
343 11/1/07 NA NA Dominican Republic DO La Vega 3613 Río Verde Abajo 3.72637 NA 19.3050 -70.600 (19.305, -70.599999999999994) Landslide Complex Large Tropical cyclone Tropical Storm Noel NA 68 United Nations Development Programme - Relief Web http://www.reliefweb.int/rw/fullMaps_Am.nsf/luFullMap/CEB72F0756431A7CC125738D003E2EF4/$File/ifrc_TC_carib071108.pdf?OpenElement 87.55445 43.77723
4051 11/18/11 NA NA Dominican Republic DO La Vega 29481 Constanza 0.52969 NA 18.9045 -70.744 (18.904499999999999, -70.744) Landslide Mudslide Medium Downpour NA NA 0 NA http://www.dominicantoday.com/dr/local/2011/11/18/41684/Mudslides-halt-traffic-to-Constanza 12.44555 93.77723
stem(df_DR$"distance")
## 
##   The decimal point is at the |
## 
##   0 | 5
##   1 | 
##   2 | 
##   3 | 7
stem(df_DR$"distance", scale = 2)
## 
##   The decimal point is at the |
## 
##   0 | 5
##   1 | 
##   1 | 
##   2 | 
##   2 | 
##   3 | 
##   3 | 7

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
0.52969 1 50 50 50 50
3.72637 1 50 50 100 100
Total 2 100 100 100 100
str(table)
## Classes 'freqtab' and 'data.frame':  3 obs. of  5 variables:
##  $ n      : num  1 1 2
##  $ %      : num  50 50 100
##  $ val%   : num  50 50 100
##  $ %cum   : num  50 100 100
##  $ val%cum: num  50 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
0.52969 1
3.72637 1
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.52969 2.52969 4.52969
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(0.53,2.53] 0 0 0
(2.53,4.53] 1 1 1
str(Freq_table)
## 'data.frame':    2 obs. of  4 variables:
##  $ distance: Factor w/ 2 levels "(0.53,2.53]",..: 1 2
##  $ Freq    : int  0 1
##  $ Rel_Freq: num  0 1
##  $ Cum_Freq: int  0 1
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(0.53,2.53] 0
(2.53,4.53] 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_DR)
## Warning in min(x): ningún argumento finito para min; retornando Inf
## Warning in max(x): ningun argumento finito para max; retornando -Inf
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
##                        id date time continent_code country_name country_code
## nbr.val      2.000000e+00   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          3.430000e+02   NA   NA             NA           NA           NA
## max          4.051000e+03   NA   NA             NA           NA           NA
## range        3.708000e+03   NA   NA             NA           NA           NA
## sum          4.394000e+03   NA   NA             NA           NA           NA
## median       2.197000e+03   NA   NA             NA           NA           NA
## mean         2.197000e+03   NA   NA             NA           NA           NA
## SE.mean      1.854000e+03   NA   NA             NA           NA           NA
## CI.mean.0.95 2.355730e+04   NA   NA             NA           NA           NA
## var          6.874632e+06   NA   NA             NA           NA           NA
## std.dev      2.621952e+03   NA   NA             NA           NA           NA
## coef.var     1.193424e+00   NA   NA             NA           NA           NA
##              state   population city  distance location_description    latitude
## nbr.val         NA 2.000000e+00   NA  2.000000                   NA  2.00000000
## nbr.null        NA 0.000000e+00   NA  0.000000                   NA  0.00000000
## nbr.na          NA 0.000000e+00   NA  0.000000                   NA  0.00000000
## min             NA 3.613000e+03   NA  0.529690                   NA 18.90450000
## max             NA 2.948100e+04   NA  3.726370                   NA 19.30500000
## range           NA 2.586800e+04   NA  3.196680                   NA  0.40050000
## sum             NA 3.309400e+04   NA  4.256060                   NA 38.20950000
## median          NA 1.654700e+04   NA  2.128030                   NA 19.10475000
## mean            NA 1.654700e+04   NA  2.128030                   NA 19.10475000
## SE.mean         NA 1.293400e+04   NA  1.598340                   NA  0.20025000
## CI.mean.0.95    NA 1.643421e+05   NA 20.308835                   NA  2.54441750
## var             NA 3.345767e+08   NA  5.109382                   NA  0.08020013
## std.dev         NA 1.829144e+04   NA  2.260394                   NA  0.28319627
## coef.var        NA 1.105423e+00   NA  1.062200                   NA  0.01482334
##                  longitude geolocation hazard_type landslide_type
## nbr.val       2.000000e+00          NA          NA             NA
## nbr.null      0.000000e+00          NA          NA             NA
## nbr.na        0.000000e+00          NA          NA             NA
## min          -7.074400e+01          NA          NA             NA
## max          -7.060000e+01          NA          NA             NA
## range         1.440000e-01          NA          NA             NA
## sum          -1.413440e+02          NA          NA             NA
## median       -7.067200e+01          NA          NA             NA
## mean         -7.067200e+01          NA          NA             NA
## SE.mean       7.200000e-02          NA          NA             NA
## CI.mean.0.95  9.148467e-01          NA          NA             NA
## var           1.036800e-02          NA          NA             NA
## std.dev       1.018234e-01          NA          NA             NA
## coef.var     -1.440788e-03          NA          NA             NA
##              landslide_size trigger storm_name injuries  fatalities source_name
## nbr.val                  NA      NA         NA        0    2.000000          NA
## nbr.null                 NA      NA         NA        0    1.000000          NA
## nbr.na                   NA      NA         NA        2    0.000000          NA
## min                      NA      NA         NA      Inf    0.000000          NA
## max                      NA      NA         NA     -Inf   68.000000          NA
## range                    NA      NA         NA     -Inf   68.000000          NA
## sum                      NA      NA         NA        0   68.000000          NA
## median                   NA      NA         NA       NA   34.000000          NA
## mean                     NA      NA         NA      NaN   34.000000          NA
## SE.mean                  NA      NA         NA       NA   34.000000          NA
## CI.mean.0.95             NA      NA         NA      NaN  432.010961          NA
## var                      NA      NA         NA       NA 2312.000000          NA
## std.dev                  NA      NA         NA       NA   48.083261          NA
## coef.var                 NA      NA         NA       NA    1.414214          NA
##              source_link       prop         ypos
## nbr.val               NA    2.00000    2.0000000
## nbr.null              NA    0.00000    0.0000000
## nbr.na                NA    0.00000    0.0000000
## min                   NA   12.44555   43.7772259
## max                   NA   87.55445   93.7772259
## range                 NA   75.10890   50.0000000
## sum                   NA  100.00000  137.5544518
## median                NA   50.00000   68.7772259
## mean                  NA   50.00000   68.7772259
## SE.mean               NA   37.55445   25.0000000
## CI.mean.0.95          NA  477.17455  317.6551184
## var                   NA 2820.67370 1250.0000000
## std.dev               NA   53.11002   35.3553391
## coef.var              NA    1.06220    0.5140559
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Puerto Plata (Dominican Republic)

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_DR <- subset(df, country_name == "Dominican Republic")
knitr::kable(head(df_DR)) 
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
124 7/13/07 Night NA Dominican Republic DO Distrito Nacional 13456 San Carlos 1.70298 NA 18.4757 -69.9140 (18.4757, -69.914000000000001) Landslide Landslide Small Unknown NA NA NA Dominican Today http://www.dominicantoday.com/app/article.aspx?id=24682
333 10/29/07 NA NA Dominican Republic DO San Cristóbal 66784 Bajos de Haina 1.72138 NA 18.4270 -70.0440 (18.427, -70.043999999999997) Landslide Mudslide Medium Tropical cyclone Tropical Storm Noel NA 3 United Nations Development Programme - Relief Web http://news.scotsman.com/international.cfm?id=1730152007
343 11/1/07 NA NA Dominican Republic DO La Vega 3613 Río Verde Abajo 3.72637 NA 19.3050 -70.6000 (19.305, -70.599999999999994) Landslide Complex Large Tropical cyclone Tropical Storm Noel NA 68 United Nations Development Programme - Relief Web http://www.reliefweb.int/rw/fullMaps_Am.nsf/luFullMap/CEB72F0756431A7CC125738D003E2EF4/$File/ifrc_TC_carib071108.pdf?OpenElement
388 12/11/07 NA NA Dominican Republic DO Santiago 1200000 Santiago de los Caballeros 1.10868 NA 19.4550 -70.7070 (19.454999999999998, -70.706999999999994) Landslide Landslide Medium Tropical cyclone Tropical Storm Olga NA 17 news.gossip.info http://clutchmagonline.com/newsgossipinfo/caribbean-storm-death-toll-rises/
724 8/17/08 NA NA Dominican Republic DO Hato Mayor 13977 Sabana de La Mar 0.75284 NA 19.0560 -69.3822 (19.056000000000001, -69.382199999999997) Landslide Complex Medium Tropical cyclone Tropical Storm Fay NA NA NA http://www.dominicantoday.com/dr/economy/2008/8/18/29085/Storms-downpours-block-transit-on-newest-Dominican-highway
746 8/26/08 NA NA Dominican Republic DO Distrito Nacional 10457 La Agustina 5.71058 NA 18.5500 -69.9200 (18.55, -69.92) Landslide Mudslide Medium Tropical cyclone Hurricane Gustav NA 8 NA http://www.reuters.com/article/worldNews/idUSN2541891320080827?pageNumber=1&virtualBrandChannel=0
library(dplyr)
df_DR <- subset(df, state == "Puerto Plata")
knitr::kable(head(df_DR))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
1177 9/20/09 NA NA Dominican Republic DO Puerto Plata 4563 Altamira 0.88500 NA 19.6750 -70.8362 (19.675000000000001, -70.836200000000005) Landslide Landslide Medium Downpour NA NA 2 NA http://www.laht.com/article.asp?CategoryId=14092&ArticleId=327347
4655 12/5/12 NA NA Dominican Republic DO Puerto Plata 146000 Puerto Plata 1.19636 NA 19.7827 -70.6871 (19.782699999999998, -70.687100000000001) Landslide Landslide Medium Rain NA NA NA NA http://www.dominicantoday.com/dr/local/2012/12/5/45992/Crews-clear-Santiago-Puerto-Plata-road-blocked-by-landslides
6707 11/7/14 NA NA Dominican Republic DO Puerto Plata 4393 Luperón 1.54885 Below road 19.9053 -70.9630 (19.9053, -70.962999999999994) Landslide Landslide Medium Rain NA 0 0 Hoy http://hoy.com.do/carretera-luperon-presenta-hundimientos-y-deslizamientos-por-lluvias/

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_DR, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_DR, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_DR, aes(x=state, y=distance, fill=city)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_DR <- df_DR %>% 
  arrange(desc(city)) %>%
  mutate(prop = distance / sum(df_DR$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_DR, aes(x=state, y = prop, fill=city)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
data<- ts(df_DR$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
1.19636
1.54885
0.88500
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_DR$distance
names(distance) <- df_DR$city 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por ciudades"
)

##               
## Pareto chart analysis for distance
##                Frequency Cum.Freq. Percentage Cum.Percent.
##   Luperón        1.54885   1.54885   42.66558     42.66558
##   Puerto Plata   1.19636   2.74521   32.95567     75.62125
##   Altamira       0.88500   3.63021   24.37875    100.00000
stem(df_DR$"distance")
## 
##   The decimal point is 1 digit(s) to the left of the |
## 
##    8 | 9
##   10 | 
##   12 | 0
##   14 | 5
head(df_DR)
## # A tibble: 3 x 25
##      id date    time  continent_code country_name  country_code state population
##   <dbl> <chr>   <chr> <chr>          <chr>         <chr>        <chr>      <dbl>
## 1  4655 12/5/12 <NA>  <NA>           Dominican Re~ DO           Puer~     146000
## 2  6707 11/7/14 <NA>  <NA>           Dominican Re~ DO           Puer~       4393
## 3  1177 9/20/09 <NA>  <NA>           Dominican Re~ DO           Puer~       4563
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_DR))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
4655 12/5/12 NA NA Dominican Republic DO Puerto Plata 146000 Puerto Plata 1.19636 NA 19.7827 -70.6871 (19.782699999999998, -70.687100000000001) Landslide Landslide Medium Rain NA NA NA NA http://www.dominicantoday.com/dr/local/2012/12/5/45992/Crews-clear-Santiago-Puerto-Plata-road-blocked-by-landslides 32.95567 16.47783
6707 11/7/14 NA NA Dominican Republic DO Puerto Plata 4393 Luperón 1.54885 Below road 19.9053 -70.9630 (19.9053, -70.962999999999994) Landslide Landslide Medium Rain NA 0 0 Hoy http://hoy.com.do/carretera-luperon-presenta-hundimientos-y-deslizamientos-por-lluvias/ 42.66558 54.28846
1177 9/20/09 NA NA Dominican Republic DO Puerto Plata 4563 Altamira 0.88500 NA 19.6750 -70.8362 (19.675000000000001, -70.836200000000005) Landslide Landslide Medium Downpour NA NA 2 NA http://www.laht.com/article.asp?CategoryId=14092&ArticleId=327347 24.37875 87.81062
stem(df_DR$"distance")
## 
##   The decimal point is 1 digit(s) to the left of the |
## 
##    8 | 9
##   10 | 
##   12 | 0
##   14 | 5
stem(df_DR$"distance", scale = 2)
## 
##   The decimal point is 1 digit(s) to the left of the |
## 
##    8 | 9
##    9 | 
##   10 | 
##   11 | 
##   12 | 0
##   13 | 
##   14 | 
##   15 | 5

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
0.885 1 33.3 33.3 33.3 33.3
1.19636 1 33.3 33.3 66.7 66.7
1.54885 1 33.3 33.3 100.0 100.0
Total 3 100.0 100.0 100.0 100.0
str(table)
## Classes 'freqtab' and 'data.frame':  4 obs. of  5 variables:
##  $ n      : num  1 1 1 3
##  $ %      : num  33.3 33.3 33.3 100
##  $ val%   : num  33.3 33.3 33.3 100
##  $ %cum   : num  33.3 66.7 100 100
##  $ val%cum: num  33.3 66.7 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
0.885 1
1.19636 1
1.54885 1
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.885 1.885
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(0.885,1.88] 2 1 2
str(Freq_table)
## 'data.frame':    1 obs. of  4 variables:
##  $ distance: Factor w/ 1 level "(0.885,1.88]": 1
##  $ Freq    : int 2
##  $ Rel_Freq: num 1
##  $ Cum_Freq: int 2
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(0.885,1.88] 2
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_DR)
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
##                        id date time continent_code country_name country_code
## nbr.val      3.000000e+00   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          1.177000e+03   NA   NA             NA           NA           NA
## max          6.707000e+03   NA   NA             NA           NA           NA
## range        5.530000e+03   NA   NA             NA           NA           NA
## sum          1.253900e+04   NA   NA             NA           NA           NA
## median       4.655000e+03   NA   NA             NA           NA           NA
## mean         4.179667e+03   NA   NA             NA           NA           NA
## SE.mean      1.613968e+03   NA   NA             NA           NA           NA
## CI.mean.0.95 6.944345e+03   NA   NA             NA           NA           NA
## var          7.814681e+06   NA   NA             NA           NA           NA
## std.dev      2.795475e+03   NA   NA             NA           NA           NA
## coef.var     6.688273e-01   NA   NA             NA           NA           NA
##              state   population city  distance location_description    latitude
## nbr.val         NA 3.000000e+00   NA 3.0000000                   NA  3.00000000
## nbr.null        NA 0.000000e+00   NA 0.0000000                   NA  0.00000000
## nbr.na          NA 0.000000e+00   NA 0.0000000                   NA  0.00000000
## min             NA 4.393000e+03   NA 0.8850000                   NA 19.67500000
## max             NA 1.460000e+05   NA 1.5488500                   NA 19.90530000
## range           NA 1.416070e+05   NA 0.6638500                   NA  0.23030000
## sum             NA 1.549560e+05   NA 3.6302100                   NA 59.36300000
## median          NA 4.563000e+03   NA 1.1963600                   NA 19.78270000
## mean            NA 5.165200e+04   NA 1.2100700                   NA 19.78766667
## SE.mean         NA 4.717403e+04   NA 0.1917596                   NA  0.06652825
## CI.mean.0.95    NA 2.029734e+05   NA 0.8250748                   NA  0.28624795
## var             NA 6.676166e+09   NA 0.1103152                   NA  0.01327802
## std.dev         NA 8.170781e+04   NA 0.3321373                   NA  0.11523031
## coef.var        NA 1.581891e+00   NA 0.2744777                   NA  0.00582334
##                  longitude geolocation hazard_type landslide_type
## nbr.val       3.000000e+00          NA          NA             NA
## nbr.null      0.000000e+00          NA          NA             NA
## nbr.na        0.000000e+00          NA          NA             NA
## min          -7.096300e+01          NA          NA             NA
## max          -7.068710e+01          NA          NA             NA
## range         2.759000e-01          NA          NA             NA
## sum          -2.124863e+02          NA          NA             NA
## median       -7.083620e+01          NA          NA             NA
## mean         -7.082877e+01          NA          NA             NA
## SE.mean       7.973214e-02          NA          NA             NA
## CI.mean.0.95  3.430597e-01          NA          NA             NA
## var           1.907164e-02          NA          NA             NA
## std.dev       1.381001e-01          NA          NA             NA
## coef.var     -1.949774e-03          NA          NA             NA
##              landslide_size trigger storm_name injuries fatalities source_name
## nbr.val                  NA      NA         NA        1   2.000000          NA
## nbr.null                 NA      NA         NA        1   1.000000          NA
## nbr.na                   NA      NA         NA        2   1.000000          NA
## min                      NA      NA         NA        0   0.000000          NA
## max                      NA      NA         NA        0   2.000000          NA
## range                    NA      NA         NA        0   2.000000          NA
## sum                      NA      NA         NA        0   2.000000          NA
## median                   NA      NA         NA        0   1.000000          NA
## mean                     NA      NA         NA        0   1.000000          NA
## SE.mean                  NA      NA         NA       NA   1.000000          NA
## CI.mean.0.95             NA      NA         NA      NaN  12.706205          NA
## var                      NA      NA         NA       NA   2.000000          NA
## std.dev                  NA      NA         NA       NA   1.414214          NA
## coef.var                 NA      NA         NA       NA   1.414214          NA
##              source_link        prop         ypos
## nbr.val               NA   3.0000000    3.0000000
## nbr.null              NA   0.0000000    0.0000000
## nbr.na                NA   0.0000000    0.0000000
## min                   NA  24.3787549   16.4778346
## max                   NA  42.6655758   87.8106225
## range                 NA  18.2868209   71.3327879
## sum                   NA 100.0000000  158.5769143
## median                NA  32.9556692   54.2884571
## mean                  NA  33.3333333   52.8589714
## SE.mean               NA   5.2823267   20.6044027
## CI.mean.0.95          NA  22.7280175   88.6535895
## var                   NA  83.7089270 1273.6242298
## std.dev               NA   9.1492583   35.6878723
## coef.var              NA   0.2744777    0.6751526
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Santiago (Dominican Republic)

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_DR <- subset(df, country_name == "Dominican Republic")
knitr::kable(head(df_DR)) 
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
124 7/13/07 Night NA Dominican Republic DO Distrito Nacional 13456 San Carlos 1.70298 NA 18.4757 -69.9140 (18.4757, -69.914000000000001) Landslide Landslide Small Unknown NA NA NA Dominican Today http://www.dominicantoday.com/app/article.aspx?id=24682
333 10/29/07 NA NA Dominican Republic DO San Cristóbal 66784 Bajos de Haina 1.72138 NA 18.4270 -70.0440 (18.427, -70.043999999999997) Landslide Mudslide Medium Tropical cyclone Tropical Storm Noel NA 3 United Nations Development Programme - Relief Web http://news.scotsman.com/international.cfm?id=1730152007
343 11/1/07 NA NA Dominican Republic DO La Vega 3613 Río Verde Abajo 3.72637 NA 19.3050 -70.6000 (19.305, -70.599999999999994) Landslide Complex Large Tropical cyclone Tropical Storm Noel NA 68 United Nations Development Programme - Relief Web http://www.reliefweb.int/rw/fullMaps_Am.nsf/luFullMap/CEB72F0756431A7CC125738D003E2EF4/$File/ifrc_TC_carib071108.pdf?OpenElement
388 12/11/07 NA NA Dominican Republic DO Santiago 1200000 Santiago de los Caballeros 1.10868 NA 19.4550 -70.7070 (19.454999999999998, -70.706999999999994) Landslide Landslide Medium Tropical cyclone Tropical Storm Olga NA 17 news.gossip.info http://clutchmagonline.com/newsgossipinfo/caribbean-storm-death-toll-rises/
724 8/17/08 NA NA Dominican Republic DO Hato Mayor 13977 Sabana de La Mar 0.75284 NA 19.0560 -69.3822 (19.056000000000001, -69.382199999999997) Landslide Complex Medium Tropical cyclone Tropical Storm Fay NA NA NA http://www.dominicantoday.com/dr/economy/2008/8/18/29085/Storms-downpours-block-transit-on-newest-Dominican-highway
746 8/26/08 NA NA Dominican Republic DO Distrito Nacional 10457 La Agustina 5.71058 NA 18.5500 -69.9200 (18.55, -69.92) Landslide Mudslide Medium Tropical cyclone Hurricane Gustav NA 8 NA http://www.reuters.com/article/worldNews/idUSN2541891320080827?pageNumber=1&virtualBrandChannel=0
library(dplyr)
df_DR <- subset(df, state == "Santiago")
knitr::kable(head(df_DR))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
388 12/11/07 NA NA Dominican Republic DO Santiago 1200000 Santiago de los Caballeros 1.10868 NA 19.4550 -70.7070 (19.454999999999998, -70.706999999999994) Landslide Landslide Medium Tropical cyclone Tropical Storm Olga NA 17 news.gossip.info http://clutchmagonline.com/newsgossipinfo/caribbean-storm-death-toll-rises/
984 2/12/09 NA NA Dominican Republic DO Santiago 1457 Pedro García 4.86398 NA 19.5500 -70.6390 (19.55, -70.638999999999996) Landslide Mudslide Medium Downpour NA NA 0 NA http://us.puerto-plata-live.com/puerto-plata/news/year-2009/february-2009.html
1178 9/20/09 NA NA Dominican Republic DO Santiago 23304 Tamboril 4.31327 NA 19.5167 -70.5866 (19.5167, -70.586600000000004) Landslide Landslide Small Downpour NA NA NA NA http://www.laht.com/article.asp?CategoryId=14092&ArticleId=327347
3569 6/3/11 NA NA Dominican Republic DO Santiago 9853 San José de Las Matas 2.72462 NA 19.3556 -70.9189 (19.355599999999999, -70.918899999999994) Landslide Landslide Medium Downpour NA NA 1 NA http://english.peopledaily.com.cn/90001/90777/90852/7402423.html

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_DR, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_DR, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_DR, aes(x=state, y=distance, fill=city)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_DR <- df_DR %>% 
  arrange(desc(city)) %>%
  mutate(prop = distance / sum(df_DR$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_DR, aes(x=state, y = prop, fill=city)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
data<- ts(df_DR$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
4.31327
1.10868
2.72462
4.86398
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_DR$distance
names(distance) <- df_DR$city 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por ciudades"
)

##                             
## Pareto chart analysis for distance
##                               Frequency  Cum.Freq. Percentage Cum.Percent.
##   Pedro García                 4.863980   4.863980  37.384891    37.384891
##   Tamboril                     4.313270   9.177250  33.152096    70.536987
##   San José de Las Matas        2.724620  11.901870  20.941620    91.478608
##   Santiago de los Caballeros   1.108680  13.010550   8.521392   100.000000
stem(df_DR$"distance")
## 
##   The decimal point is at the |
## 
##   1 | 1
##   2 | 7
##   3 | 
##   4 | 39
head(df_DR)
## # A tibble: 4 x 25
##      id date     time  continent_code country_name country_code state population
##   <dbl> <chr>    <chr> <chr>          <chr>        <chr>        <chr>      <dbl>
## 1  1178 9/20/09  <NA>  <NA>           Dominican R~ DO           Sant~      23304
## 2   388 12/11/07 <NA>  <NA>           Dominican R~ DO           Sant~    1200000
## 3  3569 6/3/11   <NA>  <NA>           Dominican R~ DO           Sant~       9853
## 4   984 2/12/09  <NA>  <NA>           Dominican R~ DO           Sant~       1457
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_DR))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
1178 9/20/09 NA NA Dominican Republic DO Santiago 23304 Tamboril 4.31327 NA 19.5167 -70.5866 (19.5167, -70.586600000000004) Landslide Landslide Small Downpour NA NA NA NA http://www.laht.com/article.asp?CategoryId=14092&ArticleId=327347 33.152096 16.57605
388 12/11/07 NA NA Dominican Republic DO Santiago 1200000 Santiago de los Caballeros 1.10868 NA 19.4550 -70.7070 (19.454999999999998, -70.706999999999994) Landslide Landslide Medium Tropical cyclone Tropical Storm Olga NA 17 news.gossip.info http://clutchmagonline.com/newsgossipinfo/caribbean-storm-death-toll-rises/ 8.521392 37.41279
3569 6/3/11 NA NA Dominican Republic DO Santiago 9853 San José de Las Matas 2.72462 NA 19.3556 -70.9189 (19.355599999999999, -70.918899999999994) Landslide Landslide Medium Downpour NA NA 1 NA http://english.peopledaily.com.cn/90001/90777/90852/7402423.html 20.941620 52.14430
984 2/12/09 NA NA Dominican Republic DO Santiago 1457 Pedro García 4.86398 NA 19.5500 -70.6390 (19.55, -70.638999999999996) Landslide Mudslide Medium Downpour NA NA 0 NA http://us.puerto-plata-live.com/puerto-plata/news/year-2009/february-2009.html 37.384892 81.30755
stem(df_DR$"distance")
## 
##   The decimal point is at the |
## 
##   1 | 1
##   2 | 7
##   3 | 
##   4 | 39
stem(df_DR$"distance", scale = 2)
## 
##   The decimal point is at the |
## 
##   1 | 1
##   1 | 
##   2 | 
##   2 | 7
##   3 | 
##   3 | 
##   4 | 3
##   4 | 9

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
1.10868 1 25 25 25 25
2.72462 1 25 25 50 50
4.31327 1 25 25 75 75
4.86398 1 25 25 100 100
Total 4 100 100 100 100
str(table)
## Classes 'freqtab' and 'data.frame':  5 obs. of  5 variables:
##  $ n      : num  1 1 1 1 4
##  $ %      : num  25 25 25 25 100
##  $ val%   : num  25 25 25 25 100
##  $ %cum   : num  25 50 75 100 100
##  $ val%cum: num  25 50 75 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
1.10868 1
2.72462 1
4.31327 1
4.86398 1
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 1.10868 3.10868 5.10868
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(1.11,3.11] 1 0.3333333 1
(3.11,5.11] 2 0.6666667 3
str(Freq_table)
## 'data.frame':    2 obs. of  4 variables:
##  $ distance: Factor w/ 2 levels "(1.11,3.11]",..: 1 2
##  $ Freq    : int  1 2
##  $ Rel_Freq: num  0.333 0.667
##  $ Cum_Freq: int  1 3
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(1.11,3.11] 1
(3.11,5.11] 2
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_DR)
## Warning in min(x): ningún argumento finito para min; retornando Inf
## Warning in max(x): ningun argumento finito para max; retornando -Inf
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
##                        id date time continent_code country_name country_code
## nbr.val      4.000000e+00   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          3.880000e+02   NA   NA             NA           NA           NA
## max          3.569000e+03   NA   NA             NA           NA           NA
## range        3.181000e+03   NA   NA             NA           NA           NA
## sum          6.119000e+03   NA   NA             NA           NA           NA
## median       1.081000e+03   NA   NA             NA           NA           NA
## mean         1.529750e+03   NA   NA             NA           NA           NA
## SE.mean      7.002205e+02   NA   NA             NA           NA           NA
## CI.mean.0.95 2.228414e+03   NA   NA             NA           NA           NA
## var          1.961235e+06   NA   NA             NA           NA           NA
## std.dev      1.400441e+03   NA   NA             NA           NA           NA
## coef.var     9.154705e-01   NA   NA             NA           NA           NA
##              state   population city   distance location_description
## nbr.val         NA 4.000000e+00   NA  4.0000000                   NA
## nbr.null        NA 0.000000e+00   NA  0.0000000                   NA
## nbr.na          NA 0.000000e+00   NA  0.0000000                   NA
## min             NA 1.457000e+03   NA  1.1086800                   NA
## max             NA 1.200000e+06   NA  4.8639800                   NA
## range           NA 1.198543e+06   NA  3.7553000                   NA
## sum             NA 1.234614e+06   NA 13.0105500                   NA
## median          NA 1.657850e+04   NA  3.5189450                   NA
## mean            NA 3.086535e+05   NA  3.2526375                   NA
## SE.mean         NA 2.971496e+05   NA  0.8464003                   NA
## CI.mean.0.95    NA 9.456625e+05   NA  2.6936236                   NA
## var             NA 3.531914e+11   NA  2.8655741                   NA
## std.dev         NA 5.942991e+05   NA  1.6928007                   NA
## coef.var        NA 1.925457e+00   NA  0.5204394                   NA
##                  latitude     longitude geolocation hazard_type landslide_type
## nbr.val       4.000000000  4.000000e+00          NA          NA             NA
## nbr.null      0.000000000  0.000000e+00          NA          NA             NA
## nbr.na        0.000000000  0.000000e+00          NA          NA             NA
## min          19.355600000 -7.091890e+01          NA          NA             NA
## max          19.550000000 -7.058660e+01          NA          NA             NA
## range         0.194400000  3.323000e-01          NA          NA             NA
## sum          77.877300000 -2.828515e+02          NA          NA             NA
## median       19.485850000 -7.067300e+01          NA          NA             NA
## mean         19.469325000 -7.071287e+01          NA          NA             NA
## SE.mean       0.042711657  7.296329e-02          NA          NA             NA
## CI.mean.0.95  0.135927554  2.322018e-01          NA          NA             NA
## var           0.007297143  2.129457e-02          NA          NA             NA
## std.dev       0.085423314  1.459266e-01          NA          NA             NA
## coef.var      0.004387585 -2.063649e-03          NA          NA             NA
##              landslide_size trigger storm_name injuries fatalities source_name
## nbr.val                  NA      NA         NA        0   3.000000          NA
## nbr.null                 NA      NA         NA        0   1.000000          NA
## nbr.na                   NA      NA         NA        4   1.000000          NA
## min                      NA      NA         NA      Inf   0.000000          NA
## max                      NA      NA         NA     -Inf  17.000000          NA
## range                    NA      NA         NA     -Inf  17.000000          NA
## sum                      NA      NA         NA        0  18.000000          NA
## median                   NA      NA         NA       NA   1.000000          NA
## mean                     NA      NA         NA      NaN   6.000000          NA
## SE.mean                  NA      NA         NA       NA   5.507571          NA
## CI.mean.0.95             NA      NA         NA      NaN  23.697163          NA
## var                      NA      NA         NA       NA  91.000000          NA
## std.dev                  NA      NA         NA       NA   9.539392          NA
## coef.var                 NA      NA         NA       NA   1.589899          NA
##              source_link        prop       ypos
## nbr.val               NA   4.0000000   4.000000
## nbr.null              NA   0.0000000   0.000000
## nbr.na                NA   0.0000000   0.000000
## min                   NA   8.5213923  16.576048
## max                   NA  37.3848915  81.307554
## range                 NA  28.8634992  64.731506
## sum                   NA 100.0000000 187.440692
## median                NA  27.0468581  44.778545
## mean                  NA  25.0000000  46.860173
## SE.mean               NA   6.5054923  13.604305
## CI.mean.0.95          NA  20.7033799  43.294969
## var                   NA 169.2857203 740.308432
## std.dev               NA  13.0109846  27.208610
## coef.var              NA   0.5204394   0.580634
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Honduras

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_HD <- subset(df, country_name == "Honduras")
knitr::kable(head(df_HD))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
854 10/19/08 NA NA Honduras HN Copán 4752 Corquín 0.43391 NA 14.5637 -88.8693 (14.563700000000001, -88.869299999999996) Landslide Landslide Large Tropical cyclone Tropical Depression 16 NA 23 NA http://www.chron.com/disp/story.mpl/ap/world/6068144.html
855 10/20/08 NA NA Honduras HN Francisco Morazán 850848 Tegucigalpa 2.99239 NA 14.1080 -87.2137 (14.108000000000001, -87.213700000000003) Landslide Mudslide Large Tropical cyclone Tropical Depression 16 NA 29 NA http://in.ibtimes.com/articles/20081021/honduras-landslide-tegucigalpa-victim.htm
2062 7/12/10 5:30:00 NA Honduras HN Francisco Morazán 850848 Tegucigalpa 0.98377 NA 14.0831 -87.1978 (14.0831, -87.197800000000001) Landslide Mudslide Medium Downpour NA NA 1 NA http://mdn.mainichi.jp/mdnnews/news/20100713p2a00m0na013000c.html
2093 7/18/10 NA NA Honduras HN Francisco Morazán 850848 Tegucigalpa 1.24404 NA 14.0814 -87.1953 (14.0814, -87.195300000000003) Landslide Landslide Medium Downpour NA NA 0 NA http://www.insidecostarica.com/dailynews/2010/july/19/centralamerica10071903.htm
2217 8/7/10 Overnight NA Honduras HN Francisco Morazán 850848 Tegucigalpa 2.21442 NA 14.0783 -87.2270 (14.0783, -87.227000000000004) Landslide Mudslide Medium Downpour NA NA 3 NA NA
2358 8/29/10 4:30:00 NA Honduras HN Francisco Morazán 2288 Santa Lucía 4.75791 NA 14.1015 -87.1607 (14.1015, -87.160700000000006) Landslide Rockfall Medium Downpour NA NA 5 NA NA

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_HD, aes(fill= state, y=distance, x=country_name)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_HD, aes(fill=state, y=distance, x=country_name)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_HD, aes(x=country_name, y=distance, fill=state)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_HD <- df_HD %>% 
  arrange(desc(state)) %>%
  mutate(prop = distance / sum(df_HD$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_HD, aes(x=country_name, y=prop, fill=state)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4
## Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Greens is 9
## Returning the palette you asked for with that many colors

Grafico de series temporales

library(forecast)
data<- ts(df_HD$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
0.31238
6.66574
2.91594
2.87349
2.00805
5.79867
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_HD$distance
names(distance) <- df_HD$state 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por estados"
)

##                    
## Pareto chart analysis for distance
##                       Frequency   Cum.Freq.  Percentage Cum.Percent.
##   Colón              36.3762900  36.3762900  21.8907391   21.8907391
##   Comayagua          17.2861300  53.6624200  10.4025496   32.2932888
##   Choluteca          11.6723700  65.3347900   7.0242679   39.3175567
##   Comayagua           8.5258400  73.8606300   5.1307305   44.4482872
##   Comayagua           7.2857500  81.1463800   4.3844618   48.8327489
##   Yoro                6.6657400  87.8121200   4.0113485   52.8440974
##   Copán               5.8972100  93.7093300   3.5488579   56.3929554
##   Ocotepeque          5.7986700  99.5080000   3.4895580   59.8825133
##   Francisco Morazán   4.7579100 104.2659100   2.8632432   62.7457566
##   La Paz              4.6913300 108.9572400   2.8231763   65.5689329
##   Comayagua           4.5336200 113.4908600   2.7282687   68.2972016
##   Choluteca           3.6959600 117.1868200   2.2241767   70.5213783
##   Francisco Morazán   3.6396200 120.8264400   2.1902721   72.7116504
##   Francisco Morazán   3.5439900 124.3704300   2.1327233   74.8443736
##   Cortés              3.5373700 127.9078000   2.1287395   76.9731131
##   Francisco Morazán   3.2528100 131.1606100   1.9574953   78.9306084
##   Francisco Morazán   3.1298600 134.2904700   1.8835057   80.8141140
##   Francisco Morazán   2.9923900 137.2828600   1.8007782   82.6148922
##   Santa Bárbara       2.9159400 140.1988000   1.7547716   84.3696639
##   Francisco Morazán   2.9132600 143.1120600   1.7531588   86.1228227
##   Santa Bárbara       2.8734900 145.9855500   1.7292258   87.8520485
##   Francisco Morazán   2.2144200 148.1999700   1.3326068   89.1846553
##   Francisco Morazán   2.0083000 150.2082700   1.2085667   90.3932220
##   Ocotepeque          2.0080500 152.2163200   1.2084162   91.6016382
##   El Paraíso          1.9005200 154.1168400   1.1437062   92.7453444
##   Francisco Morazán   1.8589700 155.9758100   1.1187020   93.8640463
##   Copán               1.3909500 157.3667600   0.8370541   94.7011005
##   Francisco Morazán   1.3058300 158.6725900   0.7858301   95.4869306
##   Francisco Morazán   1.2440400 159.9166300   0.7486458   96.2355763
##   Francisco Morazán   1.2363900 161.1530200   0.7440421   96.9796184
##   Francisco Morazán   0.9837700 162.1367900   0.5920189   97.5716373
##   Cortés              0.9705700 163.1073600   0.5840754   98.1557127
##   Francisco Morazán   0.9155200 164.0228800   0.5509470   98.7066598
##   Copán               0.7441400 164.7670200   0.4478130   99.1544727
##   Copán               0.4339100 165.2009300   0.2611209   99.4155937
##   Choluteca           0.3698700 165.5708000   0.2225826   99.6381762
##   Yoro                0.3123800 165.8831800   0.1879859   99.8261621
##   Copán               0.2888700 166.1720500   0.1738379  100.0000000
stem(df_HD$"distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 000011111111222223333334444
##   0 | 55566779
##   1 | 2
##   1 | 7
##   2 | 
##   2 | 
##   3 | 
##   3 | 6
head(df_HD)
## # A tibble: 6 x 25
##      id date     time    continent_code country_name country_code state population
##   <dbl> <chr>    <chr>   <chr>          <chr>        <chr>        <chr>      <dbl>
## 1  6202 5/20/14  <NA>    <NA>           Honduras     HN           Yoro       15774
## 2  7467 1/22/15  <NA>    <NA>           Honduras     HN           Yoro        2188
## 3  6691 10/14/14 Night   <NA>           Honduras     HN           Sant~       1759
## 4  7464 9/28/15  Morning <NA>           Honduras     HN           Sant~       1811
## 5  6672 10/13/14 <NA>    <NA>           Honduras     HN           Ocot~       2389
## 6  7462 9/25/15  <NA>    <NA>           Honduras     HN           Ocot~       1416
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_HD))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
6202 5/20/14 NA NA Honduras HN Yoro 15774 Yoro 0.31238 Unknown 15.0666 -87.3245 (15.066599999999999, -87.3245) Landslide Landslide Medium Rain NA 0 0 Prensa http://www.laprensa.hn/lasultimas24/711060-98/deslizamientos-de-tierra-dejar%C3%ADa-incomunicadas-30-aldeas-en-yoro 0.1879859 0.0939929
7467 1/22/15 NA NA Honduras HN Yoro 2188 La Sarrosa 6.66574 Above road 15.1754 -87.8164 (15.1754, -87.816400000000002) Landslide Rockfall Small Continuous rain NA 0 0 La Prensa http://www.laprensa.hn/honduras/787961-410/conductores-tienen-que-maniobrar-para-no-chocar-con-las-rocas-que-cubren 4.0113485 2.1936601
6691 10/14/14 Night NA Honduras HN Santa Bárbara 1759 Agualote 2.91594 Mine construction 15.3090 -88.5510 (15.308999999999999, -88.551000000000002) Landslide Landslide Medium Rain NA 0 1 Mundo http://elmundo.com.sv/honduras-muere-un-minero-y-rescatan-a-otros-cinco-soterrados 1.7547716 5.0767202
7464 9/28/15 Morning NA Honduras HN Santa Bárbara 1811 Ilama 2.87349 Above road 15.0909 -88.2072 (15.0909, -88.2072) Landslide Rockfall Small Rain NA 0 0 Canal 6 http://www.canal6.com.hn/destacado/derrumbes-incomunican-paso-entre-san-pedro-sula-y-santa-barbara.html 1.7292258 6.8187189
6672 10/13/14 NA NA Honduras HN Ocotepeque 2389 Sinuapa 2.00805 Below road 14.4579 -89.1666 (14.4579, -89.166600000000003) Landslide Landslide Medium Downpour NA 0 0 La Prensa http://www.laprensa.hn/economia/757783-410/evac%C3%BAan-a-familias-por-deslizamientos-en-ocotepeque 1.2084162 8.2875399
7462 9/25/15 NA NA Honduras HN Ocotepeque 1416 La Labor 5.79867 Above road 14.4810 -89.0537 (14.481, -89.053700000000006) Landslide Landslide Small Rain NA 0 0 Tiempo http://www.tiempo.hn/lluvias-comienzan-a-causar-deslizamientos-en-carreteras-del-occidente-de-honduras/ 3.4895580 10.6365270
stem(df_HD$"distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 000011111111222223333334444
##   0 | 55566779
##   1 | 2
##   1 | 7
##   2 | 
##   2 | 
##   3 | 
##   3 | 6
stem(df_HD$"distance", scale = 2)
## 
##   The decimal point is at the |
## 
##    0 | 33447900223499
##    2 | 0029990135567
##    4 | 57889
##    6 | 73
##    8 | 5
##   10 | 7
##   12 | 
##   14 | 
##   16 | 3
##   18 | 
##   20 | 
##   22 | 
##   24 | 
##   26 | 
##   28 | 
##   30 | 
##   32 | 
##   34 | 
##   36 | 4

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
0.28887 1 2.6 2.6 2.6 2.6
0.31238 1 2.6 2.6 5.3 5.3
0.36987 1 2.6 2.6 7.9 7.9
0.43391 1 2.6 2.6 10.5 10.5
0.74414 1 2.6 2.6 13.2 13.2
0.91552 1 2.6 2.6 15.8 15.8
0.97057 1 2.6 2.6 18.4 18.4
0.98377 1 2.6 2.6 21.1 21.1
1.23639 1 2.6 2.6 23.7 23.7
1.24404 1 2.6 2.6 26.3 26.3
1.30583 1 2.6 2.6 28.9 28.9
1.39095 1 2.6 2.6 31.6 31.6
1.85897 1 2.6 2.6 34.2 34.2
1.90052 1 2.6 2.6 36.8 36.8
2.00805 1 2.6 2.6 39.5 39.5
2.0083 1 2.6 2.6 42.1 42.1
2.21442 1 2.6 2.6 44.7 44.7
2.87349 1 2.6 2.6 47.4 47.4
2.91326 1 2.6 2.6 50.0 50.0
2.91594 1 2.6 2.6 52.6 52.6
2.99239 1 2.6 2.6 55.3 55.3
3.12986 1 2.6 2.6 57.9 57.9
3.25281 1 2.6 2.6 60.5 60.5
3.53737 1 2.6 2.6 63.2 63.2
3.54399 1 2.6 2.6 65.8 65.8
3.63962 1 2.6 2.6 68.4 68.4
3.69596 1 2.6 2.6 71.1 71.1
4.53362 1 2.6 2.6 73.7 73.7
4.69133 1 2.6 2.6 76.3 76.3
4.75791 1 2.6 2.6 78.9 78.9
5.79867 1 2.6 2.6 81.6 81.6
5.89721 1 2.6 2.6 84.2 84.2
6.66574 1 2.6 2.6 86.8 86.8
7.28575 1 2.6 2.6 89.5 89.5
8.52584 1 2.6 2.6 92.1 92.1
11.67237 1 2.6 2.6 94.7 94.7
17.28613 1 2.6 2.6 97.4 97.4
36.37629 1 2.6 2.6 100.0 100.0
Total 38 100.0 100.0 100.0 100.0
str(table)
## Classes 'freqtab' and 'data.frame':  39 obs. of  5 variables:
##  $ n      : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ %      : num  2.6 2.6 2.6 2.6 2.6 2.6 2.6 2.6 2.6 2.6 ...
##  $ val%   : num  2.6 2.6 2.6 2.6 2.6 2.6 2.6 2.6 2.6 2.6 ...
##  $ %cum   : num  2.6 5.3 7.9 10.5 13.2 15.8 18.4 21.1 23.7 26.3 ...
##  $ val%cum: num  2.6 5.3 7.9 10.5 13.2 15.8 18.4 21.1 23.7 26.3 ...
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
0.28887 1
0.31238 1
0.36987 1
0.43391 1
0.74414 1
0.91552 1
0.97057 1
0.98377 1
1.23639 1
1.24404 1
1.30583 1
1.39095 1
1.85897 1
1.90052 1
2.00805 1
2.0083 1
2.21442 1
2.87349 1
2.91326 1
2.91594 1
2.99239 1
3.12986 1
3.25281 1
3.53737 1
3.54399 1
3.63962 1
3.69596 1
4.53362 1
4.69133 1
4.75791 1
5.79867 1
5.89721 1
6.66574 1
7.28575 1
8.52584 1
11.67237 1
17.28613 1
36.37629 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1]  0.28887  6.28887 12.28887 18.28887 24.28887 30.28887 36.28887 42.28887
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(0.289,6.29] 31 0.8378378 31
(6.29,12.3] 4 0.1081081 35
(12.3,18.3] 1 0.0270270 36
(18.3,24.3] 0 0.0000000 36
(24.3,30.3] 0 0.0000000 36
(30.3,36.3] 0 0.0000000 36
(36.3,42.3] 1 0.0270270 37
str(Freq_table)
## 'data.frame':    7 obs. of  4 variables:
##  $ distance: Factor w/ 7 levels "(0.289,6.29]",..: 1 2 3 4 5 6 7
##  $ Freq    : int  31 4 1 0 0 0 1
##  $ Rel_Freq: num  0.838 0.108 0.027 0 0 ...
##  $ Cum_Freq: int  31 35 36 36 36 36 37
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(0.289,6.29] 31
(6.29,12.3] 4
(12.3,18.3] 1
(18.3,24.3] 0
(24.3,30.3] 0
(30.3,36.3] 0
(36.3,42.3] 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_HD)
##                        id date time continent_code country_name country_code
## nbr.val      3.800000e+01   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          8.540000e+02   NA   NA             NA           NA           NA
## max          7.485000e+03   NA   NA             NA           NA           NA
## range        6.631000e+03   NA   NA             NA           NA           NA
## sum          2.290210e+05   NA   NA             NA           NA           NA
## median       7.448500e+03   NA   NA             NA           NA           NA
## mean         6.026868e+03   NA   NA             NA           NA           NA
## SE.mean      3.671138e+02   NA   NA             NA           NA           NA
## CI.mean.0.95 7.438432e+02   NA   NA             NA           NA           NA
## var          5.121356e+06   NA   NA             NA           NA           NA
## std.dev      2.263041e+03   NA   NA             NA           NA           NA
## coef.var     3.754921e-01   NA   NA             NA           NA           NA
##              state   population city   distance location_description
## nbr.val         NA 3.800000e+01   NA  38.000000                   NA
## nbr.null        NA 0.000000e+00   NA   0.000000                   NA
## nbr.na          NA 0.000000e+00   NA   0.000000                   NA
## min             NA 1.043000e+03   NA   0.288870                   NA
## max             NA 8.508480e+05   NA  36.376290                   NA
## range           NA 8.498050e+05   NA  36.087420                   NA
## sum             NA 7.001138e+06   NA 166.172050                   NA
## median          NA 1.936000e+03   NA   2.914600                   NA
## mean            NA 1.842405e+05   NA   4.372949                   NA
## SE.mean         NA 5.663200e+04   NA   1.023393                   NA
## CI.mean.0.95    NA 1.147473e+05   NA   2.073592                   NA
## var             NA 1.218729e+11   NA  39.798695                   NA
## std.dev         NA 3.491031e+05   NA   6.308621                   NA
## coef.var        NA 1.894823e+00   NA   1.442647                   NA
##                  latitude     longitude geolocation hazard_type landslide_type
## nbr.val       38.00000000  3.800000e+01          NA          NA             NA
## nbr.null       0.00000000  0.000000e+00          NA          NA             NA
## nbr.na         0.00000000  0.000000e+00          NA          NA             NA
## min           13.28610000 -8.916660e+01          NA          NA             NA
## max           15.52270000 -8.526500e+01          NA          NA             NA
## range          2.23660000  3.901600e+00          NA          NA             NA
## sum          546.00580000 -3.329927e+03          NA          NA             NA
## median        14.20695000 -8.728760e+01          NA          NA             NA
## mean          14.36857368 -8.762966e+01          NA          NA             NA
## SE.mean        0.08447952  1.301577e-01          NA          NA             NA
## CI.mean.0.95   0.17117178  2.637245e-01          NA          NA             NA
## var            0.27119802  6.437585e-01          NA          NA             NA
## std.dev        0.52076676  8.023457e-01          NA          NA             NA
## coef.var       0.03624346 -9.156096e-03          NA          NA             NA
##              landslide_size trigger storm_name   injuries fatalities
## nbr.val                  NA      NA         NA 29.0000000 38.0000000
## nbr.null                 NA      NA         NA 26.0000000 25.0000000
## nbr.na                   NA      NA         NA  9.0000000  0.0000000
## min                      NA      NA         NA  0.0000000  0.0000000
## max                      NA      NA         NA  3.0000000 29.0000000
## range                    NA      NA         NA  3.0000000 29.0000000
## sum                      NA      NA         NA  6.0000000 86.0000000
## median                   NA      NA         NA  0.0000000  0.0000000
## mean                     NA      NA         NA  0.2068966  2.2631579
## SE.mean                  NA      NA         NA  0.1253499  0.9774733
## CI.mean.0.95             NA      NA         NA  0.2567675  1.9805491
## var                      NA      NA         NA  0.4556650 36.3072546
## std.dev                  NA      NA         NA  0.6750296  6.0255502
## coef.var                 NA      NA         NA  3.2626433  2.6624524
##              source_name source_link        prop         ypos
## nbr.val               NA          NA  38.0000000 3.800000e+01
## nbr.null              NA          NA   0.0000000 0.000000e+00
## nbr.na                NA          NA   0.0000000 0.000000e+00
## min                   NA          NA   0.1738379 9.399294e-02
## max                   NA          NA  21.8907391 9.988871e+01
## range                 NA          NA  21.7169012 9.979472e+01
## sum                   NA          NA 100.0000000 1.347990e+03
## median                NA          NA   1.7539652 3.219648e+01
## mean                  NA          NA   2.6315789 3.547342e+01
## SE.mean               NA          NA   0.6158637 4.046533e+00
## CI.mean.0.95          NA          NA   1.2478585 8.199055e+00
## var                   NA          NA  14.4129500 6.222284e+02
## std.dev               NA          NA   3.7964391 2.494451e+01
## coef.var              NA          NA   1.4426469 7.031887e-01
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Choluteca (Honduras)

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_HD <- subset(df, country_name == "Honduras")
knitr::kable(head(df_HD)) 
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
854 10/19/08 NA NA Honduras HN Copán 4752 Corquín 0.43391 NA 14.5637 -88.8693 (14.563700000000001, -88.869299999999996) Landslide Landslide Large Tropical cyclone Tropical Depression 16 NA 23 NA http://www.chron.com/disp/story.mpl/ap/world/6068144.html
855 10/20/08 NA NA Honduras HN Francisco Morazán 850848 Tegucigalpa 2.99239 NA 14.1080 -87.2137 (14.108000000000001, -87.213700000000003) Landslide Mudslide Large Tropical cyclone Tropical Depression 16 NA 29 NA http://in.ibtimes.com/articles/20081021/honduras-landslide-tegucigalpa-victim.htm
2062 7/12/10 5:30:00 NA Honduras HN Francisco Morazán 850848 Tegucigalpa 0.98377 NA 14.0831 -87.1978 (14.0831, -87.197800000000001) Landslide Mudslide Medium Downpour NA NA 1 NA http://mdn.mainichi.jp/mdnnews/news/20100713p2a00m0na013000c.html
2093 7/18/10 NA NA Honduras HN Francisco Morazán 850848 Tegucigalpa 1.24404 NA 14.0814 -87.1953 (14.0814, -87.195300000000003) Landslide Landslide Medium Downpour NA NA 0 NA http://www.insidecostarica.com/dailynews/2010/july/19/centralamerica10071903.htm
2217 8/7/10 Overnight NA Honduras HN Francisco Morazán 850848 Tegucigalpa 2.21442 NA 14.0783 -87.2270 (14.0783, -87.227000000000004) Landslide Mudslide Medium Downpour NA NA 3 NA NA
2358 8/29/10 4:30:00 NA Honduras HN Francisco Morazán 2288 Santa Lucía 4.75791 NA 14.1015 -87.1607 (14.1015, -87.160700000000006) Landslide Rockfall Medium Downpour NA NA 5 NA NA
library(dplyr)
df_HD <- subset(df, state == "Choluteca")
knitr::kable(head(df_HD))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
6123 7/2/14 16:30 NA Honduras HN Choluteca 75872 Ciudad Choluteca 3.69596 Mine construction 13.2875 -87.0325 (13.2875, -87.032499999999999) Landslide Landslide Small Mining digging NA 3 8 Sciency Thoughts http://sciencythoughts.blogspot.com/2014/07/miners-trapped-by-honduran-landslide.html
7448 6/11/15 NA NA Honduras HN Choluteca 1164 Duyure 11.67237 Above road 13.5807 -86.9101 (13.5807, -86.9101) Landslide Landslide Medium Rain NA 0 0 El Heraldo http://www.elheraldo.hn/regionales/848812-218/incomunicados-vecinos-del-municipio-de-morolica
7449 12/15/15 NA NA Honduras HN Choluteca 1199 Corpus 0.36987 Mine construction 13.2861 -87.0329 (13.286099999999999, -87.032899999999998) Landslide Rockfall Small Rain NA 0 1 La Prensa http://www.laprensa.hn/sucesos/911394-410/minero-muere-soterrado-en-el-corpus-choluteca

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_HD, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_HD, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_HD, aes(x=state, y=distance, fill=city)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_HD <- df_HD %>% 
  arrange(desc(city)) %>%
  mutate(prop = distance / sum(df_HD$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_HD, aes(x=state, y = prop, fill=city)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
data<- ts(df_HD$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
11.67237
0.36987
3.69596
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_HD$distance
names(distance) <- df_HD$city 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por ciudades"
)

##                   
## Pareto chart analysis for distance
##                     Frequency  Cum.Freq. Percentage Cum.Percent.
##   Duyure            11.672370  11.672370  74.165851    74.165851
##   Ciudad Choluteca   3.695960  15.368330  23.484007    97.649858
##   Corpus             0.369870  15.738200   2.350142   100.000000
stem(df_HD$"distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 04
##   0 | 
##   1 | 2
head(df_HD)
## # A tibble: 3 x 25
##      id date     time  continent_code country_name country_code state     population
##   <dbl> <chr>    <chr> <chr>          <chr>        <chr>        <chr>          <dbl>
## 1  7448 6/11/15  <NA>  <NA>           Honduras     HN           Choluteca       1164
## 2  7449 12/15/15 <NA>  <NA>           Honduras     HN           Choluteca       1199
## 3  6123 7/2/14   16:30 <NA>           Honduras     HN           Choluteca      75872
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_HD))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
7448 6/11/15 NA NA Honduras HN Choluteca 1164 Duyure 11.67237 Above road 13.5807 -86.9101 (13.5807, -86.9101) Landslide Landslide Medium Rain NA 0 0 El Heraldo http://www.elheraldo.hn/regionales/848812-218/incomunicados-vecinos-del-municipio-de-morolica 74.165851 37.08293
7449 12/15/15 NA NA Honduras HN Choluteca 1199 Corpus 0.36987 Mine construction 13.2861 -87.0329 (13.286099999999999, -87.032899999999998) Landslide Rockfall Small Rain NA 0 1 La Prensa http://www.laprensa.hn/sucesos/911394-410/minero-muere-soterrado-en-el-corpus-choluteca 2.350142 75.34092
6123 7/2/14 16:30 NA Honduras HN Choluteca 75872 Ciudad Choluteca 3.69596 Mine construction 13.2875 -87.0325 (13.2875, -87.032499999999999) Landslide Landslide Small Mining digging NA 3 8 Sciency Thoughts http://sciencythoughts.blogspot.com/2014/07/miners-trapped-by-honduran-landslide.html 23.484007 88.25800
stem(df_HD$"distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 04
##   0 | 
##   1 | 2
stem(df_HD$"distance", scale = 2)
## 
##   The decimal point is at the |
## 
##    0 | 4
##    2 | 7
##    4 | 
##    6 | 
##    8 | 
##   10 | 7

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
0.36987 1 33.3 33.3 33.3 33.3
3.69596 1 33.3 33.3 66.7 66.7
11.67237 1 33.3 33.3 100.0 100.0
Total 3 100.0 100.0 100.0 100.0
str(table)
## Classes 'freqtab' and 'data.frame':  4 obs. of  5 variables:
##  $ n      : num  1 1 1 3
##  $ %      : num  33.3 33.3 33.3 100
##  $ val%   : num  33.3 33.3 33.3 100
##  $ %cum   : num  33.3 66.7 100 100
##  $ val%cum: num  33.3 66.7 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
0.36987 1
3.69596 1
11.67237 1
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1]  0.36987  4.36987  8.36987 12.36987
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(0.37,4.37] 1 0.5 1
(4.37,8.37] 0 0.0 1
(8.37,12.4] 1 0.5 2
str(Freq_table)
## 'data.frame':    3 obs. of  4 variables:
##  $ distance: Factor w/ 3 levels "(0.37,4.37]",..: 1 2 3
##  $ Freq    : int  1 0 1
##  $ Rel_Freq: num  0.5 0 0.5
##  $ Cum_Freq: int  1 1 2
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(0.37,4.37] 1
(4.37,8.37] 0
(8.37,12.4] 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_HD)
##                        id date time continent_code country_name country_code
## nbr.val      3.000000e+00   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          6.123000e+03   NA   NA             NA           NA           NA
## max          7.449000e+03   NA   NA             NA           NA           NA
## range        1.326000e+03   NA   NA             NA           NA           NA
## sum          2.102000e+04   NA   NA             NA           NA           NA
## median       7.448000e+03   NA   NA             NA           NA           NA
## mean         7.006667e+03   NA   NA             NA           NA           NA
## SE.mean      4.418334e+02   NA   NA             NA           NA           NA
## CI.mean.0.95 1.901056e+03   NA   NA             NA           NA           NA
## var          5.856503e+05   NA   NA             NA           NA           NA
## std.dev      7.652779e+02   NA   NA             NA           NA           NA
## coef.var     1.092214e-01   NA   NA             NA           NA           NA
##              state   population city  distance location_description    latitude
## nbr.val         NA 3.000000e+00   NA  3.000000                   NA  3.00000000
## nbr.null        NA 0.000000e+00   NA  0.000000                   NA  0.00000000
## nbr.na          NA 0.000000e+00   NA  0.000000                   NA  0.00000000
## min             NA 1.164000e+03   NA  0.369870                   NA 13.28610000
## max             NA 7.587200e+04   NA 11.672370                   NA 13.58070000
## range           NA 7.470800e+04   NA 11.302500                   NA  0.29460000
## sum             NA 7.823500e+04   NA 15.738200                   NA 40.15430000
## median          NA 1.199000e+03   NA  3.695960                   NA 13.28750000
## mean            NA 2.607833e+04   NA  5.246067                   NA 13.38476667
## SE.mean         NA 2.489684e+04   NA  3.353543                   NA  0.09796750
## CI.mean.0.95    NA 1.071224e+05   NA 14.429130                   NA  0.42152013
## var             NA 1.859557e+09   NA 33.738750                   NA  0.02879289
## std.dev         NA 4.312258e+04   NA  5.808507                   NA  0.16968469
## coef.var        NA 1.653579e+00   NA  1.107212                   NA  0.01267745
##                  longitude geolocation hazard_type landslide_type
## nbr.val       3.000000e+00          NA          NA             NA
## nbr.null      0.000000e+00          NA          NA             NA
## nbr.na        0.000000e+00          NA          NA             NA
## min          -8.703290e+01          NA          NA             NA
## max          -8.691010e+01          NA          NA             NA
## range         1.228000e-01          NA          NA             NA
## sum          -2.609755e+02          NA          NA             NA
## median       -8.703250e+01          NA          NA             NA
## mean         -8.699183e+01          NA          NA             NA
## SE.mean       4.086683e-02          NA          NA             NA
## CI.mean.0.95  1.758358e-01          NA          NA             NA
## var           5.010293e-03          NA          NA             NA
## std.dev       7.078343e-02          NA          NA             NA
## coef.var     -8.136790e-04          NA          NA             NA
##              landslide_size trigger storm_name injuries fatalities source_name
## nbr.val                  NA      NA         NA 3.000000   3.000000          NA
## nbr.null                 NA      NA         NA 2.000000   1.000000          NA
## nbr.na                   NA      NA         NA 0.000000   0.000000          NA
## min                      NA      NA         NA 0.000000   0.000000          NA
## max                      NA      NA         NA 3.000000   8.000000          NA
## range                    NA      NA         NA 3.000000   8.000000          NA
## sum                      NA      NA         NA 3.000000   9.000000          NA
## median                   NA      NA         NA 0.000000   1.000000          NA
## mean                     NA      NA         NA 1.000000   3.000000          NA
## SE.mean                  NA      NA         NA 1.000000   2.516611          NA
## CI.mean.0.95             NA      NA         NA 4.302653  10.828105          NA
## var                      NA      NA         NA 3.000000  19.000000          NA
## std.dev                  NA      NA         NA 1.732051   4.358899          NA
## coef.var                 NA      NA         NA 1.732051   1.452966          NA
##              source_link        prop        ypos
## nbr.val               NA    3.000000   3.0000000
## nbr.null              NA    0.000000   0.0000000
## nbr.na                NA    0.000000   0.0000000
## min                   NA    2.350142  37.0829256
## max                   NA   74.165851  88.2579965
## range                 NA   71.815710  51.1750708
## sum                   NA  100.000000 200.6818442
## median                NA   23.484007  75.3409221
## mean                  NA   33.333333  66.8939481
## SE.mean               NA   21.308300  15.3648462
## CI.mean.0.95          NA   91.682215  66.1095975
## var                   NA 1362.130955 708.2354967
## std.dev               NA   36.907058  26.6126943
## coef.var              NA    1.107212   0.3978341
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Colón (Honduras)

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_HD <- subset(df, country_name == "Honduras")
knitr::kable(head(df_HD)) 
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
854 10/19/08 NA NA Honduras HN Copán 4752 Corquín 0.43391 NA 14.5637 -88.8693 (14.563700000000001, -88.869299999999996) Landslide Landslide Large Tropical cyclone Tropical Depression 16 NA 23 NA http://www.chron.com/disp/story.mpl/ap/world/6068144.html
855 10/20/08 NA NA Honduras HN Francisco Morazán 850848 Tegucigalpa 2.99239 NA 14.1080 -87.2137 (14.108000000000001, -87.213700000000003) Landslide Mudslide Large Tropical cyclone Tropical Depression 16 NA 29 NA http://in.ibtimes.com/articles/20081021/honduras-landslide-tegucigalpa-victim.htm
2062 7/12/10 5:30:00 NA Honduras HN Francisco Morazán 850848 Tegucigalpa 0.98377 NA 14.0831 -87.1978 (14.0831, -87.197800000000001) Landslide Mudslide Medium Downpour NA NA 1 NA http://mdn.mainichi.jp/mdnnews/news/20100713p2a00m0na013000c.html
2093 7/18/10 NA NA Honduras HN Francisco Morazán 850848 Tegucigalpa 1.24404 NA 14.0814 -87.1953 (14.0814, -87.195300000000003) Landslide Landslide Medium Downpour NA NA 0 NA http://www.insidecostarica.com/dailynews/2010/july/19/centralamerica10071903.htm
2217 8/7/10 Overnight NA Honduras HN Francisco Morazán 850848 Tegucigalpa 2.21442 NA 14.0783 -87.2270 (14.0783, -87.227000000000004) Landslide Mudslide Medium Downpour NA NA 3 NA NA
2358 8/29/10 4:30:00 NA Honduras HN Francisco Morazán 2288 Santa Lucía 4.75791 NA 14.1015 -87.1607 (14.1015, -87.160700000000006) Landslide Rockfall Medium Downpour NA NA 5 NA NA
library(dplyr)
df_HD <- subset(df, state == "Colón")
knitr::kable(head(df_HD))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
2652 10/25/10 NA NA Honduras HN Colón 1441 Cusuna 36.37629 NA 15.5227 -85.2650 (15.5227, -85.265000000000001) Landslide Landslide Medium Tropical cyclone Tropical Storm Richard NA 0 NA http://www.reliefweb.int/rw/RWFiles2010.nsf/FilesByRWDocUnidFilename/LSGZ-8ALBZE-full_report.pdf/$File/full_report.pdf
2794 12/8/10 NA NA Panama PA Colón 1310 El Giral 1.80330 NA 9.2332 -79.6961 (9.2332000000000001, -79.696100000000001) Landslide Landslide Medium Downpour NA NA 0 NA http://www.portworld.com/news/i98731/Panama_Canal_disrupted_by_floods
2795 12/9/10 Morning NA Panama PA Colón 1274 Portobelo 0.09491 Deforested slope 9.5493 -79.6505 (9.5493000000000006, -79.650499999999994) Landslide Mudslide Medium Downpour NA NA 8 Fox News Latino http://latino.foxnews.com/latino/news/2010/12/09/heavy-rains-kill-panama-force-canal-close/
4632 11/25/12 NA NA Panama PA Colón 76643 Colón 0.16894 NA 9.3600 -79.9001 (9.36, -79.900099999999995) Landslide Landslide Medium Rain NA NA 2 NA http://www.ndtv.com/article/world/two-dead-in-landslide-as-floods-hit-panama-297138
4879 5/28/13 NA NA Panama PA Colón 1274 Portobelo 2.67409 NA 9.5676 -79.6667 (9.5676000000000005, -79.666700000000006) Landslide Landslide Medium Downpour NA NA 0 www.newsroompanama.com http://www.newsroompanama.com/panama/5804-rain-brings-floods-landslides-and-traffic-chaos-to-colon.html
4880 5/28/13 NA NA Panama PA Colón 3302 Margarita 0.74760 NA 9.3381 -79.8897 (9.3381000000000007, -79.889700000000005) Landslide Landslide Medium Downpour NA NA 0 NA NA

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_HD, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_HD, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_HD, aes(x=state, y=distance, fill=city)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_HD <- df_HD %>% 
  arrange(desc(city)) %>%
  mutate(prop = distance / sum(df_HD$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_HD, aes(x=state, y = prop, fill=city)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
data<- ts(df_HD$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
0.09491
2.67409
4.74914
0.18619
2.28589
0.74760
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_HD$distance
names(distance) <- df_HD$city 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por ciudades"
)

##                    
## Pareto chart analysis for distance
##                       Frequency   Cum.Freq.  Percentage Cum.Percent.
##   Cusuna             36.3762900  36.3762900  71.7525220   71.7525220
##   Nuevo San Juan      4.7491400  41.1254300   9.3677165   81.1202386
##   Portobelo           2.6740900  43.7995200   5.2746638   86.3949024
##   María Chiquita      2.2858900  46.0854100   4.5089363   90.9038387
##   El Giral            1.8033000  47.8887100   3.5570236   94.4608623
##   Margarita           0.7674000  48.6561100   1.5137026   95.9745649
##   Margarita           0.7476000  49.4037100   1.4746470   97.4492119
##   Cativá              0.6394800  50.0431900   1.2613794   98.7105913
##   Colón               0.2036500  50.2468400   0.4017012   99.1122925
##   Nueva Providencia   0.1861900  50.4330300   0.3672613   99.4795538
##   Colón               0.1689400  50.6019700   0.3332355   99.8127893
##   Portobelo           0.0949100  50.6968800   0.1872107  100.0000000
stem(df_HD$"distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 00001112235
##   1 | 
##   2 | 
##   3 | 6
head(df_HD)
## # A tibble: 6 x 25
##      id date    time    continent_code country_name country_code state population
##   <dbl> <chr>   <chr>   <chr>          <chr>        <chr>        <chr>      <dbl>
## 1  2795 12/9/10 Morning <NA>           Panama       PA           Colón       1274
## 2  4879 5/28/13 <NA>    <NA>           Panama       PA           Colón       1274
## 3  6702 5/9/14  <NA>    <NA>           Panama       PA           Colón       1232
## 4  7450 9/7/15  <NA>    <NA>           Panama       PA           Colón          0
## 5  7451 7/2/15  <NA>    <NA>           Panama       PA           Colón       1146
## 6  4880 5/28/13 <NA>    <NA>           Panama       PA           Colón       3302
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_HD))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
2795 12/9/10 Morning NA Panama PA Colón 1274 Portobelo 0.09491 Deforested slope 9.5493 -79.6505 (9.5493000000000006, -79.650499999999994) Landslide Mudslide Medium Downpour NA NA 8 Fox News Latino http://latino.foxnews.com/latino/news/2010/12/09/heavy-rains-kill-panama-force-canal-close/ 0.1872107 0.0936054
4879 5/28/13 NA NA Panama PA Colón 1274 Portobelo 2.67409 NA 9.5676 -79.6667 (9.5676000000000005, -79.666700000000006) Landslide Landslide Medium Downpour NA NA 0 www.newsroompanama.com http://www.newsroompanama.com/panama/5804-rain-brings-floods-landslides-and-traffic-chaos-to-colon.html 5.2746638 2.8245427
6702 5/9/14 NA NA Panama PA Colón 1232 Nuevo San Juan 4.74914 Below road 9.2924 -79.7478 (9.2924000000000007, -79.747799999999998) Landslide Landslide Small Downpour NA 0 0 NEXtv http://www.nexpanama.com/videos/deslizamiento-de-tierra-en-carretera-boyd-roosevelt-3800 9.3677165 10.1457328
7450 9/7/15 NA NA Panama PA Colón 0 Nueva Providencia 0.18619 Unknown 9.2619 -79.8164 (9.2619000000000007, -79.816400000000002) Landslide Landslide Medium Downpour NA 0 0 Panamá América http://www.panamaamerica.com.pa/provincias/un-colegio-y-20-casas-afectadas-por-las-lluvias-en-colon-991260 0.3672613 15.0132217
7451 7/2/15 NA NA Panama PA Colón 1146 María Chiquita 2.28589 Mine construction 9.4405 -79.7754 (9.4405000000000001, -79.775400000000005) Landslide Other Medium Construction NA 0 1 El Siglo http://elsiglo.com/panama/alud-tierra-obrero-colon-muere/23876592 4.5089363 17.4513205
4880 5/28/13 NA NA Panama PA Colón 3302 Margarita 0.74760 NA 9.3381 -79.8897 (9.3381000000000007, -79.889700000000005) Landslide Landslide Medium Downpour NA NA 0 NA NA 1.4746470 20.4431121
stem(df_HD$"distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 00001112235
##   1 | 
##   2 | 
##   3 | 6
stem(df_HD$"distance", scale = 2)
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 0000111223
##   0 | 5
##   1 | 
##   1 | 
##   2 | 
##   2 | 
##   3 | 
##   3 | 6

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
0.09491 1 8.3 8.3 8.3 8.3
0.16894 1 8.3 8.3 16.7 16.7
0.18619 1 8.3 8.3 25.0 25.0
0.20365 1 8.3 8.3 33.3 33.3
0.63948 1 8.3 8.3 41.7 41.7
0.7476 1 8.3 8.3 50.0 50.0
0.7674 1 8.3 8.3 58.3 58.3
1.8033 1 8.3 8.3 66.7 66.7
2.28589 1 8.3 8.3 75.0 75.0
2.67409 1 8.3 8.3 83.3 83.3
4.74914 1 8.3 8.3 91.7 91.7
36.37629 1 8.3 8.3 100.0 100.0
Total 12 100.0 100.0 100.0 100.0
str(table)
## Classes 'freqtab' and 'data.frame':  13 obs. of  5 variables:
##  $ n      : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ %      : num  8.3 8.3 8.3 8.3 8.3 8.3 8.3 8.3 8.3 8.3 ...
##  $ val%   : num  8.3 8.3 8.3 8.3 8.3 8.3 8.3 8.3 8.3 8.3 ...
##  $ %cum   : num  8.3 16.7 25 33.3 41.7 50 58.3 66.7 75 83.3 ...
##  $ val%cum: num  8.3 16.7 25 33.3 41.7 50 58.3 66.7 75 83.3 ...
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
0.09491 1
0.16894 1
0.18619 1
0.20365 1
0.63948 1
0.7476 1
0.7674 1
1.8033 1
2.28589 1
2.67409 1
4.74914 1
36.37629 1
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1]  0.09491  8.09491 16.09491 24.09491 32.09491 40.09491
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(0.0949,8.09] 10 0.9090909 10
(8.09,16.1] 0 0.0000000 10
(16.1,24.1] 0 0.0000000 10
(24.1,32.1] 0 0.0000000 10
(32.1,40.1] 1 0.0909091 11
str(Freq_table)
## 'data.frame':    5 obs. of  4 variables:
##  $ distance: Factor w/ 5 levels "(0.0949,8.09]",..: 1 2 3 4 5
##  $ Freq    : int  10 0 0 0 1
##  $ Rel_Freq: num  0.9091 0 0 0 0.0909
##  $ Cum_Freq: int  10 10 10 10 11
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(0.0949,8.09] 10
(8.09,16.1] 0
(16.1,24.1] 0
(24.1,32.1] 0
(32.1,40.1] 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_HD)
##                        id date time continent_code country_name country_code
## nbr.val      1.200000e+01   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          2.652000e+03   NA   NA             NA           NA           NA
## max          7.452000e+03   NA   NA             NA           NA           NA
## range        4.800000e+03   NA   NA             NA           NA           NA
## sum          6.181400e+04   NA   NA             NA           NA           NA
## median       4.880500e+03   NA   NA             NA           NA           NA
## mean         5.151167e+03   NA   NA             NA           NA           NA
## SE.mean      5.231425e+02   NA   NA             NA           NA           NA
## CI.mean.0.95 1.151429e+03   NA   NA             NA           NA           NA
## var          3.284136e+06   NA   NA             NA           NA           NA
## std.dev      1.812219e+03   NA   NA             NA           NA           NA
## coef.var     3.518074e-01   NA   NA             NA           NA           NA
##              state   population city   distance location_description
## nbr.val         NA 1.200000e+01   NA  12.000000                   NA
## nbr.null        NA 1.000000e+00   NA   0.000000                   NA
## nbr.na          NA 0.000000e+00   NA   0.000000                   NA
## min             NA 0.000000e+00   NA   0.094910                   NA
## max             NA 7.664300e+04   NA  36.376290                   NA
## range           NA 7.664300e+04   NA  36.281380                   NA
## sum             NA 1.971740e+05   NA  50.696880                   NA
## median          NA 1.375500e+03   NA   0.757500                   NA
## mean            NA 1.643117e+04   NA   4.224740                   NA
## SE.mean         NA 8.446243e+03   NA   2.950226                   NA
## CI.mean.0.95    NA 1.859006e+04   NA   6.493404                   NA
## var             NA 8.560683e+08   NA 104.446004                   NA
## std.dev         NA 2.925864e+04   NA  10.219883                   NA
## coef.var        NA 1.780680e+00   NA   2.419056                   NA
##                 latitude     longitude geolocation hazard_type landslide_type
## nbr.val       12.0000000   12.00000000          NA          NA             NA
## nbr.null       0.0000000    0.00000000          NA          NA             NA
## nbr.na         0.0000000    0.00000000          NA          NA             NA
## min            9.2332000  -85.26500000          NA          NA             NA
## max           15.5227000  -79.65050000          NA          NA             NA
## range          6.2895000    5.61450000          NA          NA             NA
## sum          118.6112000 -963.01940000          NA          NA             NA
## median         9.3590500  -79.81925000          NA          NA             NA
## mean           9.8842667  -80.25161667          NA          NA             NA
## SE.mean        0.5134407    0.45651767          NA          NA             NA
## CI.mean.0.95   1.1300754    1.00478862          NA          NA             NA
## var            3.1634562    2.50090061          NA          NA             NA
## std.dev        1.7786107    1.58142360          NA          NA             NA
## coef.var       0.1799436   -0.01970582          NA          NA             NA
##              landslide_size trigger storm_name injuries fatalities source_name
## nbr.val                  NA      NA         NA        4 12.0000000          NA
## nbr.null                 NA      NA         NA        4  8.0000000          NA
## nbr.na                   NA      NA         NA        8  0.0000000          NA
## min                      NA      NA         NA        0  0.0000000          NA
## max                      NA      NA         NA        0  8.0000000          NA
## range                    NA      NA         NA        0  8.0000000          NA
## sum                      NA      NA         NA        0 15.0000000          NA
## median                   NA      NA         NA        0  0.0000000          NA
## mean                     NA      NA         NA        0  1.2500000          NA
## SE.mean                  NA      NA         NA        0  0.7084447          NA
## CI.mean.0.95             NA      NA         NA        0  1.5592763          NA
## var                      NA      NA         NA        0  6.0227273          NA
## std.dev                  NA      NA         NA        0  2.4541245          NA
## coef.var                 NA      NA         NA      NaN  1.9632996          NA
##              source_link        prop         ypos
## nbr.val               NA  12.0000000 1.200000e+01
## nbr.null              NA   0.0000000 0.000000e+00
## nbr.na                NA   0.0000000 0.000000e+00
## min                   NA   0.1872107 9.360537e-02
## max                   NA  71.7525220 9.936931e+01
## range                 NA  71.5653113 9.927570e+01
## sum                   NA 100.0000000 4.705863e+02
## median                NA   1.4941748 2.119020e+01
## mean                  NA   8.3333333 3.921552e+01
## SE.mean               NA   5.8193444 1.127004e+01
## CI.mean.0.95          NA  12.8082906 2.480520e+01
## var                   NA 406.3772270 1.524167e+03
## std.dev               NA  20.1588002 3.904058e+01
## coef.var              NA   2.4190560 9.955389e-01
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Comayagua (Honduras)

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_HD <- subset(df, country_name == "Honduras")
knitr::kable(head(df_HD)) 
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
854 10/19/08 NA NA Honduras HN Copán 4752 Corquín 0.43391 NA 14.5637 -88.8693 (14.563700000000001, -88.869299999999996) Landslide Landslide Large Tropical cyclone Tropical Depression 16 NA 23 NA http://www.chron.com/disp/story.mpl/ap/world/6068144.html
855 10/20/08 NA NA Honduras HN Francisco Morazán 850848 Tegucigalpa 2.99239 NA 14.1080 -87.2137 (14.108000000000001, -87.213700000000003) Landslide Mudslide Large Tropical cyclone Tropical Depression 16 NA 29 NA http://in.ibtimes.com/articles/20081021/honduras-landslide-tegucigalpa-victim.htm
2062 7/12/10 5:30:00 NA Honduras HN Francisco Morazán 850848 Tegucigalpa 0.98377 NA 14.0831 -87.1978 (14.0831, -87.197800000000001) Landslide Mudslide Medium Downpour NA NA 1 NA http://mdn.mainichi.jp/mdnnews/news/20100713p2a00m0na013000c.html
2093 7/18/10 NA NA Honduras HN Francisco Morazán 850848 Tegucigalpa 1.24404 NA 14.0814 -87.1953 (14.0814, -87.195300000000003) Landslide Landslide Medium Downpour NA NA 0 NA http://www.insidecostarica.com/dailynews/2010/july/19/centralamerica10071903.htm
2217 8/7/10 Overnight NA Honduras HN Francisco Morazán 850848 Tegucigalpa 2.21442 NA 14.0783 -87.2270 (14.0783, -87.227000000000004) Landslide Mudslide Medium Downpour NA NA 3 NA NA
2358 8/29/10 4:30:00 NA Honduras HN Francisco Morazán 2288 Santa Lucía 4.75791 NA 14.1015 -87.1607 (14.1015, -87.160700000000006) Landslide Rockfall Medium Downpour NA NA 5 NA NA
library(dplyr)
df_HD <- subset(df, state == "Comayagua")
knitr::kable(head(df_HD))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
2533 10/3/10 13:00 NA Honduras HN Comayagua 1484 El Rancho 4.53362 Unknown 14.3898 -87.7654 (14.389799999999999, -87.7654) Landslide Mudslide Medium Rain NA NA 0 Honduras News http://www.hondurasnews.com/2010/10/03/landslide-in-comayagua/
7454 10/16/15 18:00 NA Honduras HN Comayagua 1389 El Sauce 7.28575 Above road 14.4837 -87.7152 (14.483700000000001, -87.715199999999996) Landslide Rockfall Medium Rain NA 0 0 NTN24 http://www.ntn24.com/video/lluvias-en-honduras-73259
7455 10/29/15 NA NA Honduras HN Comayagua 4673 La Libertad 17.28613 Unknown 14.9064 -87.5930 (14.9064, -87.593000000000004) Landslide Landslide Medium Continuous rain NA 0 1 HRN http://www.radiohrn.hn/l/noticias/menor-de-cinco-%C3%B1os-muere-en-vivienda-soterrada-por-las-lluvias-en-las-lajas-comayagua
7456 10/16/15 23:00 NA Honduras HN Comayagua 1470 Concepción de Guasistagua 8.52584 Deforested slope 14.5742 -87.7286 (14.574199999999999, -87.7286) Landslide Landslide Medium Rain NA 0 5 La Prensa http://www.laprensa.hn/honduras/891623-410/deslave-soterra-a-una-familia-en-comunidad-de-comayagua

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_HD, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_HD, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_HD, aes(x=state, y=distance, fill=city)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_HD <- df_HD %>% 
  arrange(desc(city)) %>%
  mutate(prop = distance / sum(df_HD$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_HD, aes(x=state, y = prop, fill=city)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
data<- ts(df_HD$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
17.28613
7.28575
4.53362
8.52584
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_HD$distance
names(distance) <- df_HD$city 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por ciudades"
)

##                            
## Pareto chart analysis for distance
##                             Frequency Cum.Freq. Percentage Cum.Percent.
##   La Libertad                17.28613  17.28613   45.93546     45.93546
##   Concepción de Guasistagua   8.52584  25.81197   22.65622     68.59168
##   El Sauce                    7.28575  33.09772   19.36086     87.95254
##   El Rancho                   4.53362  37.63134   12.04746    100.00000
stem(df_HD$"distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 
##   0 | 579
##   1 | 
##   1 | 7
head(df_HD)
## # A tibble: 4 x 25
##      id date     time  continent_code country_name country_code state     population
##   <dbl> <chr>    <chr> <chr>          <chr>        <chr>        <chr>          <dbl>
## 1  7455 10/29/15 <NA>  <NA>           Honduras     HN           Comayagua       4673
## 2  7454 10/16/15 18:00 <NA>           Honduras     HN           Comayagua       1389
## 3  2533 10/3/10  13:00 <NA>           Honduras     HN           Comayagua       1484
## 4  7456 10/16/15 23:00 <NA>           Honduras     HN           Comayagua       1470
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_HD))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
7455 10/29/15 NA NA Honduras HN Comayagua 4673 La Libertad 17.28613 Unknown 14.9064 -87.5930 (14.9064, -87.593000000000004) Landslide Landslide Medium Continuous rain NA 0 1 HRN http://www.radiohrn.hn/l/noticias/menor-de-cinco-%C3%B1os-muere-en-vivienda-soterrada-por-las-lluvias-en-las-lajas-comayagua 45.93546 22.96773
7454 10/16/15 18:00 NA Honduras HN Comayagua 1389 El Sauce 7.28575 Above road 14.4837 -87.7152 (14.483700000000001, -87.715199999999996) Landslide Rockfall Medium Rain NA 0 0 NTN24 http://www.ntn24.com/video/lluvias-en-honduras-73259 19.36086 55.61589
2533 10/3/10 13:00 NA Honduras HN Comayagua 1484 El Rancho 4.53362 Unknown 14.3898 -87.7654 (14.389799999999999, -87.7654) Landslide Mudslide Medium Rain NA NA 0 Honduras News http://www.hondurasnews.com/2010/10/03/landslide-in-comayagua/ 12.04746 71.32005
7456 10/16/15 23:00 NA Honduras HN Comayagua 1470 Concepción de Guasistagua 8.52584 Deforested slope 14.5742 -87.7286 (14.574199999999999, -87.7286) Landslide Landslide Medium Rain NA 0 5 La Prensa http://www.laprensa.hn/honduras/891623-410/deslave-soterra-a-una-familia-en-comunidad-de-comayagua 22.65622 88.67189
stem(df_HD$"distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 
##   0 | 579
##   1 | 
##   1 | 7
stem(df_HD$"distance", scale = 2)
## 
##   The decimal point is at the |
## 
##    4 | 5
##    6 | 3
##    8 | 5
##   10 | 
##   12 | 
##   14 | 
##   16 | 3

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
4.53362 1 25 25 25 25
7.28575 1 25 25 50 50
8.52584 1 25 25 75 75
17.28613 1 25 25 100 100
Total 4 100 100 100 100
str(table)
## Classes 'freqtab' and 'data.frame':  5 obs. of  5 variables:
##  $ n      : num  1 1 1 1 4
##  $ %      : num  25 25 25 25 100
##  $ val%   : num  25 25 25 25 100
##  $ %cum   : num  25 50 75 100 100
##  $ val%cum: num  25 50 75 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
4.53362 1
7.28575 1
8.52584 1
17.28613 1
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1]  4.53362  9.53362 14.53362 19.53362
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(4.53,9.53] 2 0.6666667 2
(9.53,14.5] 0 0.0000000 2
(14.5,19.5] 1 0.3333333 3
str(Freq_table)
## 'data.frame':    3 obs. of  4 variables:
##  $ distance: Factor w/ 3 levels "(4.53,9.53]",..: 1 2 3
##  $ Freq    : int  2 0 1
##  $ Rel_Freq: num  0.667 0 0.333
##  $ Cum_Freq: int  2 2 3
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(4.53,9.53] 2
(9.53,14.5] 0
(14.5,19.5] 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_HD)
##                        id date time continent_code country_name country_code
## nbr.val      4.000000e+00   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          2.533000e+03   NA   NA             NA           NA           NA
## max          7.456000e+03   NA   NA             NA           NA           NA
## range        4.923000e+03   NA   NA             NA           NA           NA
## sum          2.489800e+04   NA   NA             NA           NA           NA
## median       7.454500e+03   NA   NA             NA           NA           NA
## mean         6.224500e+03   NA   NA             NA           NA           NA
## SE.mean      1.230500e+03   NA   NA             NA           NA           NA
## CI.mean.0.95 3.916000e+03   NA   NA             NA           NA           NA
## var          6.056522e+06   NA   NA             NA           NA           NA
## std.dev      2.461000e+03   NA   NA             NA           NA           NA
## coef.var     3.953731e-01   NA   NA             NA           NA           NA
##              state   population city   distance location_description
## nbr.val         NA 4.000000e+00   NA  4.0000000                   NA
## nbr.null        NA 0.000000e+00   NA  0.0000000                   NA
## nbr.na          NA 0.000000e+00   NA  0.0000000                   NA
## min             NA 1.389000e+03   NA  4.5336200                   NA
## max             NA 4.673000e+03   NA 17.2861300                   NA
## range           NA 3.284000e+03   NA 12.7525100                   NA
## sum             NA 9.016000e+03   NA 37.6313400                   NA
## median          NA 1.477000e+03   NA  7.9057950                   NA
## mean            NA 2.254000e+03   NA  9.4078350                   NA
## SE.mean         NA 8.066051e+02   NA  2.7553987                   NA
## CI.mean.0.95    NA 2.566978e+03   NA  8.7689084                   NA
## var             NA 2.602447e+06   NA 30.3688880                   NA
## std.dev         NA 1.613210e+03   NA  5.5107974                   NA
## coef.var        NA 7.157100e-01   NA  0.5857668                   NA
##                 latitude     longitude geolocation hazard_type landslide_type
## nbr.val       4.00000000  4.000000e+00          NA          NA             NA
## nbr.null      0.00000000  0.000000e+00          NA          NA             NA
## nbr.na        0.00000000  0.000000e+00          NA          NA             NA
## min          14.38980000 -8.776540e+01          NA          NA             NA
## max          14.90640000 -8.759300e+01          NA          NA             NA
## range         0.51660000  1.724000e-01          NA          NA             NA
## sum          58.35410000 -3.508022e+02          NA          NA             NA
## median       14.52895000 -8.772190e+01          NA          NA             NA
## mean         14.58852500 -8.770055e+01          NA          NA             NA
## SE.mean       0.11244615  3.738755e-02          NA          NA             NA
## CI.mean.0.95  0.35785382  1.189839e-01          NA          NA             NA
## var           0.05057654  5.591317e-03          NA          NA             NA
## std.dev       0.22489229  7.477511e-02          NA          NA             NA
## coef.var      0.01541570 -8.526185e-04          NA          NA             NA
##              landslide_size trigger storm_name injuries fatalities source_name
## nbr.val                  NA      NA         NA        3   4.000000          NA
## nbr.null                 NA      NA         NA        3   2.000000          NA
## nbr.na                   NA      NA         NA        1   0.000000          NA
## min                      NA      NA         NA        0   0.000000          NA
## max                      NA      NA         NA        0   5.000000          NA
## range                    NA      NA         NA        0   5.000000          NA
## sum                      NA      NA         NA        0   6.000000          NA
## median                   NA      NA         NA        0   0.500000          NA
## mean                     NA      NA         NA        0   1.500000          NA
## SE.mean                  NA      NA         NA        0   1.190238          NA
## CI.mean.0.95             NA      NA         NA        0   3.787869          NA
## var                      NA      NA         NA        0   5.666667          NA
## std.dev                  NA      NA         NA        0   2.380476          NA
## coef.var                 NA      NA         NA      NaN   1.586984          NA
##              source_link        prop        ypos
## nbr.val               NA   4.0000000   4.0000000
## nbr.null              NA   0.0000000   0.0000000
## nbr.na                NA   0.0000000   0.0000000
## min                   NA  12.0474583  22.9677312
## max                   NA  45.9354623  88.6718889
## range                 NA  33.8880040  65.7041578
## sum                   NA 100.0000000 238.5755596
## median                NA  21.0085397  63.4679698
## mean                  NA  25.0000000  59.6438899
## SE.mean               NA   7.3220850  13.9652032
## CI.mean.0.95          NA  23.3021423  44.4435093
## var                   NA 214.4517146 780.1076021
## std.dev               NA  14.6441700  27.9304064
## coef.var              NA   0.5857668   0.4682861
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Copán (Honduras)

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_HD <- subset(df, country_name == "Honduras")
knitr::kable(head(df_HD)) 
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
854 10/19/08 NA NA Honduras HN Copán 4752 Corquín 0.43391 NA 14.5637 -88.8693 (14.563700000000001, -88.869299999999996) Landslide Landslide Large Tropical cyclone Tropical Depression 16 NA 23 NA http://www.chron.com/disp/story.mpl/ap/world/6068144.html
855 10/20/08 NA NA Honduras HN Francisco Morazán 850848 Tegucigalpa 2.99239 NA 14.1080 -87.2137 (14.108000000000001, -87.213700000000003) Landslide Mudslide Large Tropical cyclone Tropical Depression 16 NA 29 NA http://in.ibtimes.com/articles/20081021/honduras-landslide-tegucigalpa-victim.htm
2062 7/12/10 5:30:00 NA Honduras HN Francisco Morazán 850848 Tegucigalpa 0.98377 NA 14.0831 -87.1978 (14.0831, -87.197800000000001) Landslide Mudslide Medium Downpour NA NA 1 NA http://mdn.mainichi.jp/mdnnews/news/20100713p2a00m0na013000c.html
2093 7/18/10 NA NA Honduras HN Francisco Morazán 850848 Tegucigalpa 1.24404 NA 14.0814 -87.1953 (14.0814, -87.195300000000003) Landslide Landslide Medium Downpour NA NA 0 NA http://www.insidecostarica.com/dailynews/2010/july/19/centralamerica10071903.htm
2217 8/7/10 Overnight NA Honduras HN Francisco Morazán 850848 Tegucigalpa 2.21442 NA 14.0783 -87.2270 (14.0783, -87.227000000000004) Landslide Mudslide Medium Downpour NA NA 3 NA NA
2358 8/29/10 4:30:00 NA Honduras HN Francisco Morazán 2288 Santa Lucía 4.75791 NA 14.1015 -87.1607 (14.1015, -87.160700000000006) Landslide Rockfall Medium Downpour NA NA 5 NA NA
library(dplyr)
df_HD <- subset(df, state == "Copán")
knitr::kable(head(df_HD))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
854 10/19/08 NA NA Honduras HN Copán 4752 Corquín 0.43391 NA 14.5637 -88.8693 (14.563700000000001, -88.869299999999996) Landslide Landslide Large Tropical cyclone Tropical Depression 16 NA 23 NA http://www.chron.com/disp/story.mpl/ap/world/6068144.html
7457 9/6/15 NA NA Honduras HN Copán 27753 Santa Rosa de Copán 0.74414 Urban area 14.7698 -88.7731 (14.7698, -88.773099999999999) Landslide Landslide Medium Continuous rain NA 0 0 La Prensa http://www.laprensa.hn/honduras/877427-410/honduras-fuertes-lluvias-afectan-el-occidente
7458 9/6/15 NA NA Honduras HN Copán 27753 Santa Rosa de Copán 0.28887 Urban area 14.7691 -88.7800 (14.7691, -88.78) Landslide Landslide Medium Continuous rain NA 0 0 La Prensa http://www.laprensa.hn/honduras/877427-410/honduras-fuertes-lluvias-afectan-el-occidente
7459 11/21/15 22:30 NA Honduras HN Copán 1340 Ojos de Agua 1.39095 Below road 14.6893 -88.8098 (14.689299999999999, -88.809799999999996) Landslide Landslide Medium Rain NA 0 1 La Prensa http://www.laprensa.hn/honduras/904048-410/beb%C3%A9-de-25-d%C3%ADas-de-nacida-muere-soterrada-en-cop%C3%A1n
7461 9/24/15 NA NA Honduras HN Copán 1452 Lucerna 5.89721 Above road 14.6000 -88.9144 (14.6, -88.914400000000001) Landslide Landslide Small Rain NA 0 0 El Heraldo http://www.elheraldo.hn/pais/883672-214/ampl%C3%ADan-por-72-horas-alerta-verde-en-nueve-departamentos-de-honduras

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_HD, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_HD, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_HD, aes(x=state, y=distance, fill=city)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_HD <- df_HD %>% 
  arrange(desc(city)) %>%
  mutate(prop = distance / sum(df_HD$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_HD, aes(x=state, y = prop, fill=city)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
data<- ts(df_HD$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
0.74414
0.28887
1.39095
5.89721
0.43391
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_HD$distance
names(distance) <- df_HD$city 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por ciudades"
)

##                      
## Pareto chart analysis for distance
##                        Frequency  Cum.Freq. Percentage Cum.Percent.
##   Lucerna               5.897210   5.897210  67.357580    67.357580
##   Ojos de Agua          1.390950   7.288160  15.887348    83.244928
##   Santa Rosa de Copán   0.744140   8.032300   8.499523    91.744450
##   Corquín               0.433910   8.466210   4.956094    96.700544
##   Santa Rosa de Copán   0.288870   8.755080   3.299456   100.000000
stem(df_HD$"distance")
## 
##   The decimal point is at the |
## 
##   0 | 3474
##   2 | 
##   4 | 9
head(df_HD)
## # A tibble: 5 x 25
##      id date     time  continent_code country_name country_code state population
##   <dbl> <chr>    <chr> <chr>          <chr>        <chr>        <chr>      <dbl>
## 1  7457 9/6/15   <NA>  <NA>           Honduras     HN           Copán      27753
## 2  7458 9/6/15   <NA>  <NA>           Honduras     HN           Copán      27753
## 3  7459 11/21/15 22:30 <NA>           Honduras     HN           Copán       1340
## 4  7461 9/24/15  <NA>  <NA>           Honduras     HN           Copán       1452
## 5   854 10/19/08 <NA>  <NA>           Honduras     HN           Copán       4752
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_HD))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
7457 9/6/15 NA NA Honduras HN Copán 27753 Santa Rosa de Copán 0.74414 Urban area 14.7698 -88.7731 (14.7698, -88.773099999999999) Landslide Landslide Medium Continuous rain NA 0 0 La Prensa http://www.laprensa.hn/honduras/877427-410/honduras-fuertes-lluvias-afectan-el-occidente 8.499523 4.249761
7458 9/6/15 NA NA Honduras HN Copán 27753 Santa Rosa de Copán 0.28887 Urban area 14.7691 -88.7800 (14.7691, -88.78) Landslide Landslide Medium Continuous rain NA 0 0 La Prensa http://www.laprensa.hn/honduras/877427-410/honduras-fuertes-lluvias-afectan-el-occidente 3.299456 10.149251
7459 11/21/15 22:30 NA Honduras HN Copán 1340 Ojos de Agua 1.39095 Below road 14.6893 -88.8098 (14.689299999999999, -88.809799999999996) Landslide Landslide Medium Rain NA 0 1 La Prensa http://www.laprensa.hn/honduras/904048-410/beb%C3%A9-de-25-d%C3%ADas-de-nacida-muere-soterrada-en-cop%C3%A1n 15.887348 19.742652
7461 9/24/15 NA NA Honduras HN Copán 1452 Lucerna 5.89721 Above road 14.6000 -88.9144 (14.6, -88.914400000000001) Landslide Landslide Small Rain NA 0 0 El Heraldo http://www.elheraldo.hn/pais/883672-214/ampl%C3%ADan-por-72-horas-alerta-verde-en-nueve-departamentos-de-honduras 67.357580 61.365116
854 10/19/08 NA NA Honduras HN Copán 4752 Corquín 0.43391 NA 14.5637 -88.8693 (14.563700000000001, -88.869299999999996) Landslide Landslide Large Tropical cyclone Tropical Depression 16 NA 23 NA http://www.chron.com/disp/story.mpl/ap/world/6068144.html 4.956094 97.521953
stem(df_HD$"distance")
## 
##   The decimal point is at the |
## 
##   0 | 3474
##   2 | 
##   4 | 9
stem(df_HD$"distance", scale = 2)
## 
##   The decimal point is at the |
## 
##   0 | 347
##   1 | 4
##   2 | 
##   3 | 
##   4 | 
##   5 | 9

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
0.28887 1 20 20 20 20
0.43391 1 20 20 40 40
0.74414 1 20 20 60 60
1.39095 1 20 20 80 80
5.89721 1 20 20 100 100
Total 5 100 100 100 100
str(table)
## Classes 'freqtab' and 'data.frame':  6 obs. of  5 variables:
##  $ n      : num  1 1 1 1 1 5
##  $ %      : num  20 20 20 20 20 100
##  $ val%   : num  20 20 20 20 20 100
##  $ %cum   : num  20 40 60 80 100 100
##  $ val%cum: num  20 40 60 80 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
0.28887 1
0.43391 1
0.74414 1
1.39095 1
5.89721 1
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.28887 2.28887 4.28887 6.28887
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(0.289,2.29] 3 0.75 3
(2.29,4.29] 0 0.00 3
(4.29,6.29] 1 0.25 4
str(Freq_table)
## 'data.frame':    3 obs. of  4 variables:
##  $ distance: Factor w/ 3 levels "(0.289,2.29]",..: 1 2 3
##  $ Freq    : int  3 0 1
##  $ Rel_Freq: num  0.75 0 0.25
##  $ Cum_Freq: int  3 3 4
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(0.289,2.29] 3
(2.29,4.29] 0
(4.29,6.29] 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_HD)
##                        id date time continent_code country_name country_code
## nbr.val      5.000000e+00   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          8.540000e+02   NA   NA             NA           NA           NA
## max          7.461000e+03   NA   NA             NA           NA           NA
## range        6.607000e+03   NA   NA             NA           NA           NA
## sum          3.068900e+04   NA   NA             NA           NA           NA
## median       7.458000e+03   NA   NA             NA           NA           NA
## mean         6.137800e+03   NA   NA             NA           NA           NA
## SE.mean      1.320950e+03   NA   NA             NA           NA           NA
## CI.mean.0.95 3.667546e+03   NA   NA             NA           NA           NA
## var          8.724547e+06   NA   NA             NA           NA           NA
## std.dev      2.953734e+03   NA   NA             NA           NA           NA
## coef.var     4.812367e-01   NA   NA             NA           NA           NA
##              state   population city distance location_description     latitude
## nbr.val         NA 5.000000e+00   NA 5.000000                   NA  5.000000000
## nbr.null        NA 0.000000e+00   NA 0.000000                   NA  0.000000000
## nbr.na          NA 0.000000e+00   NA 0.000000                   NA  0.000000000
## min             NA 1.340000e+03   NA 0.288870                   NA 14.563700000
## max             NA 2.775300e+04   NA 5.897210                   NA 14.769800000
## range           NA 2.641300e+04   NA 5.608340                   NA  0.206100000
## sum             NA 6.305000e+04   NA 8.755080                   NA 73.391900000
## median          NA 4.752000e+03   NA 0.744140                   NA 14.689300000
## mean            NA 1.261000e+04   NA 1.751016                   NA 14.678380000
## SE.mean         NA 6.212419e+03   NA 1.053732                   NA  0.042427531
## CI.mean.0.95    NA 1.724844e+04   NA 2.925628                   NA  0.117797711
## var             NA 1.929707e+08   NA 5.551753                   NA  0.009000477
## std.dev         NA 1.389139e+04   NA 2.356216                   NA  0.094870844
## coef.var        NA 1.101617e+00   NA 1.345628                   NA  0.006463305
##                  longitude geolocation hazard_type landslide_type
## nbr.val       5.000000e+00          NA          NA             NA
## nbr.null      0.000000e+00          NA          NA             NA
## nbr.na        0.000000e+00          NA          NA             NA
## min          -8.891440e+01          NA          NA             NA
## max          -8.877310e+01          NA          NA             NA
## range         1.413000e-01          NA          NA             NA
## sum          -4.441466e+02          NA          NA             NA
## median       -8.880980e+01          NA          NA             NA
## mean         -8.882932e+01          NA          NA             NA
## SE.mean       2.721322e-02          NA          NA             NA
## CI.mean.0.95  7.555601e-02          NA          NA             NA
## var           3.702797e-03          NA          NA             NA
## std.dev       6.085061e-02          NA          NA             NA
## coef.var     -6.850285e-04          NA          NA             NA
##              landslide_size trigger storm_name injuries fatalities source_name
## nbr.val                  NA      NA         NA        4   5.000000          NA
## nbr.null                 NA      NA         NA        4   3.000000          NA
## nbr.na                   NA      NA         NA        1   0.000000          NA
## min                      NA      NA         NA        0   0.000000          NA
## max                      NA      NA         NA        0  23.000000          NA
## range                    NA      NA         NA        0  23.000000          NA
## sum                      NA      NA         NA        0  24.000000          NA
## median                   NA      NA         NA        0   0.000000          NA
## mean                     NA      NA         NA        0   4.800000          NA
## SE.mean                  NA      NA         NA        0   4.554119          NA
## CI.mean.0.95             NA      NA         NA        0  12.644261          NA
## var                      NA      NA         NA        0 103.700000          NA
## std.dev                  NA      NA         NA        0  10.183320          NA
## coef.var                 NA      NA         NA      NaN   2.121525          NA
##              source_link       prop        ypos
## nbr.val               NA   5.000000    5.000000
## nbr.null              NA   0.000000    0.000000
## nbr.na                NA   0.000000    0.000000
## min                   NA   3.299456    4.249761
## max                   NA  67.357580   97.521953
## range                 NA  64.058124   93.272192
## sum                   NA 100.000000  193.028733
## median                NA   8.499523   19.742652
## mean                  NA  20.000000   38.605747
## SE.mean               NA  12.035661   17.797515
## CI.mean.0.95          NA  33.416351   49.413822
## var                   NA 724.285634 1583.757624
## std.dev               NA  26.912555   39.796452
## coef.var              NA   1.345628    1.030843
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Cortés (Honduras)

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_HD <- subset(df, country_name == "Honduras")
knitr::kable(head(df_HD)) 
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
854 10/19/08 NA NA Honduras HN Copán 4752 Corquín 0.43391 NA 14.5637 -88.8693 (14.563700000000001, -88.869299999999996) Landslide Landslide Large Tropical cyclone Tropical Depression 16 NA 23 NA http://www.chron.com/disp/story.mpl/ap/world/6068144.html
855 10/20/08 NA NA Honduras HN Francisco Morazán 850848 Tegucigalpa 2.99239 NA 14.1080 -87.2137 (14.108000000000001, -87.213700000000003) Landslide Mudslide Large Tropical cyclone Tropical Depression 16 NA 29 NA http://in.ibtimes.com/articles/20081021/honduras-landslide-tegucigalpa-victim.htm
2062 7/12/10 5:30:00 NA Honduras HN Francisco Morazán 850848 Tegucigalpa 0.98377 NA 14.0831 -87.1978 (14.0831, -87.197800000000001) Landslide Mudslide Medium Downpour NA NA 1 NA http://mdn.mainichi.jp/mdnnews/news/20100713p2a00m0na013000c.html
2093 7/18/10 NA NA Honduras HN Francisco Morazán 850848 Tegucigalpa 1.24404 NA 14.0814 -87.1953 (14.0814, -87.195300000000003) Landslide Landslide Medium Downpour NA NA 0 NA http://www.insidecostarica.com/dailynews/2010/july/19/centralamerica10071903.htm
2217 8/7/10 Overnight NA Honduras HN Francisco Morazán 850848 Tegucigalpa 2.21442 NA 14.0783 -87.2270 (14.0783, -87.227000000000004) Landslide Mudslide Medium Downpour NA NA 3 NA NA
2358 8/29/10 4:30:00 NA Honduras HN Francisco Morazán 2288 Santa Lucía 4.75791 NA 14.1015 -87.1607 (14.1015, -87.160700000000006) Landslide Rockfall Medium Downpour NA NA 5 NA NA
library(dplyr)
df_HD <- subset(df, state == "Cortés")
knitr::kable(head(df_HD))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
5415 8/29/13 0:05:00 NA Honduras HN Cortés 1146 Los Caminos 3.53737 NA 14.9510 -87.9338 (14.951000000000001, -87.933800000000005) Landslide Landslide Medium Downpour NA 2 6 www.reuters.com http://www.reuters.com/article/2013/08/29/us-honduras-landslide-idUSBRE97S0SI20130829
6689 7/31/14 Night NA Honduras HN Cortés 1043 Agua Azul Rancho 0.97057 Above road 14.8955 -87.9423 (14.8955, -87.942300000000003) Landslide Landslide Small Unknown NA 0 0 Tribuna http://www.latribuna.hn/2014/08/01/deslizamiento-de-tierra-impide-parcialmente-trafico-vehicular-al-norte-de-honduras/

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_HD, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_HD, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_HD, aes(x=state, y=distance, fill=city)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_HD <- df_HD %>% 
  arrange(desc(city)) %>%
  mutate(prop = distance / sum(df_HD$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_HD, aes(x=state, y = prop, fill=city)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
data<- ts(df_HD$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
3.53737
0.97057
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_HD$distance
names(distance) <- df_HD$city 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por ciudades"
)

##                   
## Pareto chart analysis for distance
##                    Frequency Cum.Freq. Percentage Cum.Percent.
##   Los Caminos        3.53737   3.53737   78.46977     78.46977
##   Agua Azul Rancho   0.97057   4.50794   21.53023    100.00000
stem(df_HD$"distance")
## 
##   The decimal point is at the |
## 
##   0 | 
##   1 | 0
##   2 | 
##   3 | 5
head(df_HD)
## # A tibble: 2 x 25
##      id date    time    continent_code country_name country_code state  population
##   <dbl> <chr>   <chr>   <chr>          <chr>        <chr>        <chr>       <dbl>
## 1  5415 8/29/13 0:05:00 <NA>           Honduras     HN           Cortés       1146
## 2  6689 7/31/14 Night   <NA>           Honduras     HN           Cortés       1043
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_HD))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
5415 8/29/13 0:05:00 NA Honduras HN Cortés 1146 Los Caminos 3.53737 NA 14.9510 -87.9338 (14.951000000000001, -87.933800000000005) Landslide Landslide Medium Downpour NA 2 6 www.reuters.com http://www.reuters.com/article/2013/08/29/us-honduras-landslide-idUSBRE97S0SI20130829 78.46977 39.23488
6689 7/31/14 Night NA Honduras HN Cortés 1043 Agua Azul Rancho 0.97057 Above road 14.8955 -87.9423 (14.8955, -87.942300000000003) Landslide Landslide Small Unknown NA 0 0 Tribuna http://www.latribuna.hn/2014/08/01/deslizamiento-de-tierra-impide-parcialmente-trafico-vehicular-al-norte-de-honduras/ 21.53023 89.23488
stem(df_HD$"distance")
## 
##   The decimal point is at the |
## 
##   0 | 
##   1 | 0
##   2 | 
##   3 | 5
stem(df_HD$"distance", scale = 2)
## 
##   The decimal point is at the |
## 
##   0 | 
##   1 | 0
##   1 | 
##   2 | 
##   2 | 
##   3 | 
##   3 | 5

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
0.97057 1 50 50 50 50
3.53737 1 50 50 100 100
Total 2 100 100 100 100
str(table)
## Classes 'freqtab' and 'data.frame':  3 obs. of  5 variables:
##  $ n      : num  1 1 2
##  $ %      : num  50 50 100
##  $ val%   : num  50 50 100
##  $ %cum   : num  50 100 100
##  $ val%cum: num  50 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
0.97057 1
3.53737 1
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.97057 2.97057 4.97057
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(0.971,2.97] 0 0 0
(2.97,4.97] 1 1 1
str(Freq_table)
## 'data.frame':    2 obs. of  4 variables:
##  $ distance: Factor w/ 2 levels "(0.971,2.97]",..: 1 2
##  $ Freq    : int  0 1
##  $ Rel_Freq: num  0 1
##  $ Cum_Freq: int  0 1
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(0.971,2.97] 0
(2.97,4.97] 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_HD)
##                        id date time continent_code country_name country_code
## nbr.val      2.000000e+00   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          5.415000e+03   NA   NA             NA           NA           NA
## max          6.689000e+03   NA   NA             NA           NA           NA
## range        1.274000e+03   NA   NA             NA           NA           NA
## sum          1.210400e+04   NA   NA             NA           NA           NA
## median       6.052000e+03   NA   NA             NA           NA           NA
## mean         6.052000e+03   NA   NA             NA           NA           NA
## SE.mean      6.370000e+02   NA   NA             NA           NA           NA
## CI.mean.0.95 8.093852e+03   NA   NA             NA           NA           NA
## var          8.115380e+05   NA   NA             NA           NA           NA
## std.dev      9.008540e+02   NA   NA             NA           NA           NA
## coef.var     1.488523e-01   NA   NA             NA           NA           NA
##              state   population city   distance location_description
## nbr.val         NA 2.000000e+00   NA  2.0000000                   NA
## nbr.null        NA 0.000000e+00   NA  0.0000000                   NA
## nbr.na          NA 0.000000e+00   NA  0.0000000                   NA
## min             NA 1.043000e+03   NA  0.9705700                   NA
## max             NA 1.146000e+03   NA  3.5373700                   NA
## range           NA 1.030000e+02   NA  2.5668000                   NA
## sum             NA 2.189000e+03   NA  4.5079400                   NA
## median          NA 1.094500e+03   NA  2.2539700                   NA
## mean            NA 1.094500e+03   NA  2.2539700                   NA
## SE.mean         NA 5.150000e+01   NA  1.2834000                   NA
## CI.mean.0.95    NA 6.543695e+02   NA 16.3071432                   NA
## var             NA 5.304500e+03   NA  3.2942311                   NA
## std.dev         NA 7.283200e+01   NA  1.8150017                   NA
## coef.var        NA 6.654363e-02   NA  0.8052466                   NA
##                  latitude     longitude geolocation hazard_type landslide_type
## nbr.val       2.000000000  2.000000e+00          NA          NA             NA
## nbr.null      0.000000000  0.000000e+00          NA          NA             NA
## nbr.na        0.000000000  0.000000e+00          NA          NA             NA
## min          14.895500000 -8.794230e+01          NA          NA             NA
## max          14.951000000 -8.793380e+01          NA          NA             NA
## range         0.055500000  8.500000e-03          NA          NA             NA
## sum          29.846500000 -1.758761e+02          NA          NA             NA
## median       14.923250000 -8.793805e+01          NA          NA             NA
## mean         14.923250000 -8.793805e+01          NA          NA             NA
## SE.mean       0.027750000  4.250000e-03          NA          NA             NA
## CI.mean.0.95  0.352597181  5.400137e-02          NA          NA             NA
## var           0.001540125  3.612500e-05          NA          NA             NA
## std.dev       0.039244426  6.010408e-03          NA          NA             NA
## coef.var      0.002629751 -6.834820e-05          NA          NA             NA
##              landslide_size trigger storm_name  injuries fatalities source_name
## nbr.val                  NA      NA         NA  2.000000   2.000000          NA
## nbr.null                 NA      NA         NA  1.000000   1.000000          NA
## nbr.na                   NA      NA         NA  0.000000   0.000000          NA
## min                      NA      NA         NA  0.000000   0.000000          NA
## max                      NA      NA         NA  2.000000   6.000000          NA
## range                    NA      NA         NA  2.000000   6.000000          NA
## sum                      NA      NA         NA  2.000000   6.000000          NA
## median                   NA      NA         NA  1.000000   3.000000          NA
## mean                     NA      NA         NA  1.000000   3.000000          NA
## SE.mean                  NA      NA         NA  1.000000   3.000000          NA
## CI.mean.0.95             NA      NA         NA 12.706205  38.118614          NA
## var                      NA      NA         NA  2.000000  18.000000          NA
## std.dev                  NA      NA         NA  1.414214   4.242641          NA
## coef.var                 NA      NA         NA  1.414214   1.414214          NA
##              source_link         prop         ypos
## nbr.val               NA    2.0000000    2.0000000
## nbr.null              NA    0.0000000    0.0000000
## nbr.na                NA    0.0000000    0.0000000
## min                   NA   21.5302333   39.2348833
## max                   NA   78.4697667   89.2348833
## range                 NA   56.9395334   50.0000000
## sum                   NA  100.0000000  128.4697667
## median                NA   50.0000000   64.2348833
## mean                  NA   50.0000000   64.2348833
## SE.mean               NA   28.4697667   25.0000000
## CI.mean.0.95          NA  361.7426842  317.6551184
## var                   NA 1621.0552294 1250.0000000
## std.dev               NA   40.2623302   35.3553391
## coef.var              NA    0.8052466    0.5504071
boxplot(data, horizontal=TRUE, col='green')

Gráfico para La Paz (Honduras)

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_HD <- subset(df, country_name == "Honduras")
knitr::kable(head(df_HD)) 
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
854 10/19/08 NA NA Honduras HN Copán 4752 Corquín 0.43391 NA 14.5637 -88.8693 (14.563700000000001, -88.869299999999996) Landslide Landslide Large Tropical cyclone Tropical Depression 16 NA 23 NA http://www.chron.com/disp/story.mpl/ap/world/6068144.html
855 10/20/08 NA NA Honduras HN Francisco Morazán 850848 Tegucigalpa 2.99239 NA 14.1080 -87.2137 (14.108000000000001, -87.213700000000003) Landslide Mudslide Large Tropical cyclone Tropical Depression 16 NA 29 NA http://in.ibtimes.com/articles/20081021/honduras-landslide-tegucigalpa-victim.htm
2062 7/12/10 5:30:00 NA Honduras HN Francisco Morazán 850848 Tegucigalpa 0.98377 NA 14.0831 -87.1978 (14.0831, -87.197800000000001) Landslide Mudslide Medium Downpour NA NA 1 NA http://mdn.mainichi.jp/mdnnews/news/20100713p2a00m0na013000c.html
2093 7/18/10 NA NA Honduras HN Francisco Morazán 850848 Tegucigalpa 1.24404 NA 14.0814 -87.1953 (14.0814, -87.195300000000003) Landslide Landslide Medium Downpour NA NA 0 NA http://www.insidecostarica.com/dailynews/2010/july/19/centralamerica10071903.htm
2217 8/7/10 Overnight NA Honduras HN Francisco Morazán 850848 Tegucigalpa 2.21442 NA 14.0783 -87.2270 (14.0783, -87.227000000000004) Landslide Mudslide Medium Downpour NA NA 3 NA NA
2358 8/29/10 4:30:00 NA Honduras HN Francisco Morazán 2288 Santa Lucía 4.75791 NA 14.1015 -87.1607 (14.1015, -87.160700000000006) Landslide Rockfall Medium Downpour NA NA 5 NA NA
library(dplyr)
df_HD <- subset(df, state == "La Paz")
knitr::kable(head(df_HD))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
6683 10/15/14 NA NA El Salvador SV La Paz 2654 San Pedro Masahuat 0.31933 Above river 13.5461 -89.0401 (13.546099999999999, -89.040099999999995) Landslide Landslide Medium Continuous rain NA 0 0 reliefweb http://reliefweb.int/report/el-salvador/lluvias-causan-estragos-en-seis-departamentos
7460 9/25/15 NA NA Honduras HN La Paz 1463 San José 4.69133 Unknown 14.2801 -87.9369 (14.280099999999999, -87.936899999999994) Landslide Landslide Medium Rain NA 0 0 Tiempo http://www.tiempo.hn/lluvias-comienzan-a-causar-deslizamientos-en-carreteras-del-occidente-de-honduras/

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_HD, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_HD, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_HD, aes(x=state, y=distance, fill=city)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_HD <- df_HD %>% 
  arrange(desc(city)) %>%
  mutate(prop = distance / sum(df_HD$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_HD, aes(x=state, y = prop, fill=city)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
data<- ts(df_HD$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
0.31933
4.69133
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_HD$distance
names(distance) <- df_HD$city 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por ciudades"
)

##                     
## Pareto chart analysis for distance
##                       Frequency  Cum.Freq. Percentage Cum.Percent.
##   San José             4.691330   4.691330  93.626987    93.626987
##   San Pedro Masahuat   0.319330   5.010660   6.373013   100.000000
stem(df_HD$"distance")
## 
##   The decimal point is at the |
## 
##   0 | 3
##   1 | 
##   2 | 
##   3 | 
##   4 | 7
head(df_HD)
## # A tibble: 2 x 25
##      id date     time  continent_code country_name country_code state  population
##   <dbl> <chr>    <chr> <chr>          <chr>        <chr>        <chr>       <dbl>
## 1  6683 10/15/14 <NA>  <NA>           El Salvador  SV           La Paz       2654
## 2  7460 9/25/15  <NA>  <NA>           Honduras     HN           La Paz       1463
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_HD))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
6683 10/15/14 NA NA El Salvador SV La Paz 2654 San Pedro Masahuat 0.31933 Above river 13.5461 -89.0401 (13.546099999999999, -89.040099999999995) Landslide Landslide Medium Continuous rain NA 0 0 reliefweb http://reliefweb.int/report/el-salvador/lluvias-causan-estragos-en-seis-departamentos 6.373013 3.186506
7460 9/25/15 NA NA Honduras HN La Paz 1463 San José 4.69133 Unknown 14.2801 -87.9369 (14.280099999999999, -87.936899999999994) Landslide Landslide Medium Rain NA 0 0 Tiempo http://www.tiempo.hn/lluvias-comienzan-a-causar-deslizamientos-en-carreteras-del-occidente-de-honduras/ 93.626987 53.186506
stem(df_HD$"distance")
## 
##   The decimal point is at the |
## 
##   0 | 3
##   1 | 
##   2 | 
##   3 | 
##   4 | 7
stem(df_HD$"distance", scale = 2)
## 
##   The decimal point is at the |
## 
##   0 | 3
##   0 | 
##   1 | 
##   1 | 
##   2 | 
##   2 | 
##   3 | 
##   3 | 
##   4 | 
##   4 | 7

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
0.31933 1 50 50 50 50
4.69133 1 50 50 100 100
Total 2 100 100 100 100
str(table)
## Classes 'freqtab' and 'data.frame':  3 obs. of  5 variables:
##  $ n      : num  1 1 2
##  $ %      : num  50 50 100
##  $ val%   : num  50 50 100
##  $ %cum   : num  50 100 100
##  $ val%cum: num  50 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
0.31933 1
4.69133 1
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.31933 3.31933 6.31933
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(0.319,3.32] 0 0 0
(3.32,6.32] 1 1 1
str(Freq_table)
## 'data.frame':    2 obs. of  4 variables:
##  $ distance: Factor w/ 2 levels "(0.319,3.32]",..: 1 2
##  $ Freq    : int  0 1
##  $ Rel_Freq: num  0 1
##  $ Cum_Freq: int  0 1
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(0.319,3.32] 0
(3.32,6.32] 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_HD)
##                        id date time continent_code country_name country_code
## nbr.val      2.000000e+00   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          6.683000e+03   NA   NA             NA           NA           NA
## max          7.460000e+03   NA   NA             NA           NA           NA
## range        7.770000e+02   NA   NA             NA           NA           NA
## sum          1.414300e+04   NA   NA             NA           NA           NA
## median       7.071500e+03   NA   NA             NA           NA           NA
## mean         7.071500e+03   NA   NA             NA           NA           NA
## SE.mean      3.885000e+02   NA   NA             NA           NA           NA
## CI.mean.0.95 4.936361e+03   NA   NA             NA           NA           NA
## var          3.018645e+05   NA   NA             NA           NA           NA
## std.dev      5.494220e+02   NA   NA             NA           NA           NA
## coef.var     7.769525e-02   NA   NA             NA           NA           NA
##              state   population city  distance location_description    latitude
## nbr.val         NA 2.000000e+00   NA  2.000000                   NA  2.00000000
## nbr.null        NA 0.000000e+00   NA  0.000000                   NA  0.00000000
## nbr.na          NA 0.000000e+00   NA  0.000000                   NA  0.00000000
## min             NA 1.463000e+03   NA  0.319330                   NA 13.54610000
## max             NA 2.654000e+03   NA  4.691330                   NA 14.28010000
## range           NA 1.191000e+03   NA  4.372000                   NA  0.73400000
## sum             NA 4.117000e+03   NA  5.010660                   NA 27.82620000
## median          NA 2.058500e+03   NA  2.505330                   NA 13.91310000
## mean            NA 2.058500e+03   NA  2.505330                   NA 13.91310000
## SE.mean         NA 5.955000e+02   NA  2.186000                   NA  0.36700000
## CI.mean.0.95    NA 7.566545e+03   NA 27.775764                   NA  4.66317714
## var             NA 7.092405e+05   NA  9.557192                   NA  0.26937800
## std.dev         NA 8.421642e+02   NA  3.091471                   NA  0.51901638
## coef.var        NA 4.091155e-01   NA  1.233958                   NA  0.03730415
##                  longitude geolocation hazard_type landslide_type
## nbr.val       2.000000e+00          NA          NA             NA
## nbr.null      0.000000e+00          NA          NA             NA
## nbr.na        0.000000e+00          NA          NA             NA
## min          -8.904010e+01          NA          NA             NA
## max          -8.793690e+01          NA          NA             NA
## range         1.103200e+00          NA          NA             NA
## sum          -1.769770e+02          NA          NA             NA
## median       -8.848850e+01          NA          NA             NA
## mean         -8.848850e+01          NA          NA             NA
## SE.mean       5.516000e-01          NA          NA             NA
## CI.mean.0.95  7.008743e+00          NA          NA             NA
## var           6.085251e-01          NA          NA             NA
## std.dev       7.800802e-01          NA          NA             NA
## coef.var     -8.815611e-03          NA          NA             NA
##              landslide_size trigger storm_name injuries fatalities source_name
## nbr.val                  NA      NA         NA        2          2          NA
## nbr.null                 NA      NA         NA        2          2          NA
## nbr.na                   NA      NA         NA        0          0          NA
## min                      NA      NA         NA        0          0          NA
## max                      NA      NA         NA        0          0          NA
## range                    NA      NA         NA        0          0          NA
## sum                      NA      NA         NA        0          0          NA
## median                   NA      NA         NA        0          0          NA
## mean                     NA      NA         NA        0          0          NA
## SE.mean                  NA      NA         NA        0          0          NA
## CI.mean.0.95             NA      NA         NA        0          0          NA
## var                      NA      NA         NA        0          0          NA
## std.dev                  NA      NA         NA        0          0          NA
## coef.var                 NA      NA         NA      NaN        NaN          NA
##              source_link        prop        ypos
## nbr.val               NA    2.000000    2.000000
## nbr.null              NA    0.000000    0.000000
## nbr.na                NA    0.000000    0.000000
## min                   NA    6.373013    3.186506
## max                   NA   93.626987   53.186506
## range                 NA   87.253975   50.000000
## sum                   NA  100.000000   56.373013
## median                NA   50.000000   28.186506
## mean                  NA   50.000000   28.186506
## SE.mean               NA   43.626987   25.000000
## CI.mean.0.95          NA  554.333432  317.655118
## var                   NA 3806.628035 1250.000000
## std.dev               NA   61.697877   35.355339
## coef.var              NA    1.233958    1.254336
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Francisco Morazán (Honduras)

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_HD <- subset(df, country_name == "Honduras")
knitr::kable(head(df_HD)) 
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
854 10/19/08 NA NA Honduras HN Copán 4752 Corquín 0.43391 NA 14.5637 -88.8693 (14.563700000000001, -88.869299999999996) Landslide Landslide Large Tropical cyclone Tropical Depression 16 NA 23 NA http://www.chron.com/disp/story.mpl/ap/world/6068144.html
855 10/20/08 NA NA Honduras HN Francisco Morazán 850848 Tegucigalpa 2.99239 NA 14.1080 -87.2137 (14.108000000000001, -87.213700000000003) Landslide Mudslide Large Tropical cyclone Tropical Depression 16 NA 29 NA http://in.ibtimes.com/articles/20081021/honduras-landslide-tegucigalpa-victim.htm
2062 7/12/10 5:30:00 NA Honduras HN Francisco Morazán 850848 Tegucigalpa 0.98377 NA 14.0831 -87.1978 (14.0831, -87.197800000000001) Landslide Mudslide Medium Downpour NA NA 1 NA http://mdn.mainichi.jp/mdnnews/news/20100713p2a00m0na013000c.html
2093 7/18/10 NA NA Honduras HN Francisco Morazán 850848 Tegucigalpa 1.24404 NA 14.0814 -87.1953 (14.0814, -87.195300000000003) Landslide Landslide Medium Downpour NA NA 0 NA http://www.insidecostarica.com/dailynews/2010/july/19/centralamerica10071903.htm
2217 8/7/10 Overnight NA Honduras HN Francisco Morazán 850848 Tegucigalpa 2.21442 NA 14.0783 -87.2270 (14.0783, -87.227000000000004) Landslide Mudslide Medium Downpour NA NA 3 NA NA
2358 8/29/10 4:30:00 NA Honduras HN Francisco Morazán 2288 Santa Lucía 4.75791 NA 14.1015 -87.1607 (14.1015, -87.160700000000006) Landslide Rockfall Medium Downpour NA NA 5 NA NA
library(dplyr)
df_HD <- subset(df, state == "Francisco Morazán")
knitr::kable(head(df_HD))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
855 10/20/08 NA NA Honduras HN Francisco Morazán 850848 Tegucigalpa 2.99239 NA 14.1080 -87.2137 (14.108000000000001, -87.213700000000003) Landslide Mudslide Large Tropical cyclone Tropical Depression 16 NA 29 NA http://in.ibtimes.com/articles/20081021/honduras-landslide-tegucigalpa-victim.htm
2062 7/12/10 5:30:00 NA Honduras HN Francisco Morazán 850848 Tegucigalpa 0.98377 NA 14.0831 -87.1978 (14.0831, -87.197800000000001) Landslide Mudslide Medium Downpour NA NA 1 NA http://mdn.mainichi.jp/mdnnews/news/20100713p2a00m0na013000c.html
2093 7/18/10 NA NA Honduras HN Francisco Morazán 850848 Tegucigalpa 1.24404 NA 14.0814 -87.1953 (14.0814, -87.195300000000003) Landslide Landslide Medium Downpour NA NA 0 NA http://www.insidecostarica.com/dailynews/2010/july/19/centralamerica10071903.htm
2217 8/7/10 Overnight NA Honduras HN Francisco Morazán 850848 Tegucigalpa 2.21442 NA 14.0783 -87.2270 (14.0783, -87.227000000000004) Landslide Mudslide Medium Downpour NA NA 3 NA NA
2358 8/29/10 4:30:00 NA Honduras HN Francisco Morazán 2288 Santa Lucía 4.75791 NA 14.1015 -87.1607 (14.1015, -87.160700000000006) Landslide Rockfall Medium Downpour NA NA 5 NA NA
3988 9/26/11 NA NA Honduras HN Francisco Morazán 850848 Tegucigalpa 1.23639 NA 14.0865 -87.2172 (14.086499999999999, -87.217200000000005) Landslide Landslide Medium Downpour NA NA 2 NA http://english.peopledaily.com.cn/90777/90852/7610171.html

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_HD, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_HD, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_HD, aes(x=state, y=distance, fill=city)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_HD <- df_HD %>% 
  arrange(desc(city)) %>%
  mutate(prop = distance / sum(df_HD$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_HD, aes(x=state, y = prop, fill=city)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
data<- ts(df_HD$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
1.30583
2.00830
2.99239
0.98377
1.24404
2.21442
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_HD$distance
names(distance) <- df_HD$city 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por ciudades"
)

##              
## Pareto chart analysis for distance
##                Frequency  Cum.Freq. Percentage Cum.Percent.
##   Santa Lucía   4.757910   4.757910  13.217489    13.217489
##   Río Abajo     3.639620   8.397530  10.110876    23.328364
##   El Guapinol   3.543990  11.941520   9.845215    33.173580
##   Tegucigalpa   3.252810  15.194330   9.036316    42.209896
##   El Tablón     3.129860  18.324190   8.694761    50.904657
##   Tegucigalpa   2.992390  21.316580   8.312869    59.217525
##   Tegucigalpa   2.913260  24.229840   8.093045    67.310571
##   Tegucigalpa   2.214420  26.444260   6.151666    73.462236
##   Villa Nueva   2.008300  28.452560   5.579064    79.041300
##   El Lolo       1.858970  30.311530   5.164224    84.205524
##   Yaguacire     1.305830  31.617360   3.627600    87.833124
##   Tegucigalpa   1.244040  32.861400   3.455947    91.289071
##   Tegucigalpa   1.236390  34.097790   3.434695    94.723766
##   Tegucigalpa   0.983770  35.081560   2.732916    97.456683
##   Tegucigalpa   0.915520  35.997080   2.543317   100.000000
stem(df_HD$"distance")
## 
##   The decimal point is at the |
## 
##   0 | 9
##   1 | 02239
##   2 | 029
##   3 | 01356
##   4 | 8
head(df_HD)
## # A tibble: 6 x 25
##      id date     time      continent_code country_name country_code state population
##   <dbl> <chr>    <chr>     <chr>          <chr>        <chr>        <chr>      <dbl>
## 1  7483 9/28/15  Morning   <NA>           Honduras     HN           Fran~       1449
## 2  7463 6/13/15  Morning   <NA>           Honduras     HN           Fran~       2295
## 3   855 10/20/08 <NA>      <NA>           Honduras     HN           Fran~     850848
## 4  2062 7/12/10  5:30:00   <NA>           Honduras     HN           Fran~     850848
## 5  2093 7/18/10  <NA>      <NA>           Honduras     HN           Fran~     850848
## 6  2217 8/7/10   Overnight <NA>           Honduras     HN           Fran~     850848
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_HD))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
7483 9/28/15 Morning NA Honduras HN Francisco Morazán 1449 Yaguacire 1.30583 Urban area 14.0211 -87.2055 (14.021100000000001, -87.205500000000001) Landslide Landslide Medium Downpour NA 0 0 La Prensa http://www.laprensa.hn/honduras/885141-410/dos-casas-a-punto-de-caer-por-derrumbe-en-tegucigalpa 3.627600 1.813800
7463 6/13/15 Morning NA Honduras HN Francisco Morazán 2295 Villa Nueva 2.00830 Above road 14.0468 -87.1528 (14.046799999999999, -87.152799999999999) Landslide Rockfall Medium Rain NA 0 0 La Tribuna http://www.latribuna.hn/2015/06/13/lluvias-siguen-provocando-deslizamientos-en-el-oriente-de-tegucigalpa/ 5.579064 6.417132
855 10/20/08 NA NA Honduras HN Francisco Morazán 850848 Tegucigalpa 2.99239 NA 14.1080 -87.2137 (14.108000000000001, -87.213700000000003) Landslide Mudslide Large Tropical cyclone Tropical Depression 16 NA 29 NA http://in.ibtimes.com/articles/20081021/honduras-landslide-tegucigalpa-victim.htm 8.312869 13.363098
2062 7/12/10 5:30:00 NA Honduras HN Francisco Morazán 850848 Tegucigalpa 0.98377 NA 14.0831 -87.1978 (14.0831, -87.197800000000001) Landslide Mudslide Medium Downpour NA NA 1 NA http://mdn.mainichi.jp/mdnnews/news/20100713p2a00m0na013000c.html 2.732916 18.885990
2093 7/18/10 NA NA Honduras HN Francisco Morazán 850848 Tegucigalpa 1.24404 NA 14.0814 -87.1953 (14.0814, -87.195300000000003) Landslide Landslide Medium Downpour NA NA 0 NA http://www.insidecostarica.com/dailynews/2010/july/19/centralamerica10071903.htm 3.455947 21.980422
2217 8/7/10 Overnight NA Honduras HN Francisco Morazán 850848 Tegucigalpa 2.21442 NA 14.0783 -87.2270 (14.0783, -87.227000000000004) Landslide Mudslide Medium Downpour NA NA 3 NA NA 6.151666 26.784228
stem(df_HD$"distance")
## 
##   The decimal point is at the |
## 
##   0 | 9
##   1 | 02239
##   2 | 029
##   3 | 01356
##   4 | 8
stem(df_HD$"distance", scale = 2)
## 
##   The decimal point is at the |
## 
##   0 | 9
##   1 | 0223
##   1 | 9
##   2 | 02
##   2 | 9
##   3 | 013
##   3 | 56
##   4 | 
##   4 | 8

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
0.91552 1 6.7 6.7 6.7 6.7
0.98377 1 6.7 6.7 13.3 13.3
1.23639 1 6.7 6.7 20.0 20.0
1.24404 1 6.7 6.7 26.7 26.7
1.30583 1 6.7 6.7 33.3 33.3
1.85897 1 6.7 6.7 40.0 40.0
2.0083 1 6.7 6.7 46.7 46.7
2.21442 1 6.7 6.7 53.3 53.3
2.91326 1 6.7 6.7 60.0 60.0
2.99239 1 6.7 6.7 66.7 66.7
3.12986 1 6.7 6.7 73.3 73.3
3.25281 1 6.7 6.7 80.0 80.0
3.54399 1 6.7 6.7 86.7 86.7
3.63962 1 6.7 6.7 93.3 93.3
4.75791 1 6.7 6.7 100.0 100.0
Total 15 100.0 100.0 100.0 100.0
str(table)
## Classes 'freqtab' and 'data.frame':  16 obs. of  5 variables:
##  $ n      : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ %      : num  6.7 6.7 6.7 6.7 6.7 6.7 6.7 6.7 6.7 6.7 ...
##  $ val%   : num  6.7 6.7 6.7 6.7 6.7 6.7 6.7 6.7 6.7 6.7 ...
##  $ %cum   : num  6.7 13.3 20 26.7 33.3 40 46.7 53.3 60 66.7 ...
##  $ val%cum: num  6.7 13.3 20 26.7 33.3 40 46.7 53.3 60 66.7 ...
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
0.91552 1
0.98377 1
1.23639 1
1.24404 1
1.30583 1
1.85897 1
2.0083 1
2.21442 1
2.91326 1
2.99239 1
3.12986 1
3.25281 1
3.54399 1
3.63962 1
4.75791 1
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.91552 1.91552 2.91552 3.91552 4.91552
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(0.916,1.92] 5 0.3571429 5
(1.92,2.92] 3 0.2142857 8
(2.92,3.92] 5 0.3571429 13
(3.92,4.92] 1 0.0714286 14
str(Freq_table)
## 'data.frame':    4 obs. of  4 variables:
##  $ distance: Factor w/ 4 levels "(0.916,1.92]",..: 1 2 3 4
##  $ Freq    : int  5 3 5 1
##  $ Rel_Freq: num  0.3571 0.2143 0.3571 0.0714
##  $ Cum_Freq: int  5 8 13 14
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(0.916,1.92] 5
(1.92,2.92] 3
(2.92,3.92] 5
(3.92,4.92] 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_HD)
##                        id date time continent_code country_name country_code
## nbr.val      1.500000e+01   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          8.550000e+02   NA   NA             NA           NA           NA
## max          7.485000e+03   NA   NA             NA           NA           NA
## range        6.630000e+03   NA   NA             NA           NA           NA
## sum          8.079700e+04   NA   NA             NA           NA           NA
## median       7.447000e+03   NA   NA             NA           NA           NA
## mean         5.386467e+03   NA   NA             NA           NA           NA
## SE.mean      6.991591e+02   NA   NA             NA           NA           NA
## CI.mean.0.95 1.499547e+03   NA   NA             NA           NA           NA
## var          7.332353e+06   NA   NA             NA           NA           NA
## std.dev      2.707832e+03   NA   NA             NA           NA           NA
## coef.var     5.027102e-01   NA   NA             NA           NA           NA
##              state   population city   distance location_description
## nbr.val         NA 1.500000e+01   NA 15.0000000                   NA
## nbr.null        NA 0.000000e+00   NA  0.0000000                   NA
## nbr.na          NA 0.000000e+00   NA  0.0000000                   NA
## min             NA 1.121000e+03   NA  0.9155200                   NA
## max             NA 8.508480e+05   NA  4.7579100                   NA
## range           NA 8.497270e+05   NA  3.8423900                   NA
## sum             NA 6.819247e+06   NA 35.9970800                   NA
## median          NA 8.508480e+05   NA  2.2144200                   NA
## mean            NA 4.546165e+05   NA  2.3998053                   NA
## SE.mean         NA 1.132090e+05   NA  0.2996574                   NA
## CI.mean.0.95    NA 2.428092e+05   NA  0.6427013                   NA
## var             NA 1.922443e+11   NA  1.3469187                   NA
## std.dev         NA 4.384567e+05   NA  1.1605683                   NA
## coef.var        NA 9.644541e-01   NA  0.4836093                   NA
##                  latitude     longitude geolocation hazard_type landslide_type
## nbr.val      1.500000e+01  1.500000e+01          NA          NA             NA
## nbr.null     0.000000e+00  0.000000e+00          NA          NA             NA
## nbr.na       0.000000e+00  0.000000e+00          NA          NA             NA
## min          1.376450e+01 -8.743400e+01          NA          NA             NA
## max          1.413380e+01 -8.715280e+01          NA          NA             NA
## range        3.693000e-01  2.812000e-01          NA          NA             NA
## sum          2.109138e+02 -1.308243e+03          NA          NA             NA
## median       1.408140e+01 -8.720870e+01          NA          NA             NA
## mean         1.406092e+01 -8.721617e+01          NA          NA             NA
## SE.mean      2.234844e-02  1.715587e-02          NA          NA             NA
## CI.mean.0.95 4.793264e-02  3.679568e-02          NA          NA             NA
## var          7.491792e-03  4.414858e-03          NA          NA             NA
## std.dev      8.655514e-02  6.644440e-02          NA          NA             NA
## coef.var     6.155724e-03 -7.618358e-04          NA          NA             NA
##              landslide_size trigger storm_name  injuries fatalities source_name
## nbr.val                  NA      NA         NA 9.0000000  15.000000          NA
## nbr.null                 NA      NA         NA 8.0000000  10.000000          NA
## nbr.na                   NA      NA         NA 6.0000000   0.000000          NA
## min                      NA      NA         NA 0.0000000   0.000000          NA
## max                      NA      NA         NA 1.0000000  29.000000          NA
## range                    NA      NA         NA 1.0000000  29.000000          NA
## sum                      NA      NA         NA 1.0000000  40.000000          NA
## median                   NA      NA         NA 0.0000000   0.000000          NA
## mean                     NA      NA         NA 0.1111111   2.666667          NA
## SE.mean                  NA      NA         NA 0.1111111   1.918994          NA
## CI.mean.0.95             NA      NA         NA 0.2562227   4.115834          NA
## var                      NA      NA         NA 0.1111111  55.238095          NA
## std.dev                  NA      NA         NA 0.3333333   7.432234          NA
## coef.var                 NA      NA         NA 3.0000000   2.787088          NA
##              source_link        prop        ypos
## nbr.val               NA  15.0000000  15.0000000
## nbr.null              NA   0.0000000   0.0000000
## nbr.na                NA   0.0000000   0.0000000
## min                   NA   2.5433174   1.8137999
## max                   NA  13.2174888  95.0773924
## range                 NA  10.6741713  93.2635925
## sum                   NA 100.0000000 645.2684218
## median                NA   6.1516656  37.8129143
## mean                  NA   6.6666667  43.0178948
## SE.mean               NA   0.8324493   7.7398433
## CI.mean.0.95          NA   1.7854262  16.6003128
## var                   NA  10.3945777 898.5776101
## std.dev               NA   3.2240623  29.9762841
## coef.var              NA   0.4836093   0.6968329
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Ocotepeque (Honduras)

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_HD <- subset(df, country_name == "Honduras")
knitr::kable(head(df_HD)) 
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
854 10/19/08 NA NA Honduras HN Copán 4752 Corquín 0.43391 NA 14.5637 -88.8693 (14.563700000000001, -88.869299999999996) Landslide Landslide Large Tropical cyclone Tropical Depression 16 NA 23 NA http://www.chron.com/disp/story.mpl/ap/world/6068144.html
855 10/20/08 NA NA Honduras HN Francisco Morazán 850848 Tegucigalpa 2.99239 NA 14.1080 -87.2137 (14.108000000000001, -87.213700000000003) Landslide Mudslide Large Tropical cyclone Tropical Depression 16 NA 29 NA http://in.ibtimes.com/articles/20081021/honduras-landslide-tegucigalpa-victim.htm
2062 7/12/10 5:30:00 NA Honduras HN Francisco Morazán 850848 Tegucigalpa 0.98377 NA 14.0831 -87.1978 (14.0831, -87.197800000000001) Landslide Mudslide Medium Downpour NA NA 1 NA http://mdn.mainichi.jp/mdnnews/news/20100713p2a00m0na013000c.html
2093 7/18/10 NA NA Honduras HN Francisco Morazán 850848 Tegucigalpa 1.24404 NA 14.0814 -87.1953 (14.0814, -87.195300000000003) Landslide Landslide Medium Downpour NA NA 0 NA http://www.insidecostarica.com/dailynews/2010/july/19/centralamerica10071903.htm
2217 8/7/10 Overnight NA Honduras HN Francisco Morazán 850848 Tegucigalpa 2.21442 NA 14.0783 -87.2270 (14.0783, -87.227000000000004) Landslide Mudslide Medium Downpour NA NA 3 NA NA
2358 8/29/10 4:30:00 NA Honduras HN Francisco Morazán 2288 Santa Lucía 4.75791 NA 14.1015 -87.1607 (14.1015, -87.160700000000006) Landslide Rockfall Medium Downpour NA NA 5 NA NA
library(dplyr)
df_HD <- subset(df, state == "Ocotepeque")
knitr::kable(head(df_HD))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
6672 10/13/14 NA NA Honduras HN Ocotepeque 2389 Sinuapa 2.00805 Below road 14.4579 -89.1666 (14.4579, -89.166600000000003) Landslide Landslide Medium Downpour NA 0 0 La Prensa http://www.laprensa.hn/economia/757783-410/evac%C3%BAan-a-familias-por-deslizamientos-en-ocotepeque
7462 9/25/15 NA NA Honduras HN Ocotepeque 1416 La Labor 5.79867 Above road 14.4810 -89.0537 (14.481, -89.053700000000006) Landslide Landslide Small Rain NA 0 0 Tiempo http://www.tiempo.hn/lluvias-comienzan-a-causar-deslizamientos-en-carreteras-del-occidente-de-honduras/

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_HD, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_HD, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_HD, aes(x=state, y=distance, fill=city)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_HD <- df_HD %>% 
  arrange(desc(city)) %>%
  mutate(prop = distance / sum(df_HD$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_HD, aes(x=state, y = prop, fill=city)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
data<- ts(df_HD$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
2.00805
5.79867
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_HD$distance
names(distance) <- df_HD$city 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por ciudades"
)

##           
## Pareto chart analysis for distance
##            Frequency Cum.Freq. Percentage Cum.Percent.
##   La Labor   5.79867   5.79867   74.27793     74.27793
##   Sinuapa    2.00805   7.80672   25.72207    100.00000
stem(df_HD$"distance")
## 
##   The decimal point is at the |
## 
##   2 | 0
##   3 | 
##   4 | 
##   5 | 8
head(df_HD)
## # A tibble: 2 x 25
##      id date     time  continent_code country_name country_code state      population
##   <dbl> <chr>    <chr> <chr>          <chr>        <chr>        <chr>           <dbl>
## 1  6672 10/13/14 <NA>  <NA>           Honduras     HN           Ocotepeque       2389
## 2  7462 9/25/15  <NA>  <NA>           Honduras     HN           Ocotepeque       1416
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_HD))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
6672 10/13/14 NA NA Honduras HN Ocotepeque 2389 Sinuapa 2.00805 Below road 14.4579 -89.1666 (14.4579, -89.166600000000003) Landslide Landslide Medium Downpour NA 0 0 La Prensa http://www.laprensa.hn/economia/757783-410/evac%C3%BAan-a-familias-por-deslizamientos-en-ocotepeque 25.72207 12.86104
7462 9/25/15 NA NA Honduras HN Ocotepeque 1416 La Labor 5.79867 Above road 14.4810 -89.0537 (14.481, -89.053700000000006) Landslide Landslide Small Rain NA 0 0 Tiempo http://www.tiempo.hn/lluvias-comienzan-a-causar-deslizamientos-en-carreteras-del-occidente-de-honduras/ 74.27793 62.86104
stem(df_HD$"distance")
## 
##   The decimal point is at the |
## 
##   2 | 0
##   3 | 
##   4 | 
##   5 | 8
stem(df_HD$"distance", scale = 2)
## 
##   The decimal point is at the |
## 
##   2 | 0
##   2 | 
##   3 | 
##   3 | 
##   4 | 
##   4 | 
##   5 | 
##   5 | 8

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
2.00805 1 50 50 50 50
5.79867 1 50 50 100 100
Total 2 100 100 100 100
str(table)
## Classes 'freqtab' and 'data.frame':  3 obs. of  5 variables:
##  $ n      : num  1 1 2
##  $ %      : num  50 50 100
##  $ val%   : num  50 50 100
##  $ %cum   : num  50 100 100
##  $ val%cum: num  50 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
2.00805 1
5.79867 1
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 2.00805 4.00805 6.00805
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(2.01,4.01] 0 0 0
(4.01,6.01] 1 1 1
str(Freq_table)
## 'data.frame':    2 obs. of  4 variables:
##  $ distance: Factor w/ 2 levels "(2.01,4.01]",..: 1 2
##  $ Freq    : int  0 1
##  $ Rel_Freq: num  0 1
##  $ Cum_Freq: int  0 1
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(2.01,4.01] 0
(4.01,6.01] 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_HD)
##                        id date time continent_code country_name country_code
## nbr.val      2.000000e+00   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          6.672000e+03   NA   NA             NA           NA           NA
## max          7.462000e+03   NA   NA             NA           NA           NA
## range        7.900000e+02   NA   NA             NA           NA           NA
## sum          1.413400e+04   NA   NA             NA           NA           NA
## median       7.067000e+03   NA   NA             NA           NA           NA
## mean         7.067000e+03   NA   NA             NA           NA           NA
## SE.mean      3.950000e+02   NA   NA             NA           NA           NA
## CI.mean.0.95 5.018951e+03   NA   NA             NA           NA           NA
## var          3.120500e+05   NA   NA             NA           NA           NA
## std.dev      5.586144e+02   NA   NA             NA           NA           NA
## coef.var     7.904547e-02   NA   NA             NA           NA           NA
##              state   population city   distance location_description
## nbr.val         NA 2.000000e+00   NA  2.0000000                   NA
## nbr.null        NA 0.000000e+00   NA  0.0000000                   NA
## nbr.na          NA 0.000000e+00   NA  0.0000000                   NA
## min             NA 1.416000e+03   NA  2.0080500                   NA
## max             NA 2.389000e+03   NA  5.7986700                   NA
## range           NA 9.730000e+02   NA  3.7906200                   NA
## sum             NA 3.805000e+03   NA  7.8067200                   NA
## median          NA 1.902500e+03   NA  3.9033600                   NA
## mean            NA 1.902500e+03   NA  3.9033600                   NA
## SE.mean         NA 4.865000e+02   NA  1.8953100                   NA
## CI.mean.0.95    NA 6.181569e+03   NA 24.0821969                   NA
## var             NA 4.733645e+05   NA  7.1844000                   NA
## std.dev         NA 6.880149e+02   NA  2.6803731                   NA
## coef.var        NA 3.616373e-01   NA  0.6866836                   NA
##                  latitude     longitude geolocation hazard_type landslide_type
## nbr.val       2.000000000  2.000000e+00          NA          NA             NA
## nbr.null      0.000000000  0.000000e+00          NA          NA             NA
## nbr.na        0.000000000  0.000000e+00          NA          NA             NA
## min          14.457900000 -8.916660e+01          NA          NA             NA
## max          14.481000000 -8.905370e+01          NA          NA             NA
## range         0.023100000  1.129000e-01          NA          NA             NA
## sum          28.938900000 -1.782203e+02          NA          NA             NA
## median       14.469450000 -8.911015e+01          NA          NA             NA
## mean         14.469450000 -8.911015e+01          NA          NA             NA
## SE.mean       0.011550000  5.645000e-02          NA          NA             NA
## CI.mean.0.95  0.146756665  7.172653e-01          NA          NA             NA
## var           0.000266805  6.373205e-03          NA          NA             NA
## std.dev       0.016334167  7.983236e-02          NA          NA             NA
## coef.var      0.001128873 -8.958840e-04          NA          NA             NA
##              landslide_size trigger storm_name injuries fatalities source_name
## nbr.val                  NA      NA         NA        2          2          NA
## nbr.null                 NA      NA         NA        2          2          NA
## nbr.na                   NA      NA         NA        0          0          NA
## min                      NA      NA         NA        0          0          NA
## max                      NA      NA         NA        0          0          NA
## range                    NA      NA         NA        0          0          NA
## sum                      NA      NA         NA        0          0          NA
## median                   NA      NA         NA        0          0          NA
## mean                     NA      NA         NA        0          0          NA
## SE.mean                  NA      NA         NA        0          0          NA
## CI.mean.0.95             NA      NA         NA        0          0          NA
## var                      NA      NA         NA        0          0          NA
## std.dev                  NA      NA         NA        0          0          NA
## coef.var                 NA      NA         NA      NaN        NaN          NA
##              source_link         prop         ypos
## nbr.val               NA    2.0000000    2.0000000
## nbr.null              NA    0.0000000    0.0000000
## nbr.na                NA    0.0000000    0.0000000
## min                   NA   25.7220702   12.8610351
## max                   NA   74.2779298   62.8610351
## range                 NA   48.5558596   50.0000000
## sum                   NA  100.0000000   75.7220702
## median                NA   50.0000000   37.8610351
## mean                  NA   50.0000000   37.8610351
## SE.mean               NA   24.2779298   25.0000000
## CI.mean.0.95          NA  308.4803464  317.6551184
## var                   NA 1178.8357492 1250.0000000
## std.dev               NA   34.3341776   35.3553391
## coef.var              NA    0.6866836    0.9338186
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Santa Bárbara (Honduras)

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_HD <- subset(df, country_name == "Honduras")
knitr::kable(head(df_HD)) 
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
854 10/19/08 NA NA Honduras HN Copán 4752 Corquín 0.43391 NA 14.5637 -88.8693 (14.563700000000001, -88.869299999999996) Landslide Landslide Large Tropical cyclone Tropical Depression 16 NA 23 NA http://www.chron.com/disp/story.mpl/ap/world/6068144.html
855 10/20/08 NA NA Honduras HN Francisco Morazán 850848 Tegucigalpa 2.99239 NA 14.1080 -87.2137 (14.108000000000001, -87.213700000000003) Landslide Mudslide Large Tropical cyclone Tropical Depression 16 NA 29 NA http://in.ibtimes.com/articles/20081021/honduras-landslide-tegucigalpa-victim.htm
2062 7/12/10 5:30:00 NA Honduras HN Francisco Morazán 850848 Tegucigalpa 0.98377 NA 14.0831 -87.1978 (14.0831, -87.197800000000001) Landslide Mudslide Medium Downpour NA NA 1 NA http://mdn.mainichi.jp/mdnnews/news/20100713p2a00m0na013000c.html
2093 7/18/10 NA NA Honduras HN Francisco Morazán 850848 Tegucigalpa 1.24404 NA 14.0814 -87.1953 (14.0814, -87.195300000000003) Landslide Landslide Medium Downpour NA NA 0 NA http://www.insidecostarica.com/dailynews/2010/july/19/centralamerica10071903.htm
2217 8/7/10 Overnight NA Honduras HN Francisco Morazán 850848 Tegucigalpa 2.21442 NA 14.0783 -87.2270 (14.0783, -87.227000000000004) Landslide Mudslide Medium Downpour NA NA 3 NA NA
2358 8/29/10 4:30:00 NA Honduras HN Francisco Morazán 2288 Santa Lucía 4.75791 NA 14.1015 -87.1607 (14.1015, -87.160700000000006) Landslide Rockfall Medium Downpour NA NA 5 NA NA
library(dplyr)
df_HD <- subset(df, state == "Santa Bárbara")
knitr::kable(head(df_HD))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
6691 10/14/14 Night NA Honduras HN Santa Bárbara 1759 Agualote 2.91594 Mine construction 15.3090 -88.5510 (15.308999999999999, -88.551000000000002) Landslide Landslide Medium Rain NA 0 1 Mundo http://elmundo.com.sv/honduras-muere-un-minero-y-rescatan-a-otros-cinco-soterrados
7464 9/28/15 Morning NA Honduras HN Santa Bárbara 1811 Ilama 2.87349 Above road 15.0909 -88.2072 (15.0909, -88.2072) Landslide Rockfall Small Rain NA 0 0 Canal 6 http://www.canal6.com.hn/destacado/derrumbes-incomunican-paso-entre-san-pedro-sula-y-santa-barbara.html

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_HD, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_HD, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_HD, aes(x=state, y=distance, fill=city)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_HD <- df_HD %>% 
  arrange(desc(city)) %>%
  mutate(prop = distance / sum(df_HD$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_HD, aes(x=state, y = prop, fill=city)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
data<- ts(df_HD$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
2.87349
2.91594
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_HD$distance
names(distance) <- df_HD$city 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por ciudades"
)

##           
## Pareto chart analysis for distance
##            Frequency Cum.Freq. Percentage Cum.Percent.
##   Agualote   2.91594   2.91594   50.36662     50.36662
##   Ilama      2.87349   5.78943   49.63338    100.00000
stem(df_HD$"distance")
## 
##   The decimal point is 2 digit(s) to the left of the |
## 
##   287 | 3
##   288 | 
##   289 | 
##   290 | 
##   291 | 6
head(df_HD)
## # A tibble: 2 x 25
##      id date     time    continent_code country_name country_code state population
##   <dbl> <chr>    <chr>   <chr>          <chr>        <chr>        <chr>      <dbl>
## 1  7464 9/28/15  Morning <NA>           Honduras     HN           Sant~       1811
## 2  6691 10/14/14 Night   <NA>           Honduras     HN           Sant~       1759
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_HD))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
7464 9/28/15 Morning NA Honduras HN Santa Bárbara 1811 Ilama 2.87349 Above road 15.0909 -88.2072 (15.0909, -88.2072) Landslide Rockfall Small Rain NA 0 0 Canal 6 http://www.canal6.com.hn/destacado/derrumbes-incomunican-paso-entre-san-pedro-sula-y-santa-barbara.html 49.63338 24.81669
6691 10/14/14 Night NA Honduras HN Santa Bárbara 1759 Agualote 2.91594 Mine construction 15.3090 -88.5510 (15.308999999999999, -88.551000000000002) Landslide Landslide Medium Rain NA 0 1 Mundo http://elmundo.com.sv/honduras-muere-un-minero-y-rescatan-a-otros-cinco-soterrados 50.36662 74.81669
stem(df_HD$"distance")
## 
##   The decimal point is 2 digit(s) to the left of the |
## 
##   287 | 3
##   288 | 
##   289 | 
##   290 | 
##   291 | 6
stem(df_HD$"distance", scale = 2)
## 
##   The decimal point is 2 digit(s) to the left of the |
## 
##   287 | 3
##   287 | 
##   288 | 
##   288 | 
##   289 | 
##   289 | 
##   290 | 
##   290 | 
##   291 | 
##   291 | 6

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
2.87349 1 50 50 50 50
2.91594 1 50 50 100 100
Total 2 100 100 100 100
str(table)
## Classes 'freqtab' and 'data.frame':  3 obs. of  5 variables:
##  $ n      : num  1 1 2
##  $ %      : num  50 50 100
##  $ val%   : num  50 50 100
##  $ %cum   : num  50 100 100
##  $ val%cum: num  50 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
2.87349 1
2.91594 1
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 2.87349 3.87349
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(2.87,3.87] 1 1 1
str(Freq_table)
## 'data.frame':    1 obs. of  4 variables:
##  $ distance: Factor w/ 1 level "(2.87,3.87]": 1
##  $ Freq    : int 1
##  $ Rel_Freq: num 1
##  $ Cum_Freq: int 1
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(2.87,3.87] 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_HD)
##                        id date time continent_code country_name country_code
## nbr.val      2.000000e+00   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          6.691000e+03   NA   NA             NA           NA           NA
## max          7.464000e+03   NA   NA             NA           NA           NA
## range        7.730000e+02   NA   NA             NA           NA           NA
## sum          1.415500e+04   NA   NA             NA           NA           NA
## median       7.077500e+03   NA   NA             NA           NA           NA
## mean         7.077500e+03   NA   NA             NA           NA           NA
## SE.mean      3.865000e+02   NA   NA             NA           NA           NA
## CI.mean.0.95 4.910948e+03   NA   NA             NA           NA           NA
## var          2.987645e+05   NA   NA             NA           NA           NA
## std.dev      5.465935e+02   NA   NA             NA           NA           NA
## coef.var     7.722975e-02   NA   NA             NA           NA           NA
##              state   population city     distance location_description
## nbr.val         NA 2.000000e+00   NA 2.0000000000                   NA
## nbr.null        NA 0.000000e+00   NA 0.0000000000                   NA
## nbr.na          NA 0.000000e+00   NA 0.0000000000                   NA
## min             NA 1.759000e+03   NA 2.8734900000                   NA
## max             NA 1.811000e+03   NA 2.9159400000                   NA
## range           NA 5.200000e+01   NA 0.0424500000                   NA
## sum             NA 3.570000e+03   NA 5.7894300000                   NA
## median          NA 1.785000e+03   NA 2.8947150000                   NA
## mean            NA 1.785000e+03   NA 2.8947150000                   NA
## SE.mean         NA 2.600000e+01   NA 0.0212250000                   NA
## CI.mean.0.95    NA 3.303613e+02   NA 0.2696891955                   NA
## var             NA 1.352000e+03   NA 0.0009010013                   NA
## std.dev         NA 3.676955e+01   NA 0.0300166829                   NA
## coef.var        NA 2.059919e-02   NA 0.0103694778                   NA
##                 latitude     longitude geolocation hazard_type landslide_type
## nbr.val       2.00000000  2.000000e+00          NA          NA             NA
## nbr.null      0.00000000  0.000000e+00          NA          NA             NA
## nbr.na        0.00000000  0.000000e+00          NA          NA             NA
## min          15.09090000 -8.855100e+01          NA          NA             NA
## max          15.30900000 -8.820720e+01          NA          NA             NA
## range         0.21810000  3.438000e-01          NA          NA             NA
## sum          30.39990000 -1.767582e+02          NA          NA             NA
## median       15.19995000 -8.837910e+01          NA          NA             NA
## mean         15.19995000 -8.837910e+01          NA          NA             NA
## SE.mean       0.10905000  1.719000e-01          NA          NA             NA
## CI.mean.0.95  1.38561163  2.184197e+00          NA          NA             NA
## var           0.02378380  5.909922e-02          NA          NA             NA
## std.dev       0.15421999  2.431033e-01          NA          NA             NA
## coef.var      0.01014609 -2.750688e-03          NA          NA             NA
##              landslide_size trigger storm_name injuries fatalities source_name
## nbr.val                  NA      NA         NA        2  2.0000000          NA
## nbr.null                 NA      NA         NA        2  1.0000000          NA
## nbr.na                   NA      NA         NA        0  0.0000000          NA
## min                      NA      NA         NA        0  0.0000000          NA
## max                      NA      NA         NA        0  1.0000000          NA
## range                    NA      NA         NA        0  1.0000000          NA
## sum                      NA      NA         NA        0  1.0000000          NA
## median                   NA      NA         NA        0  0.5000000          NA
## mean                     NA      NA         NA        0  0.5000000          NA
## SE.mean                  NA      NA         NA        0  0.5000000          NA
## CI.mean.0.95             NA      NA         NA        0  6.3531024          NA
## var                      NA      NA         NA        0  0.5000000          NA
## std.dev                  NA      NA         NA        0  0.7071068          NA
## coef.var                 NA      NA         NA      NaN  1.4142136          NA
##              source_link         prop         ypos
## nbr.val               NA   2.00000000    2.0000000
## nbr.null              NA   0.00000000    0.0000000
## nbr.na                NA   0.00000000    0.0000000
## min                   NA  49.63338360   24.8166918
## max                   NA  50.36661640   74.8166918
## range                 NA   0.73323281   50.0000000
## sum                   NA 100.00000000   99.6333836
## median                NA  50.00000000   49.8166918
## mean                  NA  50.00000000   49.8166918
## SE.mean               NA   0.36661640   25.0000000
## CI.mean.0.95          NA   4.65830307  317.6551184
## var                   NA   0.26881517 1250.0000000
## std.dev               NA   0.51847389   35.3553391
## coef.var              NA   0.01036948    0.7097087
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Nicaragua

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_NC <- subset(df, country_name == "Honduras")
knitr::kable(head(df_NC))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
854 10/19/08 NA NA Honduras HN Copán 4752 Corquín 0.43391 NA 14.5637 -88.8693 (14.563700000000001, -88.869299999999996) Landslide Landslide Large Tropical cyclone Tropical Depression 16 NA 23 NA http://www.chron.com/disp/story.mpl/ap/world/6068144.html
855 10/20/08 NA NA Honduras HN Francisco Morazán 850848 Tegucigalpa 2.99239 NA 14.1080 -87.2137 (14.108000000000001, -87.213700000000003) Landslide Mudslide Large Tropical cyclone Tropical Depression 16 NA 29 NA http://in.ibtimes.com/articles/20081021/honduras-landslide-tegucigalpa-victim.htm
2062 7/12/10 5:30:00 NA Honduras HN Francisco Morazán 850848 Tegucigalpa 0.98377 NA 14.0831 -87.1978 (14.0831, -87.197800000000001) Landslide Mudslide Medium Downpour NA NA 1 NA http://mdn.mainichi.jp/mdnnews/news/20100713p2a00m0na013000c.html
2093 7/18/10 NA NA Honduras HN Francisco Morazán 850848 Tegucigalpa 1.24404 NA 14.0814 -87.1953 (14.0814, -87.195300000000003) Landslide Landslide Medium Downpour NA NA 0 NA http://www.insidecostarica.com/dailynews/2010/july/19/centralamerica10071903.htm
2217 8/7/10 Overnight NA Honduras HN Francisco Morazán 850848 Tegucigalpa 2.21442 NA 14.0783 -87.2270 (14.0783, -87.227000000000004) Landslide Mudslide Medium Downpour NA NA 3 NA NA
2358 8/29/10 4:30:00 NA Honduras HN Francisco Morazán 2288 Santa Lucía 4.75791 NA 14.1015 -87.1607 (14.1015, -87.160700000000006) Landslide Rockfall Medium Downpour NA NA 5 NA NA

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_NC, aes(fill= state, y=distance, x=country_name)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_NC, aes(fill=state, y=distance, x=country_name)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_NC, aes(x=country_name, y=distance, fill=state)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_NC <- df_NC %>% 
  arrange(desc(state)) %>%
  mutate(prop = distance / sum(df_NC$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_NC, aes(x=country_name, y=prop, fill=state)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4
## Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Greens is 9
## Returning the palette you asked for with that many colors

Grafico de series temporales

library(forecast)
data<- ts(df_NC$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
0.31238
6.66574
2.91594
2.87349
2.00805
5.79867
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_NC$distance
names(distance) <- df_NC$state 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por estados"
)

##                    
## Pareto chart analysis for distance
##                       Frequency   Cum.Freq.  Percentage Cum.Percent.
##   Colón              36.3762900  36.3762900  21.8907391   21.8907391
##   Comayagua          17.2861300  53.6624200  10.4025496   32.2932888
##   Choluteca          11.6723700  65.3347900   7.0242679   39.3175567
##   Comayagua           8.5258400  73.8606300   5.1307305   44.4482872
##   Comayagua           7.2857500  81.1463800   4.3844618   48.8327489
##   Yoro                6.6657400  87.8121200   4.0113485   52.8440974
##   Copán               5.8972100  93.7093300   3.5488579   56.3929554
##   Ocotepeque          5.7986700  99.5080000   3.4895580   59.8825133
##   Francisco Morazán   4.7579100 104.2659100   2.8632432   62.7457566
##   La Paz              4.6913300 108.9572400   2.8231763   65.5689329
##   Comayagua           4.5336200 113.4908600   2.7282687   68.2972016
##   Choluteca           3.6959600 117.1868200   2.2241767   70.5213783
##   Francisco Morazán   3.6396200 120.8264400   2.1902721   72.7116504
##   Francisco Morazán   3.5439900 124.3704300   2.1327233   74.8443736
##   Cortés              3.5373700 127.9078000   2.1287395   76.9731131
##   Francisco Morazán   3.2528100 131.1606100   1.9574953   78.9306084
##   Francisco Morazán   3.1298600 134.2904700   1.8835057   80.8141140
##   Francisco Morazán   2.9923900 137.2828600   1.8007782   82.6148922
##   Santa Bárbara       2.9159400 140.1988000   1.7547716   84.3696639
##   Francisco Morazán   2.9132600 143.1120600   1.7531588   86.1228227
##   Santa Bárbara       2.8734900 145.9855500   1.7292258   87.8520485
##   Francisco Morazán   2.2144200 148.1999700   1.3326068   89.1846553
##   Francisco Morazán   2.0083000 150.2082700   1.2085667   90.3932220
##   Ocotepeque          2.0080500 152.2163200   1.2084162   91.6016382
##   El Paraíso          1.9005200 154.1168400   1.1437062   92.7453444
##   Francisco Morazán   1.8589700 155.9758100   1.1187020   93.8640463
##   Copán               1.3909500 157.3667600   0.8370541   94.7011005
##   Francisco Morazán   1.3058300 158.6725900   0.7858301   95.4869306
##   Francisco Morazán   1.2440400 159.9166300   0.7486458   96.2355763
##   Francisco Morazán   1.2363900 161.1530200   0.7440421   96.9796184
##   Francisco Morazán   0.9837700 162.1367900   0.5920189   97.5716373
##   Cortés              0.9705700 163.1073600   0.5840754   98.1557127
##   Francisco Morazán   0.9155200 164.0228800   0.5509470   98.7066598
##   Copán               0.7441400 164.7670200   0.4478130   99.1544727
##   Copán               0.4339100 165.2009300   0.2611209   99.4155937
##   Choluteca           0.3698700 165.5708000   0.2225826   99.6381762
##   Yoro                0.3123800 165.8831800   0.1879859   99.8261621
##   Copán               0.2888700 166.1720500   0.1738379  100.0000000
stem(df_NC$"distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 000011111111222223333334444
##   0 | 55566779
##   1 | 2
##   1 | 7
##   2 | 
##   2 | 
##   3 | 
##   3 | 6
head(df_NC)
## # A tibble: 6 x 25
##      id date     time    continent_code country_name country_code state population
##   <dbl> <chr>    <chr>   <chr>          <chr>        <chr>        <chr>      <dbl>
## 1  6202 5/20/14  <NA>    <NA>           Honduras     HN           Yoro       15774
## 2  7467 1/22/15  <NA>    <NA>           Honduras     HN           Yoro        2188
## 3  6691 10/14/14 Night   <NA>           Honduras     HN           Sant~       1759
## 4  7464 9/28/15  Morning <NA>           Honduras     HN           Sant~       1811
## 5  6672 10/13/14 <NA>    <NA>           Honduras     HN           Ocot~       2389
## 6  7462 9/25/15  <NA>    <NA>           Honduras     HN           Ocot~       1416
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_NC))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
6202 5/20/14 NA NA Honduras HN Yoro 15774 Yoro 0.31238 Unknown 15.0666 -87.3245 (15.066599999999999, -87.3245) Landslide Landslide Medium Rain NA 0 0 Prensa http://www.laprensa.hn/lasultimas24/711060-98/deslizamientos-de-tierra-dejar%C3%ADa-incomunicadas-30-aldeas-en-yoro 0.1879859 0.0939929
7467 1/22/15 NA NA Honduras HN Yoro 2188 La Sarrosa 6.66574 Above road 15.1754 -87.8164 (15.1754, -87.816400000000002) Landslide Rockfall Small Continuous rain NA 0 0 La Prensa http://www.laprensa.hn/honduras/787961-410/conductores-tienen-que-maniobrar-para-no-chocar-con-las-rocas-que-cubren 4.0113485 2.1936601
6691 10/14/14 Night NA Honduras HN Santa Bárbara 1759 Agualote 2.91594 Mine construction 15.3090 -88.5510 (15.308999999999999, -88.551000000000002) Landslide Landslide Medium Rain NA 0 1 Mundo http://elmundo.com.sv/honduras-muere-un-minero-y-rescatan-a-otros-cinco-soterrados 1.7547716 5.0767202
7464 9/28/15 Morning NA Honduras HN Santa Bárbara 1811 Ilama 2.87349 Above road 15.0909 -88.2072 (15.0909, -88.2072) Landslide Rockfall Small Rain NA 0 0 Canal 6 http://www.canal6.com.hn/destacado/derrumbes-incomunican-paso-entre-san-pedro-sula-y-santa-barbara.html 1.7292258 6.8187189
6672 10/13/14 NA NA Honduras HN Ocotepeque 2389 Sinuapa 2.00805 Below road 14.4579 -89.1666 (14.4579, -89.166600000000003) Landslide Landslide Medium Downpour NA 0 0 La Prensa http://www.laprensa.hn/economia/757783-410/evac%C3%BAan-a-familias-por-deslizamientos-en-ocotepeque 1.2084162 8.2875399
7462 9/25/15 NA NA Honduras HN Ocotepeque 1416 La Labor 5.79867 Above road 14.4810 -89.0537 (14.481, -89.053700000000006) Landslide Landslide Small Rain NA 0 0 Tiempo http://www.tiempo.hn/lluvias-comienzan-a-causar-deslizamientos-en-carreteras-del-occidente-de-honduras/ 3.4895580 10.6365270
stem(df_NC$"distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 000011111111222223333334444
##   0 | 55566779
##   1 | 2
##   1 | 7
##   2 | 
##   2 | 
##   3 | 
##   3 | 6
stem(df_NC$"distance", scale = 2)
## 
##   The decimal point is at the |
## 
##    0 | 33447900223499
##    2 | 0029990135567
##    4 | 57889
##    6 | 73
##    8 | 5
##   10 | 7
##   12 | 
##   14 | 
##   16 | 3
##   18 | 
##   20 | 
##   22 | 
##   24 | 
##   26 | 
##   28 | 
##   30 | 
##   32 | 
##   34 | 
##   36 | 4

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
0.28887 1 2.6 2.6 2.6 2.6
0.31238 1 2.6 2.6 5.3 5.3
0.36987 1 2.6 2.6 7.9 7.9
0.43391 1 2.6 2.6 10.5 10.5
0.74414 1 2.6 2.6 13.2 13.2
0.91552 1 2.6 2.6 15.8 15.8
0.97057 1 2.6 2.6 18.4 18.4
0.98377 1 2.6 2.6 21.1 21.1
1.23639 1 2.6 2.6 23.7 23.7
1.24404 1 2.6 2.6 26.3 26.3
1.30583 1 2.6 2.6 28.9 28.9
1.39095 1 2.6 2.6 31.6 31.6
1.85897 1 2.6 2.6 34.2 34.2
1.90052 1 2.6 2.6 36.8 36.8
2.00805 1 2.6 2.6 39.5 39.5
2.0083 1 2.6 2.6 42.1 42.1
2.21442 1 2.6 2.6 44.7 44.7
2.87349 1 2.6 2.6 47.4 47.4
2.91326 1 2.6 2.6 50.0 50.0
2.91594 1 2.6 2.6 52.6 52.6
2.99239 1 2.6 2.6 55.3 55.3
3.12986 1 2.6 2.6 57.9 57.9
3.25281 1 2.6 2.6 60.5 60.5
3.53737 1 2.6 2.6 63.2 63.2
3.54399 1 2.6 2.6 65.8 65.8
3.63962 1 2.6 2.6 68.4 68.4
3.69596 1 2.6 2.6 71.1 71.1
4.53362 1 2.6 2.6 73.7 73.7
4.69133 1 2.6 2.6 76.3 76.3
4.75791 1 2.6 2.6 78.9 78.9
5.79867 1 2.6 2.6 81.6 81.6
5.89721 1 2.6 2.6 84.2 84.2
6.66574 1 2.6 2.6 86.8 86.8
7.28575 1 2.6 2.6 89.5 89.5
8.52584 1 2.6 2.6 92.1 92.1
11.67237 1 2.6 2.6 94.7 94.7
17.28613 1 2.6 2.6 97.4 97.4
36.37629 1 2.6 2.6 100.0 100.0
Total 38 100.0 100.0 100.0 100.0
str(table)
## Classes 'freqtab' and 'data.frame':  39 obs. of  5 variables:
##  $ n      : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ %      : num  2.6 2.6 2.6 2.6 2.6 2.6 2.6 2.6 2.6 2.6 ...
##  $ val%   : num  2.6 2.6 2.6 2.6 2.6 2.6 2.6 2.6 2.6 2.6 ...
##  $ %cum   : num  2.6 5.3 7.9 10.5 13.2 15.8 18.4 21.1 23.7 26.3 ...
##  $ val%cum: num  2.6 5.3 7.9 10.5 13.2 15.8 18.4 21.1 23.7 26.3 ...
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
0.28887 1
0.31238 1
0.36987 1
0.43391 1
0.74414 1
0.91552 1
0.97057 1
0.98377 1
1.23639 1
1.24404 1
1.30583 1
1.39095 1
1.85897 1
1.90052 1
2.00805 1
2.0083 1
2.21442 1
2.87349 1
2.91326 1
2.91594 1
2.99239 1
3.12986 1
3.25281 1
3.53737 1
3.54399 1
3.63962 1
3.69596 1
4.53362 1
4.69133 1
4.75791 1
5.79867 1
5.89721 1
6.66574 1
7.28575 1
8.52584 1
11.67237 1
17.28613 1
36.37629 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1]  0.28887  6.28887 12.28887 18.28887 24.28887 30.28887 36.28887 42.28887
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(0.289,6.29] 31 0.8378378 31
(6.29,12.3] 4 0.1081081 35
(12.3,18.3] 1 0.0270270 36
(18.3,24.3] 0 0.0000000 36
(24.3,30.3] 0 0.0000000 36
(30.3,36.3] 0 0.0000000 36
(36.3,42.3] 1 0.0270270 37
str(Freq_table)
## 'data.frame':    7 obs. of  4 variables:
##  $ distance: Factor w/ 7 levels "(0.289,6.29]",..: 1 2 3 4 5 6 7
##  $ Freq    : int  31 4 1 0 0 0 1
##  $ Rel_Freq: num  0.838 0.108 0.027 0 0 ...
##  $ Cum_Freq: int  31 35 36 36 36 36 37
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(0.289,6.29] 31
(6.29,12.3] 4
(12.3,18.3] 1
(18.3,24.3] 0
(24.3,30.3] 0
(30.3,36.3] 0
(36.3,42.3] 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_NC)
##                        id date time continent_code country_name country_code
## nbr.val      3.800000e+01   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          8.540000e+02   NA   NA             NA           NA           NA
## max          7.485000e+03   NA   NA             NA           NA           NA
## range        6.631000e+03   NA   NA             NA           NA           NA
## sum          2.290210e+05   NA   NA             NA           NA           NA
## median       7.448500e+03   NA   NA             NA           NA           NA
## mean         6.026868e+03   NA   NA             NA           NA           NA
## SE.mean      3.671138e+02   NA   NA             NA           NA           NA
## CI.mean.0.95 7.438432e+02   NA   NA             NA           NA           NA
## var          5.121356e+06   NA   NA             NA           NA           NA
## std.dev      2.263041e+03   NA   NA             NA           NA           NA
## coef.var     3.754921e-01   NA   NA             NA           NA           NA
##              state   population city   distance location_description
## nbr.val         NA 3.800000e+01   NA  38.000000                   NA
## nbr.null        NA 0.000000e+00   NA   0.000000                   NA
## nbr.na          NA 0.000000e+00   NA   0.000000                   NA
## min             NA 1.043000e+03   NA   0.288870                   NA
## max             NA 8.508480e+05   NA  36.376290                   NA
## range           NA 8.498050e+05   NA  36.087420                   NA
## sum             NA 7.001138e+06   NA 166.172050                   NA
## median          NA 1.936000e+03   NA   2.914600                   NA
## mean            NA 1.842405e+05   NA   4.372949                   NA
## SE.mean         NA 5.663200e+04   NA   1.023393                   NA
## CI.mean.0.95    NA 1.147473e+05   NA   2.073592                   NA
## var             NA 1.218729e+11   NA  39.798695                   NA
## std.dev         NA 3.491031e+05   NA   6.308621                   NA
## coef.var        NA 1.894823e+00   NA   1.442647                   NA
##                  latitude     longitude geolocation hazard_type landslide_type
## nbr.val       38.00000000  3.800000e+01          NA          NA             NA
## nbr.null       0.00000000  0.000000e+00          NA          NA             NA
## nbr.na         0.00000000  0.000000e+00          NA          NA             NA
## min           13.28610000 -8.916660e+01          NA          NA             NA
## max           15.52270000 -8.526500e+01          NA          NA             NA
## range          2.23660000  3.901600e+00          NA          NA             NA
## sum          546.00580000 -3.329927e+03          NA          NA             NA
## median        14.20695000 -8.728760e+01          NA          NA             NA
## mean          14.36857368 -8.762966e+01          NA          NA             NA
## SE.mean        0.08447952  1.301577e-01          NA          NA             NA
## CI.mean.0.95   0.17117178  2.637245e-01          NA          NA             NA
## var            0.27119802  6.437585e-01          NA          NA             NA
## std.dev        0.52076676  8.023457e-01          NA          NA             NA
## coef.var       0.03624346 -9.156096e-03          NA          NA             NA
##              landslide_size trigger storm_name   injuries fatalities
## nbr.val                  NA      NA         NA 29.0000000 38.0000000
## nbr.null                 NA      NA         NA 26.0000000 25.0000000
## nbr.na                   NA      NA         NA  9.0000000  0.0000000
## min                      NA      NA         NA  0.0000000  0.0000000
## max                      NA      NA         NA  3.0000000 29.0000000
## range                    NA      NA         NA  3.0000000 29.0000000
## sum                      NA      NA         NA  6.0000000 86.0000000
## median                   NA      NA         NA  0.0000000  0.0000000
## mean                     NA      NA         NA  0.2068966  2.2631579
## SE.mean                  NA      NA         NA  0.1253499  0.9774733
## CI.mean.0.95             NA      NA         NA  0.2567675  1.9805491
## var                      NA      NA         NA  0.4556650 36.3072546
## std.dev                  NA      NA         NA  0.6750296  6.0255502
## coef.var                 NA      NA         NA  3.2626433  2.6624524
##              source_name source_link        prop         ypos
## nbr.val               NA          NA  38.0000000 3.800000e+01
## nbr.null              NA          NA   0.0000000 0.000000e+00
## nbr.na                NA          NA   0.0000000 0.000000e+00
## min                   NA          NA   0.1738379 9.399294e-02
## max                   NA          NA  21.8907391 9.988871e+01
## range                 NA          NA  21.7169012 9.979472e+01
## sum                   NA          NA 100.0000000 1.347990e+03
## median                NA          NA   1.7539652 3.219648e+01
## mean                  NA          NA   2.6315789 3.547342e+01
## SE.mean               NA          NA   0.6158637 4.046533e+00
## CI.mean.0.95          NA          NA   1.2478585 8.199055e+00
## var                   NA          NA  14.4129500 6.222284e+02
## std.dev               NA          NA   3.7964391 2.494451e+01
## coef.var              NA          NA   1.4426469 7.031887e-01
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Masaya (Nicaragua)

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_NC <- subset(df, country_name == "Nicaragua")
knitr::kable(head(df_NC)) 
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
229 9/4/07 NA NA Nicaragua NI Atlántico Norte 6315 Bonanza 54.90196 NA 13.6670 -84.2435 (13.667, -84.243499999999997) Landslide Complex Medium Tropical cyclone Hurricane Felix NA NA United Nations Development Programme - Relief Web http://www.reliefweb.int/
826 10/3/08 NA NA Nicaragua NI Masaya 5182 Tisma 14.49301 NA 12.1200 -85.8900 (12.12, -85.89) Landslide Landslide Medium Downpour NA NA 9 CBC http://www.cbc.ca/world/story/2008/10/04/nicaragua-flooding.html
2289 8/20/10 NA NA Nicaragua NI Managua 16469 El Crucero 5.84054 NA 12.0420 -86.2998 (12.042, -86.299800000000005) Landslide Mudslide Medium Downpour NA NA 3 NA NA
2330 8/25/10 NA NA Nicaragua NI Jinotega 2367 San José de Bocay 1.36745 NA 13.5317 -85.5325 (13.531700000000001, -85.532499999999999) Landslide Landslide Medium Downpour NA NA NA NA NA
6089 6/23/14 NA NA Nicaragua NI Chontales 5827 Santo Domingo 31.14242 Unknown 12.3535 -84.8095 (12.3535, -84.8095) Landslide Landslide Small Continuous rain NA 0 0 Wilfried Strauch NA
6090 6/23/14 NA NA Nicaragua NI Chontales 5827 Santo Domingo 31.24511 Unknown 12.3521 -84.8080 (12.3521, -84.808000000000007) Landslide Landslide Medium Continuous rain NA 0 0 Wilfried Strauch NA
library(dplyr)
df_NC <- subset(df, state == "Masaya")
knitr::kable(head(df_NC))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
826 10/3/08 NA NA Nicaragua NI Masaya 5182 Tisma 14.49301 NA 12.1200 -85.89 (12.12, -85.89) Landslide Landslide Medium Downpour NA NA 9 CBC http://www.cbc.ca/world/story/2008/10/04/nicaragua-flooding.html
7481 5/13/15 NA NA Nicaragua NI Masaya 2111 San Juan de Oriente 1.56730 Natural slope 11.9013 -86.06 (11.901300000000001, -86.06) Landslide Debris flow Large Rain NA 0 0 La Gente http://www.radiolaprimerisima.com/noticias/183345/reportan-despales-en-la-reserva-rio-indio-maiz

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_NC, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_NC, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_NC, aes(x=state, y=distance, fill=city)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_NC <- df_NC %>% 
  arrange(desc(city)) %>%
  mutate(prop = distance / sum(df_NC$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_NC, aes(x=state, y = prop, fill=city)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
data<- ts(df_NC$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
14.49301
1.56730
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_NC$distance
names(distance) <- df_NC$city 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por ciudades"
)

##                      
## Pareto chart analysis for distance
##                       Frequency Cum.Freq. Percentage Cum.Percent.
##   Tisma                14.49301  14.49301   90.24116     90.24116
##   San Juan de Oriente   1.56730  16.06031    9.75884    100.00000
stem(df_NC$"distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 2
##   0 | 
##   1 | 4
head(df_NC)
## # A tibble: 2 x 25
##      id date    time  continent_code country_name country_code state  population
##   <dbl> <chr>   <chr> <chr>          <chr>        <chr>        <chr>       <dbl>
## 1   826 10/3/08 <NA>  <NA>           Nicaragua    NI           Masaya       5182
## 2  7481 5/13/15 <NA>  <NA>           Nicaragua    NI           Masaya       2111
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_NC))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
826 10/3/08 NA NA Nicaragua NI Masaya 5182 Tisma 14.49301 NA 12.1200 -85.89 (12.12, -85.89) Landslide Landslide Medium Downpour NA NA 9 CBC http://www.cbc.ca/world/story/2008/10/04/nicaragua-flooding.html 90.24116 45.12058
7481 5/13/15 NA NA Nicaragua NI Masaya 2111 San Juan de Oriente 1.56730 Natural slope 11.9013 -86.06 (11.901300000000001, -86.06) Landslide Debris flow Large Rain NA 0 0 La Gente http://www.radiolaprimerisima.com/noticias/183345/reportan-despales-en-la-reserva-rio-indio-maiz 9.75884 95.12058
stem(df_NC$"distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 2
##   0 | 
##   1 | 4
stem(df_NC$"distance", scale = 2)
## 
##   The decimal point is at the |
## 
##    0 | 6
##    2 | 
##    4 | 
##    6 | 
##    8 | 
##   10 | 
##   12 | 
##   14 | 5

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
1.5673 1 50 50 50 50
14.49301 1 50 50 100 100
Total 2 100 100 100 100
str(table)
## Classes 'freqtab' and 'data.frame':  3 obs. of  5 variables:
##  $ n      : num  1 1 2
##  $ %      : num  50 50 100
##  $ val%   : num  50 50 100
##  $ %cum   : num  50 100 100
##  $ val%cum: num  50 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
1.5673 1
14.49301 1
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1]  1.5673  8.5673 15.5673
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(1.57,8.57] 0 0 0
(8.57,15.6] 1 1 1
str(Freq_table)
## 'data.frame':    2 obs. of  4 variables:
##  $ distance: Factor w/ 2 levels "(1.57,8.57]",..: 1 2
##  $ Freq    : int  0 1
##  $ Rel_Freq: num  0 1
##  $ Cum_Freq: int  0 1
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(1.57,8.57] 0
(8.57,15.6] 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_NC)
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
##                        id date time continent_code country_name country_code
## nbr.val      2.000000e+00   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          8.260000e+02   NA   NA             NA           NA           NA
## max          7.481000e+03   NA   NA             NA           NA           NA
## range        6.655000e+03   NA   NA             NA           NA           NA
## sum          8.307000e+03   NA   NA             NA           NA           NA
## median       4.153500e+03   NA   NA             NA           NA           NA
## mean         4.153500e+03   NA   NA             NA           NA           NA
## SE.mean      3.327500e+03   NA   NA             NA           NA           NA
## CI.mean.0.95 4.227990e+04   NA   NA             NA           NA           NA
## var          2.214451e+07   NA   NA             NA           NA           NA
## std.dev      4.705796e+03   NA   NA             NA           NA           NA
## coef.var     1.132971e+00   NA   NA             NA           NA           NA
##              state   population city  distance location_description    latitude
## nbr.val         NA 2.000000e+00   NA  2.000000                   NA  2.00000000
## nbr.null        NA 0.000000e+00   NA  0.000000                   NA  0.00000000
## nbr.na          NA 0.000000e+00   NA  0.000000                   NA  0.00000000
## min             NA 2.111000e+03   NA  1.567300                   NA 11.90130000
## max             NA 5.182000e+03   NA 14.493010                   NA 12.12000000
## range           NA 3.071000e+03   NA 12.925710                   NA  0.21870000
## sum             NA 7.293000e+03   NA 16.060310                   NA 24.02130000
## median          NA 3.646500e+03   NA  8.030155                   NA 12.01065000
## mean            NA 3.646500e+03   NA  8.030155                   NA 12.01065000
## SE.mean         NA 1.535500e+03   NA  6.462855                   NA  0.10935000
## CI.mean.0.95    NA 1.951038e+04   NA 82.118359                   NA  1.38942349
## var             NA 4.715521e+06   NA 83.536990                   NA  0.02391484
## std.dev         NA 2.171525e+03   NA  9.139857                   NA  0.15464425
## coef.var        NA 5.955094e-01   NA  1.138192                   NA  0.01287559
##                  longitude geolocation hazard_type landslide_type
## nbr.val       2.000000e+00          NA          NA             NA
## nbr.null      0.000000e+00          NA          NA             NA
## nbr.na        0.000000e+00          NA          NA             NA
## min          -8.606000e+01          NA          NA             NA
## max          -8.589000e+01          NA          NA             NA
## range         1.700000e-01          NA          NA             NA
## sum          -1.719500e+02          NA          NA             NA
## median       -8.597500e+01          NA          NA             NA
## mean         -8.597500e+01          NA          NA             NA
## SE.mean       8.500000e-02          NA          NA             NA
## CI.mean.0.95  1.080027e+00          NA          NA             NA
## var           1.445000e-02          NA          NA             NA
## std.dev       1.202082e-01          NA          NA             NA
## coef.var     -1.398176e-03          NA          NA             NA
##              landslide_size trigger storm_name injuries fatalities source_name
## nbr.val                  NA      NA         NA        1   2.000000          NA
## nbr.null                 NA      NA         NA        1   1.000000          NA
## nbr.na                   NA      NA         NA        1   0.000000          NA
## min                      NA      NA         NA        0   0.000000          NA
## max                      NA      NA         NA        0   9.000000          NA
## range                    NA      NA         NA        0   9.000000          NA
## sum                      NA      NA         NA        0   9.000000          NA
## median                   NA      NA         NA        0   4.500000          NA
## mean                     NA      NA         NA        0   4.500000          NA
## SE.mean                  NA      NA         NA       NA   4.500000          NA
## CI.mean.0.95             NA      NA         NA      NaN  57.177921          NA
## var                      NA      NA         NA       NA  40.500000          NA
## std.dev                  NA      NA         NA       NA   6.363961          NA
## coef.var                 NA      NA         NA       NA   1.414214          NA
##              source_link        prop         ypos
## nbr.val               NA    2.000000    2.0000000
## nbr.null              NA    0.000000    0.0000000
## nbr.na                NA    0.000000    0.0000000
## min                   NA    9.758840   45.1205799
## max                   NA   90.241160   95.1205799
## range                 NA   80.482319   50.0000000
## sum                   NA  100.000000  140.2411597
## median                NA   50.000000   70.1205799
## mean                  NA   50.000000   70.1205799
## SE.mean               NA   40.241160   25.0000000
## CI.mean.0.95          NA  511.312414  317.6551184
## var                   NA 3238.701873 1250.0000000
## std.dev               NA   56.909594   35.3553391
## coef.var              NA    1.138192    0.5042077
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Managua (Nicaragua)

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_NC <- subset(df, country_name == "Nicaragua")
knitr::kable(head(df_NC)) 
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
229 9/4/07 NA NA Nicaragua NI Atlántico Norte 6315 Bonanza 54.90196 NA 13.6670 -84.2435 (13.667, -84.243499999999997) Landslide Complex Medium Tropical cyclone Hurricane Felix NA NA United Nations Development Programme - Relief Web http://www.reliefweb.int/
826 10/3/08 NA NA Nicaragua NI Masaya 5182 Tisma 14.49301 NA 12.1200 -85.8900 (12.12, -85.89) Landslide Landslide Medium Downpour NA NA 9 CBC http://www.cbc.ca/world/story/2008/10/04/nicaragua-flooding.html
2289 8/20/10 NA NA Nicaragua NI Managua 16469 El Crucero 5.84054 NA 12.0420 -86.2998 (12.042, -86.299800000000005) Landslide Mudslide Medium Downpour NA NA 3 NA NA
2330 8/25/10 NA NA Nicaragua NI Jinotega 2367 San José de Bocay 1.36745 NA 13.5317 -85.5325 (13.531700000000001, -85.532499999999999) Landslide Landslide Medium Downpour NA NA NA NA NA
6089 6/23/14 NA NA Nicaragua NI Chontales 5827 Santo Domingo 31.14242 Unknown 12.3535 -84.8095 (12.3535, -84.8095) Landslide Landslide Small Continuous rain NA 0 0 Wilfried Strauch NA
6090 6/23/14 NA NA Nicaragua NI Chontales 5827 Santo Domingo 31.24511 Unknown 12.3521 -84.8080 (12.3521, -84.808000000000007) Landslide Landslide Medium Continuous rain NA 0 0 Wilfried Strauch NA
library(dplyr)
df_NC <- subset(df, state == "Managua")
knitr::kable(head(df_NC))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
2289 8/20/10 NA NA Nicaragua NI Managua 16469 El Crucero 5.84054 NA 12.0420 -86.2998 (12.042, -86.299800000000005) Landslide Mudslide Medium Downpour NA NA 3 NA NA
6270 10/16/14 Night NA Nicaragua NI Managua 70013 Ciudad Sandino 5.59574 Retaining wall 12.1137 -86.2409 (12.1137, -86.240899999999996) Landslide Mudslide Small Downpour NA 0 9 The Washington Post http://www.washingtonpost.com/posttv/world/at-least-nine-killed-in-nicaragua-mudslide/2014/10/17/4c9559dc-5633-11e4-b86d-184ac281388d_video.html
7477 6/12/15 NA NA Nicaragua NI Managua 1902 Terrabona 18.92056 Above road 12.5769 -86.0418 (12.5769, -86.041799999999995) Landslide Landslide Medium Rain NA 0 0 La Prensa http://www.laprensa.com.ni/2015/06/12/nacionales/1849021-managua-colapsa

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_NC, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_NC, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_NC, aes(x=state, y=distance, fill=city)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_NC <- df_NC %>% 
  arrange(desc(city)) %>%
  mutate(prop = distance / sum(df_NC$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_NC, aes(x=state, y = prop, fill=city)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
data<- ts(df_NC$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
18.92056
5.84054
5.59574
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_NC$distance
names(distance) <- df_NC$city 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por ciudades"
)

##                 
## Pareto chart analysis for distance
##                  Frequency Cum.Freq. Percentage Cum.Percent.
##   Terrabona       18.92056  18.92056   62.32717     62.32717
##   El Crucero       5.84054  24.76110   19.23962     81.56679
##   Ciudad Sandino   5.59574  30.35684   18.43321    100.00000
stem(df_NC$"distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 66
##   1 | 
##   1 | 9
head(df_NC)
## # A tibble: 3 x 25
##      id date     time  continent_code country_name country_code state   population
##   <dbl> <chr>    <chr> <chr>          <chr>        <chr>        <chr>        <dbl>
## 1  7477 6/12/15  <NA>  <NA>           Nicaragua    NI           Managua       1902
## 2  2289 8/20/10  <NA>  <NA>           Nicaragua    NI           Managua      16469
## 3  6270 10/16/14 Night <NA>           Nicaragua    NI           Managua      70013
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_NC))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
7477 6/12/15 NA NA Nicaragua NI Managua 1902 Terrabona 18.92056 Above road 12.5769 -86.0418 (12.5769, -86.041799999999995) Landslide Landslide Medium Rain NA 0 0 La Prensa http://www.laprensa.com.ni/2015/06/12/nacionales/1849021-managua-colapsa 62.32717 31.16359
2289 8/20/10 NA NA Nicaragua NI Managua 16469 El Crucero 5.84054 NA 12.0420 -86.2998 (12.042, -86.299800000000005) Landslide Mudslide Medium Downpour NA NA 3 NA NA 19.23962 71.94698
6270 10/16/14 Night NA Nicaragua NI Managua 70013 Ciudad Sandino 5.59574 Retaining wall 12.1137 -86.2409 (12.1137, -86.240899999999996) Landslide Mudslide Small Downpour NA 0 9 The Washington Post http://www.washingtonpost.com/posttv/world/at-least-nine-killed-in-nicaragua-mudslide/2014/10/17/4c9559dc-5633-11e4-b86d-184ac281388d_video.html 18.43321 90.78340
stem(df_NC$"distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 66
##   1 | 
##   1 | 9
stem(df_NC$"distance", scale = 2)
## 
##   The decimal point is at the |
## 
##    4 | 68
##    6 | 
##    8 | 
##   10 | 
##   12 | 
##   14 | 
##   16 | 
##   18 | 9

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
5.59574 1 33.3 33.3 33.3 33.3
5.84054 1 33.3 33.3 66.7 66.7
18.92056 1 33.3 33.3 100.0 100.0
Total 3 100.0 100.0 100.0 100.0
str(table)
## Classes 'freqtab' and 'data.frame':  4 obs. of  5 variables:
##  $ n      : num  1 1 1 3
##  $ %      : num  33.3 33.3 33.3 100
##  $ val%   : num  33.3 33.3 33.3 100
##  $ %cum   : num  33.3 66.7 100 100
##  $ val%cum: num  33.3 66.7 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
5.59574 1
5.84054 1
18.92056 1
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1]  5.59574 10.59574 15.59574 20.59574
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(5.6,10.6] 1 0.5 1
(10.6,15.6] 0 0.0 1
(15.6,20.6] 1 0.5 2
str(Freq_table)
## 'data.frame':    3 obs. of  4 variables:
##  $ distance: Factor w/ 3 levels "(5.6,10.6]","(10.6,15.6]",..: 1 2 3
##  $ Freq    : int  1 0 1
##  $ Rel_Freq: num  0.5 0 0.5
##  $ Cum_Freq: int  1 1 2
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(5.6,10.6] 1
(10.6,15.6] 0
(15.6,20.6] 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_NC)
##                        id date time continent_code country_name country_code
## nbr.val      3.000000e+00   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          2.289000e+03   NA   NA             NA           NA           NA
## max          7.477000e+03   NA   NA             NA           NA           NA
## range        5.188000e+03   NA   NA             NA           NA           NA
## sum          1.603600e+04   NA   NA             NA           NA           NA
## median       6.270000e+03   NA   NA             NA           NA           NA
## mean         5.345333e+03   NA   NA             NA           NA           NA
## SE.mean      1.567386e+03   NA   NA             NA           NA           NA
## CI.mean.0.95 6.743916e+03   NA   NA             NA           NA           NA
## var          7.370092e+06   NA   NA             NA           NA           NA
## std.dev      2.714791e+03   NA   NA             NA           NA           NA
## coef.var     5.078807e-01   NA   NA             NA           NA           NA
##              state   population city   distance location_description
## nbr.val         NA 3.000000e+00   NA  3.0000000                   NA
## nbr.null        NA 0.000000e+00   NA  0.0000000                   NA
## nbr.na          NA 0.000000e+00   NA  0.0000000                   NA
## min             NA 1.902000e+03   NA  5.5957400                   NA
## max             NA 7.001300e+04   NA 18.9205600                   NA
## range           NA 6.811100e+04   NA 13.3248200                   NA
## sum             NA 8.838400e+04   NA 30.3568400                   NA
## median          NA 1.646900e+04   NA  5.8405400                   NA
## mean            NA 2.946133e+04   NA 10.1189467                   NA
## SE.mean         NA 2.070731e+04   NA  4.4013740                   NA
## CI.mean.0.95    NA 8.909635e+04   NA 18.9375839                   NA
## var             NA 1.286378e+09   NA 58.1162797                   NA
## std.dev         NA 3.586611e+04   NA  7.6234034                   NA
## coef.var        NA 1.217396e+00   NA  0.7533791                   NA
##                 latitude     longitude geolocation hazard_type landslide_type
## nbr.val       3.00000000    3.00000000          NA          NA             NA
## nbr.null      0.00000000    0.00000000          NA          NA             NA
## nbr.na        0.00000000    0.00000000          NA          NA             NA
## min          12.04200000  -86.29980000          NA          NA             NA
## max          12.57690000  -86.04180000          NA          NA             NA
## range         0.53490000    0.25800000          NA          NA             NA
## sum          36.73260000 -258.58250000          NA          NA             NA
## median       12.11370000  -86.24090000          NA          NA             NA
## mean         12.24420000  -86.19416667          NA          NA             NA
## SE.mean       0.16763272    0.07805768          NA          NA             NA
## CI.mean.0.95  0.72126540    0.33585508          NA          NA             NA
## var           0.08430219    0.01827900          NA          NA             NA
## std.dev       0.29034839    0.13519986          NA          NA             NA
## coef.var      0.02371314   -0.00156855          NA          NA             NA
##              landslide_size trigger storm_name injuries fatalities source_name
## nbr.val                  NA      NA         NA        2   3.000000          NA
## nbr.null                 NA      NA         NA        2   1.000000          NA
## nbr.na                   NA      NA         NA        1   0.000000          NA
## min                      NA      NA         NA        0   0.000000          NA
## max                      NA      NA         NA        0   9.000000          NA
## range                    NA      NA         NA        0   9.000000          NA
## sum                      NA      NA         NA        0  12.000000          NA
## median                   NA      NA         NA        0   3.000000          NA
## mean                     NA      NA         NA        0   4.000000          NA
## SE.mean                  NA      NA         NA        0   2.645751          NA
## CI.mean.0.95             NA      NA         NA        0  11.383749          NA
## var                      NA      NA         NA        0  21.000000          NA
## std.dev                  NA      NA         NA        0   4.582576          NA
## coef.var                 NA      NA         NA      NaN   1.145644          NA
##              source_link        prop        ypos
## nbr.val               NA   3.0000000   3.0000000
## nbr.null              NA   0.0000000   0.0000000
## nbr.na                NA   0.0000000   0.0000000
## min                   NA  18.4332098  31.1635862
## max                   NA  62.3271724  90.7833951
## range                 NA  43.8939626  59.6198089
## sum                   NA 100.0000000 193.8939626
## median                NA  19.2396178  71.9469813
## mean                  NA  33.3333333  64.6313209
## SE.mean               NA  14.4987885  17.5951657
## CI.mean.0.95          NA  62.3832518  75.7058878
## var                   NA 630.6446011 928.7695694
## std.dev               NA  25.1126383  30.4757210
## coef.var              NA   0.7533791   0.4715318
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Jinotega (Nicaragua)

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_NC <- subset(df, country_name == "Nicaragua")
knitr::kable(head(df_NC)) 
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
229 9/4/07 NA NA Nicaragua NI Atlántico Norte 6315 Bonanza 54.90196 NA 13.6670 -84.2435 (13.667, -84.243499999999997) Landslide Complex Medium Tropical cyclone Hurricane Felix NA NA United Nations Development Programme - Relief Web http://www.reliefweb.int/
826 10/3/08 NA NA Nicaragua NI Masaya 5182 Tisma 14.49301 NA 12.1200 -85.8900 (12.12, -85.89) Landslide Landslide Medium Downpour NA NA 9 CBC http://www.cbc.ca/world/story/2008/10/04/nicaragua-flooding.html
2289 8/20/10 NA NA Nicaragua NI Managua 16469 El Crucero 5.84054 NA 12.0420 -86.2998 (12.042, -86.299800000000005) Landslide Mudslide Medium Downpour NA NA 3 NA NA
2330 8/25/10 NA NA Nicaragua NI Jinotega 2367 San José de Bocay 1.36745 NA 13.5317 -85.5325 (13.531700000000001, -85.532499999999999) Landslide Landslide Medium Downpour NA NA NA NA NA
6089 6/23/14 NA NA Nicaragua NI Chontales 5827 Santo Domingo 31.14242 Unknown 12.3535 -84.8095 (12.3535, -84.8095) Landslide Landslide Small Continuous rain NA 0 0 Wilfried Strauch NA
6090 6/23/14 NA NA Nicaragua NI Chontales 5827 Santo Domingo 31.24511 Unknown 12.3521 -84.8080 (12.3521, -84.808000000000007) Landslide Landslide Medium Continuous rain NA 0 0 Wilfried Strauch NA
library(dplyr)
df_NC <- subset(df, state == "Jinotega")
knitr::kable(head(df_NC))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
2330 8/25/10 NA NA Nicaragua NI Jinotega 2367 San José de Bocay 1.36745 NA 13.5317 -85.5325 (13.531700000000001, -85.532499999999999) Landslide Landslide Medium Downpour NA NA NA NA NA
7470 10/8/15 NA NA Nicaragua NI Jinotega 6955 Wiwilí 25.81514 Unknown 13.8176 -85.6880 (13.817600000000001, -85.688000000000002) Landslide Landslide Medium Rain NA 0 0 El Nuevo Diario http://www.elnuevodiario.com.ni/nacionales/372954-evacuan-cinco-familias-deslizamiento-tierra-jinote/
7471 2/19/16 NA NA Nicaragua NI Jinotega 51073 Jinotega 2.44880 Below road 13.0805 -85.9925 (13.080500000000001, -85.992500000000007) Landslide Landslide Small Rain NA 0 0 Hoy http://www.hoy.com.ni/2015/09/26/r%C3%ADo-ahoga-a-tres-ni%C3%B1as/

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_NC, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_NC, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_NC, aes(x=state, y=distance, fill=city)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_NC <- df_NC %>% 
  arrange(desc(city)) %>%
  mutate(prop = distance / sum(df_NC$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_NC, aes(x=state, y = prop, fill=city)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
data<- ts(df_NC$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
25.81514
1.36745
2.44880
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_NC$distance
names(distance) <- df_NC$city 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por ciudades"
)

##                    
## Pareto chart analysis for distance
##                      Frequency  Cum.Freq. Percentage Cum.Percent.
##   Wiwilí             25.815140  25.815140  87.120921    87.120921
##   Jinotega            2.448800  28.263940   8.264209    95.385130
##   San José de Bocay   1.367450  29.631390   4.614870   100.000000
stem(df_NC$"distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 12
##   0 | 
##   1 | 
##   1 | 
##   2 | 
##   2 | 6
head(df_NC)
## # A tibble: 3 x 25
##      id date    time  continent_code country_name country_code state    population
##   <dbl> <chr>   <chr> <chr>          <chr>        <chr>        <chr>         <dbl>
## 1  7470 10/8/15 <NA>  <NA>           Nicaragua    NI           Jinotega       6955
## 2  2330 8/25/10 <NA>  <NA>           Nicaragua    NI           Jinotega       2367
## 3  7471 2/19/16 <NA>  <NA>           Nicaragua    NI           Jinotega      51073
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_NC))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
7470 10/8/15 NA NA Nicaragua NI Jinotega 6955 Wiwilí 25.81514 Unknown 13.8176 -85.6880 (13.817600000000001, -85.688000000000002) Landslide Landslide Medium Rain NA 0 0 El Nuevo Diario http://www.elnuevodiario.com.ni/nacionales/372954-evacuan-cinco-familias-deslizamiento-tierra-jinote/ 87.120921 43.56046
2330 8/25/10 NA NA Nicaragua NI Jinotega 2367 San José de Bocay 1.36745 NA 13.5317 -85.5325 (13.531700000000001, -85.532499999999999) Landslide Landslide Medium Downpour NA NA NA NA NA 4.614870 89.42836
7471 2/19/16 NA NA Nicaragua NI Jinotega 51073 Jinotega 2.44880 Below road 13.0805 -85.9925 (13.080500000000001, -85.992500000000007) Landslide Landslide Small Rain NA 0 0 Hoy http://www.hoy.com.ni/2015/09/26/r%C3%ADo-ahoga-a-tres-ni%C3%B1as/ 8.264209 95.86790
stem(df_NC$"distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 12
##   0 | 
##   1 | 
##   1 | 
##   2 | 
##   2 | 6
stem(df_NC$"distance", scale = 2)
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 12
##   0 | 
##   1 | 
##   1 | 
##   2 | 
##   2 | 6

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
1.36745 1 33.3 33.3 33.3 33.3
2.4488 1 33.3 33.3 66.7 66.7
25.81514 1 33.3 33.3 100.0 100.0
Total 3 100.0 100.0 100.0 100.0
str(table)
## Classes 'freqtab' and 'data.frame':  4 obs. of  5 variables:
##  $ n      : num  1 1 1 3
##  $ %      : num  33.3 33.3 33.3 100
##  $ val%   : num  33.3 33.3 33.3 100
##  $ %cum   : num  33.3 66.7 100 100
##  $ val%cum: num  33.3 66.7 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
1.36745 1
2.4488 1
25.81514 1
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1]  1.36745 10.36745 19.36745 28.36745
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(1.37,10.4] 1 0.5 1
(10.4,19.4] 0 0.0 1
(19.4,28.4] 1 0.5 2
str(Freq_table)
## 'data.frame':    3 obs. of  4 variables:
##  $ distance: Factor w/ 3 levels "(1.37,10.4]",..: 1 2 3
##  $ Freq    : int  1 0 1
##  $ Rel_Freq: num  0.5 0 0.5
##  $ Cum_Freq: int  1 1 2
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(1.37,10.4] 1
(10.4,19.4] 0
(19.4,28.4] 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_NC)
##                        id date time continent_code country_name country_code
## nbr.val      3.000000e+00   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          2.330000e+03   NA   NA             NA           NA           NA
## max          7.471000e+03   NA   NA             NA           NA           NA
## range        5.141000e+03   NA   NA             NA           NA           NA
## sum          1.727100e+04   NA   NA             NA           NA           NA
## median       7.470000e+03   NA   NA             NA           NA           NA
## mean         5.757000e+03   NA   NA             NA           NA           NA
## SE.mean      1.713500e+03   NA   NA             NA           NA           NA
## CI.mean.0.95 7.372596e+03   NA   NA             NA           NA           NA
## var          8.808247e+06   NA   NA             NA           NA           NA
## std.dev      2.967869e+03   NA   NA             NA           NA           NA
## coef.var     5.155236e-01   NA   NA             NA           NA           NA
##              state   population city   distance location_description
## nbr.val         NA 3.000000e+00   NA   3.000000                   NA
## nbr.null        NA 0.000000e+00   NA   0.000000                   NA
## nbr.na          NA 0.000000e+00   NA   0.000000                   NA
## min             NA 2.367000e+03   NA   1.367450                   NA
## max             NA 5.107300e+04   NA  25.815140                   NA
## range           NA 4.870600e+04   NA  24.447690                   NA
## sum             NA 6.039500e+04   NA  29.631390                   NA
## median          NA 6.955000e+03   NA   2.448800                   NA
## mean            NA 2.013167e+04   NA   9.877130                   NA
## SE.mean         NA 1.552726e+04   NA   7.975117                   NA
## CI.mean.0.95    NA 6.680839e+04   NA  34.314157                   NA
## var             NA 7.232870e+08   NA 190.807452                   NA
## std.dev         NA 2.689400e+04   NA  13.813307                   NA
## coef.var        NA 1.335905e+00   NA   1.398514                   NA
##                 latitude     longitude geolocation hazard_type landslide_type
## nbr.val       3.00000000  3.000000e+00          NA          NA             NA
## nbr.null      0.00000000  0.000000e+00          NA          NA             NA
## nbr.na        0.00000000  0.000000e+00          NA          NA             NA
## min          13.08050000 -8.599250e+01          NA          NA             NA
## max          13.81760000 -8.553250e+01          NA          NA             NA
## range         0.73710000  4.600000e-01          NA          NA             NA
## sum          40.42980000 -2.572130e+02          NA          NA             NA
## median       13.53170000 -8.568800e+01          NA          NA             NA
## mean         13.47660000 -8.573767e+01          NA          NA             NA
## SE.mean       0.21455855  1.350927e-01          NA          NA             NA
## CI.mean.0.95  0.92317092  5.812568e-01          NA          NA             NA
## var           0.13810611  5.475008e-02          NA          NA             NA
## std.dev       0.37162630  2.339874e-01          NA          NA             NA
## coef.var      0.02757567 -2.729108e-03          NA          NA             NA
##              landslide_size trigger storm_name injuries fatalities source_name
## nbr.val                  NA      NA         NA        2          2          NA
## nbr.null                 NA      NA         NA        2          2          NA
## nbr.na                   NA      NA         NA        1          1          NA
## min                      NA      NA         NA        0          0          NA
## max                      NA      NA         NA        0          0          NA
## range                    NA      NA         NA        0          0          NA
## sum                      NA      NA         NA        0          0          NA
## median                   NA      NA         NA        0          0          NA
## mean                     NA      NA         NA        0          0          NA
## SE.mean                  NA      NA         NA        0          0          NA
## CI.mean.0.95             NA      NA         NA        0          0          NA
## var                      NA      NA         NA        0          0          NA
## std.dev                  NA      NA         NA        0          0          NA
## coef.var                 NA      NA         NA      NaN        NaN          NA
##              source_link        prop        ypos
## nbr.val               NA    3.000000   3.0000000
## nbr.null              NA    0.000000   0.0000000
## nbr.na                NA    0.000000   0.0000000
## min                   NA    4.614870  43.5604607
## max                   NA   87.120921  95.8678955
## range                 NA   82.506052  52.3074348
## sum                   NA  100.000000 228.8567124
## median                NA    8.264209  89.4283562
## mean                  NA   33.333333  76.2855708
## SE.mean               NA   26.914419  16.4678124
## CI.mean.0.95          NA  115.803400  70.8552781
## var                   NA 2173.157894 813.5665396
## std.dev               NA   46.617142  28.5230878
## coef.var              NA    1.398514   0.3738989
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Costa Rica

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_CR <- subset(df, country_name == "Costa Rica")
knitr::kable(head(df_CR))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
249 9/9/07 NA NA Costa Rica CR Heredia 21947 Heredia 0.26208 NA 10.0000 -84.1167 (10, -84.116699999999994) Landslide Landslide Medium Rain NA NA NA ticotimes.net http://www.ticotimes.net/dailyarchive/2007_09/0911072.htm
299 10/9/07 NA NA Costa Rica CR San José 3072 San Ignacio 4.57763 NA 9.7789 -84.1250 (9.7789000000000001, -84.125) Landslide Complex Medium Rain NA NA 4 ticotimes.net http://www.ticotimes.net/dailyarchive/2007_10/1010071.htm
301 10/11/07 NA NA Costa Rica CR Alajuela 7014 Atenas 3.08459 NA 9.9869 -84.4070 (9.9869000000000003, -84.406999999999996) Landslide Mudslide Large Rain NA NA 14 Agence France-Presse, afp.google.com http://afp.google.com/article/ALeqM5hu6a8oyAM1ycq9nU_6Zyj_l7F0AA
302 10/11/07 NA NA Costa Rica CR San José 26669 NA 9.56251 NA 10.0214 -83.9451 (10.0214, -83.945099999999996) Landslide Landslide Large Rain NA NA 10 International Herald http://www.iht.com/articles/ap/2007/10/12/america/LA-GEN-Costa-Rica-Mudslide.php
323 10/24/07 NA NA Costa Rica CR Puntarenas 6540 Miramar 3.82425 Mine construction 10.0715 -84.7575 (10.0715, -84.757499999999993) Landslide Mudslide Medium Downpour NA NA NA Reuters - AlertNet.org http://www.reuters.com/article/companyNewsAndPR/idUSN2435152820071025
556 5/29/08 NA NA Costa Rica CR Guanacaste 4108 Bagaces 17.65521 NA 10.4024 -85.3555 (10.4024, -85.355500000000006) Landslide Landslide Medium Tropical cyclone Tropical Storm Alma NA NA NA http://www.reliefweb.int/rw/RWB.NSF/db900SID/ASAZ-7FHCHL?OpenDocument

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_CR, aes(fill= state, y=distance, x=country_name)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_CR, aes(fill=state, y=distance, x=country_name)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_CR, aes(x=country_name, y=distance, fill=state)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_CR <- df_CR %>% 
  arrange(desc(state)) %>%
  mutate(prop = distance / sum(df_CR$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_CR, aes(x=country_name, y=prop, fill=state)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
data<- ts(df_CR$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
4.57763
9.56251
1.85787
16.24937
12.85801
0.25254
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_CR$distance
names(distance) <- df_CR$state 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por estados"
)

##             
## Pareto chart analysis for distance
##                 Frequency    Cum.Freq.   Percentage Cum.Percent.
##   San José    22.32368000  22.32368000   3.45492296   3.45492296
##   Heredia     21.95470000  44.27838000   3.39781780   6.85274076
##   San José    21.67452000  65.95290000   3.35445576  10.20719651
##   Puntarenas  20.06633000  86.01923000   3.10556433  13.31276084
##   Heredia     19.54581000 105.56504000   3.02500608  16.33776693
##   Heredia     19.51432000 125.07936000   3.02013254  19.35789946
##   Cartago     19.28722000 144.36658000   2.98498542  22.34288488
##   Puntarenas  18.00524000 162.37182000   2.78657986  25.12946474
##   Guanacaste  17.65521000 180.02703000   2.73240749  27.86187223
##   Limón       17.23264000 197.25967000   2.66700847  30.52888070
##   San José    16.24937000 213.50904000   2.51483275  33.04371345
##   San José    15.64997000 229.15901000   2.42206664  35.46578009
##   Heredia     15.05161000 244.21062000   2.32946150  37.79524159
##   Heredia     14.81614000 259.02676000   2.29301900  40.08826059
##   Puntarenas  13.48919000 272.51595000   2.08765366  42.17591425
##   San José    12.85801000 285.37396000   1.98996913  44.16588338
##   Guanacaste  12.33807000 297.71203000   1.90950065  46.07538402
##   Guanacaste  12.21952000 309.93155000   1.89115326  47.96653728
##   Guanacaste  12.18115000 322.11270000   1.88521493  49.85175221
##   Alajuela    11.96524000 334.07794000   1.85179963  51.70355185
##   Puntarenas  11.74074000 345.81868000   1.81705490  53.52060675
##   San José    11.31047000 357.12915000   1.75046419  55.27107094
##   San José    10.73752000 367.86667000   1.66179162  56.93286257
##   Alajuela    10.32968000 378.19635000   1.59867229  58.53153486
##   Guanacaste  10.21631000 388.41266000   1.58112659  60.11266145
##   Heredia     10.01310000 398.42576000   1.54967681  61.66233825
##   San José    10.01198000 408.43774000   1.54950347  63.21184172
##   Heredia      9.85736000 418.29510000   1.52557371  64.73741544
##   Alajuela     9.84213000 428.13723000   1.52321664  66.26063208
##   Cartago      9.63616000 437.77339000   1.49133971  67.75197179
##   Alajuela     9.61692000 447.39031000   1.48836203  69.24033382
##   San José     9.56251000 456.95282000   1.47994127  70.72027510
##   San José     9.53611000 466.48893000   1.47585548  72.19613057
##   Cartago      9.53493000 476.02386000   1.47567286  73.67180343
##   Puntarenas   8.92048000 484.94434000   1.38057754  75.05238097
##   San José     8.39161000 493.33595000   1.29872701  76.35110797
##   San José     8.27042000 501.60637000   1.27997104  77.63107902
##   San José     8.21372000 509.82009000   1.27119587  78.90227489
##   Puntarenas   7.87044000 517.69053000   1.21806816  80.12034305
##   Alajuela     6.92174000 524.61227000   1.07124267  81.19158572
##   Alajuela     6.88715000 531.49942000   1.06588935  82.25747506
##   Alajuela     6.80061000 538.30003000   1.05249599  83.30997105
##   San José     6.49523000 544.79526000   1.00523387  84.31520492
##   Alajuela     5.96634000 550.76160000   0.92338024  85.23858516
##   Alajuela     5.95519000 556.71679000   0.92165461  86.16023978
##   Alajuela     5.57523000 562.29202000   0.86285013  87.02308991
##   Alajuela     5.43516000 567.72718000   0.84117220  87.86426211
##   Limón        5.36500000 573.09218000   0.83031390  88.69457601
##   Cartago      5.15142000 578.24360000   0.79725920  89.49183521
##   Alajuela     5.12667000 583.37027000   0.79342877  90.28526397
##   Puntarenas   4.93053000 588.30080000   0.76307317  91.04833715
##   San José     4.89954000 593.20034000   0.75827701  91.80661415
##   Alajuela     4.87432000 598.07466000   0.75437384  92.56098799
##   San José     4.57763000 602.65229000   0.70845663  93.26944461
##   Alajuela     4.24199000 606.89428000   0.65651132  93.92595593
##   Puntarenas   3.82425000 610.71853000   0.59185982  94.51781575
##   San José     3.71407000 614.43260000   0.57480782  95.09262357
##   San José     3.67691000 618.10951000   0.56905675  95.66168032
##   Alajuela     3.21979000 621.32930000   0.49831060  96.15999092
##   Alajuela     3.08916000 624.41846000   0.47809366  96.63808457
##   Alajuela     3.08459000 627.50305000   0.47738638  97.11547096
##   Cartago      3.07297000 630.57602000   0.47558801  97.59105897
##   Cartago      2.94804000 633.52406000   0.45625323  98.04731220
##   San José     2.92605000 636.45011000   0.45284995  98.50016215
##   Alajuela     2.08469000 638.53480000   0.32263692  98.82279907
##   San José     1.85787000 640.39267000   0.28753314  99.11033220
##   Alajuela     1.47396000 641.86663000   0.22811733  99.33844953
##   San José     1.16705000 643.03368000   0.18061842  99.51906795
##   San José     0.72957000 643.76325000   0.11291186  99.63197981
##   Alajuela     0.70048000 644.46373000   0.10840974  99.74038955
##   San José     0.55804000 645.02177000   0.08636503  99.82675458
##   Puntarenas   0.35225000 645.37402000   0.05451595  99.88127053
##   Heredia      0.26208000 645.63610000   0.04056080  99.92183132
##   San José     0.25254000 645.88864000   0.03908434  99.96091566
##   San José     0.25254000 646.14118000   0.03908434 100.00000000
stem(df_CR$"distance")
## 
##   The decimal point is at the |
## 
##    0 | 3334677259
##    2 | 1991112778
##    4 | 2699912446
##    6 | 0058999
##    8 | 23495566689
##   10 | 0023737
##   12 | 022395
##   14 | 816
##   16 | 227
##   18 | 0355
##   20 | 17
##   22 | 03
head(df_CR)
## # A tibble: 6 x 25
##      id date     time  continent_code country_name country_code state    population
##   <dbl> <chr>    <chr> <chr>          <chr>        <chr>        <chr>         <dbl>
## 1   299 10/9/07  <NA>  <NA>           Costa Rica   CR           San José       3072
## 2   302 10/11/07 <NA>  <NA>           Costa Rica   CR           San José      26669
## 3   776 9/6/08   <NA>  <NA>           Costa Rica   CR           San José      10028
## 4   838 10/12/08 <NA>  <NA>           Costa Rica   CR           San José      34877
## 5   839 10/12/08 <NA>  <NA>           Costa Rica   CR           San José       8292
## 6  2526 10/1/10  <NA>  <NA>           Costa Rica   CR           San José       2833
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_CR))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
299 10/9/07 NA NA Costa Rica CR San José 3072 San Ignacio 4.57763 NA 9.7789 -84.1250 (9.7789000000000001, -84.125) Landslide Complex Medium Rain NA NA 4 ticotimes.net http://www.ticotimes.net/dailyarchive/2007_10/1010071.htm 0.7084566 0.3542283
302 10/11/07 NA NA Costa Rica CR San José 26669 NA 9.56251 NA 10.0214 -83.9451 (10.0214, -83.945099999999996) Landslide Landslide Large Rain NA NA 10 International Herald http://www.iht.com/articles/ap/2007/10/12/america/LA-GEN-Costa-Rica-Mudslide.php 1.4799413 1.4484273
776 9/6/08 NA NA Costa Rica CR San José 10028 Daniel Flores 1.85787 NA 9.3500 -83.6780 (9.35, -83.677999999999997) Landslide Mudslide Medium Downpour NA NA NA NA http://insidecostarica.com/dailynews/2008/september/07/nac02.htm 0.2875331 2.3321645
838 10/12/08 NA NA Costa Rica CR San José 34877 San Isidro 16.24937 NA 9.5190 -83.7060 (9.5190000000000001, -83.706000000000003) Landslide Landslide Medium Downpour NA NA NA NA http://www.ticotimes.net/dailyarchive/2008_10/1014081.htm 2.5148327 3.7333474
839 10/12/08 NA NA Costa Rica CR San José 8292 Santiago 12.85801 NA 9.7640 -84.3970 (9.7639999999999993, -84.397000000000006) Landslide Landslide Medium Downpour NA NA NA NA http://www.ticotimes.net/dailyarchive/2008_10/1014082.htm 1.9899691 5.9857483
2526 10/1/10 NA NA Costa Rica CR San José 2833 Salitral 0.25254 NA 9.9108 -84.1764 (9.9108000000000001, -84.176400000000001) Landslide Landslide Medium Downpour NA NA 0 NA http://www.ticotimes.net/News/Mudslides-Force-63-Out-of-Santa-Ana-Homes_Friday-October-01-2010 0.0390843 7.0002751
stem(df_CR$"distance")
## 
##   The decimal point is at the |
## 
##    0 | 3334677259
##    2 | 1991112778
##    4 | 2699912446
##    6 | 0058999
##    8 | 23495566689
##   10 | 0023737
##   12 | 022395
##   14 | 816
##   16 | 227
##   18 | 0355
##   20 | 17
##   22 | 03
stem(df_CR$"distance", scale = 2)
## 
##   The decimal point is at the |
## 
##    0 | 3334677259
##    2 | 1991112778
##    4 | 2699912446
##    6 | 0058999
##    8 | 23495566689
##   10 | 0023737
##   12 | 022395
##   14 | 816
##   16 | 227
##   18 | 0355
##   20 | 17
##   22 | 03

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
0.25254 2 2.7 2.7 2.7 2.7
0.26208 1 1.3 1.3 4.0 4.0
0.35225 1 1.3 1.3 5.3 5.3
0.55804 1 1.3 1.3 6.7 6.7
0.70048 1 1.3 1.3 8.0 8.0
0.72957 1 1.3 1.3 9.3 9.3
1.16705 1 1.3 1.3 10.7 10.7
1.47396 1 1.3 1.3 12.0 12.0
1.85787 1 1.3 1.3 13.3 13.3
2.08469 1 1.3 1.3 14.7 14.7
2.92605 1 1.3 1.3 16.0 16.0
2.94804 1 1.3 1.3 17.3 17.3
3.07297 1 1.3 1.3 18.7 18.7
3.08459 1 1.3 1.3 20.0 20.0
3.08916 1 1.3 1.3 21.3 21.3
3.21979 1 1.3 1.3 22.7 22.7
3.67691 1 1.3 1.3 24.0 24.0
3.71407 1 1.3 1.3 25.3 25.3
3.82425 1 1.3 1.3 26.7 26.7
4.24199 1 1.3 1.3 28.0 28.0
4.57763 1 1.3 1.3 29.3 29.3
4.87432 1 1.3 1.3 30.7 30.7
4.89954 1 1.3 1.3 32.0 32.0
4.93053 1 1.3 1.3 33.3 33.3
5.12667 1 1.3 1.3 34.7 34.7
5.15142 1 1.3 1.3 36.0 36.0
5.365 1 1.3 1.3 37.3 37.3
5.43516 1 1.3 1.3 38.7 38.7
5.57523 1 1.3 1.3 40.0 40.0
5.95519 1 1.3 1.3 41.3 41.3
5.96634 1 1.3 1.3 42.7 42.7
6.49523 1 1.3 1.3 44.0 44.0
6.80061 1 1.3 1.3 45.3 45.3
6.88715 1 1.3 1.3 46.7 46.7
6.92174 1 1.3 1.3 48.0 48.0
7.87044 1 1.3 1.3 49.3 49.3
8.21372 1 1.3 1.3 50.7 50.7
8.27042 1 1.3 1.3 52.0 52.0
8.39161 1 1.3 1.3 53.3 53.3
8.92048 1 1.3 1.3 54.7 54.7
9.53493 1 1.3 1.3 56.0 56.0
9.53611 1 1.3 1.3 57.3 57.3
9.56251 1 1.3 1.3 58.7 58.7
9.61692 1 1.3 1.3 60.0 60.0
9.63616 1 1.3 1.3 61.3 61.3
9.84213 1 1.3 1.3 62.7 62.7
9.85736 1 1.3 1.3 64.0 64.0
10.01198 1 1.3 1.3 65.3 65.3
10.0131 1 1.3 1.3 66.7 66.7
10.21631 1 1.3 1.3 68.0 68.0
10.32968 1 1.3 1.3 69.3 69.3
10.73752 1 1.3 1.3 70.7 70.7
11.31047 1 1.3 1.3 72.0 72.0
11.74074 1 1.3 1.3 73.3 73.3
11.96524 1 1.3 1.3 74.7 74.7
12.18115 1 1.3 1.3 76.0 76.0
12.21952 1 1.3 1.3 77.3 77.3
12.33807 1 1.3 1.3 78.7 78.7
12.85801 1 1.3 1.3 80.0 80.0
13.48919 1 1.3 1.3 81.3 81.3
14.81614 1 1.3 1.3 82.7 82.7
15.05161 1 1.3 1.3 84.0 84.0
15.64997 1 1.3 1.3 85.3 85.3
16.24937 1 1.3 1.3 86.7 86.7
17.23264 1 1.3 1.3 88.0 88.0
17.65521 1 1.3 1.3 89.3 89.3
18.00524 1 1.3 1.3 90.7 90.7
19.28722 1 1.3 1.3 92.0 92.0
19.51432 1 1.3 1.3 93.3 93.3
19.54581 1 1.3 1.3 94.7 94.7
20.06633 1 1.3 1.3 96.0 96.0
21.67452 1 1.3 1.3 97.3 97.3
21.9547 1 1.3 1.3 98.7 98.7
22.32368 1 1.3 1.3 100.0 100.0
Total 75 100.0 100.0 100.0 100.0
str(table)
## Classes 'freqtab' and 'data.frame':  75 obs. of  5 variables:
##  $ n      : num  2 1 1 1 1 1 1 1 1 1 ...
##  $ %      : num  2.7 1.3 1.3 1.3 1.3 1.3 1.3 1.3 1.3 1.3 ...
##  $ val%   : num  2.7 1.3 1.3 1.3 1.3 1.3 1.3 1.3 1.3 1.3 ...
##  $ %cum   : num  2.7 4 5.3 6.7 8 9.3 10.7 12 13.3 14.7 ...
##  $ val%cum: num  2.7 4 5.3 6.7 8 9.3 10.7 12 13.3 14.7 ...
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
0.25254 2
0.26208 1
0.35225 1
0.55804 1
0.70048 1
0.72957 1
1.16705 1
1.47396 1
1.85787 1
2.08469 1
2.92605 1
2.94804 1
3.07297 1
3.08459 1
3.08916 1
3.21979 1
3.67691 1
3.71407 1
3.82425 1
4.24199 1
4.57763 1
4.87432 1
4.89954 1
4.93053 1
5.12667 1
5.15142 1
5.365 1
5.43516 1
5.57523 1
5.95519 1
5.96634 1
6.49523 1
6.80061 1
6.88715 1
6.92174 1
7.87044 1
8.21372 1
8.27042 1
8.39161 1
8.92048 1
9.53493 1
9.53611 1
9.56251 1
9.61692 1
9.63616 1
9.84213 1
9.85736 1
10.01198 1
10.0131 1
10.21631 1
10.32968 1
10.73752 1
11.31047 1
11.74074 1
11.96524 1
12.18115 1
12.21952 1
12.33807 1
12.85801 1
13.48919 1
14.81614 1
15.05161 1
15.64997 1
16.24937 1
17.23264 1
17.65521 1
18.00524 1
19.28722 1
19.51432 1
19.54581 1
20.06633 1
21.67452 1
21.9547 1
22.32368 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1]  0.25254  4.25254  8.25254 12.25254 16.25254 20.25254 24.25254
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(0.253,4.25] 19 0.2602740 19
(4.25,8.25] 17 0.2328767 36
(8.25,12.3] 20 0.2739726 56
(12.3,16.3] 7 0.0958904 63
(16.3,20.3] 7 0.0958904 70
(20.3,24.3] 3 0.0410959 73
str(Freq_table)
## 'data.frame':    6 obs. of  4 variables:
##  $ distance: Factor w/ 6 levels "(0.253,4.25]",..: 1 2 3 4 5 6
##  $ Freq    : int  19 17 20 7 7 3
##  $ Rel_Freq: num  0.2603 0.2329 0.274 0.0959 0.0959 ...
##  $ Cum_Freq: int  19 36 56 63 70 73
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(0.253,4.25] 19
(4.25,8.25] 17
(8.25,12.3] 20
(12.3,16.3] 7
(16.3,20.3] 7
(20.3,24.3] 3
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_CR)
##                        id date time continent_code country_name country_code
## nbr.val      7.500000e+01   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          2.490000e+02   NA   NA             NA           NA           NA
## max          7.496000e+03   NA   NA             NA           NA           NA
## range        7.247000e+03   NA   NA             NA           NA           NA
## sum          3.121850e+05   NA   NA             NA           NA           NA
## median       3.762000e+03   NA   NA             NA           NA           NA
## mean         4.162467e+03   NA   NA             NA           NA           NA
## SE.mean      2.838021e+02   NA   NA             NA           NA           NA
## CI.mean.0.95 5.654880e+02   NA   NA             NA           NA           NA
## var          6.040771e+06   NA   NA             NA           NA           NA
## std.dev      2.457798e+03   NA   NA             NA           NA           NA
## coef.var     5.904667e-01   NA   NA             NA           NA           NA
##              state   population city    distance location_description
## nbr.val         NA 7.500000e+01   NA  75.0000000                   NA
## nbr.null        NA 4.000000e+00   NA   0.0000000                   NA
## nbr.na          NA 0.000000e+00   NA   0.0000000                   NA
## min             NA 0.000000e+00   NA   0.2525400                   NA
## max             NA 3.350070e+05   NA  22.3236800                   NA
## range           NA 3.350070e+05   NA  22.0711400                   NA
## sum             NA 1.127522e+06   NA 646.1411800                   NA
## median          NA 6.784000e+03   NA   8.2137200                   NA
## mean            NA 1.503363e+04   NA   8.6152157                   NA
## SE.mean         NA 4.498709e+03   NA   0.6924979                   NA
## CI.mean.0.95    NA 8.963873e+03   NA   1.3798322                   NA
## var             NA 1.517878e+09   NA  35.9665004                   NA
## std.dev         NA 3.895996e+04   NA   5.9972077                   NA
## coef.var        NA 2.591521e+00   NA   0.6961181                   NA
##                  latitude     longitude geolocation hazard_type landslide_type
## nbr.val       75.00000000  7.500000e+01          NA          NA             NA
## nbr.null       0.00000000  0.000000e+00          NA          NA             NA
## nbr.na         0.00000000  0.000000e+00          NA          NA             NA
## min            8.61170000 -8.535550e+01          NA          NA             NA
## max           10.89160000 -8.294180e+01          NA          NA             NA
## range          2.27990000  2.413700e+00          NA          NA             NA
## sum          740.80410000 -6.307925e+03          NA          NA             NA
## median         9.96430000 -8.408790e+01          NA          NA             NA
## mean           9.87738800 -8.410567e+01          NA          NA             NA
## SE.mean        0.05198610  5.429493e-02          NA          NA             NA
## CI.mean.0.95   0.10358456  1.081850e-01          NA          NA             NA
## var            0.20269158  2.210955e-01          NA          NA             NA
## std.dev        0.45021281  4.702079e-01          NA          NA             NA
## coef.var       0.04558015 -5.590680e-03          NA          NA             NA
##              landslide_size trigger storm_name   injuries fatalities
## nbr.val                  NA      NA         NA 27.0000000 60.0000000
## nbr.null                 NA      NA         NA 26.0000000 50.0000000
## nbr.na                   NA      NA         NA 48.0000000 15.0000000
## min                      NA      NA         NA  0.0000000  0.0000000
## max                      NA      NA         NA  3.0000000 23.0000000
## range                    NA      NA         NA  3.0000000 23.0000000
## sum                      NA      NA         NA  3.0000000 61.0000000
## median                   NA      NA         NA  0.0000000  0.0000000
## mean                     NA      NA         NA  0.1111111  1.0166667
## SE.mean                  NA      NA         NA  0.1111111  0.4750805
## CI.mean.0.95             NA      NA         NA  0.2283922  0.9506339
## var                      NA      NA         NA  0.3333333 13.5420904
## std.dev                  NA      NA         NA  0.5773503  3.6799579
## coef.var                 NA      NA         NA  5.1961524  3.6196308
##              source_name source_link         prop         ypos
## nbr.val               NA          NA  75.00000000   75.0000000
## nbr.null              NA          NA   0.00000000    0.0000000
## nbr.na                NA          NA   0.00000000    0.0000000
## min                   NA          NA   0.03908434    0.3542283
## max                   NA          NA   3.45492296   99.8386815
## range                 NA          NA   3.41583863   99.4844532
## sum                   NA          NA 100.00000000 3898.8052162
## median                NA          NA   1.27119587   49.1708732
## mean                  NA          NA   1.33333333   51.9840695
## SE.mean               NA          NA   0.10717439    3.7309554
## CI.mean.0.95          NA          NA   0.21354964    7.4340909
## var                   NA          NA   0.86147631 1044.0021043
## std.dev               NA          NA   0.92815748   32.3110214
## coef.var              NA          NA   0.69611811    0.6215562
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Heredia (Costa Rica)

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_CR <- subset(df, country_name == "Costa Rica")
knitr::kable(head(df_CR)) 
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
249 9/9/07 NA NA Costa Rica CR Heredia 21947 Heredia 0.26208 NA 10.0000 -84.1167 (10, -84.116699999999994) Landslide Landslide Medium Rain NA NA NA ticotimes.net http://www.ticotimes.net/dailyarchive/2007_09/0911072.htm
299 10/9/07 NA NA Costa Rica CR San José 3072 San Ignacio 4.57763 NA 9.7789 -84.1250 (9.7789000000000001, -84.125) Landslide Complex Medium Rain NA NA 4 ticotimes.net http://www.ticotimes.net/dailyarchive/2007_10/1010071.htm
301 10/11/07 NA NA Costa Rica CR Alajuela 7014 Atenas 3.08459 NA 9.9869 -84.4070 (9.9869000000000003, -84.406999999999996) Landslide Mudslide Large Rain NA NA 14 Agence France-Presse, afp.google.com http://afp.google.com/article/ALeqM5hu6a8oyAM1ycq9nU_6Zyj_l7F0AA
302 10/11/07 NA NA Costa Rica CR San José 26669 NA 9.56251 NA 10.0214 -83.9451 (10.0214, -83.945099999999996) Landslide Landslide Large Rain NA NA 10 International Herald http://www.iht.com/articles/ap/2007/10/12/america/LA-GEN-Costa-Rica-Mudslide.php
323 10/24/07 NA NA Costa Rica CR Puntarenas 6540 Miramar 3.82425 Mine construction 10.0715 -84.7575 (10.0715, -84.757499999999993) Landslide Mudslide Medium Downpour NA NA NA Reuters - AlertNet.org http://www.reuters.com/article/companyNewsAndPR/idUSN2435152820071025
556 5/29/08 NA NA Costa Rica CR Guanacaste 4108 Bagaces 17.65521 NA 10.4024 -85.3555 (10.4024, -85.355500000000006) Landslide Landslide Medium Tropical cyclone Tropical Storm Alma NA NA NA http://www.reliefweb.int/rw/RWB.NSF/db900SID/ASAZ-7FHCHL?OpenDocument
library(dplyr)
df_CR <- subset(df, state == "Heredia")
knitr::kable(head(df_CR))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
249 9/9/07 NA NA Costa Rica CR Heredia 21947 Heredia 0.26208 NA 10.0000 -84.1167 (10, -84.116699999999994) Landslide Landslide Medium Rain NA NA NA ticotimes.net http://www.ticotimes.net/dailyarchive/2007_09/0911072.htm
1786 4/27/10 Early morning NA Costa Rica CR Heredia 1355 Ángeles 19.51432 NA 10.1452 -83.9564 (10.145200000000001, -83.956400000000002) Landslide Landslide Medium Downpour NA NA 0 NA http://en.trend.az/news/incident/1678592.html
2598 10/15/10 NA NA Costa Rica CR Heredia 1355 Ángeles 14.81614 NA 10.1067 -83.9753 (10.1067, -83.975300000000004) Landslide Rockfall Medium Downpour NA NA 2 NA http://www.ticotimes.net/News/Daily-News/Two-People-Die-in-Landslide-on-Limon-Highway_Saturday-October-16-2010
2742 11/21/10 NA NA Costa Rica CR Heredia 1355 Ángeles 19.54581 NA 10.1433 -83.9529 (10.1433, -83.9529) Landslide Landslide Medium Downpour NA NA 0 NA http://insidecostarica.com/dailynews/2010/november/22/costarica10112204.htm
3472 5/8/11 Night NA Costa Rica CR Heredia 1355 Ángeles 15.05161 NA 10.1118 -83.9793 (10.111800000000001, -83.979299999999995) Landslide Landslide Medium Rain NA NA 0 NA http://insidecostarica.com/dailynews/2011/may/10/costarica11051010.htm
4358 5/13/12 NA NA Costa Rica CR Heredia 5745 Santo Domingo 21.95470 NA 10.1981 -84.0074 (10.1981, -84.007400000000004) Landslide Landslide Medium Downpour NA NA NA NA http://www.insidecostarica.com/dailynews/2012/may/17/costarica12051708.htm

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_CR, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_CR, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_CR, aes(x=state, y=distance, fill=city)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_CR <- df_CR %>% 
  arrange(desc(city)) %>%
  mutate(prop = distance / sum(df_CR$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_CR, aes(x=state, y = prop, fill=city)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
data<- ts(df_CR$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
21.95470
9.85736
0.26208
10.01310
19.51432
14.81614
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_CR$distance
names(distance) <- df_CR$city 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por ciudades"
)

##                        
## Pareto chart analysis for distance
##                          Frequency  Cum.Freq. Percentage Cum.Percent.
##   Santo Domingo          21.954700  21.954700  19.776315    19.776315
##   Ángeles                19.545810  41.500510  17.606440    37.382755
##   Ángeles                19.514320  61.014830  17.578074    54.960829
##   Ángeles                15.051610  76.066440  13.558162    68.518991
##   Ángeles                14.816140  90.882580  13.346056    81.865047
##   Dulce Nombre de Jesus  10.013100 100.895680   9.019582    90.884629
##   Santo Domingo           9.857360 110.753040   8.879295    99.763924
##   Heredia                 0.262080 111.015120   0.236076   100.000000
stem(df_CR$"distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 0
##   0 | 
##   1 | 00
##   1 | 55
##   2 | 002
head(df_CR)
## # A tibble: 6 x 25
##      id date     time  continent_code country_name country_code state population
##   <dbl> <chr>    <chr> <chr>          <chr>        <chr>        <chr>      <dbl>
## 1  4358 5/13/12  <NA>  <NA>           Costa Rica   CR           Here~       5745
## 2  5541 9/16/13  <NA>  <NA>           Costa Rica   CR           Here~       5745
## 3   249 9/9/07   <NA>  <NA>           Costa Rica   CR           Here~      21947
## 4  6696 12/13/14 Night <NA>           Costa Rica   CR           Here~          0
## 5  1786 4/27/10  Earl~ <NA>           Costa Rica   CR           Here~       1355
## 6  2598 10/15/10 <NA>  <NA>           Costa Rica   CR           Here~       1355
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_CR))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
4358 5/13/12 NA NA Costa Rica CR Heredia 5745 Santo Domingo 21.95470 NA 10.1981 -84.0074 (10.1981, -84.007400000000004) Landslide Landslide Medium Downpour NA NA NA NA http://www.insidecostarica.com/dailynews/2012/may/17/costarica12051708.htm 19.776315 9.888158
5541 9/16/13 NA NA Costa Rica CR Heredia 5745 Santo Domingo 9.85736 NA 10.1528 -84.1489 (10.152799999999999, -84.148899999999998) Landslide Landslide Medium Tropical cyclone Manuel NA 0 insidecostarica.com http://insidecostarica.com/2013/09/17/torrential-rains-flooding-washed-out-bridges-and-landslides-wreak-havoc/ 8.879295 24.215963
249 9/9/07 NA NA Costa Rica CR Heredia 21947 Heredia 0.26208 NA 10.0000 -84.1167 (10, -84.116699999999994) Landslide Landslide Medium Rain NA NA NA ticotimes.net http://www.ticotimes.net/dailyarchive/2007_09/0911072.htm 0.236076 28.773648
6696 12/13/14 Night NA Costa Rica CR Heredia 0 Dulce Nombre de Jesus 10.01310 Unknown 10.2054 -83.9041 (10.205399999999999, -83.9041) Landslide Landslide Medium Unknown NA 0 0 Columbia http://www.columbia.co.cr/index.php/nacionales/transporte/6953-la-ruta-32-se-mantiene-cerrada-desde-anoche-por-un-deslizamiento-en-el-sector-de-rio-sucio 9.019582 33.401477
1786 4/27/10 Early morning NA Costa Rica CR Heredia 1355 Ángeles 19.51432 NA 10.1452 -83.9564 (10.145200000000001, -83.956400000000002) Landslide Landslide Medium Downpour NA NA 0 NA http://en.trend.az/news/incident/1678592.html 17.578074 46.700305
2598 10/15/10 NA NA Costa Rica CR Heredia 1355 Ángeles 14.81614 NA 10.1067 -83.9753 (10.1067, -83.975300000000004) Landslide Rockfall Medium Downpour NA NA 2 NA http://www.ticotimes.net/News/Daily-News/Two-People-Die-in-Landslide-on-Limon-Highway_Saturday-October-16-2010 13.346056 62.162370
stem(df_CR$"distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 0
##   0 | 
##   1 | 00
##   1 | 55
##   2 | 002
stem(df_CR$"distance", scale = 2)
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 0
##   0 | 
##   1 | 00
##   1 | 55
##   2 | 002

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
0.26208 1 12.5 12.5 12.5 12.5
9.85736 1 12.5 12.5 25.0 25.0
10.0131 1 12.5 12.5 37.5 37.5
14.81614 1 12.5 12.5 50.0 50.0
15.05161 1 12.5 12.5 62.5 62.5
19.51432 1 12.5 12.5 75.0 75.0
19.54581 1 12.5 12.5 87.5 87.5
21.9547 1 12.5 12.5 100.0 100.0
Total 8 100.0 100.0 100.0 100.0
str(table)
## Classes 'freqtab' and 'data.frame':  9 obs. of  5 variables:
##  $ n      : num  1 1 1 1 1 1 1 1 8
##  $ %      : num  12.5 12.5 12.5 12.5 12.5 12.5 12.5 12.5 100
##  $ val%   : num  12.5 12.5 12.5 12.5 12.5 12.5 12.5 12.5 100
##  $ %cum   : num  12.5 25 37.5 50 62.5 75 87.5 100 100
##  $ val%cum: num  12.5 25 37.5 50 62.5 75 87.5 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
0.26208 1
9.85736 1
10.0131 1
14.81614 1
15.05161 1
19.51432 1
19.54581 1
21.9547 1
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1]  0.26208  6.26208 12.26208 18.26208 24.26208
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(0.262,6.26] 0 0.0000000 0
(6.26,12.3] 2 0.2857143 2
(12.3,18.3] 2 0.2857143 4
(18.3,24.3] 3 0.4285714 7
str(Freq_table)
## 'data.frame':    4 obs. of  4 variables:
##  $ distance: Factor w/ 4 levels "(0.262,6.26]",..: 1 2 3 4
##  $ Freq    : int  0 2 2 3
##  $ Rel_Freq: num  0 0.286 0.286 0.429
##  $ Cum_Freq: int  0 2 4 7
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(0.262,6.26] 0
(6.26,12.3] 2
(12.3,18.3] 2
(18.3,24.3] 3
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_CR)
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
##                        id date time continent_code country_name country_code
## nbr.val      8.000000e+00   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          2.490000e+02   NA   NA             NA           NA           NA
## max          6.696000e+03   NA   NA             NA           NA           NA
## range        6.447000e+03   NA   NA             NA           NA           NA
## sum          2.744200e+04   NA   NA             NA           NA           NA
## median       3.107000e+03   NA   NA             NA           NA           NA
## mean         3.430250e+03   NA   NA             NA           NA           NA
## SE.mean      7.315967e+02   NA   NA             NA           NA           NA
## CI.mean.0.95 1.729951e+03   NA   NA             NA           NA           NA
## var          4.281870e+06   NA   NA             NA           NA           NA
## std.dev      2.069268e+03   NA   NA             NA           NA           NA
## coef.var     6.032412e-01   NA   NA             NA           NA           NA
##              state   population city    distance location_description
## nbr.val         NA 8.000000e+00   NA   8.0000000                   NA
## nbr.null        NA 1.000000e+00   NA   0.0000000                   NA
## nbr.na          NA 0.000000e+00   NA   0.0000000                   NA
## min             NA 0.000000e+00   NA   0.2620800                   NA
## max             NA 2.194700e+04   NA  21.9547000                   NA
## range           NA 2.194700e+04   NA  21.6926200                   NA
## sum             NA 3.885700e+04   NA 111.0151200                   NA
## median          NA 1.355000e+03   NA  14.9338750                   NA
## mean            NA 4.857125e+03   NA  13.8768900                   NA
## SE.mean         NA 2.557523e+03   NA   2.4924134                   NA
## CI.mean.0.95    NA 6.047580e+03   NA   5.8936213                   NA
## var             NA 5.232738e+07   NA  49.6969984                   NA
## std.dev         NA 7.233767e+03   NA   7.0496098                   NA
## coef.var        NA 1.489310e+00   NA   0.5080108                   NA
##                  latitude     longitude geolocation hazard_type landslide_type
## nbr.val       8.000000000  8.000000e+00          NA          NA             NA
## nbr.null      0.000000000  0.000000e+00          NA          NA             NA
## nbr.na        0.000000000  0.000000e+00          NA          NA             NA
## min          10.000000000 -8.414890e+01          NA          NA             NA
## max          10.205400000 -8.390410e+01          NA          NA             NA
## range         0.205400000  2.448000e-01          NA          NA             NA
## sum          81.063300000 -6.720410e+02          NA          NA             NA
## median       10.144250000 -8.397730e+01          NA          NA             NA
## mean         10.132912500 -8.400512e+01          NA          NA             NA
## SE.mean       0.022739522  2.987758e-02          NA          NA             NA
## CI.mean.0.95  0.053770426  7.064924e-02          NA          NA             NA
## var           0.004136687  7.141356e-03          NA          NA             NA
## std.dev       0.064317081  8.450655e-02          NA          NA             NA
## coef.var      0.006347344 -1.005969e-03          NA          NA             NA
##              landslide_size trigger storm_name injuries fatalities source_name
## nbr.val                  NA      NA         NA        1  6.0000000          NA
## nbr.null                 NA      NA         NA        1  5.0000000          NA
## nbr.na                   NA      NA         NA        7  2.0000000          NA
## min                      NA      NA         NA        0  0.0000000          NA
## max                      NA      NA         NA        0  2.0000000          NA
## range                    NA      NA         NA        0  2.0000000          NA
## sum                      NA      NA         NA        0  2.0000000          NA
## median                   NA      NA         NA        0  0.0000000          NA
## mean                     NA      NA         NA        0  0.3333333          NA
## SE.mean                  NA      NA         NA       NA  0.3333333          NA
## CI.mean.0.95             NA      NA         NA      NaN  0.8568606          NA
## var                      NA      NA         NA       NA  0.6666667          NA
## std.dev                  NA      NA         NA       NA  0.8164966          NA
## coef.var                 NA      NA         NA       NA  2.4494897          NA
##              source_link        prop        ypos
## nbr.val               NA   8.0000000   8.0000000
## nbr.null              NA   0.0000000   0.0000000
## nbr.na                NA   0.0000000   0.0000000
## min                   NA   0.2360760   9.8881576
## max                   NA  19.7763152  93.2209189
## range                 NA  19.5402392  83.3327613
## sum                   NA 100.0000000 376.0014582
## median                NA  13.4521090  40.0508913
## mean                  NA  12.5000000  47.0001823
## SE.mean               NA   2.2451117  10.1053438
## CI.mean.0.95          NA   5.3088456  23.8953410
## var                   NA  40.3242124 816.9437832
## std.dev               NA   6.3501348  28.5822285
## coef.var              NA   0.5080108   0.6081302
boxplot(data, horizontal=TRUE, col='green')

Gráfico para San José (Costa Rica)

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_CR <- subset(df, country_name == "Costa Rica")
knitr::kable(head(df_CR)) 
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
249 9/9/07 NA NA Costa Rica CR Heredia 21947 Heredia 0.26208 NA 10.0000 -84.1167 (10, -84.116699999999994) Landslide Landslide Medium Rain NA NA NA ticotimes.net http://www.ticotimes.net/dailyarchive/2007_09/0911072.htm
299 10/9/07 NA NA Costa Rica CR San José 3072 San Ignacio 4.57763 NA 9.7789 -84.1250 (9.7789000000000001, -84.125) Landslide Complex Medium Rain NA NA 4 ticotimes.net http://www.ticotimes.net/dailyarchive/2007_10/1010071.htm
301 10/11/07 NA NA Costa Rica CR Alajuela 7014 Atenas 3.08459 NA 9.9869 -84.4070 (9.9869000000000003, -84.406999999999996) Landslide Mudslide Large Rain NA NA 14 Agence France-Presse, afp.google.com http://afp.google.com/article/ALeqM5hu6a8oyAM1ycq9nU_6Zyj_l7F0AA
302 10/11/07 NA NA Costa Rica CR San José 26669 NA 9.56251 NA 10.0214 -83.9451 (10.0214, -83.945099999999996) Landslide Landslide Large Rain NA NA 10 International Herald http://www.iht.com/articles/ap/2007/10/12/america/LA-GEN-Costa-Rica-Mudslide.php
323 10/24/07 NA NA Costa Rica CR Puntarenas 6540 Miramar 3.82425 Mine construction 10.0715 -84.7575 (10.0715, -84.757499999999993) Landslide Mudslide Medium Downpour NA NA NA Reuters - AlertNet.org http://www.reuters.com/article/companyNewsAndPR/idUSN2435152820071025
556 5/29/08 NA NA Costa Rica CR Guanacaste 4108 Bagaces 17.65521 NA 10.4024 -85.3555 (10.4024, -85.355500000000006) Landslide Landslide Medium Tropical cyclone Tropical Storm Alma NA NA NA http://www.reliefweb.int/rw/RWB.NSF/db900SID/ASAZ-7FHCHL?OpenDocument
library(dplyr)
df_CR <- subset(df, state == "San José")
knitr::kable(head(df_CR))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
299 10/9/07 NA NA Costa Rica CR San José 3072 San Ignacio 4.57763 NA 9.7789 -84.1250 (9.7789000000000001, -84.125) Landslide Complex Medium Rain NA NA 4 ticotimes.net http://www.ticotimes.net/dailyarchive/2007_10/1010071.htm
302 10/11/07 NA NA Costa Rica CR San José 26669 NA 9.56251 NA 10.0214 -83.9451 (10.0214, -83.945099999999996) Landslide Landslide Large Rain NA NA 10 International Herald http://www.iht.com/articles/ap/2007/10/12/america/LA-GEN-Costa-Rica-Mudslide.php
776 9/6/08 NA NA Costa Rica CR San José 10028 Daniel Flores 1.85787 NA 9.3500 -83.6780 (9.35, -83.677999999999997) Landslide Mudslide Medium Downpour NA NA NA NA http://insidecostarica.com/dailynews/2008/september/07/nac02.htm
838 10/12/08 NA NA Costa Rica CR San José 34877 San Isidro 16.24937 NA 9.5190 -83.7060 (9.5190000000000001, -83.706000000000003) Landslide Landslide Medium Downpour NA NA NA NA http://www.ticotimes.net/dailyarchive/2008_10/1014081.htm
839 10/12/08 NA NA Costa Rica CR San José 8292 Santiago 12.85801 NA 9.7640 -84.3970 (9.7639999999999993, -84.397000000000006) Landslide Landslide Medium Downpour NA NA NA NA http://www.ticotimes.net/dailyarchive/2008_10/1014082.htm
2526 10/1/10 NA NA Costa Rica CR San José 2833 Salitral 0.25254 NA 9.9108 -84.1764 (9.9108000000000001, -84.176400000000001) Landslide Landslide Medium Downpour NA NA 0 NA http://www.ticotimes.net/News/Mudslides-Force-63-Out-of-Santa-Ana-Homes_Friday-October-01-2010

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_CR, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_CR, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_CR, aes(x=state, y=distance, fill=city)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_CR <- df_CR %>% 
  arrange(desc(city)) %>%
  mutate(prop = distance / sum(df_CR$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_CR, aes(x=state, y = prop, fill=city)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4
## Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Greens is 9
## Returning the palette you asked for with that many colors

Grafico de series temporales

library(forecast)
data<- ts(df_CR$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
6.49523
12.85801
8.27042
0.55804
0.72957
1.16705
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_CR$distance
names(distance) <- df_CR$state 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 200, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por ciudades"
)

##           
## Pareto chart analysis for distance
##              Frequency   Cum.Freq.  Percentage Cum.Percent.
##   San José  22.3236800  22.3236800  11.3956252   11.3956252
##   San José  21.6745200  43.9982000  11.0642469   22.4598721
##   San José  16.2493700  60.2475700   8.2948569   30.7547290
##   San José  15.6499700  75.8975400   7.9888797   38.7436087
##   San José  12.8580100  88.7555500   6.5636608   45.3072695
##   San José  11.3104700 100.0660200   5.7736842   51.0809536
##   San José  10.7375200 110.8035400   5.4812089   56.5621626
##   San José  10.0119800 120.8155200   5.1108407   61.6730032
##   San José   9.5625100 130.3780300   4.8813986   66.5544018
##   San José   9.5361100 139.9141400   4.8679221   71.4223240
##   San José   8.3916100 148.3057500   4.2836863   75.7060103
##   San José   8.2704200 156.5761700   4.2218222   79.9278325
##   San José   8.2137200 164.7898900   4.1928784   84.1207108
##   San José   6.4952300 171.2851200   3.3156364   87.4363473
##   San José   4.8995400 176.1846600   2.5010805   89.9374278
##   San José   4.5776300 180.7622900   2.3367543   92.2741821
##   San José   3.7140700 184.4763600   1.8959307   94.1701128
##   San José   3.6769100 188.1532700   1.8769615   96.0470743
##   San José   2.9260500 191.0793200   1.4936681   97.5407425
##   San José   1.8578700 192.9371900   0.9483916   98.4891341
##   San José   1.1670500 194.1042400   0.5957470   99.0848810
##   San José   0.7295700 194.8338100   0.3724254   99.4573065
##   San José   0.5580400 195.3918500   0.2848641   99.7421705
##   San José   0.2525400 195.6443900   0.1289147   99.8710853
##   San José   0.2525400 195.8969300   0.1289147  100.0000000
stem(df_CR$"distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 001112344
##   0 | 556888
##   1 | 000113
##   1 | 66
##   2 | 22
head(df_CR)
## # A tibble: 6 x 25
##      id date     time  continent_code country_name country_code state    population
##   <dbl> <chr>    <chr> <chr>          <chr>        <chr>        <chr>         <dbl>
## 1  2686 11/4/10  <NA>  <NA>           Costa Rica   CR           San José      22433
## 2   839 10/12/08 <NA>  <NA>           Costa Rica   CR           San José       8292
## 3  7496 11/11/15 <NA>  <NA>           Costa Rica   CR           San José       8292
## 4  2681 11/4/10  <NA>  <NA>           Costa Rica   CR           San José       4255
## 5  7444 10/29/15 <NA>  <NA>           Costa Rica   CR           San José      26047
## 6  7494 9/25/15  <NA>  <NA>           Costa Rica   CR           San José     335007
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_CR))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
2686 11/4/10 NA NA Costa Rica CR San José 22433 Tejar 6.49523 NA 9.7112 -84.2812 (9.7111999999999998, -84.281199999999998) Landslide Landslide Medium Tropical cyclone Tropical Storm Tomas NA 0 NA http://fortunatimes.com/2010/11/06/no-passage-to-the-south-and-central-pacific/ 3.3156364 1.657818
839 10/12/08 NA NA Costa Rica CR San José 8292 Santiago 12.85801 NA 9.7640 -84.3970 (9.7639999999999993, -84.397000000000006) Landslide Landslide Medium Downpour NA NA NA NA http://www.ticotimes.net/dailyarchive/2008_10/1014082.htm 6.5636608 6.597467
7496 11/11/15 NA NA Costa Rica CR San José 8292 Santiago 8.27042 Above road 9.8747 -84.3841 (9.8747000000000007, -84.384100000000004) Landslide Landslide Small Rain NA 0 0 La Nacion http://www.nacion.com/sucesos/Camino-Puriscal-Turrubares-cerrado-derrumbe_0_1523647757.html 4.2218222 11.990208
2681 11/4/10 NA NA Costa Rica CR San José 4255 San Marcos 0.55804 NA 9.6563 -84.0236 (9.6562999999999999, -84.023600000000002) Landslide Mudslide Medium Tropical cyclone Tropical Storm Tomas NA 3 NA http://articles.cnn.com/2010-11-05/world/costa.rica.fatal.mudslides_1_mud-slides-drinking-or-cooking-safe-water-supplies?_s=PM:WORLD 0.2848641 14.243551
7444 10/29/15 NA NA Costa Rica CR San José 26047 San Juan 0.72957 Above road 9.9574 -84.0879 (9.9573999999999998, -84.087900000000005) Landslide Landslide Medium Rain NA 0 0 La Nacion http://www.nacion.com/nacional/transportes/Deslizamiento-Tibas-afecta-servicio-Heredia_0_1521048003.html 0.3724254 14.572196
7494 9/25/15 NA NA Costa Rica CR San José 335007 San José 1.16705 Urban area 9.9402 -84.0771 (9.9402000000000008, -84.077100000000002) Landslide Landslide Small Rain NA 0 0 Repretel http://www.repretel.com/actualidad/rally-amarralo-3337 0.5957470 15.056282
stem(df_CR$"distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 001112344
##   0 | 556888
##   1 | 000113
##   1 | 66
##   2 | 22
stem(df_CR$"distance", scale = 2)
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 001112344
##   0 | 556888
##   1 | 000113
##   1 | 66
##   2 | 22

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
0.25254 2 8 8 8 8
0.55804 1 4 4 12 12
0.72957 1 4 4 16 16
1.16705 1 4 4 20 20
1.85787 1 4 4 24 24
2.92605 1 4 4 28 28
3.67691 1 4 4 32 32
3.71407 1 4 4 36 36
4.57763 1 4 4 40 40
4.89954 1 4 4 44 44
6.49523 1 4 4 48 48
8.21372 1 4 4 52 52
8.27042 1 4 4 56 56
8.39161 1 4 4 60 60
9.53611 1 4 4 64 64
9.56251 1 4 4 68 68
10.01198 1 4 4 72 72
10.73752 1 4 4 76 76
11.31047 1 4 4 80 80
12.85801 1 4 4 84 84
15.64997 1 4 4 88 88
16.24937 1 4 4 92 92
21.67452 1 4 4 96 96
22.32368 1 4 4 100 100
Total 25 100 100 100 100
str(table)
## Classes 'freqtab' and 'data.frame':  25 obs. of  5 variables:
##  $ n      : num  2 1 1 1 1 1 1 1 1 1 ...
##  $ %      : num  8 4 4 4 4 4 4 4 4 4 ...
##  $ val%   : num  8 4 4 4 4 4 4 4 4 4 ...
##  $ %cum   : num  8 12 16 20 24 28 32 36 40 44 ...
##  $ val%cum: num  8 12 16 20 24 28 32 36 40 44 ...
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
0.25254 2
0.55804 1
0.72957 1
1.16705 1
1.85787 1
2.92605 1
3.67691 1
3.71407 1
4.57763 1
4.89954 1
6.49523 1
8.21372 1
8.27042 1
8.39161 1
9.53611 1
9.56251 1
10.01198 1
10.73752 1
11.31047 1
12.85801 1
15.64997 1
16.24937 1
21.67452 1
22.32368 1
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1]  0.25254  5.25254 10.25254 15.25254 20.25254 25.25254
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(0.253,5.25] 9 0.3913043 9
(5.25,10.3] 7 0.3043478 16
(10.3,15.3] 3 0.1304348 19
(15.3,20.3] 2 0.0869565 21
(20.3,25.3] 2 0.0869565 23
str(Freq_table)
## 'data.frame':    5 obs. of  4 variables:
##  $ distance: Factor w/ 5 levels "(0.253,5.25]",..: 1 2 3 4 5
##  $ Freq    : int  9 7 3 2 2
##  $ Rel_Freq: num  0.391 0.304 0.13 0.087 0.087
##  $ Cum_Freq: int  9 16 19 21 23
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(0.253,5.25] 9
(5.25,10.3] 7
(10.3,15.3] 3
(15.3,20.3] 2
(20.3,25.3] 2
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_CR)
##                        id date time continent_code country_name country_code
## nbr.val      2.500000e+01   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          2.990000e+02   NA   NA             NA           NA           NA
## max          7.496000e+03   NA   NA             NA           NA           NA
## range        7.197000e+03   NA   NA             NA           NA           NA
## sum          1.039670e+05   NA   NA             NA           NA           NA
## median       2.692000e+03   NA   NA             NA           NA           NA
## mean         4.158680e+03   NA   NA             NA           NA           NA
## SE.mean      5.330458e+02   NA   NA             NA           NA           NA
## CI.mean.0.95 1.100153e+03   NA   NA             NA           NA           NA
## var          7.103446e+06   NA   NA             NA           NA           NA
## std.dev      2.665229e+03   NA   NA             NA           NA           NA
## coef.var     6.408834e-01   NA   NA             NA           NA           NA
##              state   population city   distance location_description
## nbr.val         NA 2.500000e+01   NA  25.000000                   NA
## nbr.null        NA 3.000000e+00   NA   0.000000                   NA
## nbr.na          NA 0.000000e+00   NA   0.000000                   NA
## min             NA 0.000000e+00   NA   0.252540                   NA
## max             NA 3.350070e+05   NA  22.323680                   NA
## range           NA 3.350070e+05   NA  22.071140                   NA
## sum             NA 6.934110e+05   NA 195.896930                   NA
## median          NA 1.002800e+04   NA   8.213720                   NA
## mean            NA 2.773644e+04   NA   7.835877                   NA
## SE.mean         NA 1.308803e+04   NA   1.270388                   NA
## CI.mean.0.95    NA 2.701236e+04   NA   2.621953                   NA
## var             NA 4.282410e+09   NA  40.347169                   NA
## std.dev         NA 6.544013e+04   NA   6.351942                   NA
## coef.var        NA 2.359356e+00   NA   0.810623                   NA
##                  latitude     longitude geolocation hazard_type landslide_type
## nbr.val       25.00000000  2.500000e+01          NA          NA             NA
## nbr.null       0.00000000  0.000000e+00          NA          NA             NA
## nbr.na         0.00000000  0.000000e+00          NA          NA             NA
## min            9.19220000 -8.439700e+01          NA          NA             NA
## max           10.14950000 -8.355650e+01          NA          NA             NA
## range          0.95730000  8.405000e-01          NA          NA             NA
## sum          242.83950000 -2.098785e+03          NA          NA             NA
## median         9.77890000 -8.396730e+01          NA          NA             NA
## mean           9.71358000 -8.395140e+01          NA          NA             NA
## SE.mean        0.06054644  4.905849e-02          NA          NA             NA
## CI.mean.0.95   0.12496171  1.012517e-01          NA          NA             NA
## var            0.09164678  6.016838e-02          NA          NA             NA
## std.dev        0.30273219  2.452924e-01          NA          NA             NA
## coef.var       0.03116587 -2.921839e-03          NA          NA             NA
##              landslide_size trigger storm_name injuries fatalities source_name
## nbr.val                  NA      NA         NA       10  21.000000          NA
## nbr.null                 NA      NA         NA       10  17.000000          NA
## nbr.na                   NA      NA         NA       15   4.000000          NA
## min                      NA      NA         NA        0   0.000000          NA
## max                      NA      NA         NA        0  23.000000          NA
## range                    NA      NA         NA        0  23.000000          NA
## sum                      NA      NA         NA        0  40.000000          NA
## median                   NA      NA         NA        0   0.000000          NA
## mean                     NA      NA         NA        0   1.904762          NA
## SE.mean                  NA      NA         NA        0   1.172918          NA
## CI.mean.0.95             NA      NA         NA        0   2.446664          NA
## var                      NA      NA         NA        0  28.890476          NA
## std.dev                  NA      NA         NA        0   5.374986          NA
## coef.var                 NA      NA         NA      NaN   2.821868          NA
##              source_link        prop         ypos
## nbr.val               NA  25.0000000   25.0000000
## nbr.null              NA   0.0000000    0.0000000
## nbr.na                NA   0.0000000    0.0000000
## min                   NA   0.1289147    1.6578182
## max                   NA  11.3956252   97.5593007
## range                 NA  11.2667105   95.9014825
## sum                   NA 100.0000000 1339.6995323
## median                NA   4.1928784   66.5210348
## mean                  NA   4.0000000   53.5879813
## SE.mean               NA   0.6484984    6.3076988
## CI.mean.0.95          NA   1.3384349   13.0184506
## var                   NA  10.5137540  994.6766161
## std.dev               NA   3.2424919   31.5384942
## coef.var              NA   0.8106230    0.5885367
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Alajuela (Costa Rica)

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_CR <- subset(df, country_name == "Costa Rica")
knitr::kable(head(df_CR)) 
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
249 9/9/07 NA NA Costa Rica CR Heredia 21947 Heredia 0.26208 NA 10.0000 -84.1167 (10, -84.116699999999994) Landslide Landslide Medium Rain NA NA NA ticotimes.net http://www.ticotimes.net/dailyarchive/2007_09/0911072.htm
299 10/9/07 NA NA Costa Rica CR San José 3072 San Ignacio 4.57763 NA 9.7789 -84.1250 (9.7789000000000001, -84.125) Landslide Complex Medium Rain NA NA 4 ticotimes.net http://www.ticotimes.net/dailyarchive/2007_10/1010071.htm
301 10/11/07 NA NA Costa Rica CR Alajuela 7014 Atenas 3.08459 NA 9.9869 -84.4070 (9.9869000000000003, -84.406999999999996) Landslide Mudslide Large Rain NA NA 14 Agence France-Presse, afp.google.com http://afp.google.com/article/ALeqM5hu6a8oyAM1ycq9nU_6Zyj_l7F0AA
302 10/11/07 NA NA Costa Rica CR San José 26669 NA 9.56251 NA 10.0214 -83.9451 (10.0214, -83.945099999999996) Landslide Landslide Large Rain NA NA 10 International Herald http://www.iht.com/articles/ap/2007/10/12/america/LA-GEN-Costa-Rica-Mudslide.php
323 10/24/07 NA NA Costa Rica CR Puntarenas 6540 Miramar 3.82425 Mine construction 10.0715 -84.7575 (10.0715, -84.757499999999993) Landslide Mudslide Medium Downpour NA NA NA Reuters - AlertNet.org http://www.reuters.com/article/companyNewsAndPR/idUSN2435152820071025
556 5/29/08 NA NA Costa Rica CR Guanacaste 4108 Bagaces 17.65521 NA 10.4024 -85.3555 (10.4024, -85.355500000000006) Landslide Landslide Medium Tropical cyclone Tropical Storm Alma NA NA NA http://www.reliefweb.int/rw/RWB.NSF/db900SID/ASAZ-7FHCHL?OpenDocument
library(dplyr)
df_CR <- subset(df, state == "Alajuela")
knitr::kable(head(df_CR))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
301 10/11/07 NA NA Costa Rica CR Alajuela 7014 Atenas 3.08459 NA 9.9869 -84.4070 (9.9869000000000003, -84.406999999999996) Landslide Mudslide Large Rain NA NA 14 Agence France-Presse, afp.google.com http://afp.google.com/article/ALeqM5hu6a8oyAM1ycq9nU_6Zyj_l7F0AA
1749 4/14/10 NA NA Costa Rica CR Alajuela 14448 Desamparados 6.88715 Above road 9.9323 -84.4453 (9.9322999999999997, -84.445300000000003) Landslide Landslide Medium Downpour NA NA 0 NA http://www.insidecostarica.com/dailynews/2010/april/16/costarica10041602.htm
1886 5/22/10 18:00:00 NA Costa Rica CR Alajuela 14448 Desamparados 6.92174 Above road 9.9290 -84.4428 (9.9290000000000003, -84.442800000000005) Landslide Landslide Medium Downpour NA 3 0 Costa Rica News http://thecostaricanews.com/rains-cause-landslides-and-road-accidents-on-caldera/3255
2174 7/30/10 9:30:00 NA Costa Rica CR Alajuela 14448 Desamparados 4.24199 Above road 9.9271 -84.4568 (9.9270999999999994, -84.456800000000001) Landslide Landslide Medium Rain NA NA 0 La Fortuna https://lafortunatimes.wordpress.com/2010/07/30/landslide-caused-closure-of-san-jose-caldera-for-most-of-the-day-friday/
2516 9/29/10 NA NA Costa Rica CR Alajuela 3624 San Rafael 1.47396 NA 10.0757 -84.4793 (10.075699999999999, -84.479299999999995) Landslide Mudslide Medium Downpour NA NA 0 NA http://www.ticotimes.net/News/Daily-News/Inter-American-Highway-Reopens-Caldera-Highway-Under-Repair_Monday-October-04-2010
2682 11/4/10 NA NA Costa Rica CR Alajuela 3624 San Rafael 9.61692 NA 10.0421 -84.5577 (10.0421, -84.557699999999997) Landslide Landslide Medium Tropical cyclone Tropical Storm Tomas NA 0 NA http://fortunatimes.com/2010/11/06/no-passage-to-the-south-and-central-pacific/

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_CR, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_CR, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_CR, aes(x=state, y=distance, fill=city)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_CR <- df_CR %>% 
  arrange(desc(city)) %>%
  mutate(prop = distance / sum(df_CR$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_CR, aes(x=state, y = prop, fill=city)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4
## Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Greens is 9
## Returning the palette you asked for with that many colors

Grafico de series temporales

library(forecast)
data<- ts(df_CR$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
0.70048
3.21979
5.43516
1.47396
9.61692
4.87432
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_CR$distance
names(distance) <- df_CR$state 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por ciudades"
)

##           
## Pareto chart analysis for distance
##              Frequency   Cum.Freq.  Percentage Cum.Percent.
##   Alajuela  11.9652400  11.9652400  10.5708367   10.5708367
##   Alajuela  10.3296800  22.2949200   9.1258813   19.6967180
##   Alajuela   9.8421300  32.1370500   8.6951494   28.3918674
##   Alajuela   9.6169200  41.7539700   8.4961849   36.8880523
##   Alajuela   6.9217400  48.6757100   6.1150953   43.0031476
##   Alajuela   6.8871500  55.5628600   6.0845364   49.0876840
##   Alajuela   6.8006100  62.3634700   6.0080816   55.0957655
##   Alajuela   5.9663400  68.3298100   5.2710356   60.3668011
##   Alajuela   5.9551900  74.2850000   5.2611850   65.6279861
##   Alajuela   5.5752300  79.8602300   4.9255047   70.5534908
##   Alajuela   5.4351600  85.2953900   4.8017582   75.3552490
##   Alajuela   5.1266700  90.4220600   4.5292189   79.8844679
##   Alajuela   4.8743200  95.2963800   4.3062772   84.1907451
##   Alajuela   4.2419900  99.5383700   3.7476376   87.9383828
##   Alajuela   3.2197900 102.7581600   2.8445626   90.7829454
##   Alajuela   3.0891600 105.8473200   2.7291559   93.5121013
##   Alajuela   3.0845900 108.9319100   2.7251185   96.2372198
##   Alajuela   2.0846900 111.0166000   1.8417447   98.0789646
##   Alajuela   1.4739600 112.4905600   1.3021879   99.3811524
##   Alajuela   0.7004800 113.1910400   0.6188476  100.0000000
stem(df_CR$"distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 1123334
##   0 | 555666777
##   1 | 0002
head(df_CR)
## # A tibble: 6 x 25
##      id date     time  continent_code country_name country_code state    population
##   <dbl> <chr>    <chr> <chr>          <chr>        <chr>        <chr>         <dbl>
## 1  3762 7/12/11  <NA>  <NA>           Costa Rica   CR           Alajuela       4185
## 2  7486 10/27/15 <NA>  <NA>           Costa Rica   CR           Alajuela       5745
## 3  2703 11/5/10  <NA>  <NA>           Costa Rica   CR           Alajuela       2107
## 4  2516 9/29/10  <NA>  <NA>           Costa Rica   CR           Alajuela       3624
## 5  2682 11/4/10  <NA>  <NA>           Costa Rica   CR           Alajuela       3624
## 6  5408 8/27/13  <NA>  <NA>           Costa Rica   CR           Alajuela       1015
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_CR))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
3762 7/12/11 NA NA Costa Rica CR Alajuela 4185 Upala 0.70048 NA 10.8916 -85.0141 (10.8916, -85.014099999999999) Landslide Landslide Medium Downpour NA NA 0 NA http://www.ticotimes.net/Current-Edition/News-Briefs/Upala-on-yellow-alert-after-earthquake-heavy-rains_Wednesday-July-13-2011 0.6188476 0.3094238
7486 10/27/15 NA NA Costa Rica CR Alajuela 5745 Santo Domingo 3.21979 Above road 10.0600 -84.1841 (10.06, -84.184100000000001) Landslide Mudslide Small Rain NA 0 0 La Prensa Libre http://www.laprensalibre.cr/Noticias/detalle/45060/430/deslizamiento-deja-bus-atrapado-en-alajuela 2.8445626 2.0411289
2703 11/5/10 NA NA Costa Rica CR Alajuela 2107 Santiago 5.43516 NA 9.9990 -84.4876 (9.9990000000000006, -84.4876) Landslide Landslide Medium Tropical cyclone Tropical Storm Tomas NA 0 NA http://fortunatimes.com/2010/11/06/no-passage-to-the-south-and-central-pacific/ 4.8017582 5.8642893
2516 9/29/10 NA NA Costa Rica CR Alajuela 3624 San Rafael 1.47396 NA 10.0757 -84.4793 (10.075699999999999, -84.479299999999995) Landslide Mudslide Medium Downpour NA NA 0 NA http://www.ticotimes.net/News/Daily-News/Inter-American-Highway-Reopens-Caldera-Highway-Under-Repair_Monday-October-04-2010 1.3021879 8.9162623
2682 11/4/10 NA NA Costa Rica CR Alajuela 3624 San Rafael 9.61692 NA 10.0421 -84.5577 (10.0421, -84.557699999999997) Landslide Landslide Medium Tropical cyclone Tropical Storm Tomas NA 0 NA http://fortunatimes.com/2010/11/06/no-passage-to-the-south-and-central-pacific/ 8.4961849 13.8154486
5408 8/27/13 NA NA Costa Rica CR Alajuela 1015 Sabanilla 4.87432 NA 10.1181 -84.2146 (10.1181, -84.214600000000004) Landslide Landslide Medium Downpour NA NA NA insidecostarica.com http://insidecostarica.com/2013/08/28/flooding-and-landslides-after-heavy-downpours-across-the-country/ 4.3062772 20.2166797
stem(df_CR$"distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 1123334
##   0 | 555666777
##   1 | 0002
stem(df_CR$"distance", scale = 2)
## 
##   The decimal point is at the |
## 
##    0 | 75
##    2 | 1112
##    4 | 29146
##    6 | 00899
##    8 | 68
##   10 | 3
##   12 | 0

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
0.70048 1 5 5 5 5
1.47396 1 5 5 10 10
2.08469 1 5 5 15 15
3.08459 1 5 5 20 20
3.08916 1 5 5 25 25
3.21979 1 5 5 30 30
4.24199 1 5 5 35 35
4.87432 1 5 5 40 40
5.12667 1 5 5 45 45
5.43516 1 5 5 50 50
5.57523 1 5 5 55 55
5.95519 1 5 5 60 60
5.96634 1 5 5 65 65
6.80061 1 5 5 70 70
6.88715 1 5 5 75 75
6.92174 1 5 5 80 80
9.61692 1 5 5 85 85
9.84213 1 5 5 90 90
10.32968 1 5 5 95 95
11.96524 1 5 5 100 100
Total 20 100 100 100 100
str(table)
## Classes 'freqtab' and 'data.frame':  21 obs. of  5 variables:
##  $ n      : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ %      : num  5 5 5 5 5 5 5 5 5 5 ...
##  $ val%   : num  5 5 5 5 5 5 5 5 5 5 ...
##  $ %cum   : num  5 10 15 20 25 30 35 40 45 50 ...
##  $ val%cum: num  5 10 15 20 25 30 35 40 45 50 ...
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
0.70048 1
1.47396 1
2.08469 1
3.08459 1
3.08916 1
3.21979 1
4.24199 1
4.87432 1
5.12667 1
5.43516 1
5.57523 1
5.95519 1
5.96634 1
6.80061 1
6.88715 1
6.92174 1
9.61692 1
9.84213 1
10.32968 1
11.96524 1
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1]  0.70048  3.70048  6.70048  9.70048 12.70048
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(0.7,3.7] 5 0.2631579 5
(3.7,6.7] 7 0.3684211 12
(6.7,9.7] 4 0.2105263 16
(9.7,12.7] 3 0.1578947 19
str(Freq_table)
## 'data.frame':    4 obs. of  4 variables:
##  $ distance: Factor w/ 4 levels "(0.7,3.7]","(3.7,6.7]",..: 1 2 3 4
##  $ Freq    : int  5 7 4 3
##  $ Rel_Freq: num  0.263 0.368 0.211 0.158
##  $ Cum_Freq: int  5 12 16 19
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(0.7,3.7] 5
(3.7,6.7] 7
(6.7,9.7] 4
(9.7,12.7] 3
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_CR)
##                        id date time continent_code country_name country_code
## nbr.val      2.000000e+01   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          3.010000e+02   NA   NA             NA           NA           NA
## max          7.488000e+03   NA   NA             NA           NA           NA
## range        7.187000e+03   NA   NA             NA           NA           NA
## sum          9.718800e+04   NA   NA             NA           NA           NA
## median       5.878000e+03   NA   NA             NA           NA           NA
## mean         4.859400e+03   NA   NA             NA           NA           NA
## SE.mean      5.261514e+02   NA   NA             NA           NA           NA
## CI.mean.0.95 1.101248e+03   NA   NA             NA           NA           NA
## var          5.536707e+06   NA   NA             NA           NA           NA
## std.dev      2.353021e+03   NA   NA             NA           NA           NA
## coef.var     4.842204e-01   NA   NA             NA           NA           NA
##              state   population city    distance location_description
## nbr.val         NA 2.000000e+01   NA  20.0000000                   NA
## nbr.null        NA 0.000000e+00   NA   0.0000000                   NA
## nbr.na          NA 0.000000e+00   NA   0.0000000                   NA
## min             NA 1.015000e+03   NA   0.7004800                   NA
## max             NA 4.749400e+04   NA  11.9652400                   NA
## range           NA 4.647900e+04   NA  11.2647600                   NA
## sum             NA 1.924900e+05   NA 113.1910400                   NA
## median          NA 7.014000e+03   NA   5.5051950                   NA
## mean            NA 9.624500e+03   NA   5.6595520                   NA
## SE.mean         NA 2.281502e+03   NA   0.6812501                   NA
## CI.mean.0.95    NA 4.775238e+03   NA   1.4258729                   NA
## var             NA 1.041050e+08   NA   9.2820347                   NA
## std.dev         NA 1.020319e+04   NA   3.0466432                   NA
## coef.var        NA 1.060126e+00   NA   0.5383188                   NA
##                  latitude     longitude geolocation hazard_type landslide_type
## nbr.val       20.00000000  2.000000e+01          NA          NA             NA
## nbr.null       0.00000000  0.000000e+00          NA          NA             NA
## nbr.na         0.00000000  0.000000e+00          NA          NA             NA
## min            9.91890000 -8.501410e+01          NA          NA             NA
## max           10.89160000 -8.418070e+01          NA          NA             NA
## range          0.97270000  8.334000e-01          NA          NA             NA
## sum          202.24760000 -1.688552e+03          NA          NA             NA
## median        10.04315000 -8.444405e+01          NA          NA             NA
## mean          10.11238000 -8.442758e+01          NA          NA             NA
## SE.mean        0.05493583  4.594981e-02          NA          NA             NA
## CI.mean.0.95   0.11498201  9.617405e-02          NA          NA             NA
## var            0.06035891  4.222770e-02          NA          NA             NA
## std.dev        0.24568050  2.054938e-01          NA          NA             NA
## coef.var       0.02429502 -2.433965e-03          NA          NA             NA
##              landslide_size trigger storm_name   injuries fatalities
## nbr.val                  NA      NA         NA 11.0000000 18.0000000
## nbr.null                 NA      NA         NA 10.0000000 15.0000000
## nbr.na                   NA      NA         NA  9.0000000  2.0000000
## min                      NA      NA         NA  0.0000000  0.0000000
## max                      NA      NA         NA  3.0000000 14.0000000
## range                    NA      NA         NA  3.0000000 14.0000000
## sum                      NA      NA         NA  3.0000000 16.0000000
## median                   NA      NA         NA  0.0000000  0.0000000
## mean                     NA      NA         NA  0.2727273  0.8888889
## SE.mean                  NA      NA         NA  0.2727273  0.7749716
## CI.mean.0.95             NA      NA         NA  0.6076742  1.6350471
## var                      NA      NA         NA  0.8181818 10.8104575
## std.dev                  NA      NA         NA  0.9045340  3.2879260
## coef.var                 NA      NA         NA  3.3166248  3.6989168
##              source_name source_link        prop         ypos
## nbr.val               NA          NA  20.0000000   20.0000000
## nbr.null              NA          NA   0.0000000    0.0000000
## nbr.na                NA          NA   0.0000000    0.0000000
## min                   NA          NA   0.6188476    0.3094238
## max                   NA          NA  10.5708367   97.5372476
## range                 NA          NA   9.9519891   97.2278239
## sum                   NA          NA 100.0000000  984.6035428
## median                NA          NA   4.8636314   51.7474020
## mean                  NA          NA   5.0000000   49.2301771
## SE.mean               NA          NA   0.6018587    7.4144621
## CI.mean.0.95          NA          NA   1.2597047   15.5186476
## var                   NA          NA   7.2446780 1099.4849742
## std.dev               NA          NA   2.6915940   33.1584827
## coef.var              NA          NA   0.5383188    0.6735398
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Puntarenas (Costa Rica)

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_CR <- subset(df, country_name == "Costa Rica")
knitr::kable(head(df_CR)) 
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
249 9/9/07 NA NA Costa Rica CR Heredia 21947 Heredia 0.26208 NA 10.0000 -84.1167 (10, -84.116699999999994) Landslide Landslide Medium Rain NA NA NA ticotimes.net http://www.ticotimes.net/dailyarchive/2007_09/0911072.htm
299 10/9/07 NA NA Costa Rica CR San José 3072 San Ignacio 4.57763 NA 9.7789 -84.1250 (9.7789000000000001, -84.125) Landslide Complex Medium Rain NA NA 4 ticotimes.net http://www.ticotimes.net/dailyarchive/2007_10/1010071.htm
301 10/11/07 NA NA Costa Rica CR Alajuela 7014 Atenas 3.08459 NA 9.9869 -84.4070 (9.9869000000000003, -84.406999999999996) Landslide Mudslide Large Rain NA NA 14 Agence France-Presse, afp.google.com http://afp.google.com/article/ALeqM5hu6a8oyAM1ycq9nU_6Zyj_l7F0AA
302 10/11/07 NA NA Costa Rica CR San José 26669 NA 9.56251 NA 10.0214 -83.9451 (10.0214, -83.945099999999996) Landslide Landslide Large Rain NA NA 10 International Herald http://www.iht.com/articles/ap/2007/10/12/america/LA-GEN-Costa-Rica-Mudslide.php
323 10/24/07 NA NA Costa Rica CR Puntarenas 6540 Miramar 3.82425 Mine construction 10.0715 -84.7575 (10.0715, -84.757499999999993) Landslide Mudslide Medium Downpour NA NA NA Reuters - AlertNet.org http://www.reuters.com/article/companyNewsAndPR/idUSN2435152820071025
556 5/29/08 NA NA Costa Rica CR Guanacaste 4108 Bagaces 17.65521 NA 10.4024 -85.3555 (10.4024, -85.355500000000006) Landslide Landslide Medium Tropical cyclone Tropical Storm Alma NA NA NA http://www.reliefweb.int/rw/RWB.NSF/db900SID/ASAZ-7FHCHL?OpenDocument
library(dplyr)
df_CR <- subset(df, state == "Puntarenas")
knitr::kable(head(df_CR))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
323 10/24/07 NA NA Costa Rica CR Puntarenas 6540 Miramar 3.82425 Mine construction 10.0715 -84.7575 (10.0715, -84.757499999999993) Landslide Mudslide Medium Downpour NA NA NA Reuters - AlertNet.org http://www.reuters.com/article/companyNewsAndPR/idUSN2435152820071025
845 10/15/08 NA NA Costa Rica CR Puntarenas 6777 Golfito 11.74074 NA 8.6700 -83.0640 (8.67, -83.063999999999993) Landslide Landslide Medium Downpour NA NA NA NA http://www.ticotimes.net/dailyarchive/2008_10/1016081.htm
848 10/16/08 NA NA Costa Rica CR Puntarenas 6540 Miramar 8.92048 NA 10.1110 -84.8090 (10.111000000000001, -84.808999999999997) Landslide Complex Medium Downpour NA NA NA NA http://insidecostarica.com/dailynews/2008/october/17/nac01.htm
1296 11/13/09 NA NA Costa Rica CR Puntarenas 3981 San Vito 18.00524 NA 8.8021 -83.1335 (8.8020999999999994, -83.133499999999998) Landslide Landslide Medium Earthquake NA NA 1 NA http://www.ticotimes.net/dailyarchive/2009_11/1116092.cfm
2685 11/4/10 NA NA Costa Rica CR Puntarenas 6777 Golfito 7.87044 NA 8.6117 -83.1000 (8.6117000000000008, -83.1) Landslide Landslide Medium Tropical cyclone Tropical Storm Tomas NA 0 NA http://fortunatimes.com/2010/11/06/no-passage-to-the-south-and-central-pacific/
2688 11/4/10 NA NA Costa Rica CR Puntarenas 7084 Corredor 4.93053 NA 8.6844 -82.9418 (8.6844000000000001, -82.941800000000001) Landslide Landslide Medium Tropical cyclone Tropical Storm Tomas NA 0 NA http://fortunatimes.com/2010/11/14/12-roads-remain-closed/

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_CR, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_CR, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_CR, aes(x=state, y=distance, fill=city)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_CR <- df_CR %>% 
  arrange(desc(city)) %>%
  mutate(prop = distance / sum(df_CR$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_CR, aes(x=state, y = prop, fill=city)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
data<- ts(df_CR$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
18.00524
13.48919
3.82425
8.92048
11.74074
7.87044
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_CR$distance
names(distance) <- df_CR$city 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por ciudades"
)

##                
## Pareto chart analysis for distance
##                   Frequency   Cum.Freq.  Percentage Cum.Percent.
##   Ciudad Cortés  20.0663300  20.0663300  22.4960244   22.4960244
##   San Vito       18.0052400  38.0715700  20.1853711   42.6813955
##   Parrita        13.4891900  51.5607600  15.1225036   57.8038990
##   Golfito        11.7407400  63.3015000  13.1623457   70.9662447
##   Miramar         8.9204800  72.2219800  10.0005998   80.9668445
##   Golfito         7.8704400  80.0924200   8.8234176   89.7902622
##   Corredor        4.9305300  85.0229500   5.5275341   95.3177962
##   Miramar         3.8242500  88.8472000   4.2873022   99.6050985
##   Buenos Aires    0.3522500  89.1994500   0.3949015  100.0000000
stem(df_CR$"distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 04
##   0 | 589
##   1 | 23
##   1 | 8
##   2 | 0
head(df_CR)
## # A tibble: 6 x 25
##      id date     time  continent_code country_name country_code state      population
##   <dbl> <chr>    <chr> <chr>          <chr>        <chr>        <chr>           <dbl>
## 1  1296 11/13/09 <NA>  <NA>           Costa Rica   CR           Puntarenas       3981
## 2  2689 11/4/10  <NA>  <NA>           Costa Rica   CR           Puntarenas       3734
## 3   323 10/24/07 <NA>  <NA>           Costa Rica   CR           Puntarenas       6540
## 4   848 10/16/08 <NA>  <NA>           Costa Rica   CR           Puntarenas       6540
## 5   845 10/15/08 <NA>  <NA>           Costa Rica   CR           Puntarenas       6777
## 6  2685 11/4/10  <NA>  <NA>           Costa Rica   CR           Puntarenas       6777
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_CR))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
1296 11/13/09 NA NA Costa Rica CR Puntarenas 3981 San Vito 18.00524 NA 8.8021 -83.1335 (8.8020999999999994, -83.133499999999998) Landslide Landslide Medium Earthquake NA NA 1 NA http://www.ticotimes.net/dailyarchive/2009_11/1116092.cfm 20.185371 10.09269
2689 11/4/10 NA NA Costa Rica CR Puntarenas 3734 Parrita 13.48919 NA 9.6341 -84.3662 (9.6341000000000001, -84.366200000000006) Landslide Landslide Medium Tropical cyclone Tropical Storm Tomas NA 0 NA http://fortunatimes.com/2010/11/14/12-roads-remain-closed/ 15.122504 27.74662
323 10/24/07 NA NA Costa Rica CR Puntarenas 6540 Miramar 3.82425 Mine construction 10.0715 -84.7575 (10.0715, -84.757499999999993) Landslide Mudslide Medium Downpour NA NA NA Reuters - AlertNet.org http://www.reuters.com/article/companyNewsAndPR/idUSN2435152820071025 4.287302 37.45153
848 10/16/08 NA NA Costa Rica CR Puntarenas 6540 Miramar 8.92048 NA 10.1110 -84.8090 (10.111000000000001, -84.808999999999997) Landslide Complex Medium Downpour NA NA NA NA http://insidecostarica.com/dailynews/2008/october/17/nac01.htm 10.000600 44.59548
845 10/15/08 NA NA Costa Rica CR Puntarenas 6777 Golfito 11.74074 NA 8.6700 -83.0640 (8.67, -83.063999999999993) Landslide Landslide Medium Downpour NA NA NA NA http://www.ticotimes.net/dailyarchive/2008_10/1016081.htm 13.162346 56.17695
2685 11/4/10 NA NA Costa Rica CR Puntarenas 6777 Golfito 7.87044 NA 8.6117 -83.1000 (8.6117000000000008, -83.1) Landslide Landslide Medium Tropical cyclone Tropical Storm Tomas NA 0 NA http://fortunatimes.com/2010/11/06/no-passage-to-the-south-and-central-pacific/ 8.823418 67.16983
stem(df_CR$"distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 04
##   0 | 589
##   1 | 23
##   1 | 8
##   2 | 0
stem(df_CR$"distance", scale = 2)
## 
##   The decimal point is at the |
## 
##    0 | 4
##    2 | 8
##    4 | 9
##    6 | 9
##    8 | 9
##   10 | 7
##   12 | 5
##   14 | 
##   16 | 
##   18 | 0
##   20 | 1

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
0.35225 1 11.1 11.1 11.1 11.1
3.82425 1 11.1 11.1 22.2 22.2
4.93053 1 11.1 11.1 33.3 33.3
7.87044 1 11.1 11.1 44.4 44.4
8.92048 1 11.1 11.1 55.6 55.6
11.74074 1 11.1 11.1 66.7 66.7
13.48919 1 11.1 11.1 77.8 77.8
18.00524 1 11.1 11.1 88.9 88.9
20.06633 1 11.1 11.1 100.0 100.0
Total 9 100.0 100.0 100.0 100.0
str(table)
## Classes 'freqtab' and 'data.frame':  10 obs. of  5 variables:
##  $ n      : num  1 1 1 1 1 1 1 1 1 9
##  $ %      : num  11.1 11.1 11.1 11.1 11.1 11.1 11.1 11.1 11.1 100
##  $ val%   : num  11.1 11.1 11.1 11.1 11.1 11.1 11.1 11.1 11.1 100
##  $ %cum   : num  11.1 22.2 33.3 44.4 55.6 66.7 77.8 88.9 100 100
##  $ val%cum: num  11.1 22.2 33.3 44.4 55.6 66.7 77.8 88.9 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
0.35225 1
3.82425 1
4.93053 1
7.87044 1
8.92048 1
11.74074 1
13.48919 1
18.00524 1
20.06633 1
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1]  0.35225  4.35225  8.35225 12.35225 16.35225 20.35225
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(0.352,4.35] 1 0.125 1
(4.35,8.35] 2 0.250 3
(8.35,12.4] 2 0.250 5
(12.4,16.4] 1 0.125 6
(16.4,20.4] 2 0.250 8
str(Freq_table)
## 'data.frame':    5 obs. of  4 variables:
##  $ distance: Factor w/ 5 levels "(0.352,4.35]",..: 1 2 3 4 5
##  $ Freq    : int  1 2 2 1 2
##  $ Rel_Freq: num  0.125 0.25 0.25 0.125 0.25
##  $ Cum_Freq: int  1 3 5 6 8
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(0.352,4.35] 1
(4.35,8.35] 2
(8.35,12.4] 2
(12.4,16.4] 1
(16.4,20.4] 2
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_CR)
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
##                        id date time continent_code country_name country_code
## nbr.val      9.000000e+00   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          3.230000e+02   NA   NA             NA           NA           NA
## max          7.493000e+03   NA   NA             NA           NA           NA
## range        7.170000e+03   NA   NA             NA           NA           NA
## sum          2.155700e+04   NA   NA             NA           NA           NA
## median       2.685000e+03   NA   NA             NA           NA           NA
## mean         2.395222e+03   NA   NA             NA           NA           NA
## SE.mean      7.132643e+02   NA   NA             NA           NA           NA
## CI.mean.0.95 1.644790e+03   NA   NA             NA           NA           NA
## var          4.578713e+06   NA   NA             NA           NA           NA
## std.dev      2.139793e+03   NA   NA             NA           NA           NA
## coef.var     8.933588e-01   NA   NA             NA           NA           NA
##              state   population city   distance location_description
## nbr.val         NA 9.000000e+00   NA  9.0000000                   NA
## nbr.null        NA 0.000000e+00   NA  0.0000000                   NA
## nbr.na          NA 0.000000e+00   NA  0.0000000                   NA
## min             NA 3.734000e+03   NA  0.3522500                   NA
## max             NA 1.168000e+04   NA 20.0663300                   NA
## range           NA 7.946000e+03   NA 19.7140800                   NA
## sum             NA 5.696300e+04   NA 89.1994500                   NA
## median          NA 6.540000e+03   NA  8.9204800                   NA
## mean            NA 6.329222e+03   NA  9.9110500                   NA
## SE.mean         NA 8.172298e+02   NA  2.1831653                   NA
## CI.mean.0.95    NA 1.884535e+03   NA  5.0343882                   NA
## var             NA 6.010781e+06   NA 42.8958955                   NA
## std.dev         NA 2.451689e+03   NA  6.5494958                   NA
## coef.var        NA 3.873603e-01   NA  0.6608276                   NA
##                 latitude     longitude geolocation hazard_type landslide_type
## nbr.val       9.00000000  9.000000e+00          NA          NA             NA
## nbr.null      0.00000000  0.000000e+00          NA          NA             NA
## nbr.na        0.00000000  0.000000e+00          NA          NA             NA
## min           8.61170000 -8.480900e+01          NA          NA             NA
## max          10.11100000 -8.294180e+01          NA          NA             NA
## range         1.49930000  1.867200e+00          NA          NA             NA
## sum          82.72080000 -7.528426e+02          NA          NA             NA
## median        8.98960000 -8.332680e+01          NA          NA             NA
## mean          9.19120000 -8.364918e+01          NA          NA             NA
## SE.mean       0.19984316  2.553648e-01          NA          NA             NA
## CI.mean.0.95  0.46083916  5.888723e-01          NA          NA             NA
## var           0.35943561  5.869007e-01          NA          NA             NA
## std.dev       0.59952949  7.660945e-01          NA          NA             NA
## coef.var      0.06522864 -9.158422e-03          NA          NA             NA
##              landslide_size trigger storm_name injuries fatalities source_name
## nbr.val                  NA      NA         NA        1  6.0000000          NA
## nbr.null                 NA      NA         NA        1  5.0000000          NA
## nbr.na                   NA      NA         NA        8  3.0000000          NA
## min                      NA      NA         NA        0  0.0000000          NA
## max                      NA      NA         NA        0  1.0000000          NA
## range                    NA      NA         NA        0  1.0000000          NA
## sum                      NA      NA         NA        0  1.0000000          NA
## median                   NA      NA         NA        0  0.0000000          NA
## mean                     NA      NA         NA        0  0.1666667          NA
## SE.mean                  NA      NA         NA       NA  0.1666667          NA
## CI.mean.0.95             NA      NA         NA      NaN  0.4284303          NA
## var                      NA      NA         NA       NA  0.1666667          NA
## std.dev                  NA      NA         NA       NA  0.4082483          NA
## coef.var                 NA      NA         NA       NA  2.4494897          NA
##              source_link        prop        ypos
## nbr.val               NA   9.0000000   9.0000000
## nbr.null              NA   0.0000000   0.0000000
## nbr.na                NA   0.0000000   0.0000000
## min                   NA   0.3949015  10.0926855
## max                   NA  22.4960244  99.8025492
## range                 NA  22.1011228  89.7098637
## sum                   NA 100.0000000 505.7380343
## median                NA  10.0005998  56.1769495
## mean                  NA  11.1111111  56.1931149
## SE.mean               NA   2.4475098   9.7228963
## CI.mean.0.95          NA   5.6439677  22.4210391
## var                   NA  53.9127377 850.8124161
## std.dev               NA   7.3425294  29.1686890
## coef.var              NA   0.6608276   0.5190794
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Guanacaste (Costa Rica)

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_CR <- subset(df, country_name == "Costa Rica")
knitr::kable(head(df_CR)) 
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
249 9/9/07 NA NA Costa Rica CR Heredia 21947 Heredia 0.26208 NA 10.0000 -84.1167 (10, -84.116699999999994) Landslide Landslide Medium Rain NA NA NA ticotimes.net http://www.ticotimes.net/dailyarchive/2007_09/0911072.htm
299 10/9/07 NA NA Costa Rica CR San José 3072 San Ignacio 4.57763 NA 9.7789 -84.1250 (9.7789000000000001, -84.125) Landslide Complex Medium Rain NA NA 4 ticotimes.net http://www.ticotimes.net/dailyarchive/2007_10/1010071.htm
301 10/11/07 NA NA Costa Rica CR Alajuela 7014 Atenas 3.08459 NA 9.9869 -84.4070 (9.9869000000000003, -84.406999999999996) Landslide Mudslide Large Rain NA NA 14 Agence France-Presse, afp.google.com http://afp.google.com/article/ALeqM5hu6a8oyAM1ycq9nU_6Zyj_l7F0AA
302 10/11/07 NA NA Costa Rica CR San José 26669 NA 9.56251 NA 10.0214 -83.9451 (10.0214, -83.945099999999996) Landslide Landslide Large Rain NA NA 10 International Herald http://www.iht.com/articles/ap/2007/10/12/america/LA-GEN-Costa-Rica-Mudslide.php
323 10/24/07 NA NA Costa Rica CR Puntarenas 6540 Miramar 3.82425 Mine construction 10.0715 -84.7575 (10.0715, -84.757499999999993) Landslide Mudslide Medium Downpour NA NA NA Reuters - AlertNet.org http://www.reuters.com/article/companyNewsAndPR/idUSN2435152820071025
556 5/29/08 NA NA Costa Rica CR Guanacaste 4108 Bagaces 17.65521 NA 10.4024 -85.3555 (10.4024, -85.355500000000006) Landslide Landslide Medium Tropical cyclone Tropical Storm Alma NA NA NA http://www.reliefweb.int/rw/RWB.NSF/db900SID/ASAZ-7FHCHL?OpenDocument
library(dplyr)
df_CR <- subset(df, state == "Guanacaste")
knitr::kable(head(df_CR))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
556 5/29/08 NA NA Costa Rica CR Guanacaste 4108 Bagaces 17.65521 NA 10.4024 -85.3555 (10.4024, -85.355500000000006) Landslide Landslide Medium Tropical cyclone Tropical Storm Alma NA NA NA http://www.reliefweb.int/rw/RWB.NSF/db900SID/ASAZ-7FHCHL?OpenDocument
2683 11/4/10 NA NA Costa Rica CR Guanacaste 7301 Tilarán 10.21631 NA 10.4548 -84.8751 (10.454800000000001, -84.875100000000003) Landslide Landslide Medium Tropical cyclone Tropical Storm Tomas NA 0 NA http://fortunatimes.com/2010/11/06/no-passage-to-the-south-and-central-pacific/
4375 5/31/12 NA NA Costa Rica CR Guanacaste 7301 Tilarán 12.33807 NA 10.5562 -84.8952 (10.5562, -84.895200000000003) Landslide Landslide Large Downpour NA NA NA NA http://thecostaricanews.com/landslides-and-wash-outs-continue-to-cause-problems-in-northern-costa-rica/12129
5571 10/3/13 NA NA Costa Rica CR Guanacaste 7301 Tilarán 12.21952 NA 10.5543 -84.8946 (10.5543, -84.894599999999997) Landslide Landslide Medium Mining digging NA NA NA www.ticotimes.net http://www.ticotimes.net/More-news/News-Briefs/TRAVEL-ALERT-UPDATE-Rains-landslides-close-eight-routes-across-Costa-Rica_Friday-October-04-2013
5591 10/8/13 Morning NA Costa Rica CR Guanacaste 7301 Tilarán 12.18115 NA 10.5546 -84.8955 (10.554600000000001, -84.895499999999998) Landslide Landslide Medium Rain NA NA 2 insidecostarica.com http://insidecostarica.com/2013/10/14/bodies-man-son-buried-landslide-nuevo-arenal-located/

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_CR, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_CR, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_CR, aes(x=state, y=distance, fill=city)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_CR <- df_CR %>% 
  arrange(desc(city)) %>%
  mutate(prop = distance / sum(df_CR$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_CR, aes(x=state, y = prop, fill=city)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
data<- ts(df_CR$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
10.21631
12.33807
12.21952
12.18115
17.65521
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_CR$distance
names(distance) <- df_CR$city 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por ciudades"
)

##          
## Pareto chart analysis for distance
##           Frequency Cum.Freq. Percentage Cum.Percent.
##   Bagaces  17.65521  17.65521   27.32571     27.32571
##   Tilarán  12.33807  29.99328   19.09615     46.42185
##   Tilarán  12.21952  42.21280   18.91266     65.33451
##   Tilarán  12.18115  54.39395   18.85328     84.18779
##   Tilarán  10.21631  64.61026   15.81221    100.00000
stem(df_CR$"distance")
## 
##   The decimal point is at the |
## 
##   10 | 2
##   12 | 223
##   14 | 
##   16 | 7
head(df_CR)
## # A tibble: 5 x 25
##      id date    time    continent_code country_name country_code state population
##   <dbl> <chr>   <chr>   <chr>          <chr>        <chr>        <chr>      <dbl>
## 1  2683 11/4/10 <NA>    <NA>           Costa Rica   CR           Guan~       7301
## 2  4375 5/31/12 <NA>    <NA>           Costa Rica   CR           Guan~       7301
## 3  5571 10/3/13 <NA>    <NA>           Costa Rica   CR           Guan~       7301
## 4  5591 10/8/13 Morning <NA>           Costa Rica   CR           Guan~       7301
## 5   556 5/29/08 <NA>    <NA>           Costa Rica   CR           Guan~       4108
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_CR))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
2683 11/4/10 NA NA Costa Rica CR Guanacaste 7301 Tilarán 10.21631 NA 10.4548 -84.8751 (10.454800000000001, -84.875100000000003) Landslide Landslide Medium Tropical cyclone Tropical Storm Tomas NA 0 NA http://fortunatimes.com/2010/11/06/no-passage-to-the-south-and-central-pacific/ 15.81221 7.906105
4375 5/31/12 NA NA Costa Rica CR Guanacaste 7301 Tilarán 12.33807 NA 10.5562 -84.8952 (10.5562, -84.895200000000003) Landslide Landslide Large Downpour NA NA NA NA http://thecostaricanews.com/landslides-and-wash-outs-continue-to-cause-problems-in-northern-costa-rica/12129 19.09615 25.360283
5571 10/3/13 NA NA Costa Rica CR Guanacaste 7301 Tilarán 12.21952 NA 10.5543 -84.8946 (10.5543, -84.894599999999997) Landslide Landslide Medium Mining digging NA NA NA www.ticotimes.net http://www.ticotimes.net/More-news/News-Briefs/TRAVEL-ALERT-UPDATE-Rains-landslides-close-eight-routes-across-Costa-Rica_Friday-October-04-2013 18.91266 44.364688
5591 10/8/13 Morning NA Costa Rica CR Guanacaste 7301 Tilarán 12.18115 NA 10.5546 -84.8955 (10.554600000000001, -84.895499999999998) Landslide Landslide Medium Rain NA NA 2 insidecostarica.com http://insidecostarica.com/2013/10/14/bodies-man-son-buried-landslide-nuevo-arenal-located/ 18.85328 63.247656
556 5/29/08 NA NA Costa Rica CR Guanacaste 4108 Bagaces 17.65521 NA 10.4024 -85.3555 (10.4024, -85.355500000000006) Landslide Landslide Medium Tropical cyclone Tropical Storm Alma NA NA NA http://www.reliefweb.int/rw/RWB.NSF/db900SID/ASAZ-7FHCHL?OpenDocument 27.32571 86.337147
stem(df_CR$"distance")
## 
##   The decimal point is at the |
## 
##   10 | 2
##   12 | 223
##   14 | 
##   16 | 7
stem(df_CR$"distance", scale = 2)
## 
##   The decimal point is at the |
## 
##   10 | 2
##   11 | 
##   12 | 223
##   13 | 
##   14 | 
##   15 | 
##   16 | 
##   17 | 7

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
10.21631 1 20 20 20 20
12.18115 1 20 20 40 40
12.21952 1 20 20 60 60
12.33807 1 20 20 80 80
17.65521 1 20 20 100 100
Total 5 100 100 100 100
str(table)
## Classes 'freqtab' and 'data.frame':  6 obs. of  5 variables:
##  $ n      : num  1 1 1 1 1 5
##  $ %      : num  20 20 20 20 20 100
##  $ val%   : num  20 20 20 20 20 100
##  $ %cum   : num  20 40 60 80 100 100
##  $ val%cum: num  20 40 60 80 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
10.21631 1
12.18115 1
12.21952 1
12.33807 1
17.65521 1
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 10.21631 13.21631 16.21631 19.21631
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(10.2,13.2] 3 0.75 3
(13.2,16.2] 0 0.00 3
(16.2,19.2] 1 0.25 4
str(Freq_table)
## 'data.frame':    3 obs. of  4 variables:
##  $ distance: Factor w/ 3 levels "(10.2,13.2]",..: 1 2 3
##  $ Freq    : int  3 0 1
##  $ Rel_Freq: num  0.75 0 0.25
##  $ Cum_Freq: int  3 3 4
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(10.2,13.2] 3
(13.2,16.2] 0
(16.2,19.2] 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_CR)
## Warning in min(x): ningún argumento finito para min; retornando Inf
## Warning in max(x): ningun argumento finito para max; retornando -Inf
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
##                        id date time continent_code country_name country_code
## nbr.val      5.000000e+00   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          5.560000e+02   NA   NA             NA           NA           NA
## max          5.591000e+03   NA   NA             NA           NA           NA
## range        5.035000e+03   NA   NA             NA           NA           NA
## sum          1.877600e+04   NA   NA             NA           NA           NA
## median       4.375000e+03   NA   NA             NA           NA           NA
## mean         3.755200e+03   NA   NA             NA           NA           NA
## SE.mean      9.601025e+02   NA   NA             NA           NA           NA
## CI.mean.0.95 2.665672e+03   NA   NA             NA           NA           NA
## var          4.608984e+06   NA   NA             NA           NA           NA
## std.dev      2.146854e+03   NA   NA             NA           NA           NA
## coef.var     5.717018e-01   NA   NA             NA           NA           NA
##              state   population city   distance location_description
## nbr.val         NA 5.000000e+00   NA  5.0000000                   NA
## nbr.null        NA 0.000000e+00   NA  0.0000000                   NA
## nbr.na          NA 0.000000e+00   NA  0.0000000                   NA
## min             NA 4.108000e+03   NA 10.2163100                   NA
## max             NA 7.301000e+03   NA 17.6552100                   NA
## range           NA 3.193000e+03   NA  7.4389000                   NA
## sum             NA 3.331200e+04   NA 64.6102600                   NA
## median          NA 7.301000e+03   NA 12.2195200                   NA
## mean            NA 6.662400e+03   NA 12.9220520                   NA
## SE.mean         NA 6.386000e+02   NA  1.2471437                   NA
## CI.mean.0.95    NA 1.773038e+03   NA  3.4626259                   NA
## var             NA 2.039050e+06   NA  7.7768366                   NA
## std.dev         NA 1.427953e+03   NA  2.7886980                   NA
## coef.var        NA 2.143301e-01   NA  0.2158092                   NA
##                  latitude     longitude geolocation hazard_type landslide_type
## nbr.val       5.000000000  5.000000e+00          NA          NA             NA
## nbr.null      0.000000000  0.000000e+00          NA          NA             NA
## nbr.na        0.000000000  0.000000e+00          NA          NA             NA
## min          10.402400000 -8.535550e+01          NA          NA             NA
## max          10.556200000 -8.487510e+01          NA          NA             NA
## range         0.153800000  4.804000e-01          NA          NA             NA
## sum          52.522300000 -4.249159e+02          NA          NA             NA
## median       10.554300000 -8.489520e+01          NA          NA             NA
## mean         10.504460000 -8.498318e+01          NA          NA             NA
## SE.mean       0.032060437  9.316065e-02          NA          NA             NA
## CI.mean.0.95  0.089014042  2.586554e-01          NA          NA             NA
## var           0.005139358  4.339454e-02          NA          NA             NA
## std.dev       0.071689316  2.083136e-01          NA          NA             NA
## coef.var      0.006824655 -2.451233e-03          NA          NA             NA
##              landslide_size trigger storm_name injuries fatalities source_name
## nbr.val                  NA      NA         NA        0   2.000000          NA
## nbr.null                 NA      NA         NA        0   1.000000          NA
## nbr.na                   NA      NA         NA        5   3.000000          NA
## min                      NA      NA         NA      Inf   0.000000          NA
## max                      NA      NA         NA     -Inf   2.000000          NA
## range                    NA      NA         NA     -Inf   2.000000          NA
## sum                      NA      NA         NA        0   2.000000          NA
## median                   NA      NA         NA       NA   1.000000          NA
## mean                     NA      NA         NA      NaN   1.000000          NA
## SE.mean                  NA      NA         NA       NA   1.000000          NA
## CI.mean.0.95             NA      NA         NA      NaN  12.706205          NA
## var                      NA      NA         NA       NA   2.000000          NA
## std.dev                  NA      NA         NA       NA   1.414214          NA
## coef.var                 NA      NA         NA       NA   1.414214          NA
##              source_link        prop       ypos
## nbr.val               NA   5.0000000   5.000000
## nbr.null              NA   0.0000000   0.000000
## nbr.na                NA   0.0000000   0.000000
## min                   NA  15.8122100   7.906105
## max                   NA  27.3257065  86.337147
## range                 NA  11.5134965  78.431042
## sum                   NA 100.0000000 227.215879
## median                NA  18.9126619  44.364688
## mean                  NA  20.0000000  45.443176
## SE.mean               NA   1.9302564  13.789187
## CI.mean.0.95          NA   5.3592509  38.284922
## var                   NA  18.6294484 950.708442
## std.dev               NA   4.3161845  30.833560
## coef.var              NA   0.2158092   0.678508
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Saint Lucia

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_SL <- subset(df, country_name == "Saint Lucia")
knitr::kable(head(df_SL))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
841 10/12/08 NA NA Saint Lucia LC Vieux-Fort 4574 Vieux Fort 2.61752 NA 13.7400 -60.9460 (13.74, -60.945999999999998) Landslide Landslide Medium Downpour NA NA NA NA http://www.htsstlucia.com/2008_News/October/HTS_News_October_13th_2008.html
2658 10/30/10 NA NA Saint Lucia LC Laborie 1131 Laborie 2.74715 NA 13.7705 -60.9690 (13.7705, -60.969000000000001) Landslide Landslide Medium Tropical cyclone Hurricane Tomas NA 0 NA NA
2659 10/31/10 NA NA Saint Lucia LC Soufrière 2918 Soufrière 1.56516 NA 13.8516 -61.0429 (13.851599999999999, -61.042900000000003) Landslide Landslide Medium Tropical cyclone Hurricane Tomas NA 2 NA http://gocaribbean.about.com/b/2010/11/02/hurricane-triggers-deadly-landslides-on-st-lucia.htm
2660 10/31/10 NA NA Saint Lucia LC Soufrière 2918 Soufrière 1.24758 NA 13.8491 -61.0656 (13.8491, -61.065600000000003) Landslide Landslide Medium Tropical cyclone Hurricane Tomas NA 2 NA http://gocaribbean.about.com/b/2010/11/02/hurricane-triggers-deadly-landslides-on-st-lucia.htm
2661 10/31/10 NA NA Saint Lucia LC Dennery 2870 Dennery 6.13309 NA 13.9210 -60.9402 (13.920999999999999, -60.940199999999997) Landslide Landslide Medium Tropical cyclone Hurricane Tomas NA 0 NA http://www.youtube.com/watch?v=TbPh_Rh7hvE&feature=player_embedded
2662 10/31/10 NA NA Saint Lucia LC Soufrière 2918 Soufrière 2.60663 NA 13.8328 -61.0535 (13.832800000000001, -61.0535) Landslide Landslide Medium Tropical cyclone Hurricane Tomas NA 0 NA http://one-little-house.blogspot.com/2010/11/tomas-update.html

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_SL, aes(fill= state, y=distance, x=country_name)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_SL, aes(fill=state, y=distance, x=country_name)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_SL, aes(x=country_name, y=distance, fill=state)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_SL <- df_SL %>% 
  arrange(desc(state)) %>%
  mutate(prop = distance / sum(df_SL$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_SL, aes(x=country_name, y=prop, fill=state)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
data<- ts(df_SL$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
2.61752
1.56516
1.24758
2.60663
0.96933
3.81469
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_SL$distance
names(distance) <- df_SL$state 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por estados"
)

##             
## Pareto chart analysis for distance
##               Frequency  Cum.Freq. Percentage Cum.Percent.
##   Dennery      6.133090   6.133090  28.261590    28.261590
##   Praslin      3.814690   9.947780  17.578285    45.839875
##   Laborie      2.747150  12.694930  12.659007    58.498881
##   Vieux-Fort   2.617520  15.312450  12.061665    70.560546
##   Soufrière    2.606630  17.919080  12.011483    82.572030
##   Soufrière    1.565160  19.484240   7.212337    89.784366
##   Soufrière    1.247580  20.731820   5.748912    95.533278
##   Soufrière    0.969330  21.701150   4.466722   100.000000
stem(df_SL$"distance")
## 
##   The decimal point is at the |
## 
##   0 | 026
##   2 | 6678
##   4 | 
##   6 | 1
head(df_SL)
## # A tibble: 6 x 25
##      id date     time  continent_code country_name country_code state      population
##   <dbl> <chr>    <chr> <chr>          <chr>        <chr>        <chr>           <dbl>
## 1   841 10/12/08 <NA>  <NA>           Saint Lucia  LC           Vieux-Fort       4574
## 2  2659 10/31/10 <NA>  <NA>           Saint Lucia  LC           Soufrière        2918
## 3  2660 10/31/10 <NA>  <NA>           Saint Lucia  LC           Soufrière        2918
## 4  2662 10/31/10 <NA>  <NA>           Saint Lucia  LC           Soufrière        2918
## 5  2663 10/31/10 <NA>  <NA>           Saint Lucia  LC           Soufrière        2918
## 6  5718 12/24/13 Night <NA>           Saint Lucia  LC           Praslin          1906
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_SL))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
841 10/12/08 NA NA Saint Lucia LC Vieux-Fort 4574 Vieux Fort 2.61752 NA 13.7400 -60.9460 (13.74, -60.945999999999998) Landslide Landslide Medium Downpour NA NA NA NA http://www.htsstlucia.com/2008_News/October/HTS_News_October_13th_2008.html 12.061665 6.030832
2659 10/31/10 NA NA Saint Lucia LC Soufrière 2918 Soufrière 1.56516 NA 13.8516 -61.0429 (13.851599999999999, -61.042900000000003) Landslide Landslide Medium Tropical cyclone Hurricane Tomas NA 2 NA http://gocaribbean.about.com/b/2010/11/02/hurricane-triggers-deadly-landslides-on-st-lucia.htm 7.212337 15.667833
2660 10/31/10 NA NA Saint Lucia LC Soufrière 2918 Soufrière 1.24758 NA 13.8491 -61.0656 (13.8491, -61.065600000000003) Landslide Landslide Medium Tropical cyclone Hurricane Tomas NA 2 NA http://gocaribbean.about.com/b/2010/11/02/hurricane-triggers-deadly-landslides-on-st-lucia.htm 5.748912 22.148458
2662 10/31/10 NA NA Saint Lucia LC Soufrière 2918 Soufrière 2.60663 NA 13.8328 -61.0535 (13.832800000000001, -61.0535) Landslide Landslide Medium Tropical cyclone Hurricane Tomas NA 0 NA http://one-little-house.blogspot.com/2010/11/tomas-update.html 12.011483 31.028655
2663 10/31/10 NA NA Saint Lucia LC Soufrière 2918 Soufrière 0.96933 NA 13.8487 -61.0613 (13.848699999999999, -61.061300000000003) Landslide Landslide Medium Tropical cyclone Hurricane Tomas NA 0 NA http://one-little-house.blogspot.com/2010/11/tomas-update.html 4.466722 39.267758
5718 12/24/13 Night NA Saint Lucia LC Praslin 1906 Praslin 3.81469 NA 13.8891 -60.9268 (13.889099999999999, -60.9268) Landslide Landslide Large Downpour NA NA NA reliefweb.int NA 17.578285 50.290261
stem(df_SL$"distance")
## 
##   The decimal point is at the |
## 
##   0 | 026
##   2 | 6678
##   4 | 
##   6 | 1
stem(df_SL$"distance", scale = 2)
## 
##   The decimal point is at the |
## 
##   0 | 
##   1 | 026
##   2 | 667
##   3 | 8
##   4 | 
##   5 | 
##   6 | 1

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
0.96933 1 12.5 12.5 12.5 12.5
1.24758 1 12.5 12.5 25.0 25.0
1.56516 1 12.5 12.5 37.5 37.5
2.60663 1 12.5 12.5 50.0 50.0
2.61752 1 12.5 12.5 62.5 62.5
2.74715 1 12.5 12.5 75.0 75.0
3.81469 1 12.5 12.5 87.5 87.5
6.13309 1 12.5 12.5 100.0 100.0
Total 8 100.0 100.0 100.0 100.0
str(table)
## Classes 'freqtab' and 'data.frame':  9 obs. of  5 variables:
##  $ n      : num  1 1 1 1 1 1 1 1 8
##  $ %      : num  12.5 12.5 12.5 12.5 12.5 12.5 12.5 12.5 100
##  $ val%   : num  12.5 12.5 12.5 12.5 12.5 12.5 12.5 12.5 100
##  $ %cum   : num  12.5 25 37.5 50 62.5 75 87.5 100 100
##  $ val%cum: num  12.5 25 37.5 50 62.5 75 87.5 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
0.96933 1
1.24758 1
1.56516 1
2.60663 1
2.61752 1
2.74715 1
3.81469 1
6.13309 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.96933 2.96933 4.96933 6.96933
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(0.969,2.97] 5 0.7142857 5
(2.97,4.97] 1 0.1428571 6
(4.97,6.97] 1 0.1428571 7
str(Freq_table)
## 'data.frame':    3 obs. of  4 variables:
##  $ distance: Factor w/ 3 levels "(0.969,2.97]",..: 1 2 3
##  $ Freq    : int  5 1 1
##  $ Rel_Freq: num  0.714 0.143 0.143
##  $ Cum_Freq: int  5 6 7
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(0.969,2.97] 5
(2.97,4.97] 1
(4.97,6.97] 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_SL)
## Warning in min(x): ningún argumento finito para min; retornando Inf
## Warning in max(x): ningun argumento finito para max; retornando -Inf
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
##                        id date time continent_code country_name country_code
## nbr.val      8.000000e+00   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          8.410000e+02   NA   NA             NA           NA           NA
## max          5.718000e+03   NA   NA             NA           NA           NA
## range        4.877000e+03   NA   NA             NA           NA           NA
## sum          2.252200e+04   NA   NA             NA           NA           NA
## median       2.660500e+03   NA   NA             NA           NA           NA
## mean         2.815250e+03   NA   NA             NA           NA           NA
## SE.mean      4.718377e+02   NA   NA             NA           NA           NA
## CI.mean.0.95 1.115719e+03   NA   NA             NA           NA           NA
## var          1.781046e+06   NA   NA             NA           NA           NA
## std.dev      1.334558e+03   NA   NA             NA           NA           NA
## coef.var     4.740462e-01   NA   NA             NA           NA           NA
##              state   population city   distance location_description
## nbr.val         NA 8.000000e+00   NA  8.0000000                   NA
## nbr.null        NA 0.000000e+00   NA  0.0000000                   NA
## nbr.na          NA 0.000000e+00   NA  0.0000000                   NA
## min             NA 1.131000e+03   NA  0.9693300                   NA
## max             NA 4.574000e+03   NA  6.1330900                   NA
## range           NA 3.443000e+03   NA  5.1637600                   NA
## sum             NA 2.215300e+04   NA 21.7011500                   NA
## median          NA 2.918000e+03   NA  2.6120750                   NA
## mean            NA 2.769125e+03   NA  2.7126437                   NA
## SE.mean         NA 3.480774e+02   NA  0.5891536                   NA
## CI.mean.0.95    NA 8.230724e+02   NA  1.3931269                   NA
## var             NA 9.692633e+05   NA  2.7768157                   NA
## std.dev         NA 9.845117e+02   NA  1.6663780                   NA
## coef.var        NA 3.555317e-01   NA  0.6143004                   NA
##                  latitude     longitude geolocation hazard_type landslide_type
## nbr.val      8.000000e+00  8.000000e+00          NA          NA             NA
## nbr.null     0.000000e+00  0.000000e+00          NA          NA             NA
## nbr.na       0.000000e+00  0.000000e+00          NA          NA             NA
## min          1.374000e+01 -6.106560e+01          NA          NA             NA
## max          1.392100e+01 -6.092680e+01          NA          NA             NA
## range        1.810000e-01  1.388000e-01          NA          NA             NA
## sum          1.107028e+02 -4.880053e+02          NA          NA             NA
## median       1.384890e+01 -6.100595e+01          NA          NA             NA
## mean         1.383785e+01 -6.100066e+01          NA          NA             NA
## SE.mean      2.074935e-02  2.136899e-02          NA          NA             NA
## CI.mean.0.95 4.906441e-02  5.052962e-02          NA          NA             NA
## var          3.444283e-03  3.653068e-03          NA          NA             NA
## std.dev      5.868801e-02  6.044062e-02          NA          NA             NA
## coef.var     4.241122e-03 -9.908191e-04          NA          NA             NA
##              landslide_size trigger storm_name injuries fatalities source_name
## nbr.val                  NA      NA         NA        0  6.0000000          NA
## nbr.null                 NA      NA         NA        0  4.0000000          NA
## nbr.na                   NA      NA         NA        8  2.0000000          NA
## min                      NA      NA         NA      Inf  0.0000000          NA
## max                      NA      NA         NA     -Inf  2.0000000          NA
## range                    NA      NA         NA     -Inf  2.0000000          NA
## sum                      NA      NA         NA        0  4.0000000          NA
## median                   NA      NA         NA       NA  0.0000000          NA
## mean                     NA      NA         NA      NaN  0.6666667          NA
## SE.mean                  NA      NA         NA       NA  0.4216370          NA
## CI.mean.0.95             NA      NA         NA      NaN  1.0838525          NA
## var                      NA      NA         NA       NA  1.0666667          NA
## std.dev                  NA      NA         NA       NA  1.0327956          NA
## coef.var                 NA      NA         NA       NA  1.5491933          NA
##              source_link        prop        ypos
## nbr.val               NA   8.0000000   8.0000000
## nbr.null              NA   0.0000000   0.0000000
## nbr.na                NA   0.0000000   0.0000000
## min                   NA   4.4667218   6.0308325
## max                   NA  28.2615898  85.8692051
## range                 NA  23.7948680  79.8383726
## sum                   NA 100.0000000 315.7119093
## median                NA  12.0365741  35.1482064
## mean                  NA  12.5000000  39.4639887
## SE.mean               NA   2.7148497   9.4480204
## CI.mean.0.95          NA   6.4195994  22.3410181
## var                   NA  58.9632706 714.1207126
## std.dev               NA   7.6787545  26.7230371
## coef.var              NA   0.6143004   0.6771499
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Grenada

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_GN <- subset(df, country_name == "Grenada")
knitr::kable(head(df_GN))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
3355 4/12/11 3:00:00 NA Grenada GD Saint Mark 2256 Victoria 1.19981 NA 12.1979 -61.6990 (12.197900000000001, -61.698999999999998) Landslide Landslide Small Downpour NA NA 0 NA http://stormcarib.com/reports/current/report.php?id=1302618053_40629
3356 4/12/11 Morning NA Grenada GD Saint John 3378 Gouyave 5.65355 NA 12.1353 -61.6871 (12.135300000000001, -61.687100000000001) Landslide Landslide Small Downpour NA NA 0 NA http://stormcarib.com/reports/current/report.php?id=1302618053_40629

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_GN, aes(fill= state, y=distance, x=country_name)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_GN, aes(fill=state, y=distance, x=country_name)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_GN, aes(x=country_name, y=distance, fill=state)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_GN <- df_GN %>% 
  arrange(desc(state)) %>%
  mutate(prop = distance / sum(df_GN$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_GN, aes(x=country_name, y=prop, fill=state)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
data<- ts(df_GN$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
1.19981
5.65355
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_GN$distance
names(distance) <- df_GN$state 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por estados"
)

##             
## Pareto chart analysis for distance
##              Frequency Cum.Freq. Percentage Cum.Percent.
##   Saint John   5.65355   5.65355   82.49311     82.49311
##   Saint Mark   1.19981   6.85336   17.50689    100.00000
stem(df_GN$"distance")
## 
##   The decimal point is at the |
## 
##   1 | 2
##   2 | 
##   3 | 
##   4 | 
##   5 | 7
head(df_GN)
## # A tibble: 2 x 25
##      id date    time    continent_code country_name country_code state population
##   <dbl> <chr>   <chr>   <chr>          <chr>        <chr>        <chr>      <dbl>
## 1  3355 4/12/11 3:00:00 <NA>           Grenada      GD           Sain~       2256
## 2  3356 4/12/11 Morning <NA>           Grenada      GD           Sain~       3378
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_GN))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
3355 4/12/11 3:00:00 NA Grenada GD Saint Mark 2256 Victoria 1.19981 NA 12.1979 -61.6990 (12.197900000000001, -61.698999999999998) Landslide Landslide Small Downpour NA NA 0 NA http://stormcarib.com/reports/current/report.php?id=1302618053_40629 17.50689 8.753444
3356 4/12/11 Morning NA Grenada GD Saint John 3378 Gouyave 5.65355 NA 12.1353 -61.6871 (12.135300000000001, -61.687100000000001) Landslide Landslide Small Downpour NA NA 0 NA http://stormcarib.com/reports/current/report.php?id=1302618053_40629 82.49311 58.753444
stem(df_GN$"distance")
## 
##   The decimal point is at the |
## 
##   1 | 2
##   2 | 
##   3 | 
##   4 | 
##   5 | 7
stem(df_GN$"distance", scale = 2)
## 
##   The decimal point is at the |
## 
##   1 | 2
##   1 | 
##   2 | 
##   2 | 
##   3 | 
##   3 | 
##   4 | 
##   4 | 
##   5 | 
##   5 | 7

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
1.19981 1 50 50 50 50
5.65355 1 50 50 100 100
Total 2 100 100 100 100
str(table)
## Classes 'freqtab' and 'data.frame':  3 obs. of  5 variables:
##  $ n      : num  1 1 2
##  $ %      : num  50 50 100
##  $ val%   : num  50 50 100
##  $ %cum   : num  50 100 100
##  $ val%cum: num  50 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
1.19981 1
5.65355 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 1.19981 4.19981 7.19981
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(1.2,4.2] 0 0 0
(4.2,7.2] 1 1 1
str(Freq_table)
## 'data.frame':    2 obs. of  4 variables:
##  $ distance: Factor w/ 2 levels "(1.2,4.2]","(4.2,7.2]": 1 2
##  $ Freq    : int  0 1
##  $ Rel_Freq: num  0 1
##  $ Cum_Freq: int  0 1
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(1.2,4.2] 0
(4.2,7.2] 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_GN)
## Warning in min(x): ningún argumento finito para min; retornando Inf
## Warning in max(x): ningun argumento finito para max; retornando -Inf
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
##                        id date time continent_code country_name country_code
## nbr.val      2.000000e+00   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          3.355000e+03   NA   NA             NA           NA           NA
## max          3.356000e+03   NA   NA             NA           NA           NA
## range        1.000000e+00   NA   NA             NA           NA           NA
## sum          6.711000e+03   NA   NA             NA           NA           NA
## median       3.355500e+03   NA   NA             NA           NA           NA
## mean         3.355500e+03   NA   NA             NA           NA           NA
## SE.mean      5.000000e-01   NA   NA             NA           NA           NA
## CI.mean.0.95 6.353102e+00   NA   NA             NA           NA           NA
## var          5.000000e-01   NA   NA             NA           NA           NA
## std.dev      7.071068e-01   NA   NA             NA           NA           NA
## coef.var     2.107307e-04   NA   NA             NA           NA           NA
##              state   population city  distance location_description    latitude
## nbr.val         NA 2.000000e+00   NA  2.000000                   NA  2.00000000
## nbr.null        NA 0.000000e+00   NA  0.000000                   NA  0.00000000
## nbr.na          NA 0.000000e+00   NA  0.000000                   NA  0.00000000
## min             NA 2.256000e+03   NA  1.199810                   NA 12.13530000
## max             NA 3.378000e+03   NA  5.653550                   NA 12.19790000
## range           NA 1.122000e+03   NA  4.453740                   NA  0.06260000
## sum             NA 5.634000e+03   NA  6.853360                   NA 24.33320000
## median          NA 2.817000e+03   NA  3.426680                   NA 12.16660000
## mean            NA 2.817000e+03   NA  3.426680                   NA 12.16660000
## SE.mean         NA 5.610000e+02   NA  2.226870                   NA  0.03130000
## CI.mean.0.95    NA 7.128181e+03   NA 28.295066                   NA  0.39770421
## var             NA 6.294420e+05   NA  9.917900                   NA  0.00195938
## std.dev         NA 7.933738e+02   NA  3.149270                   NA  0.04426488
## coef.var        NA 2.816378e-01   NA  0.919044                   NA  0.00363823
##                  longitude geolocation hazard_type landslide_type
## nbr.val       2.000000e+00          NA          NA             NA
## nbr.null      0.000000e+00          NA          NA             NA
## nbr.na        0.000000e+00          NA          NA             NA
## min          -6.169900e+01          NA          NA             NA
## max          -6.168710e+01          NA          NA             NA
## range         1.190000e-02          NA          NA             NA
## sum          -1.233861e+02          NA          NA             NA
## median       -6.169305e+01          NA          NA             NA
## mean         -6.169305e+01          NA          NA             NA
## SE.mean       5.950000e-03          NA          NA             NA
## CI.mean.0.95  7.560192e-02          NA          NA             NA
## var           7.080500e-05          NA          NA             NA
## std.dev       8.414571e-03          NA          NA             NA
## coef.var     -1.363941e-04          NA          NA             NA
##              landslide_size trigger storm_name injuries fatalities source_name
## nbr.val                  NA      NA         NA        0          2          NA
## nbr.null                 NA      NA         NA        0          2          NA
## nbr.na                   NA      NA         NA        2          0          NA
## min                      NA      NA         NA      Inf          0          NA
## max                      NA      NA         NA     -Inf          0          NA
## range                    NA      NA         NA     -Inf          0          NA
## sum                      NA      NA         NA        0          0          NA
## median                   NA      NA         NA       NA          0          NA
## mean                     NA      NA         NA      NaN          0          NA
## SE.mean                  NA      NA         NA       NA          0          NA
## CI.mean.0.95             NA      NA         NA      NaN          0          NA
## var                      NA      NA         NA       NA          0          NA
## std.dev                  NA      NA         NA       NA          0          NA
## coef.var                 NA      NA         NA       NA        NaN          NA
##              source_link        prop        ypos
## nbr.val               NA    2.000000    2.000000
## nbr.null              NA    0.000000    0.000000
## nbr.na                NA    0.000000    0.000000
## min                   NA   17.506887    8.753444
## max                   NA   82.493113   58.753444
## range                 NA   64.986226   50.000000
## sum                   NA  100.000000   67.506887
## median                NA   50.000000   33.753444
## mean                  NA   50.000000   33.753444
## SE.mean               NA   32.493113   25.000000
## CI.mean.0.95          NA  412.864145  317.655118
## var                   NA 2111.604768 1250.000000
## std.dev               NA   45.952201   35.355339
## coef.var              NA    0.919044    1.047459
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Cuba

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_CB <- subset(df, country_name == "Cuba")
knitr::kable(head(df_CB))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
2611 10/18/10 NA NA Cuba CU Provincia de La Habana 132351 Cerro 0.89865 NA 23.1098 -82.3691 (23.1098, -82.369100000000003) Landslide Complex Medium Tropical cyclone Tropical Storm Paula NA 0 NA http://www.reliefweb.int/rw/RWFiles2010.nsf/FilesByRWDocUnidFilename/VDUX-8ADM53-full_report.pdf/$File/full_report.pdf
2706 11/7/10 NA NA Cuba CU Guantanamo 48362 Baracoa 10.45795 NA 20.2526 -74.4867 (20.252600000000001, -74.486699999999999) Landslide Landslide Medium Tropical cyclone Hurricane Tomas NA 0 NA http://www.solvision.co.cu/english/index.php?option=com_content&view=article&id=1631:viaduct-la-farola-in-baracoa-traffic-restored&catid=34:portada&Itemid=171
5067 7/9/13 NA NA Cuba CU Artemisa Province 7205 Soroa 11.87914 NA 22.7943 -83.1322 (22.7943, -83.132199999999997) Landslide Landslide Medium Downpour NA NA 0 www.havanatimes.org http://www.havanatimes.org/?p=96131

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_CB, aes(fill= state, y=distance, x=country_name)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_CB, aes(fill=state, y=distance, x=country_name)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_CB, aes(x=country_name, y=distance, fill=state)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_CB <- df_CB %>% 
  arrange(desc(state)) %>%
  mutate(prop = distance / sum(df_CB$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_CB, aes(x=country_name, y=prop, fill=state)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
data<- ts(df_CB$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
0.89865
10.45795
11.87914
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_CB$distance
names(distance) <- df_CB$state 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por estados"
)

##                         
## Pareto chart analysis for distance
##                           Frequency  Cum.Freq. Percentage Cum.Percent.
##   Artemisa Province       11.879140  11.879140  51.124432    51.124432
##   Guantanamo              10.457950  22.337090  45.008035    96.132467
##   Provincia de La Habana   0.898650  23.235740   3.867533   100.000000
stem(df_CB$"distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 1
##   0 | 
##   1 | 02
head(df_CB)
## # A tibble: 3 x 25
##      id date     time  continent_code country_name country_code state population
##   <dbl> <chr>    <chr> <chr>          <chr>        <chr>        <chr>      <dbl>
## 1  2611 10/18/10 <NA>  <NA>           Cuba         CU           Prov~     132351
## 2  2706 11/7/10  <NA>  <NA>           Cuba         CU           Guan~      48362
## 3  5067 7/9/13   <NA>  <NA>           Cuba         CU           Arte~       7205
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_CB))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
2611 10/18/10 NA NA Cuba CU Provincia de La Habana 132351 Cerro 0.89865 NA 23.1098 -82.3691 (23.1098, -82.369100000000003) Landslide Complex Medium Tropical cyclone Tropical Storm Paula NA 0 NA http://www.reliefweb.int/rw/RWFiles2010.nsf/FilesByRWDocUnidFilename/VDUX-8ADM53-full_report.pdf/$File/full_report.pdf 3.867533 1.933767
2706 11/7/10 NA NA Cuba CU Guantanamo 48362 Baracoa 10.45795 NA 20.2526 -74.4867 (20.252600000000001, -74.486699999999999) Landslide Landslide Medium Tropical cyclone Hurricane Tomas NA 0 NA http://www.solvision.co.cu/english/index.php?option=com_content&view=article&id=1631:viaduct-la-farola-in-baracoa-traffic-restored&catid=34:portada&Itemid=171 45.008035 26.371551
5067 7/9/13 NA NA Cuba CU Artemisa Province 7205 Soroa 11.87914 NA 22.7943 -83.1322 (22.7943, -83.132199999999997) Landslide Landslide Medium Downpour NA NA 0 www.havanatimes.org http://www.havanatimes.org/?p=96131 51.124432 74.437784
stem(df_CB$"distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 1
##   0 | 
##   1 | 02
stem(df_CB$"distance", scale = 2)
## 
##   The decimal point is at the |
## 
##    0 | 9
##    2 | 
##    4 | 
##    6 | 
##    8 | 
##   10 | 59

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
0.89865 1 33.3 33.3 33.3 33.3
10.45795 1 33.3 33.3 66.7 66.7
11.87914 1 33.3 33.3 100.0 100.0
Total 3 100.0 100.0 100.0 100.0
str(table)
## Classes 'freqtab' and 'data.frame':  4 obs. of  5 variables:
##  $ n      : num  1 1 1 3
##  $ %      : num  33.3 33.3 33.3 100
##  $ val%   : num  33.3 33.3 33.3 100
##  $ %cum   : num  33.3 66.7 100 100
##  $ val%cum: num  33.3 66.7 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
0.89865 1
10.45795 1
11.87914 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1]  0.89865  4.89865  8.89865 12.89865
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(0.899,4.9] 0 0 0
(4.9,8.9] 0 0 0
(8.9,12.9] 2 1 2
str(Freq_table)
## 'data.frame':    3 obs. of  4 variables:
##  $ distance: Factor w/ 3 levels "(0.899,4.9]",..: 1 2 3
##  $ Freq    : int  0 0 2
##  $ Rel_Freq: num  0 0 1
##  $ Cum_Freq: int  0 0 2
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(0.899,4.9] 0
(4.9,8.9] 0
(8.9,12.9] 2
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_CB)
## Warning in min(x): ningún argumento finito para min; retornando Inf
## Warning in max(x): ningun argumento finito para max; retornando -Inf
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
##                        id date time continent_code country_name country_code
## nbr.val      3.000000e+00   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          2.611000e+03   NA   NA             NA           NA           NA
## max          5.067000e+03   NA   NA             NA           NA           NA
## range        2.456000e+03   NA   NA             NA           NA           NA
## sum          1.038400e+04   NA   NA             NA           NA           NA
## median       2.706000e+03   NA   NA             NA           NA           NA
## mean         3.461333e+03   NA   NA             NA           NA           NA
## SE.mean      8.033016e+02   NA   NA             NA           NA           NA
## CI.mean.0.95 3.456328e+03   NA   NA             NA           NA           NA
## var          1.935880e+06   NA   NA             NA           NA           NA
## std.dev      1.391359e+03   NA   NA             NA           NA           NA
## coef.var     4.019720e-01   NA   NA             NA           NA           NA
##              state   population city   distance location_description   latitude
## nbr.val         NA 3.000000e+00   NA  3.0000000                   NA  3.0000000
## nbr.null        NA 0.000000e+00   NA  0.0000000                   NA  0.0000000
## nbr.na          NA 0.000000e+00   NA  0.0000000                   NA  0.0000000
## min             NA 7.205000e+03   NA  0.8986500                   NA 20.2526000
## max             NA 1.323510e+05   NA 11.8791400                   NA 23.1098000
## range           NA 1.251460e+05   NA 10.9804900                   NA  2.8572000
## sum             NA 1.879180e+05   NA 23.2357400                   NA 66.1567000
## median          NA 4.836200e+04   NA 10.4579500                   NA 22.7943000
## mean            NA 6.263933e+04   NA  7.7452467                   NA 22.0522333
## SE.mean         NA 3.682509e+04   NA  3.4477944                   NA  0.9044142
## CI.mean.0.95    NA 1.584456e+05   NA 14.8346622                   NA  3.8913802
## var             NA 4.068262e+09   NA 35.6618597                   NA  2.4538952
## std.dev         NA 6.378293e+04   NA  5.9717552                   NA  1.5664914
## coef.var        NA 1.018257e+00   NA  0.7710219                   NA  0.0710355
##                  longitude geolocation hazard_type landslide_type
## nbr.val         3.00000000          NA          NA             NA
## nbr.null        0.00000000          NA          NA             NA
## nbr.na          0.00000000          NA          NA             NA
## min           -83.13220000          NA          NA             NA
## max           -74.48670000          NA          NA             NA
## range           8.64550000          NA          NA             NA
## sum          -239.98800000          NA          NA             NA
## median        -82.36910000          NA          NA             NA
## mean          -79.99600000          NA          NA             NA
## SE.mean         2.76344412          NA          NA             NA
## CI.mean.0.95   11.89014040          NA          NA             NA
## var            22.90987027          NA          NA             NA
## std.dev         4.78642563          NA          NA             NA
## coef.var       -0.05983331          NA          NA             NA
##              landslide_size trigger storm_name injuries fatalities source_name
## nbr.val                  NA      NA         NA        0          3          NA
## nbr.null                 NA      NA         NA        0          3          NA
## nbr.na                   NA      NA         NA        3          0          NA
## min                      NA      NA         NA      Inf          0          NA
## max                      NA      NA         NA     -Inf          0          NA
## range                    NA      NA         NA     -Inf          0          NA
## sum                      NA      NA         NA        0          0          NA
## median                   NA      NA         NA       NA          0          NA
## mean                     NA      NA         NA      NaN          0          NA
## SE.mean                  NA      NA         NA       NA          0          NA
## CI.mean.0.95             NA      NA         NA      NaN          0          NA
## var                      NA      NA         NA       NA          0          NA
## std.dev                  NA      NA         NA       NA          0          NA
## coef.var                 NA      NA         NA       NA        NaN          NA
##              source_link        prop        ypos
## nbr.val               NA   3.0000000    3.000000
## nbr.null              NA   0.0000000    0.000000
## nbr.na                NA   0.0000000    0.000000
## min                   NA   3.8675334    1.933767
## max                   NA  51.1244316   74.437784
## range                 NA  47.2568982   72.504018
## sum                   NA 100.0000000  102.743102
## median                NA  45.0080350   26.371551
## mean                  NA  33.3333333   34.247701
## SE.mean               NA  14.8383243   21.297366
## CI.mean.0.95          NA  63.8441565   91.635171
## var                   NA 660.5276020 1360.733440
## std.dev               NA  25.7007315   36.888121
## coef.var              NA   0.7710219    1.077098
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Puerto Rico

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_PR <- subset(df, country_name == "Puerto Rico")
knitr::kable(head(df_PR))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
393 12/12/07 NA NA Puerto Rico PR San Juan 418140 San Juan 6.91777 NA 18.4320 -66.0510 (18.431999999999999, -66.051000000000002) Landslide Landslide Medium Tropical cyclone Tropical Storm Olga NA NA AP.google.com http://ap.google.com/article/ALeqM5gVWjsPEiqe1tEu2mhBIRaxxGi8owD8TFVR600
2550 10/6/10 NA NA Puerto Rico PR Orocovis 944 Orocovis 6.85760 NA 18.1652 -66.3969 (18.165199999999999, -66.396900000000002) Landslide Complex Medium Tropical cyclone Tropical Storm Otto NA 0 NA http://www.whitehouse.gov/the-press-office/2010/10/26/president-obama-signs-puerto-rico-disaster-declaration
6708 5/18/14 16:30 NA Puerto Rico PR Vega Alta 12036 Vega Alta 3.49090 Mine construction 18.3806 -66.3319 (18.380600000000001, -66.331900000000005) Landslide Other Small Rain NA 0 0 Telemundo http://www.telemundopr.com/telenoticias/puerto-rico/Deslizamiento-deja-a-familias-incomunicadas-en-Vega-Alta-258522361.html
6709 9/24/14 NA NA Puerto Rico PR Aguada 4040 Aguada 1.40257 Unknown 18.3711 -67.1782 (18.371099999999998, -67.178200000000004) Landslide Landslide Medium Downpour NA 0 0 Telemundo http://www.telemundopr.com/telenoticias/puerto-rico/Viviendas-inhabitables-luego-de-deslizamiento-de-tierras-en-Aguada-277123031.html
6710 8/24/14 3:00 NA Puerto Rico PR Ponce 5080 Adjuntas 5.78872 Unknown 18.1283 -66.6810 (18.128299999999999, -66.680999999999997) Landslide Landslide Small Downpour NA 0 0 Perla del Sur http://www.periodicolaperla.com/index.php?option=com_content&view=article&id=6371:surgen-nuevos-deslizamientos-en-ponce&catid=135:actualidad-del-sur&Itemid=423
6711 8/24/14 NA NA Puerto Rico PR Ponce 5080 Adjuntas 6.89036 Unknown 18.1254 -66.6700 (18.125399999999999, -66.67) Landslide Landslide Medium Downpour NA 0 0 Perla del Sur http://www.periodicolaperla.com/index.php?option=com_content&view=article&id=6371:surgen-nuevos-deslizamientos-en-ponce&catid=135:actualidad-del-sur&Itemid=423

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_PR, aes(fill= state, y=distance, x=country_name)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_PR, aes(fill=state, y=distance, x=country_name)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_PR, aes(x=country_name, y=distance, fill=state)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_PR <- df_PR %>% 
  arrange(desc(state)) %>%
  mutate(prop = distance / sum(df_PR$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_PR, aes(x=country_name, y=prop, fill=state)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
data<- ts(df_PR$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
3.65535
3.49090
6.91777
5.78872
6.89036
6.85760
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_PR$distance
names(distance) <- df_PR$state 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por estados"
)

##            
## Pareto chart analysis for distance
##              Frequency  Cum.Freq. Percentage Cum.Percent.
##   San Juan    6.917770   6.917770  19.763211    19.763211
##   Ponce       6.890360  13.808130  19.684904    39.448114
##   Orocovis    6.857600  20.665730  19.591312    59.039427
##   Ponce       5.788720  26.454450  16.537655    75.577082
##   Villalba    3.655350  30.109800  10.442881    86.019963
##   Vega Alta   3.490900  33.600700   9.973068    95.993032
##   Aguada      1.402570  35.003270   4.006968   100.000000
stem(df_PR$"distance")
## 
##   The decimal point is at the |
## 
##   0 | 4
##   2 | 57
##   4 | 8
##   6 | 999
head(df_PR)
## # A tibble: 6 x 25
##      id date     time      continent_code country_name country_code state population
##   <dbl> <chr>    <chr>     <chr>          <chr>        <chr>        <chr>      <dbl>
## 1  6712 11/7/14  Afternoon <NA>           Puerto Rico  PR           Vill~       4555
## 2  6708 5/18/14  16:30     <NA>           Puerto Rico  PR           Vega~      12036
## 3   393 12/12/07 <NA>      <NA>           Puerto Rico  PR           San ~     418140
## 4  6710 8/24/14  3:00      <NA>           Puerto Rico  PR           Ponce       5080
## 5  6711 8/24/14  <NA>      <NA>           Puerto Rico  PR           Ponce       5080
## 6  2550 10/6/10  <NA>      <NA>           Puerto Rico  PR           Oroc~        944
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_PR))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
6712 11/7/14 Afternoon NA Puerto Rico PR Villalba 4555 Villalba 3.65535 Unknown 18.1578 -66.4790 (18.157800000000002, -66.478999999999999) Landslide Landslide Medium Unknown NA 0 0 Villalba Online http://www.18norte66oeste.com/index.php/ley-y-orden/955-reportan-deslizamiento-de-tierra-en-barrio-mogote 10.442882 5.221441
6708 5/18/14 16:30 NA Puerto Rico PR Vega Alta 12036 Vega Alta 3.49090 Mine construction 18.3806 -66.3319 (18.380600000000001, -66.331900000000005) Landslide Other Small Rain NA 0 0 Telemundo http://www.telemundopr.com/telenoticias/puerto-rico/Deslizamiento-deja-a-familias-incomunicadas-en-Vega-Alta-258522361.html 9.973068 15.429416
393 12/12/07 NA NA Puerto Rico PR San Juan 418140 San Juan 6.91777 NA 18.4320 -66.0510 (18.431999999999999, -66.051000000000002) Landslide Landslide Medium Tropical cyclone Tropical Storm Olga NA NA AP.google.com http://ap.google.com/article/ALeqM5gVWjsPEiqe1tEu2mhBIRaxxGi8owD8TFVR600 19.763211 30.297555
6710 8/24/14 3:00 NA Puerto Rico PR Ponce 5080 Adjuntas 5.78872 Unknown 18.1283 -66.6810 (18.128299999999999, -66.680999999999997) Landslide Landslide Small Downpour NA 0 0 Perla del Sur http://www.periodicolaperla.com/index.php?option=com_content&view=article&id=6371:surgen-nuevos-deslizamientos-en-ponce&catid=135:actualidad-del-sur&Itemid=423 16.537655 48.447988
6711 8/24/14 NA NA Puerto Rico PR Ponce 5080 Adjuntas 6.89036 Unknown 18.1254 -66.6700 (18.125399999999999, -66.67) Landslide Landslide Medium Downpour NA 0 0 Perla del Sur http://www.periodicolaperla.com/index.php?option=com_content&view=article&id=6371:surgen-nuevos-deslizamientos-en-ponce&catid=135:actualidad-del-sur&Itemid=423 19.684904 66.559267
2550 10/6/10 NA NA Puerto Rico PR Orocovis 944 Orocovis 6.85760 NA 18.1652 -66.3969 (18.165199999999999, -66.396900000000002) Landslide Complex Medium Tropical cyclone Tropical Storm Otto NA 0 NA http://www.whitehouse.gov/the-press-office/2010/10/26/president-obama-signs-puerto-rico-disaster-declaration 19.591313 86.197375
stem(df_PR$"distance")
## 
##   The decimal point is at the |
## 
##   0 | 4
##   2 | 57
##   4 | 8
##   6 | 999
stem(df_PR$"distance", scale = 2)
## 
##   The decimal point is at the |
## 
##   1 | 4
##   2 | 
##   3 | 57
##   4 | 
##   5 | 8
##   6 | 999

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
1.40257 1 14.3 14.3 14.3 14.3
3.4909 1 14.3 14.3 28.6 28.6
3.65535 1 14.3 14.3 42.9 42.9
5.78872 1 14.3 14.3 57.1 57.1
6.8576 1 14.3 14.3 71.4 71.4
6.89036 1 14.3 14.3 85.7 85.7
6.91777 1 14.3 14.3 100.0 100.0
Total 7 100.0 100.0 100.0 100.0
str(table)
## Classes 'freqtab' and 'data.frame':  8 obs. of  5 variables:
##  $ n      : num  1 1 1 1 1 1 1 7
##  $ %      : num  14.3 14.3 14.3 14.3 14.3 14.3 14.3 100
##  $ val%   : num  14.3 14.3 14.3 14.3 14.3 14.3 14.3 100
##  $ %cum   : num  14.3 28.6 42.9 57.1 71.4 85.7 100 100
##  $ val%cum: num  14.3 28.6 42.9 57.1 71.4 85.7 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
1.40257 1
3.4909 1
3.65535 1
5.78872 1
6.8576 1
6.89036 1
6.91777 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 1.40257 3.40257 5.40257 7.40257
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(1.4,3.4] 0 0.0000000 0
(3.4,5.4] 2 0.3333333 2
(5.4,7.4] 4 0.6666667 6
str(Freq_table)
## 'data.frame':    3 obs. of  4 variables:
##  $ distance: Factor w/ 3 levels "(1.4,3.4]","(3.4,5.4]",..: 1 2 3
##  $ Freq    : int  0 2 4
##  $ Rel_Freq: num  0 0.333 0.667
##  $ Cum_Freq: int  0 2 6
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(1.4,3.4] 0
(3.4,5.4] 2
(5.4,7.4] 4
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_PR)
##                        id date time continent_code country_name country_code
## nbr.val      7.000000e+00   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          3.930000e+02   NA   NA             NA           NA           NA
## max          6.712000e+03   NA   NA             NA           NA           NA
## range        6.319000e+03   NA   NA             NA           NA           NA
## sum          3.649300e+04   NA   NA             NA           NA           NA
## median       6.709000e+03   NA   NA             NA           NA           NA
## mean         5.213286e+03   NA   NA             NA           NA           NA
## SE.mean      9.943773e+02   NA   NA             NA           NA           NA
## CI.mean.0.95 2.433154e+03   NA   NA             NA           NA           NA
## var          6.921504e+06   NA   NA             NA           NA           NA
## std.dev      2.630875e+03   NA   NA             NA           NA           NA
## coef.var     5.046482e-01   NA   NA             NA           NA           NA
##              state   population city   distance location_description
## nbr.val         NA 7.000000e+00   NA  7.0000000                   NA
## nbr.null        NA 0.000000e+00   NA  0.0000000                   NA
## nbr.na          NA 0.000000e+00   NA  0.0000000                   NA
## min             NA 9.440000e+02   NA  1.4025700                   NA
## max             NA 4.181400e+05   NA  6.9177700                   NA
## range           NA 4.171960e+05   NA  5.5152000                   NA
## sum             NA 4.498750e+05   NA 35.0032700                   NA
## median          NA 5.080000e+03   NA  5.7887200                   NA
## mean            NA 6.426786e+04   NA  5.0004671                   NA
## SE.mean         NA 5.899213e+04   NA  0.8215987                   NA
## CI.mean.0.95    NA 1.443485e+05   NA  2.0103796                   NA
## var             NA 2.436050e+10   NA  4.7251712                   NA
## std.dev         NA 1.560785e+05   NA  2.1737459                   NA
## coef.var        NA 2.428562e+00   NA  0.4347086                   NA
##                  latitude     longitude geolocation hazard_type landslide_type
## nbr.val      7.000000e+00  7.000000e+00          NA          NA             NA
## nbr.null     0.000000e+00  0.000000e+00          NA          NA             NA
## nbr.na       0.000000e+00  0.000000e+00          NA          NA             NA
## min          1.812540e+01 -6.717820e+01          NA          NA             NA
## max          1.843200e+01 -6.605100e+01          NA          NA             NA
## range        3.066000e-01  1.127200e+00          NA          NA             NA
## sum          1.277604e+02 -4.657880e+02          NA          NA             NA
## median       1.816520e+01 -6.647900e+01          NA          NA             NA
## mean         1.825149e+01 -6.654114e+01          NA          NA             NA
## SE.mean      5.137584e-02  1.336926e-01          NA          NA             NA
## CI.mean.0.95 1.257122e-01  3.271341e-01          NA          NA             NA
## var          1.847634e-02  1.251161e-01          NA          NA             NA
## std.dev      1.359277e-01  3.537175e-01          NA          NA             NA
## coef.var     7.447487e-03 -5.315771e-03          NA          NA             NA
##              landslide_size trigger storm_name injuries fatalities source_name
## nbr.val                  NA      NA         NA        5          6          NA
## nbr.null                 NA      NA         NA        5          6          NA
## nbr.na                   NA      NA         NA        2          1          NA
## min                      NA      NA         NA        0          0          NA
## max                      NA      NA         NA        0          0          NA
## range                    NA      NA         NA        0          0          NA
## sum                      NA      NA         NA        0          0          NA
## median                   NA      NA         NA        0          0          NA
## mean                     NA      NA         NA        0          0          NA
## SE.mean                  NA      NA         NA        0          0          NA
## CI.mean.0.95             NA      NA         NA        0          0          NA
## var                      NA      NA         NA        0          0          NA
## std.dev                  NA      NA         NA        0          0          NA
## coef.var                 NA      NA         NA      NaN        NaN          NA
##              source_link        prop         ypos
## nbr.val               NA   7.0000000    7.0000000
## nbr.null              NA   0.0000000    0.0000000
## nbr.na                NA   0.0000000    0.0000000
## min                   NA   4.0069685    5.2214407
## max                   NA  19.7632107   97.9965158
## range                 NA  15.7562422   92.7750750
## sum                   NA 100.0000000  350.1495575
## median                NA  16.5376549   48.4479879
## mean                  NA  14.2857143   50.0213654
## SE.mean               NA   2.3472056   13.3443062
## CI.mean.0.95          NA   5.7434052   32.6523411
## var                   NA  38.5656195 1246.4935649
## std.dev               NA   6.2101223   35.3057158
## coef.var              NA   0.4347086    0.7058127
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Dominica

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_DO <- subset(df, country_name == "Dominica")
knitr::kable(head(df_DO))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
186 8/17/07 NA NA Dominica DM Saint Paul 702 Pont Cassé 3.39516 NA 15.3379 -61.3610 (15.337899999999999, -61.360999999999997) Landslide Mudslide Small Tropical cyclone Hurricane Dean NA 2 Tribune India http://www.tribuneindia.com/2007/20070817/himachal.htm
250 9/9/07 NA NA Dominica DM Saint George 16571 Roseau 2.59849 NA 15.3055 -61.3642 (15.3055, -61.364199999999997) Landslide Landslide Medium Rain Tropical Wave NA NA RadioJamaica http://www.radiojamaica.com/content/view/1156/88/
1552 3/11/10 NA NA Dominica DM Saint Paul 702 Pont Cassé 3.98646 NA 15.3356 -61.3312 (15.335599999999999, -61.331200000000003) Landslide Landslide Medium Rain NA NA 0 NA http://stormcarib.com/reports/current/report.php?id=1268397271_8827
1743 4/12/10 NA NA Dominica DM Saint Patrick 2608 Berekua 2.08997 NA 15.2454 -61.3017 (15.2454, -61.301699999999997) Landslide Landslide Medium Downpour NA NA 0 NA http://www.dominicacentral.com/general/community/heavy-overnight-rains-cause-landslides-across-island.html
1744 4/12/10 NA NA Dominica DM Saint Paul 702 Pont Cassé 3.78784 NA 15.4004 -61.3440 (15.400399999999999, -61.344000000000001) Landslide Landslide Medium Downpour NA NA 0 NA http://www.dominicacentral.com/general/community/heavy-overnight-rains-cause-landslides-across-island.html
1745 4/12/10 NA NA Dominica DM Saint Patrick 2608 Berekua 4.08252 NA 15.2458 -61.2809 (15.245799999999999, -61.280900000000003) Landslide Landslide Small Downpour NA NA 0 NA http://www.dominicacentral.com/general/community/heavy-overnight-rains-cause-landslides-across-island.html

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_DO, aes(fill= state, y=distance, x=country_name)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_DO, aes(fill=state, y=distance, x=country_name)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_DO, aes(x=country_name, y=distance, fill=state)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_DO <- df_DO %>% 
  arrange(desc(state)) %>%
  mutate(prop = distance / sum(df_DO$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_DO, aes(x=country_name, y=prop, fill=state)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
data<- ts(df_DO$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
3.39516
3.98646
3.78784
6.45930
4.20239
2.08997
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_DO$distance
names(distance) <- df_DO$state 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por estados"
)

##                
## Pareto chart analysis for distance
##                  Frequency  Cum.Freq. Percentage Cum.Percent.
##   Saint Paul      6.459300   6.459300  11.938173    11.938173
##   Saint John      5.929940  12.389240  10.959802    22.897975
##   Saint Patrick   5.614950  18.004190  10.377633    33.275607
##   Saint Patrick   5.116000  23.120190   9.455466    42.731073
##   Saint Paul      4.202390  27.322580   7.766919    50.497992
##   Saint Patrick   4.082520  31.405100   7.545373    58.043365
##   Saint Paul      3.986460  35.391560   7.367834    65.411199
##   Saint Paul      3.787840  39.179400   7.000741    72.411940
##   Saint Paul      3.395160  42.574560   6.274984    78.686925
##   Saint Andrew    2.648730  45.223290   4.895422    83.582346
##   Saint George    2.598490  47.821780   4.802567    88.384914
##   Saint Joseph    2.386050  50.207830   4.409933    92.794846
##   Saint Patrick   2.089970  52.297800   3.862713    96.657559
##   Saint Mark      1.808470  54.106270   3.342441   100.000000
stem(df_DO$"distance")
## 
##   The decimal point is at the |
## 
##   1 | 8
##   2 | 1466
##   3 | 48
##   4 | 012
##   5 | 169
##   6 | 5
head(df_DO)
## # A tibble: 6 x 25
##      id date     time    continent_code country_name country_code state population
##   <dbl> <chr>    <chr>   <chr>          <chr>        <chr>        <chr>      <dbl>
## 1   186 8/17/07  <NA>    <NA>           Dominica     DM           Sain~        702
## 2  1552 3/11/10  <NA>    <NA>           Dominica     DM           Sain~        702
## 3  1744 4/12/10  <NA>    <NA>           Dominica     DM           Sain~        702
## 4  1754 4/16/10  <NA>    <NA>           Dominica     DM           Sain~        702
## 5  5715 12/24/13 2:00:00 <NA>           Dominica     DM           Sain~        702
## 6  1743 4/12/10  <NA>    <NA>           Dominica     DM           Sain~       2608
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_DO))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
186 8/17/07 NA NA Dominica DM Saint Paul 702 Pont Cassé 3.39516 NA 15.3379 -61.3610 (15.337899999999999, -61.360999999999997) Landslide Mudslide Small Tropical cyclone Hurricane Dean NA 2 Tribune India http://www.tribuneindia.com/2007/20070817/himachal.htm 6.274984 3.137492
1552 3/11/10 NA NA Dominica DM Saint Paul 702 Pont Cassé 3.98646 NA 15.3356 -61.3312 (15.335599999999999, -61.331200000000003) Landslide Landslide Medium Rain NA NA 0 NA http://stormcarib.com/reports/current/report.php?id=1268397271_8827 7.367834 9.958901
1744 4/12/10 NA NA Dominica DM Saint Paul 702 Pont Cassé 3.78784 NA 15.4004 -61.3440 (15.400399999999999, -61.344000000000001) Landslide Landslide Medium Downpour NA NA 0 NA http://www.dominicacentral.com/general/community/heavy-overnight-rains-cause-landslides-across-island.html 7.000741 17.143189
1754 4/16/10 NA NA Dominica DM Saint Paul 702 Pont Cassé 6.45930 NA 15.3211 -61.3124 (15.321099999999999, -61.312399999999997) Landslide Landslide Medium Downpour NA NA 0 NA http://www.thejakartapost.com/news/2010/04/17/landslides-block-main-road-west-sumatra.html 11.938173 26.612646
5715 12/24/13 2:00:00 NA Dominica DM Saint Paul 702 Pont Cassé 4.20239 NA 15.3288 -61.3470 (15.328799999999999, -61.347000000000001) Landslide Mudslide Large Downpour NA NA 0 www.cdema.org NA 7.766919 36.465192
1743 4/12/10 NA NA Dominica DM Saint Patrick 2608 Berekua 2.08997 NA 15.2454 -61.3017 (15.2454, -61.301699999999997) Landslide Landslide Medium Downpour NA NA 0 NA http://www.dominicacentral.com/general/community/heavy-overnight-rains-cause-landslides-across-island.html 3.862713 42.280008
stem(df_DO$"distance")
## 
##   The decimal point is at the |
## 
##   1 | 8
##   2 | 1466
##   3 | 48
##   4 | 012
##   5 | 169
##   6 | 5
stem(df_DO$"distance", scale = 2)
## 
##   The decimal point is at the |
## 
##   1 | 8
##   2 | 14
##   2 | 66
##   3 | 4
##   3 | 8
##   4 | 012
##   4 | 
##   5 | 1
##   5 | 69
##   6 | 
##   6 | 5

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
1.80847 1 7.1 7.1 7.1 7.1
2.08997 1 7.1 7.1 14.3 14.3
2.38605 1 7.1 7.1 21.4 21.4
2.59849 1 7.1 7.1 28.6 28.6
2.64873 1 7.1 7.1 35.7 35.7
3.39516 1 7.1 7.1 42.9 42.9
3.78784 1 7.1 7.1 50.0 50.0
3.98646 1 7.1 7.1 57.1 57.1
4.08252 1 7.1 7.1 64.3 64.3
4.20239 1 7.1 7.1 71.4 71.4
5.116 1 7.1 7.1 78.6 78.6
5.61495 1 7.1 7.1 85.7 85.7
5.92994 1 7.1 7.1 92.9 92.9
6.4593 1 7.1 7.1 100.0 100.0
Total 14 100.0 100.0 100.0 100.0
str(table)
## Classes 'freqtab' and 'data.frame':  15 obs. of  5 variables:
##  $ n      : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ %      : num  7.1 7.1 7.1 7.1 7.1 7.1 7.1 7.1 7.1 7.1 ...
##  $ val%   : num  7.1 7.1 7.1 7.1 7.1 7.1 7.1 7.1 7.1 7.1 ...
##  $ %cum   : num  7.1 14.3 21.4 28.6 35.7 42.9 50 57.1 64.3 71.4 ...
##  $ val%cum: num  7.1 14.3 21.4 28.6 35.7 42.9 50 57.1 64.3 71.4 ...
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
1.80847 1
2.08997 1
2.38605 1
2.59849 1
2.64873 1
3.39516 1
3.78784 1
3.98646 1
4.08252 1
4.20239 1
5.116 1
5.61495 1
5.92994 1
6.4593 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 1.80847 2.80847 3.80847 4.80847 5.80847 6.80847
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(1.81,2.81] 4 0.3076923 4
(2.81,3.81] 2 0.1538462 6
(3.81,4.81] 3 0.2307692 9
(4.81,5.81] 2 0.1538462 11
(5.81,6.81] 2 0.1538462 13
str(Freq_table)
## 'data.frame':    5 obs. of  4 variables:
##  $ distance: Factor w/ 5 levels "(1.81,2.81]",..: 1 2 3 4 5
##  $ Freq    : int  4 2 3 2 2
##  $ Rel_Freq: num  0.308 0.154 0.231 0.154 0.154
##  $ Cum_Freq: int  4 6 9 11 13
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(1.81,2.81] 4
(2.81,3.81] 2
(3.81,4.81] 3
(4.81,5.81] 2
(5.81,6.81] 2
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_DO)
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
##                        id date time continent_code country_name country_code
## nbr.val      1.400000e+01   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          1.860000e+02   NA   NA             NA           NA           NA
## max          5.754000e+03   NA   NA             NA           NA           NA
## range        5.568000e+03   NA   NA             NA           NA           NA
## sum          3.792400e+04   NA   NA             NA           NA           NA
## median       1.746500e+03   NA   NA             NA           NA           NA
## mean         2.708857e+03   NA   NA             NA           NA           NA
## SE.mean      5.536192e+02   NA   NA             NA           NA           NA
## CI.mean.0.95 1.196022e+03   NA   NA             NA           NA           NA
## var          4.290919e+06   NA   NA             NA           NA           NA
## std.dev      2.071453e+03   NA   NA             NA           NA           NA
## coef.var     7.646964e-01   NA   NA             NA           NA           NA
##              state   population city   distance location_description
## nbr.val         NA 1.400000e+01   NA 14.0000000                   NA
## nbr.null        NA 0.000000e+00   NA  0.0000000                   NA
## nbr.na          NA 0.000000e+00   NA  0.0000000                   NA
## min             NA 7.020000e+02   NA  1.8084700                   NA
## max             NA 1.657100e+04   NA  6.4593000                   NA
## range           NA 1.586900e+04   NA  4.6508300                   NA
## sum             NA 3.749000e+04   NA 54.1062700                   NA
## median          NA 1.374000e+03   NA  3.8871500                   NA
## mean            NA 2.677857e+03   NA  3.8647336                   NA
## SE.mean         NA 1.099107e+03   NA  0.3969319                   NA
## CI.mean.0.95    NA 2.374475e+03   NA  0.8575192                   NA
## var             NA 1.691249e+07   NA  2.2057691                   NA
## std.dev         NA 4.112480e+03   NA  1.4851832                   NA
## coef.var        NA 1.535735e+00   NA  0.3842912                   NA
##                  latitude     longitude geolocation hazard_type landslide_type
## nbr.val      1.400000e+01  1.400000e+01          NA          NA             NA
## nbr.null     0.000000e+00  0.000000e+00          NA          NA             NA
## nbr.na       0.000000e+00  0.000000e+00          NA          NA             NA
## min          1.521740e+01 -6.144100e+01          NA          NA             NA
## max          1.563080e+01 -6.126420e+01          NA          NA             NA
## range        4.134000e-01  1.768000e-01          NA          NA             NA
## sum          2.149263e+02 -8.587772e+02          NA          NA             NA
## median       1.532495e+01 -6.134550e+01          NA          NA             NA
## mean         1.535188e+01 -6.134123e+01          NA          NA             NA
## SE.mean      3.318744e-02  1.426224e-02          NA          NA             NA
## CI.mean.0.95 7.169709e-02  3.081170e-02          NA          NA             NA
## var          1.541968e-02  2.847761e-03          NA          NA             NA
## std.dev      1.241760e-01  5.336441e-02          NA          NA             NA
## coef.var     8.088653e-03 -8.699600e-04          NA          NA             NA
##              landslide_size trigger storm_name injuries fatalities source_name
## nbr.val                  NA      NA         NA        1 13.0000000          NA
## nbr.null                 NA      NA         NA        1 12.0000000          NA
## nbr.na                   NA      NA         NA       13  1.0000000          NA
## min                      NA      NA         NA        0  0.0000000          NA
## max                      NA      NA         NA        0  2.0000000          NA
## range                    NA      NA         NA        0  2.0000000          NA
## sum                      NA      NA         NA        0  2.0000000          NA
## median                   NA      NA         NA        0  0.0000000          NA
## mean                     NA      NA         NA        0  0.1538462          NA
## SE.mean                  NA      NA         NA       NA  0.1538462          NA
## CI.mean.0.95             NA      NA         NA      NaN  0.3352020          NA
## var                      NA      NA         NA       NA  0.3076923          NA
## std.dev                  NA      NA         NA       NA  0.5547002          NA
## coef.var                 NA      NA         NA       NA  3.6055513          NA
##              source_link        prop       ypos
## nbr.val               NA  14.0000000  14.000000
## nbr.null              NA   0.0000000   0.000000
## nbr.na                NA   0.0000000   0.000000
## min                   NA   3.3424407   3.137492
## max                   NA  11.9381728  97.552289
## range                 NA   8.5957321  94.414797
## sum                   NA 100.0000000 732.865128
## median                NA   7.1842875  52.464802
## mean                  NA   7.1428571  52.347509
## SE.mean               NA   0.7336153   8.288438
## CI.mean.0.95          NA   1.5848796  17.906082
## var                   NA   7.5346807 961.774900
## std.dev               NA   2.7449373  31.012496
## coef.var              NA   0.3842912   0.592435
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Saint Patrick (Dominica)

library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl  (7): id, population, distance, latitude, longitude, injuries, fatalities
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_DO <- subset(df, country_name == "Dominica")
knitr::kable(head(df_DO)) 
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
186 8/17/07 NA NA Dominica DM Saint Paul 702 Pont Cassé 3.39516 NA 15.3379 -61.3610 (15.337899999999999, -61.360999999999997) Landslide Mudslide Small Tropical cyclone Hurricane Dean NA 2 Tribune India http://www.tribuneindia.com/2007/20070817/himachal.htm
250 9/9/07 NA NA Dominica DM Saint George 16571 Roseau 2.59849 NA 15.3055 -61.3642 (15.3055, -61.364199999999997) Landslide Landslide Medium Rain Tropical Wave NA NA RadioJamaica http://www.radiojamaica.com/content/view/1156/88/
1552 3/11/10 NA NA Dominica DM Saint Paul 702 Pont Cassé 3.98646 NA 15.3356 -61.3312 (15.335599999999999, -61.331200000000003) Landslide Landslide Medium Rain NA NA 0 NA http://stormcarib.com/reports/current/report.php?id=1268397271_8827
1743 4/12/10 NA NA Dominica DM Saint Patrick 2608 Berekua 2.08997 NA 15.2454 -61.3017 (15.2454, -61.301699999999997) Landslide Landslide Medium Downpour NA NA 0 NA http://www.dominicacentral.com/general/community/heavy-overnight-rains-cause-landslides-across-island.html
1744 4/12/10 NA NA Dominica DM Saint Paul 702 Pont Cassé 3.78784 NA 15.4004 -61.3440 (15.400399999999999, -61.344000000000001) Landslide Landslide Medium Downpour NA NA 0 NA http://www.dominicacentral.com/general/community/heavy-overnight-rains-cause-landslides-across-island.html
1745 4/12/10 NA NA Dominica DM Saint Patrick 2608 Berekua 4.08252 NA 15.2458 -61.2809 (15.245799999999999, -61.280900000000003) Landslide Landslide Small Downpour NA NA 0 NA http://www.dominicacentral.com/general/community/heavy-overnight-rains-cause-landslides-across-island.html
library(dplyr)
df_DO <- subset(df, state == "Saint Patrick")
knitr::kable(head(df_DO))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
1743 4/12/10 NA NA Dominica DM Saint Patrick 2608 Berekua 2.08997 NA 15.2454 -61.3017 (15.2454, -61.301699999999997) Landslide Landslide Medium Downpour NA NA 0 NA http://www.dominicacentral.com/general/community/heavy-overnight-rains-cause-landslides-across-island.html
1745 4/12/10 NA NA Dominica DM Saint Patrick 2608 Berekua 4.08252 NA 15.2458 -61.2809 (15.245799999999999, -61.280900000000003) Landslide Landslide Small Downpour NA NA 0 NA http://www.dominicacentral.com/general/community/heavy-overnight-rains-cause-landslides-across-island.html
1746 4/12/10 NA NA Dominica DM Saint Patrick 2608 Berekua 5.61495 NA 15.2556 -61.2697 (15.255599999999999, -61.2697) Landslide Landslide Small Downpour NA NA 0 NA http://www.dominicacentral.com/general/community/heavy-overnight-rains-cause-landslides-across-island.html
1747 4/12/10 NA NA Dominica DM Saint Patrick 1332 La Plaine 5.11600 NA 15.2892 -61.2642 (15.289199999999999, -61.264200000000002) Landslide Landslide Small Downpour NA NA 0 NA http://www.dominicacentral.com/general/community/heavy-overnight-rains-cause-landslides-across-island.html

Gráfico de barras agrupados

library(ggplot2)
library(dplyr)
ggplot(df_DO, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados

library(ggplot2)
library(dplyr)
ggplot(df_DO, aes(fill=city, y=distance, x=state)) +
  geom_bar(position="stack", stat="identity")

Gráfico circular

library(ggplot2)
library(dplyr)
ggplot(df_DO, aes(x=state, y=distance, fill=city)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_DO <- df_DO %>% 
  arrange(desc(city)) %>%
  mutate(prop = distance / sum(df_DO$distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_DO, aes(x=state, y = prop, fill=city)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
  scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales

library(forecast)
data<- ts(df_DO$distance, frequency=12, start=2008)
knitr::kable(head(data))
x
5.11600
2.08997
4.08252
5.61495
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto

library(qcc)
distance <- df_DO$distance
names(distance) <- df_DO$city 
pareto.chart(distance, 
             ylab="distance",
             col = heat.colors(length(distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "Grafico de Pareto por ciudades"
)

##            
## Pareto chart analysis for distance
##             Frequency Cum.Freq. Percentage Cum.Percent.
##   Berekua     5.61495   5.61495   33.21779     33.21779
##   La Plaine   5.11600  10.73095   30.26603     63.48382
##   Berekua     4.08252  14.81347   24.15201     87.63583
##   Berekua     2.08997  16.90344   12.36417    100.00000
stem(df_DO$"distance")
## 
##   The decimal point is at the |
## 
##   2 | 1
##   3 | 
##   4 | 1
##   5 | 16
head(df_DO)
## # A tibble: 4 x 25
##      id date    time  continent_code country_name country_code state  population
##   <dbl> <chr>   <chr> <chr>          <chr>        <chr>        <chr>       <dbl>
## 1  1747 4/12/10 <NA>  <NA>           Dominica     DM           Saint~       1332
## 2  1743 4/12/10 <NA>  <NA>           Dominica     DM           Saint~       2608
## 3  1745 4/12/10 <NA>  <NA>           Dominica     DM           Saint~       2608
## 4  1746 4/12/10 <NA>  <NA>           Dominica     DM           Saint~       2608
## # ... with 17 more variables: city <chr>, distance <dbl>,
## #   location_description <chr>, latitude <dbl>, longitude <dbl>,
## #   geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## #   landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## #   fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## #   ypos <dbl>
knitr::kable(head(df_DO))
id date time continent_code country_name country_code state population city distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
1747 4/12/10 NA NA Dominica DM Saint Patrick 1332 La Plaine 5.11600 NA 15.2892 -61.2642 (15.289199999999999, -61.264200000000002) Landslide Landslide Small Downpour NA NA 0 NA http://www.dominicacentral.com/general/community/heavy-overnight-rains-cause-landslides-across-island.html 30.26603 15.13301
1743 4/12/10 NA NA Dominica DM Saint Patrick 2608 Berekua 2.08997 NA 15.2454 -61.3017 (15.2454, -61.301699999999997) Landslide Landslide Medium Downpour NA NA 0 NA http://www.dominicacentral.com/general/community/heavy-overnight-rains-cause-landslides-across-island.html 12.36417 36.44811
1745 4/12/10 NA NA Dominica DM Saint Patrick 2608 Berekua 4.08252 NA 15.2458 -61.2809 (15.245799999999999, -61.280900000000003) Landslide Landslide Small Downpour NA NA 0 NA http://www.dominicacentral.com/general/community/heavy-overnight-rains-cause-landslides-across-island.html 24.15201 54.70620
1746 4/12/10 NA NA Dominica DM Saint Patrick 2608 Berekua 5.61495 NA 15.2556 -61.2697 (15.255599999999999, -61.2697) Landslide Landslide Small Downpour NA NA 0 NA http://www.dominicacentral.com/general/community/heavy-overnight-rains-cause-landslides-across-island.html 33.21779 83.39110
stem(df_DO$"distance")
## 
##   The decimal point is at the |
## 
##   2 | 1
##   3 | 
##   4 | 1
##   5 | 16
stem(df_DO$"distance", scale = 2)
## 
##   The decimal point is at the |
## 
##   2 | 1
##   2 | 
##   3 | 
##   3 | 
##   4 | 1
##   4 | 
##   5 | 1
##   5 | 6

Tablas de frecuencia

library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
2.08997 1 25 25 25 25
4.08252 1 25 25 50 50
5.116 1 25 25 75 75
5.61495 1 25 25 100 100
Total 4 100 100 100 100
str(table)
## Classes 'freqtab' and 'data.frame':  5 obs. of  5 variables:
##  $ n      : num  1 1 1 1 4
##  $ %      : num  25 25 25 25 100
##  $ val%   : num  25 25 25 25 100
##  $ %cum   : num  25 50 75 100 100
##  $ val%cum: num  25 50 75 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
2.08997 1
4.08252 1
5.116 1
5.61495 1
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Distancia") +
  ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada

n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 2.08997 4.08997 6.08997
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
distance Freq Rel_Freq Cum_Freq
(2.09,4.09] 1 0.3333333 1
(4.09,6.09] 2 0.6666667 3
str(Freq_table)
## 'data.frame':    2 obs. of  4 variables:
##  $ distance: Factor w/ 2 levels "(2.09,4.09]",..: 1 2
##  $ Freq    : int  1 2
##  $ Rel_Freq: num  0.333 0.667
##  $ Cum_Freq: int  1 3
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
x y
(2.09,4.09] 1
(4.09,6.09] 2
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="green", fill="aquamarine") +
  xlab("Rango de deslizamiento") +
  ylab("Frecuencia")

library(pastecs)
stat.desc(df_DO)
## Warning in min(x): ningún argumento finito para min; retornando Inf
## Warning in max(x): ningun argumento finito para max; retornando -Inf
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
##                        id date time continent_code country_name country_code
## nbr.val      4.000000e+00   NA   NA             NA           NA           NA
## nbr.null     0.000000e+00   NA   NA             NA           NA           NA
## nbr.na       0.000000e+00   NA   NA             NA           NA           NA
## min          1.743000e+03   NA   NA             NA           NA           NA
## max          1.747000e+03   NA   NA             NA           NA           NA
## range        4.000000e+00   NA   NA             NA           NA           NA
## sum          6.981000e+03   NA   NA             NA           NA           NA
## median       1.745500e+03   NA   NA             NA           NA           NA
## mean         1.745250e+03   NA   NA             NA           NA           NA
## SE.mean      8.539126e-01   NA   NA             NA           NA           NA
## CI.mean.0.95 2.717531e+00   NA   NA             NA           NA           NA
## var          2.916667e+00   NA   NA             NA           NA           NA
## std.dev      1.707825e+00   NA   NA             NA           NA           NA
## coef.var     9.785562e-04   NA   NA             NA           NA           NA
##              state   population city   distance location_description
## nbr.val         NA 4.000000e+00   NA  4.0000000                   NA
## nbr.null        NA 0.000000e+00   NA  0.0000000                   NA
## nbr.na          NA 0.000000e+00   NA  0.0000000                   NA
## min             NA 1.332000e+03   NA  2.0899700                   NA
## max             NA 2.608000e+03   NA  5.6149500                   NA
## range           NA 1.276000e+03   NA  3.5249800                   NA
## sum             NA 9.156000e+03   NA 16.9034400                   NA
## median          NA 2.608000e+03   NA  4.5992600                   NA
## mean            NA 2.289000e+03   NA  4.2258600                   NA
## SE.mean         NA 3.190000e+02   NA  0.7801972                   NA
## CI.mean.0.95    NA 1.015200e+03   NA  2.4829358                   NA
## var             NA 4.070440e+05   NA  2.4348309                   NA
## std.dev         NA 6.380000e+02   NA  1.5603945                   NA
## coef.var        NA 2.787243e-01   NA  0.3692490                   NA
##                  latitude     longitude geolocation hazard_type landslide_type
## nbr.val       4.000000000  4.000000e+00          NA          NA             NA
## nbr.null      0.000000000  0.000000e+00          NA          NA             NA
## nbr.na        0.000000000  0.000000e+00          NA          NA             NA
## min          15.245400000 -6.130170e+01          NA          NA             NA
## max          15.289200000 -6.126420e+01          NA          NA             NA
## range         0.043800000  3.750000e-02          NA          NA             NA
## sum          61.036000000 -2.451165e+02          NA          NA             NA
## median       15.250700000 -6.127530e+01          NA          NA             NA
## mean         15.259000000 -6.127913e+01          NA          NA             NA
## SE.mean       0.010339246  8.288383e-03          NA          NA             NA
## CI.mean.0.95  0.032904094  2.637733e-02          NA          NA             NA
## var           0.000427600  2.747892e-04          NA          NA             NA
## std.dev       0.020678491  1.657677e-02          NA          NA             NA
## coef.var      0.001355167 -2.705124e-04          NA          NA             NA
##              landslide_size trigger storm_name injuries fatalities source_name
## nbr.val                  NA      NA         NA        0          4          NA
## nbr.null                 NA      NA         NA        0          4          NA
## nbr.na                   NA      NA         NA        4          0          NA
## min                      NA      NA         NA      Inf          0          NA
## max                      NA      NA         NA     -Inf          0          NA
## range                    NA      NA         NA     -Inf          0          NA
## sum                      NA      NA         NA        0          0          NA
## median                   NA      NA         NA       NA          0          NA
## mean                     NA      NA         NA      NaN          0          NA
## SE.mean                  NA      NA         NA       NA          0          NA
## CI.mean.0.95             NA      NA         NA      NaN          0          NA
## var                      NA      NA         NA       NA          0          NA
## std.dev                  NA      NA         NA       NA          0          NA
## coef.var                 NA      NA         NA       NA        NaN          NA
##              source_link       prop        ypos
## nbr.val               NA   4.000000   4.0000000
## nbr.null              NA   0.000000   0.0000000
## nbr.na                NA   0.000000   0.0000000
## min                   NA  12.364170  15.1330143
## max                   NA  33.217795  83.3911026
## range                 NA  20.853625  68.2580883
## sum                   NA 100.000000 189.6784323
## median                NA  27.209018  45.5771577
## mean                  NA  25.000000  47.4196081
## SE.mean               NA   4.615612  14.4621365
## CI.mean.0.95          NA  14.688938  46.0249728
## var                   NA  85.215503 836.6135657
## std.dev               NA   9.231224  28.9242729
## coef.var              NA   0.369249   0.6099644
boxplot(data, horizontal=TRUE, col='green')

Conclusión