Introduccion

En el siguiente informe, se analizarán los datos de deslizamiento para los países suramericanos, teniendo en cuenta estados y ciudades muestra . Dentro de este análisis estadístico se utilizaron diferentes tipos de métodos estadísticos tales como: Gráficos de barras y circulares, gráficos apilados, diagramas de tallo y hoja, gráficos de series temporales, tablas de frecuencias,datos estadísticos y diagramas de caja y extensión. Estos diferentes gráficos y diagramas, nos brindarán un panorama general pero en detalle, del desarrollo de los deslizamientos en cada país y la manera en cómo pueden llegar a afectar la población de dichas zonas. El objetivo de este informe es demostrar la utilidad de las estadísticas, para categorizar, organizar y presentar de manera clara datos; para que así estos tengan un significado entendible.

Base de datos

library(readr)
library(knitr)
df <- read.csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")

Declaracion de variables

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
colnames(df)[4] <- "America"
colnames(df)[10] <- "Distance"
colnames(df)[5] <- "Country"
colnames(df)[7] <- "State"
colnames(df)[9] <- "City"
colnames(df)[2] <- "date"
library(readr)
library(knitr)
library(ggplot2)
df_SA <- subset (df, America == "SA")
df_Ca <- subset (df, State == "Cauca")
df_C <- subset (df, State == "Córdoba")
df_Mag <- subset (df, State == "Magdalena")
df_Nar <- subset (df, State == "Nariño")
df_Meta <- subset (df, State == "Meta")
df_Put <- subset (df, State == "Putumayo")
df_San <- subset (df, State == "Santander")
df_Ant <- subset (df, State == "Antioquia")
df_Nor <- subset (df, State == "Norte de Santander")
df_CostaRica <- subset (df, Country == "Costa Rica")
df_Alajuela <- subset (df, State == "Alajuela")
df_Cartago <- subset (df, State == "Cartago")
df_Guanacaste <- subset (df, State == "Guanacaste")
df_Heredia <- subset (df, State == "Heredia")
df_Mir <- subset (df, State == "Miranda")
df_pi <- subset (df, State == "Pichincha")
df_za <- subset (df, State == "Zamora-Chinchipe")
df_Venezuela <- subset (df, Country == "Venezuela")
df_var <- subset (df, State == "Vargas")
df_DF <- subset (df, State == "Distrito Federal")
df_Peru <- subset (df, Country == "Peru")
df_Ansc <- subset (df, State == "Ancash")
df_La  <- subset (df, State == "La Libertad")

knitr::kable(head(df_SA))
id date time America Country country_code State population City Distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
8 77 5/21/07 SA Colombia CO Risaralda 440118 Pereira 0.62022 4.8081 -75.6941 (4.8080999999999996, -75.694100000000006) Landslide Mudslide Large Rain NA 13 Reuters - AlertNet.org http://www.reuters.com/news/video/videoStory?videoId=53594&feedType=RSS&rpc=23
9 105 6/27/07 SA Ecuador EC Zamora-Chinchipe 15276 Zamora 0.47714 -4.0650 -78.9510 (-4.0650000000000004, -78.950999999999993) Landslide Landslide Medium Downpour NA NA Red Cross - Field reports https://www-secure.ifrc.org/dmis/prepare/view_report.asp?ReportID=2908
10 106 6/27/07 SA Ecuador EC Loja 117796 Loja 0.35649 -3.9900 -79.2050 (-3.99, -79.204999999999998) Landslide Landslide Medium Downpour NA NA Red Cross - Field reports https://www-secure.ifrc.org/dmis/prepare/view_report.asp?ReportID=2908
11 107 6/27/07 SA Ecuador EC Pichincha 5114 Sangolquí 33.94603 -0.3560 -78.1480 (-0.35599999999999998, -78.147999999999996) Landslide Landslide Medium Downpour NA NA Red Cross - Field reports https://www-secure.ifrc.org/dmis/prepare/view_report.asp?ReportID=2908
49 307 10/13/07 SA Colombia CO Cauca 9985 Suárez 8.46579 2.9437 -76.7719 (2.9437000000000002, -76.771900000000002) Landslide Mudslide Large Continuous rain NA 24 Reuters - AlertNet.org http://www.reuters.com/article/newsOne/idUSN1329387220071013
70 397 12/19/07 SA Colombia CO Tolima 4892 Ambalema 6.96130 4.8470 -74.7631 (4.8470000000000004, -74.763099999999994) Landslide Landslide Large Rain NA NA Indiamuslims.info http://www.indiamuslims.info/news/2007/dec/20/eight_people_rescued_colombian_landslide.html
head(df_SA %>% 
  select(Country, State, City, Distance, date))
##     Country            State       City Distance     date
## 8  Colombia        Risaralda    Pereira  0.62022  5/21/07
## 9   Ecuador Zamora-Chinchipe     Zamora  0.47714  6/27/07
## 10  Ecuador             Loja       Loja  0.35649  6/27/07
## 11  Ecuador        Pichincha Sangolquí 33.94603  6/27/07
## 49 Colombia            Cauca    Suárez  8.46579 10/13/07
## 70 Colombia           Tolima   Ambalema  6.96130 12/19/07

Deslizamientos en el continente sur americano.

Pais colombia

library(ggplot2)
library(readr)
library(knitr)
df_Col <- subset (df, Country == "Colombia")
knitr::kable(head(df_Col,n=4))
id date time America Country country_code State population City Distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
8 77 5/21/07 SA Colombia CO Risaralda 440118 Pereira 0.62022 4.8081 -75.6941 (4.8080999999999996, -75.694100000000006) Landslide Mudslide Large Rain NA 13 Reuters - AlertNet.org http://www.reuters.com/news/video/videoStory?videoId=53594&feedType=RSS&rpc=23
49 307 10/13/07 SA Colombia CO Cauca 9985 Suárez 8.46579 2.9437 -76.7719 (2.9437000000000002, -76.771900000000002) Landslide Mudslide Large Continuous rain NA 24 Reuters - AlertNet.org http://www.reuters.com/article/newsOne/idUSN1329387220071013
70 397 12/19/07 SA Colombia CO Tolima 4892 Ambalema 6.96130 4.8470 -74.7631 (4.8470000000000004, -74.763099999999994) Landslide Landslide Large Rain NA NA Indiamuslims.info http://www.indiamuslims.info/news/2007/dec/20/eight_people_rescued_colombian_landslide.html
103 562 5/31/08 SA Colombia CO Antioquia 1999979 Medellín 5.12170 6.2746 -75.6039 (6.2746000000000004, -75.603899999999996) Landslide Complex Large Downpour NA 27 http://english.people.com.cn/90001/90777/90852/6422291.html
head(df_Col %>% 
  select(Country, State, City, Distance, date))
##      Country              State      City Distance     date
## 8   Colombia          Risaralda   Pereira  0.62022  5/21/07
## 49  Colombia              Cauca   Suárez  8.46579 10/13/07
## 70  Colombia             Tolima  Ambalema  6.96130 12/19/07
## 103 Colombia          Antioquia Medellín  5.12170  5/31/08
## 110 Colombia Norte de Santander  Hacarí   0.38844  6/24/08
## 117 Colombia       Cundinamarca   Quetame  8.58891  7/14/08
ggplot(data=df_Col, aes(fill=State, y=Distance, x="Colombia")) +
  geom_bar(position="dodge", stat="identity")

Deslizamiento por estado o departamento.

ggplot(data=df_Col, aes(fill=State, y=Distance, x="Colombia")) +
  geom_bar(position="dodge", stat="identity")

ggplot(data=df_Col, aes(fill=State, y=Distance, x="Colombia")) +
  geom_bar(position="stack", stat="identity")

RISARALDA:

Deslizamientos de las ciudades de Risalda
library(readr)
library(knitr)
df_ri <- subset (df, State == "Risaralda")
df_ri %>% 
  select(Country, State, City, Distance, date) 
##      Country     State          City Distance     date
## 8   Colombia Risaralda       Pereira  0.62022  5/21/07
## 454 Colombia Risaralda         Apía  8.18229  9/27/10
## 517 Colombia Risaralda     Quinchía  0.11421 11/13/10
## 855 Colombia Risaralda Dos Quebradas  0.74201 12/23/11
head(df_ri)
##       id     date    time America  Country country_code     State population
## 8     77  5/21/07              SA Colombia           CO Risaralda     440118
## 454 2507  9/27/10 3:00:00      SA Colombia           CO Risaralda       6940
## 517 2720 11/13/10              SA Colombia           CO Risaralda      10895
## 855 4104 12/23/11              SA Colombia           CO Risaralda     179583
##              City Distance location_description latitude longitude
## 8         Pereira  0.62022                        4.8081  -75.6941
## 454         Apía  8.18229                        5.1749  -75.9712
## 517     Quinchía  0.11421                        5.3391  -75.7311
## 855 Dos Quebradas  0.74201                        4.8349  -75.6621
##                                   geolocation hazard_type landslide_type
## 8   (4.8080999999999996, -75.694100000000006)   Landslide       Mudslide
## 454 (5.1749000000000001, -75.971199999999996)   Landslide      Landslide
## 517 (5.3391000000000002, -75.731099999999998)   Landslide       Mudslide
## 855 (4.8349000000000002, -75.662099999999995)   Landslide      Landslide
##     landslide_size  trigger storm_name injuries fatalities
## 8            Large     Rain                  NA         13
## 454         Medium Downpour                  NA          4
## 517         Medium Downpour                  NA          4
## 855         Medium Downpour                  NA          0
##                source_name
## 8   Reuters - AlertNet.org
## 454                       
## 517                       
## 855                       
##                                                                              source_link
## 8         http://www.reuters.com/news/video/videoStory?videoId=53594&feedType=RSS&rpc=23
## 454                    http://www.laht.com/article.asp?ArticleId=368646&CategoryId=12393
## 517                    http://www.laht.com/article.asp?ArticleId=377428&CategoryId=12393
## 855 http://www.reuters.com/article/2011/12/24/us-colombia-pipeline-idUSTRE7BN01O20111224
ggplot(data=df_ri, aes(x=City, y=Distance)) + geom_bar(stat="identity", color="blue", fill="white")

Gráfico circular
ggplot(data=df_ri, aes(x = "Risaralda", y = Distance, fill=City)) +
  geom_bar(stat = "identity", width = 1, color = "black") +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_ri <- df_ri %>% 
  arrange(desc(City)) %>%
  mutate(prop = Distance / sum(df_ri$Distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
library(ggplot2)
library(dplyr)

df_ri <- df_ri %>% 
  arrange(desc(City)) %>%
  mutate(prop = Distance / sum(df_ri$Distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
## Loading required package: scales
## 
## Attaching package: 'scales'
## The following object is masked from 'package:readr':
## 
##     col_factor
ggplot(df_ri, aes(x=State, y = prop, fill=City)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(Distance/100)), color = "white", size=6) +
  scale_fill_brewer(palette="Set8")
## Warning in pal_name(palette, type): Unknown palette Set8

Diagrama de pareto
  • Lugares con mayor concentracion de desizamientos.
library(qcc)
## Warning: package 'qcc' was built under R version 4.1.1
## Package 'qcc' version 2.7
## Type 'citation("qcc")' for citing this R package in publications.
Distance <- df_ri$Distance
names(Distance) <- df_ri$City 

pareto.chart(Distance, 
             ylab="Distance",
             col = heat.colors(length(Distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "DONDE SE CONCENTRAN LAS CIUDADES CON MAYORES DESLIZAMIENTOS"
)

##                
## Pareto chart analysis for Distance
##                  Frequency  Cum.Freq. Percentage Cum.Percent.
##   Apía           8.182290   8.182290  84.713932    84.713932
##   Dos Quebradas   0.742010   8.924300   7.682273    92.396205
##   Pereira         0.620220   9.544520   6.421341    98.817546
##   Quinchía       0.114210   9.658730   1.182454   100.000000
Diagrama de tallo y hojas
stem(df_ri$"Distance")
## 
##   The decimal point is at the |
## 
##   0 | 167
##   2 | 
##   4 | 
##   6 | 
##   8 | 2
head(df_ri)
##     id     date    time America  Country country_code     State population
## 1 2720 11/13/10              SA Colombia           CO Risaralda      10895
## 2   77  5/21/07              SA Colombia           CO Risaralda     440118
## 3 4104 12/23/11              SA Colombia           CO Risaralda     179583
## 4 2507  9/27/10 3:00:00      SA Colombia           CO Risaralda       6940
##            City Distance location_description latitude longitude
## 1     Quinchía  0.11421                        5.3391  -75.7311
## 2       Pereira  0.62022                        4.8081  -75.6941
## 3 Dos Quebradas  0.74201                        4.8349  -75.6621
## 4         Apía  8.18229                        5.1749  -75.9712
##                                 geolocation hazard_type landslide_type
## 1 (5.3391000000000002, -75.731099999999998)   Landslide       Mudslide
## 2 (4.8080999999999996, -75.694100000000006)   Landslide       Mudslide
## 3 (4.8349000000000002, -75.662099999999995)   Landslide      Landslide
## 4 (5.1749000000000001, -75.971199999999996)   Landslide      Landslide
##   landslide_size  trigger storm_name injuries fatalities            source_name
## 1         Medium Downpour                  NA          4                       
## 2          Large     Rain                  NA         13 Reuters - AlertNet.org
## 3         Medium Downpour                  NA          0                       
## 4         Medium Downpour                  NA          4                       
##                                                                            source_link
## 1                    http://www.laht.com/article.asp?ArticleId=377428&CategoryId=12393
## 2       http://www.reuters.com/news/video/videoStory?videoId=53594&feedType=RSS&rpc=23
## 3 http://www.reuters.com/article/2011/12/24/us-colombia-pipeline-idUSTRE7BN01O20111224
## 4                    http://www.laht.com/article.asp?ArticleId=368646&CategoryId=12393
##        prop       ypos
## 1  1.182454  0.5912268
## 2  6.421341  4.3931241
## 3  7.682273 11.4449312
## 4 84.713932 57.6430338
knitr::kable(head(df_ri))
id date time America Country country_code State population City Distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
2720 11/13/10 SA Colombia CO Risaralda 10895 Quinchía 0.11421 5.3391 -75.7311 (5.3391000000000002, -75.731099999999998) Landslide Mudslide Medium Downpour NA 4 http://www.laht.com/article.asp?ArticleId=377428&CategoryId=12393 1.182454 0.5912268
77 5/21/07 SA Colombia CO Risaralda 440118 Pereira 0.62022 4.8081 -75.6941 (4.8080999999999996, -75.694100000000006) Landslide Mudslide Large Rain NA 13 Reuters - AlertNet.org http://www.reuters.com/news/video/videoStory?videoId=53594&feedType=RSS&rpc=23 6.421341 4.3931241
4104 12/23/11 SA Colombia CO Risaralda 179583 Dos Quebradas 0.74201 4.8349 -75.6621 (4.8349000000000002, -75.662099999999995) Landslide Landslide Medium Downpour NA 0 http://www.reuters.com/article/2011/12/24/us-colombia-pipeline-idUSTRE7BN01O20111224 7.682273 11.4449312
2507 9/27/10 3:00:00 SA Colombia CO Risaralda 6940 Apía 8.18229 5.1749 -75.9712 (5.1749000000000001, -75.971199999999996) Landslide Landslide Medium Downpour NA 4 http://www.laht.com/article.asp?ArticleId=368646&CategoryId=12393 84.713932 57.6430338
stem(df_ri$"Distance")
## 
##   The decimal point is at the |
## 
##   0 | 167
##   2 | 
##   4 | 
##   6 | 
##   8 | 2
stem(df_ri$"Distance", scale = 2)
## 
##   The decimal point is at the |
## 
##   0 | 167
##   1 | 
##   2 | 
##   3 | 
##   4 | 
##   5 | 
##   6 | 
##   7 | 
##   8 | 2
Gráfico de series temporales
library(forecast)
## Warning: package 'forecast' was built under R version 4.1.1
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
data_serie<- ts(df_ri$Distance, frequency=12, start=2007)
head(data_serie)
##          Jan     Feb     Mar     Apr
## 2007 0.11421 0.62022 0.74201 8.18229
autoplot(data_serie)+
labs(title = "Serie de Deslizamiento", x="Tiempo", y = "Distancia", colour = "#00a0dc") +theme_bw()

Tablas de frecuencia
library(questionr)
## Warning: package 'questionr' was built under R version 4.1.1
table <- questionr::freq(Distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
0.11421 1 25 25 25 25
0.62022 1 25 25 50 50
0.74201 1 25 25 75 75
8.18229 1 25 25 100 100
Total 4 100 100 100 100
str(table) 
## Classes 'freqtab' and 'data.frame':  5 obs. of  5 variables:
##  $ n      : num  1 1 1 1 4
##  $ %      : num  25 25 25 25 100
##  $ val%   : num  25 25 25 25 100
##  $ %cum   : num  25 50 75 100 100
##  $ val%cum: num  25 50 75 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
0.11421 1
0.62022 1
0.74201 1
8.18229 1
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="white", fill="blue") +
  xlab("Número de asistencias") +
  ylab("Frecuencia")

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(Distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(Distance) - min(Distance)
w = ceiling(R/n_clases)
bins <- seq(min(Distance), max(Distance) + w, by = w)
bins
## [1] 0.11421 3.11421 6.11421 9.11421
Edades <- cut(Distance, bins)
Freq_table <- transform(table(Distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
Distance Freq Rel_Freq Cum_Freq
0.11421 1 0.25 1
0.62022 1 0.25 2
0.74201 1 0.25 3
8.18229 1 0.25 4
str(Freq_table)
## 'data.frame':    4 obs. of  4 variables:
##  $ Distance: Factor w/ 4 levels "0.11421","0.62022",..: 1 2 3 4
##  $ Freq    : int  1 1 1 1
##  $ Rel_Freq: num  0.25 0.25 0.25 0.25
##  $ Cum_Freq: int  1 2 3 4
df <- data.frame(x = Freq_table$Distance, y = Freq_table$Freq)
knitr::kable(df)
x y
0.11421 1
0.62022 1
0.74201 1
8.18229 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="blue", fill="green") +
  xlab("Rango de Distance") +
  ylab("Frecuencia")

###### Estadísticos - Personas Afectadas por Deslizamiento

summary(df_ri$Distance)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.1142  0.4937  0.6811  2.4147  2.6021  8.1823
library(pastecs)
## Warning: package 'pastecs' was built under R version 4.1.1
## 
## Attaching package: 'pastecs'
## The following objects are masked from 'package:dplyr':
## 
##     first, last
stat.desc(df_ri)
## Warning in min(x): ningún argumento finito para min; retornando Inf
## Warning in max(x): ningun argumento finito para max; retornando -Inf
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
##                        id date time America Country country_code State
## nbr.val      4.000000e+00   NA   NA      NA      NA           NA    NA
## nbr.null     0.000000e+00   NA   NA      NA      NA           NA    NA
## nbr.na       0.000000e+00   NA   NA      NA      NA           NA    NA
## min          7.700000e+01   NA   NA      NA      NA           NA    NA
## max          4.104000e+03   NA   NA      NA      NA           NA    NA
## range        4.027000e+03   NA   NA      NA      NA           NA    NA
## sum          9.408000e+03   NA   NA      NA      NA           NA    NA
## median       2.613500e+03   NA   NA      NA      NA           NA    NA
## mean         2.352000e+03   NA   NA      NA      NA           NA    NA
## SE.mean      8.368880e+02   NA   NA      NA      NA           NA    NA
## CI.mean.0.95 2.663351e+03   NA   NA      NA      NA           NA    NA
## var          2.801526e+06   NA   NA      NA      NA           NA    NA
## std.dev      1.673776e+03   NA   NA      NA      NA           NA    NA
## coef.var     7.116394e-01   NA   NA      NA      NA           NA    NA
##                population City  Distance location_description    latitude
## nbr.val      4.000000e+00   NA  4.000000                   NA  4.00000000
## nbr.null     0.000000e+00   NA  0.000000                   NA  0.00000000
## nbr.na       0.000000e+00   NA  0.000000                   NA  0.00000000
## min          6.940000e+03   NA  0.114210                   NA  4.80810000
## max          4.401180e+05   NA  8.182290                   NA  5.33910000
## range        4.331780e+05   NA  8.068080                   NA  0.53100000
## sum          6.375360e+05   NA  9.658730                   NA 20.15700000
## median       9.523900e+04   NA  0.681115                   NA  5.00490000
## mean         1.593840e+05   NA  2.414683                   NA  5.03925000
## SE.mean      1.018609e+05   NA  1.927334                   NA  0.13022423
## CI.mean.0.95 3.241669e+05   NA  6.133637                   NA  0.41443161
## var          4.150258e+10   NA 14.858466                   NA  0.06783340
## std.dev      2.037218e+05   NA  3.854668                   NA  0.26044845
## coef.var     1.278182e+00   NA  1.596346                   NA  0.05168397
##                  longitude geolocation hazard_type landslide_type
## nbr.val       4.000000e+00          NA          NA             NA
## nbr.null      0.000000e+00          NA          NA             NA
## nbr.na        0.000000e+00          NA          NA             NA
## min          -7.597120e+01          NA          NA             NA
## max          -7.566210e+01          NA          NA             NA
## range         3.091000e-01          NA          NA             NA
## sum          -3.030585e+02          NA          NA             NA
## median       -7.571260e+01          NA          NA             NA
## mean         -7.576462e+01          NA          NA             NA
## SE.mean       7.028650e-02          NA          NA             NA
## CI.mean.0.95  2.236830e-01          NA          NA             NA
## var           1.976077e-02          NA          NA             NA
## std.dev       1.405730e-01          NA          NA             NA
## coef.var     -1.855391e-03          NA          NA             NA
##              landslide_size trigger storm_name injuries fatalities source_name
## nbr.val                  NA      NA         NA        0   4.000000          NA
## nbr.null                 NA      NA         NA        0   1.000000          NA
## nbr.na                   NA      NA         NA        4   0.000000          NA
## min                      NA      NA         NA      Inf   0.000000          NA
## max                      NA      NA         NA     -Inf  13.000000          NA
## range                    NA      NA         NA     -Inf  13.000000          NA
## sum                      NA      NA         NA        0  21.000000          NA
## median                   NA      NA         NA       NA   4.000000          NA
## mean                     NA      NA         NA      NaN   5.250000          NA
## SE.mean                  NA      NA         NA       NA   2.750000          NA
## CI.mean.0.95             NA      NA         NA      NaN   8.751727          NA
## var                      NA      NA         NA       NA  30.250000          NA
## std.dev                  NA      NA         NA       NA   5.500000          NA
## coef.var                 NA      NA         NA       NA   1.047619          NA
##              source_link        prop        ypos
## nbr.val               NA    4.000000   4.0000000
## nbr.null              NA    0.000000   0.0000000
## nbr.na                NA    0.000000   0.0000000
## min                   NA    1.182454   0.5912268
## max                   NA   84.713932  57.6430338
## range                 NA   83.531479  57.0518070
## sum                   NA  100.000000  74.0723159
## median                NA    7.051807   7.9190277
## mean                  NA   25.000000  18.5180790
## SE.mean               NA   19.954322  13.2340404
## CI.mean.0.95          NA   63.503557  42.1166228
## var                   NA 1592.699824 700.5592963
## std.dev               NA   39.908643  26.4680807
## coef.var              NA    1.596346   1.4293103
Caja y extensión
boxplot(Distance, horizontal=TRUE, col='steelblue')

library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.1.1
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v tibble  3.1.3     v stringr 1.4.0
## v tidyr   1.1.3     v forcats 0.5.1
## v purrr   0.3.4
## Warning: package 'tidyr' was built under R version 4.1.1
## Warning: package 'forcats' was built under R version 4.1.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x scales::col_factor() masks readr::col_factor()
## x purrr::discard()     masks scales::discard()
## x tidyr::extract()     masks pastecs::extract()
## x dplyr::filter()      masks stats::filter()
## x pastecs::first()     masks dplyr::first()
## x dplyr::lag()         masks stats::lag()
## x pastecs::last()      masks dplyr::last()
library(hrbrthemes)
## Warning: package 'hrbrthemes' was built under R version 4.1.1
## NOTE: Either Arial Narrow or Roboto Condensed fonts are required to use these themes.
##       Please use hrbrthemes::import_roboto_condensed() to install Roboto Condensed and
##       if Arial Narrow is not on your system, please see https://bit.ly/arialnarrow
library(viridis)
## Warning: package 'viridis' was built under R version 4.1.1
## Loading required package: viridisLite
## 
## Attaching package: 'viridis'
## The following object is masked from 'package:scales':
## 
##     viridis_pal
df <- data.frame(Distance)
df %>% ggplot(aes(x = "", y = Distance)) +
  geom_boxplot(color="red", fill="orange", alpha=0.5) +
  theme_ipsum() +
  theme(legend.position="none", plot.title = element_text(size=11)) +
  ggtitle("Deslizamientos  ") +
  coord_flip() +
  xlab("") +
  ylab("")
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

Deslizamientos en Antioquia_colombia

df_Ant %>% 
  select(Country, State, City, Distance, date) 
##       Country     State                    City Distance     date
## 103  Colombia Antioquia               Medellín  5.12170  5/31/08
## 119  Colombia Antioquia                Briceño  6.44532  7/17/08
## 133  Colombia Antioquia                  Caldas  1.27637  8/18/08
## 163  Colombia Antioquia               Medellín  4.09028 11/16/08
## 455  Colombia Antioquia                 Giraldo  2.55282  9/27/10
## 518  Colombia Antioquia               Medellín  2.18776 11/13/10
## 528  Colombia Antioquia                Cáceres  4.29197 11/20/10
## 553  Colombia Antioquia                   Bello  2.04898  12/5/10
## 629  Colombia Antioquia               Medellín  4.19867  4/10/11
## 640  Colombia Antioquia      Santa Rosa de Osos 18.91189  4/13/11
## 658  Colombia Antioquia Municipio de Copacabana  7.98838  4/22/11
## 659  Colombia Antioquia                Frontino  5.06960  4/23/11
## 821  Colombia Antioquia               Medellín  1.73101  9/27/11
## 826  Colombia Antioquia         Ciudad Bolívar 24.48479 10/15/11
## 859  Colombia Antioquia                Envigado  2.07081 12/30/11
## 876  Colombia Antioquia                  Amalfi 11.11685  4/12/12
## 1362 Colombia Antioquia                 Barbosa  7.78677 11/14/14
## 1457 Colombia Antioquia                  Salgar  3.09014  5/18/15
## 1669 Colombia Antioquia                 Mutatá 15.04256   5/9/15
head(df_Ant)
##       id     date time America  Country country_code     State population
## 103  562  5/31/08           SA Colombia           CO Antioquia    1999979
## 119  650  7/17/08           SA Colombia           CO Antioquia       2214
## 133  728  8/18/08           SA Colombia           CO Antioquia      65565
## 163  889 11/16/08           SA Colombia           CO Antioquia    1999979
## 455 2508  9/27/10           SA Colombia           CO Antioquia       1464
## 518 2721 11/13/10           SA Colombia           CO Antioquia    1999979
##          City Distance location_description latitude longitude
## 103 Medellín  5.12170                        6.2746  -75.6039
## 119  Briceño  6.44532          Burned area   7.1600  -75.5200
## 133    Caldas  1.27637                        6.0800  -75.6390
## 163 Medellín  4.09028                        6.2170  -75.5760
## 455   Giraldo  2.55282                        6.7060  -75.9917
## 518 Medellín  2.18776                        6.2674  -75.5758
##                                   geolocation hazard_type landslide_type
## 103 (6.2746000000000004, -75.603899999999996)   Landslide        Complex
## 119                            (7.16, -75.52)   Landslide      Landslide
## 133               (6.08, -75.638999999999996)   Landslide      Landslide
## 163 (6.2169999999999996, -75.575999999999993)   Landslide       Mudslide
## 455 (6.7060000000000004, -75.991699999999994)   Landslide      Landslide
## 518 (6.2674000000000003, -75.575800000000001)   Landslide       Mudslide
##     landslide_size  trigger storm_name injuries fatalities source_name
## 103          Large Downpour                  NA         27            
## 119         Medium     Rain                  NA          8            
## 133         Medium     Rain                  NA         NA            
## 163          Large Downpour                  NA          8            
## 455          Large Downpour                  NA          9            
## 518         Medium Downpour                  NA          2            
##                                                                                source_link
## 103                            http://english.people.com.cn/90001/90777/90852/6422291.html
## 119        http://colombiareports.com/2008/07/18/eight-people-die-in-antioquia-landslides/
## 133 http://colombiareports.com/2008/08/20/landslides-and-floods-hit-antioquia-and-cordoba/
## 163 http://www.chinapost.com.tw/international/americas/2008/11/19/183837/Eight-corpses.htm
## 455                             http://www.youtube.com/watch?v=Atf9gmvLFWw&feature=related
## 518                      http://www.laht.com/article.asp?ArticleId=377428&CategoryId=12393
ggplot(data=df_Ant, aes(x=City, y=Distance)) + geom_bar(stat="identity", color="blue", fill="white")

Gráfico circular
ggplot(data=df_Ant, aes(x = "Antioquia", y = Distance, fill=City)) +
  geom_bar(stat = "identity", width = 1, color = "black") +
  coord_polar("y", start = 0)

ggplot(df_Ant,aes(x="Antioquia",y=Distance, fill=City))+
  geom_bar(stat = "identity",
           color="white")+
    geom_text(aes(label=(Distance*10)),
              position=position_stack(vjust=0.5),color="white",size=6)+
  coord_polar(theta = "y")+
    labs(title="Gráfico de Deslizamiento")

Diagrama de pareto
  • Donde se concentran las ciudades con mayor deslizamiento
library(qcc)

Distance <- df_Ant$Distance
names(Distance) <- df_Ant$City 

pareto.chart(Distance, 
             ylab="Distance",
             col = heat.colors(length(Distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "DONDE SE CONCENTRAN LAS CIUDADES CON MAYORES DESLIZAMIENTOS"
)

##                          
## Pareto chart analysis for Distance
##                             Frequency   Cum.Freq.  Percentage Cum.Percent.
##   Ciudad Bolívar          24.4847900  24.4847900  18.9062000   18.9062000
##   Santa Rosa de Osos       18.9118900  43.3966800  14.6030239   33.5092239
##   Mutatá                  15.0425600  58.4392400  11.6152782   45.1245021
##   Amalfi                   11.1168500  69.5560900   8.5839980   53.7085001
##   Municipio de Copacabana   7.9883800  77.5444700   6.1683155   59.8768156
##   Barbosa                   7.7867700  85.3312400   6.0126401   65.8894557
##   Briceño                  6.4453200  91.7765600   4.9768247   70.8662805
##   Medellín                 5.1217000  96.8982600   3.9547770   74.8210575
##   Frontino                  5.0696000 101.9678600   3.9145474   78.7356049
##   Cáceres                  4.2919700 106.2598300   3.3140919   82.0496967
##   Medellín                 4.1986700 110.4585000   3.2420492   85.2917460
##   Medellín                 4.0902800 114.5487800   3.1583547   88.4501007
##   Salgar                    3.0901400 117.6389200   2.3860856   90.8361863
##   Giraldo                   2.5528200 120.1917400   1.9711880   92.8073743
##   Medellín                 2.1877600 122.3795000   1.6893030   94.4966773
##   Envigado                  2.0708100 124.4503100   1.5989987   96.0956760
##   Bello                     2.0489800 126.4992900   1.5821424   97.6778184
##   Medellín                 1.7310100 128.2303000   1.3366184   99.0144369
##   Caldas                    1.2763700 129.5066700   0.9855631  100.0000000
Diagrama de tallo y hojas
stem(df_Ant$"Distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 1222233444
##   0 | 55688
##   1 | 1
##   1 | 59
##   2 | 4
head(df_Ant)
##       id     date time America  Country country_code     State population
## 103  562  5/31/08           SA Colombia           CO Antioquia    1999979
## 119  650  7/17/08           SA Colombia           CO Antioquia       2214
## 133  728  8/18/08           SA Colombia           CO Antioquia      65565
## 163  889 11/16/08           SA Colombia           CO Antioquia    1999979
## 455 2508  9/27/10           SA Colombia           CO Antioquia       1464
## 518 2721 11/13/10           SA Colombia           CO Antioquia    1999979
##          City Distance location_description latitude longitude
## 103 Medellín  5.12170                        6.2746  -75.6039
## 119  Briceño  6.44532          Burned area   7.1600  -75.5200
## 133    Caldas  1.27637                        6.0800  -75.6390
## 163 Medellín  4.09028                        6.2170  -75.5760
## 455   Giraldo  2.55282                        6.7060  -75.9917
## 518 Medellín  2.18776                        6.2674  -75.5758
##                                   geolocation hazard_type landslide_type
## 103 (6.2746000000000004, -75.603899999999996)   Landslide        Complex
## 119                            (7.16, -75.52)   Landslide      Landslide
## 133               (6.08, -75.638999999999996)   Landslide      Landslide
## 163 (6.2169999999999996, -75.575999999999993)   Landslide       Mudslide
## 455 (6.7060000000000004, -75.991699999999994)   Landslide      Landslide
## 518 (6.2674000000000003, -75.575800000000001)   Landslide       Mudslide
##     landslide_size  trigger storm_name injuries fatalities source_name
## 103          Large Downpour                  NA         27            
## 119         Medium     Rain                  NA          8            
## 133         Medium     Rain                  NA         NA            
## 163          Large Downpour                  NA          8            
## 455          Large Downpour                  NA          9            
## 518         Medium Downpour                  NA          2            
##                                                                                source_link
## 103                            http://english.people.com.cn/90001/90777/90852/6422291.html
## 119        http://colombiareports.com/2008/07/18/eight-people-die-in-antioquia-landslides/
## 133 http://colombiareports.com/2008/08/20/landslides-and-floods-hit-antioquia-and-cordoba/
## 163 http://www.chinapost.com.tw/international/americas/2008/11/19/183837/Eight-corpses.htm
## 455                             http://www.youtube.com/watch?v=Atf9gmvLFWw&feature=related
## 518                      http://www.laht.com/article.asp?ArticleId=377428&CategoryId=12393
knitr::kable(head(df_Ant))
id date time America Country country_code State population City Distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
103 562 5/31/08 SA Colombia CO Antioquia 1999979 Medellín 5.12170 6.2746 -75.6039 (6.2746000000000004, -75.603899999999996) Landslide Complex Large Downpour NA 27 http://english.people.com.cn/90001/90777/90852/6422291.html
119 650 7/17/08 SA Colombia CO Antioquia 2214 Briceño 6.44532 Burned area 7.1600 -75.5200 (7.16, -75.52) Landslide Landslide Medium Rain NA 8 http://colombiareports.com/2008/07/18/eight-people-die-in-antioquia-landslides/
133 728 8/18/08 SA Colombia CO Antioquia 65565 Caldas 1.27637 6.0800 -75.6390 (6.08, -75.638999999999996) Landslide Landslide Medium Rain NA NA http://colombiareports.com/2008/08/20/landslides-and-floods-hit-antioquia-and-cordoba/
163 889 11/16/08 SA Colombia CO Antioquia 1999979 Medellín 4.09028 6.2170 -75.5760 (6.2169999999999996, -75.575999999999993) Landslide Mudslide Large Downpour NA 8 http://www.chinapost.com.tw/international/americas/2008/11/19/183837/Eight-corpses.htm
455 2508 9/27/10 SA Colombia CO Antioquia 1464 Giraldo 2.55282 6.7060 -75.9917 (6.7060000000000004, -75.991699999999994) Landslide Landslide Large Downpour NA 9 http://www.youtube.com/watch?v=Atf9gmvLFWw&feature=related
518 2721 11/13/10 SA Colombia CO Antioquia 1999979 Medellín 2.18776 6.2674 -75.5758 (6.2674000000000003, -75.575800000000001) Landslide Mudslide Medium Downpour NA 2 http://www.laht.com/article.asp?ArticleId=377428&CategoryId=12393
stem(df_Ant$"Distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 1222233444
##   0 | 55688
##   1 | 1
##   1 | 59
##   2 | 4
stem(df_Ant$"Distance", scale = 2)
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 1222233444
##   0 | 55688
##   1 | 1
##   1 | 59
##   2 | 4
Gráfico de series temporales
library(forecast)
data_serie<- ts(df_Ant$Distance, frequency=12, start=2007)
head(data_serie)
##          Jan     Feb     Mar     Apr     May     Jun
## 2007 5.12170 6.44532 1.27637 4.09028 2.55282 2.18776
autoplot(data_serie)+
labs(title = "Serie de Deslizamiento", x="Tiempo", y = "Distancia", colour = "#00a0dc") +theme_bw()

Tablas de frecuencia
library(questionr)

table <- questionr::freq(Distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
1.27637 1 5.3 5.3 5.3 5.3
1.73101 1 5.3 5.3 10.5 10.5
2.04898 1 5.3 5.3 15.8 15.8
2.07081 1 5.3 5.3 21.1 21.1
2.18776 1 5.3 5.3 26.3 26.3
2.55282 1 5.3 5.3 31.6 31.6
3.09014 1 5.3 5.3 36.8 36.8
4.09028 1 5.3 5.3 42.1 42.1
4.19867 1 5.3 5.3 47.4 47.4
4.29197 1 5.3 5.3 52.6 52.6
5.0696 1 5.3 5.3 57.9 57.9
5.1217 1 5.3 5.3 63.2 63.2
6.44532 1 5.3 5.3 68.4 68.4
7.78677 1 5.3 5.3 73.7 73.7
7.98838 1 5.3 5.3 78.9 78.9
11.11685 1 5.3 5.3 84.2 84.2
15.04256 1 5.3 5.3 89.5 89.5
18.91189 1 5.3 5.3 94.7 94.7
24.48479 1 5.3 5.3 100.0 100.0
Total 19 100.0 100.0 100.0 100.0
str(table) 
## Classes 'freqtab' and 'data.frame':  20 obs. of  5 variables:
##  $ n      : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ %      : num  5.3 5.3 5.3 5.3 5.3 5.3 5.3 5.3 5.3 5.3 ...
##  $ val%   : num  5.3 5.3 5.3 5.3 5.3 5.3 5.3 5.3 5.3 5.3 ...
##  $ %cum   : num  5.3 10.5 15.8 21.1 26.3 31.6 36.8 42.1 47.4 52.6 ...
##  $ val%cum: num  5.3 10.5 15.8 21.1 26.3 31.6 36.8 42.1 47.4 52.6 ...
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
1.27637 1
1.73101 1
2.04898 1
2.07081 1
2.18776 1
2.55282 1
3.09014 1
4.09028 1
4.19867 1
4.29197 1
5.0696 1
5.1217 1
6.44532 1
7.78677 1
7.98838 1
11.11685 1
15.04256 1
18.91189 1
24.48479 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="white", fill="blue") +
  xlab("Número de asistencias") +
  ylab("Frecuencia")

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(Distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(Distance) - min(Distance)
w = ceiling(R/n_clases)
bins <- seq(min(Distance), max(Distance) + w, by = w)
bins
## [1]  1.27637  6.27637 11.27637 16.27637 21.27637 26.27637
Edades <- cut(Distance, bins)
Freq_table <- transform(table(Distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
Distance Freq Rel_Freq Cum_Freq
1.27637 1 0.0526316 1
1.73101 1 0.0526316 2
2.04898 1 0.0526316 3
2.07081 1 0.0526316 4
2.18776 1 0.0526316 5
2.55282 1 0.0526316 6
3.09014 1 0.0526316 7
4.09028 1 0.0526316 8
4.19867 1 0.0526316 9
4.29197 1 0.0526316 10
5.0696 1 0.0526316 11
5.1217 1 0.0526316 12
6.44532 1 0.0526316 13
7.78677 1 0.0526316 14
7.98838 1 0.0526316 15
11.11685 1 0.0526316 16
15.04256 1 0.0526316 17
18.91189 1 0.0526316 18
24.48479 1 0.0526316 19
str(Freq_table)
## 'data.frame':    19 obs. of  4 variables:
##  $ Distance: Factor w/ 19 levels "1.27637","1.73101",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ Freq    : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Rel_Freq: num  0.0526 0.0526 0.0526 0.0526 0.0526 ...
##  $ Cum_Freq: int  1 2 3 4 5 6 7 8 9 10 ...
df <- data.frame(x = Freq_table$Distance, y = Freq_table$Freq)
knitr::kable(df)
x y
1.27637 1
1.73101 1
2.04898 1
2.07081 1
2.18776 1
2.55282 1
3.09014 1
4.09028 1
4.19867 1
4.29197 1
5.0696 1
5.1217 1
6.44532 1
7.78677 1
7.98838 1
11.11685 1
15.04256 1
18.91189 1
24.48479 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="blue", fill="green") +
  xlab("Rango de Distance") +
  ylab("Frecuencia")

Estadísticos
  • Personas Afectadas por Deslizamiento
summary(df_Ant$Distance)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.276   2.370   4.292   6.816   7.888  24.485
library(pastecs)
stat.desc(df_Ant)
##                        id date time America Country country_code State
## nbr.val      1.900000e+01   NA   NA      NA      NA           NA    NA
## nbr.null     0.000000e+00   NA   NA      NA      NA           NA    NA
## nbr.na       0.000000e+00   NA   NA      NA      NA           NA    NA
## min          5.620000e+02   NA   NA      NA      NA           NA    NA
## max          7.513000e+03   NA   NA      NA      NA           NA    NA
## range        6.951000e+03   NA   NA      NA      NA           NA    NA
## sum          6.463000e+04   NA   NA      NA      NA           NA    NA
## median       3.362000e+03   NA   NA      NA      NA           NA    NA
## mean         3.401579e+03   NA   NA      NA      NA           NA    NA
## SE.mean      4.626486e+02   NA   NA      NA      NA           NA    NA
## CI.mean.0.95 9.719887e+02   NA   NA      NA      NA           NA    NA
## var          4.066831e+06   NA   NA      NA      NA           NA    NA
## std.dev      2.016639e+03   NA   NA      NA      NA           NA    NA
## coef.var     5.928537e-01   NA   NA      NA      NA           NA    NA
##                population City    Distance location_description    latitude
## nbr.val      1.900000e+01   NA  19.0000000                   NA  19.0000000
## nbr.null     0.000000e+00   NA   0.0000000                   NA   0.0000000
## nbr.na       0.000000e+00   NA   0.0000000                   NA   0.0000000
## min          1.464000e+03   NA   1.2763700                   NA   5.5139000
## max          1.999979e+06   NA  24.4847900                   NA   7.6167000
## range        1.998515e+06   NA  23.2084200                   NA   2.1028000
## sum          1.075194e+07   NA 129.5066700                   NA 122.7047000
## median       1.670700e+04   NA   4.2919700                   NA   6.3236000
## mean         5.658915e+05   NA   6.8161405                   NA   6.4581421
## SE.mean      2.030740e+05   NA   1.4599518                   NA   0.1145878
## CI.mean.0.95 4.266426e+05   NA   3.0672450                   NA   0.2407400
## var          7.835417e+11   NA  40.4977278                   NA   0.2494769
## std.dev      8.851789e+05   NA   6.3637825                   NA   0.4994766
## coef.var     1.564220e+00   NA   0.9336343                   NA   0.0773406
##                  longitude geolocation hazard_type landslide_type
## nbr.val       1.900000e+01          NA          NA             NA
## nbr.null      0.000000e+00          NA          NA             NA
## nbr.na        0.000000e+00          NA          NA             NA
## min          -7.640890e+01          NA          NA             NA
## max          -7.494110e+01          NA          NA             NA
## range         1.467800e+00          NA          NA             NA
## sum          -1.436556e+03          NA          NA             NA
## median       -7.557580e+01          NA          NA             NA
## mean         -7.560821e+01          NA          NA             NA
## SE.mean       9.157725e-02          NA          NA             NA
## CI.mean.0.95  1.923967e-01          NA          NA             NA
## var           1.593415e-01          NA          NA             NA
## std.dev       3.991760e-01          NA          NA             NA
## coef.var     -5.279532e-03          NA          NA             NA
##              landslide_size trigger storm_name   injuries fatalities
## nbr.val                  NA      NA         NA   4.000000  18.000000
## nbr.null                 NA      NA         NA   1.000000   3.000000
## nbr.na                   NA      NA         NA  15.000000   1.000000
## min                      NA      NA         NA   0.000000   0.000000
## max                      NA      NA         NA  40.000000  92.000000
## range                    NA      NA         NA  40.000000  92.000000
## sum                      NA      NA         NA  42.000000 266.000000
## median                   NA      NA         NA   1.000000   4.000000
## mean                     NA      NA         NA  10.500000  14.777778
## SE.mean                  NA      NA         NA   9.836158   6.733758
## CI.mean.0.95             NA      NA         NA  31.303044  14.206988
## var                      NA      NA         NA 387.000000 816.183007
## std.dev                  NA      NA         NA  19.672316  28.568917
## coef.var                 NA      NA         NA   1.873554   1.933235
##              source_name source_link
## nbr.val               NA          NA
## nbr.null              NA          NA
## nbr.na                NA          NA
## min                   NA          NA
## max                   NA          NA
## range                 NA          NA
## sum                   NA          NA
## median                NA          NA
## mean                  NA          NA
## SE.mean               NA          NA
## CI.mean.0.95          NA          NA
## var                   NA          NA
## std.dev               NA          NA
## coef.var              NA          NA
Caja y extensión
boxplot(Distance, horizontal=TRUE, col='steelblue')

library(tidyverse)
library(hrbrthemes)
library(viridis)

df <- data.frame(Distance)
df %>% ggplot(aes(x = "", y = Distance)) +
  geom_boxplot(color="red", fill="orange", alpha=0.5) +
  theme_ipsum() +
  theme(legend.position="none", plot.title = element_text(size=11)) +
  ggtitle("Deslizamientos  ") +
  coord_flip() +
  xlab("") +
  ylab("")
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

Cauca

Deslizamientos de las ciudades de Cauca
df_Ca %>% 
  select(Country, State, City, Distance, date) 
##       Country State     City Distance     date
## 49   Colombia Cauca  Suárez  8.46579 10/13/07
## 166  Colombia Cauca  Miranda  0.70558 11/24/08
## 850  Colombia Cauca  La Cruz  0.64469 12/13/11
## 1344 Colombia Cauca Almaguer 17.31514 12/21/14
## 1471 Colombia Cauca Jambaló  8.81287  6/13/15
head(df_Ca)
##        id     date  time America  Country country_code State population
## 49    307 10/13/07            SA Colombia           CO Cauca       9985
## 166   904 11/24/08            SA Colombia           CO Cauca      13223
## 850  4091 12/13/11            SA Colombia           CO Cauca       8751
## 1344 6579 12/21/14            SA Colombia           CO Cauca       3120
## 1471 6986  6/13/15 13:00      SA Colombia           CO Cauca       1972
##          City Distance location_description latitude longitude
## 49    Suárez  8.46579                        2.9437  -76.7719
## 166   Miranda  0.70558                        3.2468  -76.2226
## 850   La Cruz  0.64469                        1.6056  -76.9742
## 1344 Almaguer 17.31514              Unknown   1.8362  -76.9857
## 1471 Jambaló  8.81287           Above road   2.8696  -76.3034
##                                    geolocation hazard_type landslide_type
## 49   (2.9437000000000002, -76.771900000000002)   Landslide       Mudslide
## 166             (3.2467999999999999, -76.2226)   Landslide       Mudslide
## 850  (1.6055999999999999, -76.974199999999996)   Landslide      Landslide
## 1344 (1.8362000000000001, -76.985699999999994)   Landslide       Mudslide
## 1471 (2.8696000000000002, -76.303399999999996)   Landslide      Landslide
##      landslide_size         trigger storm_name injuries fatalities
## 49            Large Continuous rain                  NA         24
## 166          Medium        Downpour                  NA         10
## 850          Medium        Downpour                  NA          1
## 1344          Large            Rain                   0          6
## 1471         Medium            Rain                   0          0
##                 source_name
## 49   Reuters - AlertNet.org
## 166                        
## 850                        
## 1344                TeleSUR
## 1471              RSOE EDIS
##                                                                                           source_link
## 49                                       http://www.reuters.com/article/newsOne/idUSN1329387220071013
## 166                                 http://www.laht.com/article.asp?ArticleId=321599&CategoryId=12393
## 850  http://www.iol.co.za/dailynews/news/16-missing-after-mudslide-in-colombia-1.1197978#.UKenfOQ81s4
## 1344   http://www.telesurtv.net/english/news/Mudslide-Kills-Six-People-in-Colombia-20141222-0015.html
## 1471   http://hisz.rsoe.hu/alertmap/site/index.php?pageid=event_desc&edis_id=LS%20-20150613-48641-COL
ggplot(data=df_Ca, aes(x=City, y=Distance)) + geom_bar(stat="identity", color="blue", fill="white")

Gráfico circular
ggplot(data=df_Ca, aes(x = "Cauca", y = Distance, fill=City)) +
  geom_bar(stat = "identity", width = 1, color = "black") +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_Ca <- df_Ca %>% 
  arrange(desc(City)) %>%
  mutate(prop = Distance / sum(df_Ca$Distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_Ca, aes(x=State, y = prop, fill=City)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(Distance/100)), color = "white", size=6) +
  scale_fill_brewer(palette="Set8") 
## Warning in pal_name(palette, type): Unknown palette Set8

Diagrama de pareto
  • Cuidades con mayor concentracion de deslizamiento
library(qcc)

Distance <- df_Ca$Distance
names(Distance) <- df_Ca$City 

pareto.chart(Distance, 
             ylab="Distance",
             col = heat.colors(length(Distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "DONDE SE CONCENTRAN LAS CIUDADES CON MAYORES DESLIZAMIENTOS"
)

##           
## Pareto chart analysis for Distance
##             Frequency  Cum.Freq. Percentage Cum.Percent.
##   Almaguer  17.315140  17.315140  48.172452    48.172452
##   Jambaló   8.812870  26.128010  24.518286    72.690739
##   Suárez    8.465790  34.593800  23.552675    96.243414
##   Miranda    0.705580  35.299380   1.962994    98.206408
##   La Cruz    0.644690  35.944070   1.793592   100.000000
Diagrama de tallo y hojas
stem(df_Ca$"Distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 11
##   0 | 89
##   1 | 
##   1 | 7
head(df_Ca)
##     id     date  time America  Country country_code State population     City
## 1  307 10/13/07            SA Colombia           CO Cauca       9985  Suárez
## 2  904 11/24/08            SA Colombia           CO Cauca      13223  Miranda
## 3 4091 12/13/11            SA Colombia           CO Cauca       8751  La Cruz
## 4 6986  6/13/15 13:00      SA Colombia           CO Cauca       1972 Jambaló
## 5 6579 12/21/14            SA Colombia           CO Cauca       3120 Almaguer
##   Distance location_description latitude longitude
## 1  8.46579                        2.9437  -76.7719
## 2  0.70558                        3.2468  -76.2226
## 3  0.64469                        1.6056  -76.9742
## 4  8.81287           Above road   2.8696  -76.3034
## 5 17.31514              Unknown   1.8362  -76.9857
##                                 geolocation hazard_type landslide_type
## 1 (2.9437000000000002, -76.771900000000002)   Landslide       Mudslide
## 2            (3.2467999999999999, -76.2226)   Landslide       Mudslide
## 3 (1.6055999999999999, -76.974199999999996)   Landslide      Landslide
## 4 (2.8696000000000002, -76.303399999999996)   Landslide      Landslide
## 5 (1.8362000000000001, -76.985699999999994)   Landslide       Mudslide
##   landslide_size         trigger storm_name injuries fatalities
## 1          Large Continuous rain                  NA         24
## 2         Medium        Downpour                  NA         10
## 3         Medium        Downpour                  NA          1
## 4         Medium            Rain                   0          0
## 5          Large            Rain                   0          6
##              source_name
## 1 Reuters - AlertNet.org
## 2                       
## 3                       
## 4              RSOE EDIS
## 5                TeleSUR
##                                                                                        source_link
## 1                                     http://www.reuters.com/article/newsOne/idUSN1329387220071013
## 2                                http://www.laht.com/article.asp?ArticleId=321599&CategoryId=12393
## 3 http://www.iol.co.za/dailynews/news/16-missing-after-mudslide-in-colombia-1.1197978#.UKenfOQ81s4
## 4   http://hisz.rsoe.hu/alertmap/site/index.php?pageid=event_desc&edis_id=LS%20-20150613-48641-COL
## 5   http://www.telesurtv.net/english/news/Mudslide-Kills-Six-People-in-Colombia-20141222-0015.html
##        prop     ypos
## 1 23.552675 11.77634
## 2  1.962994 24.53417
## 3  1.793592 26.41247
## 4 24.518286 39.56840
## 5 48.172452 75.91377
knitr::kable(head(df_Ca))
id date time America Country country_code State population City Distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
307 10/13/07 SA Colombia CO Cauca 9985 Suárez 8.46579 2.9437 -76.7719 (2.9437000000000002, -76.771900000000002) Landslide Mudslide Large Continuous rain NA 24 Reuters - AlertNet.org http://www.reuters.com/article/newsOne/idUSN1329387220071013 23.552675 11.77634
904 11/24/08 SA Colombia CO Cauca 13223 Miranda 0.70558 3.2468 -76.2226 (3.2467999999999999, -76.2226) Landslide Mudslide Medium Downpour NA 10 http://www.laht.com/article.asp?ArticleId=321599&CategoryId=12393 1.962994 24.53417
4091 12/13/11 SA Colombia CO Cauca 8751 La Cruz 0.64469 1.6056 -76.9742 (1.6055999999999999, -76.974199999999996) Landslide Landslide Medium Downpour NA 1 http://www.iol.co.za/dailynews/news/16-missing-after-mudslide-in-colombia-1.1197978#.UKenfOQ81s4 1.793592 26.41247
6986 6/13/15 13:00 SA Colombia CO Cauca 1972 Jambaló 8.81287 Above road 2.8696 -76.3034 (2.8696000000000002, -76.303399999999996) Landslide Landslide Medium Rain 0 0 RSOE EDIS http://hisz.rsoe.hu/alertmap/site/index.php?pageid=event_desc&edis_id=LS%20-20150613-48641-COL 24.518286 39.56840
6579 12/21/14 SA Colombia CO Cauca 3120 Almaguer 17.31514 Unknown 1.8362 -76.9857 (1.8362000000000001, -76.985699999999994) Landslide Mudslide Large Rain 0 6 TeleSUR http://www.telesurtv.net/english/news/Mudslide-Kills-Six-People-in-Colombia-20141222-0015.html 48.172452 75.91377
stem(df_Ca$"Distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 11
##   0 | 89
##   1 | 
##   1 | 7
stem(df_Ca$"Distance", scale = 2)
## 
##   The decimal point is at the |
## 
##    0 | 67
##    2 | 
##    4 | 
##    6 | 
##    8 | 58
##   10 | 
##   12 | 
##   14 | 
##   16 | 3
Gráfico de series temporales
library(forecast)
data_serie<- ts(df_Ca$Distance, frequency=12, start=2007)
head(data_serie)
##           Jan      Feb      Mar      Apr      May
## 2007  8.46579  0.70558  0.64469  8.81287 17.31514
autoplot(data_serie)+
labs(title = "Serie de Deslizamiento", x="Tiempo", y = "Distancia", colour = "#00a0dc") +theme_bw()

Tablas de frecuencia
library(questionr)

table <- questionr::freq(Distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
0.64469 1 20 20 20 20
0.70558 1 20 20 40 40
8.46579 1 20 20 60 60
8.81287 1 20 20 80 80
17.31514 1 20 20 100 100
Total 5 100 100 100 100
str(table) 
## Classes 'freqtab' and 'data.frame':  6 obs. of  5 variables:
##  $ n      : num  1 1 1 1 1 5
##  $ %      : num  20 20 20 20 20 100
##  $ val%   : num  20 20 20 20 20 100
##  $ %cum   : num  20 40 60 80 100 100
##  $ val%cum: num  20 40 60 80 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
0.64469 1
0.70558 1
8.46579 1
8.81287 1
17.31514 1
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="white", fill="blue") +
  xlab("Número de asistencias") +
  ylab("Frecuencia")

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(Distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(Distance) - min(Distance)
w = ceiling(R/n_clases)
bins <- seq(min(Distance), max(Distance) + w, by = w)
bins
## [1]  0.64469  6.64469 12.64469 18.64469
Edades <- cut(Distance, bins)
Freq_table <- transform(table(Distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
Distance Freq Rel_Freq Cum_Freq
0.64469 1 0.2 1
0.70558 1 0.2 2
8.46579 1 0.2 3
8.81287 1 0.2 4
17.31514 1 0.2 5
str(Freq_table)
## 'data.frame':    5 obs. of  4 variables:
##  $ Distance: Factor w/ 5 levels "0.64469","0.70558",..: 1 2 3 4 5
##  $ Freq    : int  1 1 1 1 1
##  $ Rel_Freq: num  0.2 0.2 0.2 0.2 0.2
##  $ Cum_Freq: int  1 2 3 4 5
df <- data.frame(x = Freq_table$Distance, y = Freq_table$Freq)
knitr::kable(df)
x y
0.64469 1
0.70558 1
8.46579 1
8.81287 1
17.31514 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="blue", fill="green") +
  xlab("Rango de Distance") +
  ylab("Frecuencia")

Estadísticos
  • Personas Afectadas por Deslizamiento
summary(df_Ca$Distance)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.6447  0.7056  8.4658  7.1888  8.8129 17.3151
library(pastecs)
stat.desc(df_Ca)
##                        id date time America Country country_code State
## nbr.val      5.000000e+00   NA   NA      NA      NA           NA    NA
## nbr.null     0.000000e+00   NA   NA      NA      NA           NA    NA
## nbr.na       0.000000e+00   NA   NA      NA      NA           NA    NA
## min          3.070000e+02   NA   NA      NA      NA           NA    NA
## max          6.986000e+03   NA   NA      NA      NA           NA    NA
## range        6.679000e+03   NA   NA      NA      NA           NA    NA
## sum          1.886700e+04   NA   NA      NA      NA           NA    NA
## median       4.091000e+03   NA   NA      NA      NA           NA    NA
## mean         3.773400e+03   NA   NA      NA      NA           NA    NA
## SE.mean      1.388208e+03   NA   NA      NA      NA           NA    NA
## CI.mean.0.95 3.854284e+03   NA   NA      NA      NA           NA    NA
## var          9.635611e+06   NA   NA      NA      NA           NA    NA
## std.dev      3.104128e+03   NA   NA      NA      NA           NA    NA
## coef.var     8.226343e-01   NA   NA      NA      NA           NA    NA
##                population City   Distance location_description   latitude
## nbr.val      5.000000e+00   NA  5.0000000                   NA  5.0000000
## nbr.null     0.000000e+00   NA  0.0000000                   NA  0.0000000
## nbr.na       0.000000e+00   NA  0.0000000                   NA  0.0000000
## min          1.972000e+03   NA  0.6446900                   NA  1.6056000
## max          1.322300e+04   NA 17.3151400                   NA  3.2468000
## range        1.125100e+04   NA 16.6704500                   NA  1.6412000
## sum          3.705100e+04   NA 35.9440700                   NA 12.5019000
## median       8.751000e+03   NA  8.4657900                   NA  2.8696000
## mean         7.410200e+03   NA  7.1888140                   NA  2.5003800
## SE.mean      2.123627e+03   NA  3.0957114                   NA  0.3264786
## CI.mean.0.95 5.896133e+03   NA  8.5950729                   NA  0.9064498
## var          2.254895e+07   NA 47.9171465                   NA  0.5329413
## std.dev      4.748574e+03   NA  6.9222212                   NA  0.7300283
## coef.var     6.408159e-01   NA  0.9629156                   NA  0.2919669
##                  longitude geolocation hazard_type landslide_type
## nbr.val       5.000000e+00          NA          NA             NA
## nbr.null      0.000000e+00          NA          NA             NA
## nbr.na        0.000000e+00          NA          NA             NA
## min          -7.698570e+01          NA          NA             NA
## max          -7.622260e+01          NA          NA             NA
## range         7.631000e-01          NA          NA             NA
## sum          -3.832578e+02          NA          NA             NA
## median       -7.677190e+01          NA          NA             NA
## mean         -7.665156e+01          NA          NA             NA
## SE.mean       1.636230e-01          NA          NA             NA
## CI.mean.0.95  4.542903e-01          NA          NA             NA
## var           1.338625e-01          NA          NA             NA
## std.dev       3.658722e-01          NA          NA             NA
## coef.var     -4.773187e-03          NA          NA             NA
##              landslide_size trigger storm_name injuries fatalities source_name
## nbr.val                  NA      NA         NA        2   5.000000          NA
## nbr.null                 NA      NA         NA        2   1.000000          NA
## nbr.na                   NA      NA         NA        3   0.000000          NA
## min                      NA      NA         NA        0   0.000000          NA
## max                      NA      NA         NA        0  24.000000          NA
## range                    NA      NA         NA        0  24.000000          NA
## sum                      NA      NA         NA        0  41.000000          NA
## median                   NA      NA         NA        0   6.000000          NA
## mean                     NA      NA         NA        0   8.200000          NA
## SE.mean                  NA      NA         NA        0   4.340507          NA
## CI.mean.0.95             NA      NA         NA        0  12.051179          NA
## var                      NA      NA         NA        0  94.200000          NA
## std.dev                  NA      NA         NA        0   9.705668          NA
## coef.var                 NA      NA         NA      NaN   1.183618          NA
##              source_link        prop       ypos
## nbr.val               NA   5.0000000   5.000000
## nbr.null              NA   0.0000000   0.000000
## nbr.na                NA   0.0000000   0.000000
## min                   NA   1.7935921  11.776338
## max                   NA  48.1724524  75.913774
## range                 NA  46.3788603  64.137436
## sum                   NA 100.0000000 178.205153
## median                NA  23.5526750  26.412465
## mean                  NA  20.0000000  35.641031
## SE.mean               NA   8.6125790  10.989473
## CI.mean.0.95          NA  23.9123529  30.511669
## var                   NA 370.8825879 603.842603
## std.dev               NA  19.2583122  24.573209
## coef.var              NA   0.9629156   0.689464
Caja y extensión
boxplot(Distance, horizontal=TRUE, col='steelblue')

library(tidyverse)
library(hrbrthemes)
library(viridis)

df <- data.frame(Distance)
df %>% ggplot(aes(x = "", y = Distance)) +
  geom_boxplot(color="red", fill="orange", alpha=0.5) +
  theme_ipsum() +
  theme(legend.position="none", plot.title = element_text(size=11)) +
  ggtitle("Deslizamientos  ") +
  coord_flip() +
  xlab("") +
  ylab("")
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

Magdalena:

Deslizamientos de las ciudades de Magdalena
df_Mag %>% 
  select(Country, State, City, Distance, date) 
##       Country     State        City Distance     date
## 396  Colombia Magdalena    Ciénaga 51.84125  7/30/10
## 853  Colombia Magdalena Santa Marta  0.79694 12/16/11
## 1290 Colombia Magdalena   Ariguaní 28.50569  10/7/14
head(df_Mag)
##        id     date time America  Country country_code     State population
## 396  2175  7/30/10           SA Colombia           CO Magdalena      88311
## 853  4096 12/16/11           SA Colombia           CO Magdalena     431781
## 1290 6219  10/7/14           SA Colombia           CO Magdalena      26246
##             City Distance location_description latitude longitude
## 396     Ciénaga 51.84125                       11.0029  -73.7733
## 853  Santa Marta  0.79694                       11.2475  -74.2017
## 1290   Ariguaní 28.50569              Unknown  10.4741  -73.8715
##                         geolocation hazard_type landslide_type landslide_size
## 396  (11.0029, -73.773300000000006)   Landslide       Mudslide         Medium
## 853  (11.2475, -74.201700000000002)   Landslide      Landslide          Large
## 1290 (10.4741, -73.871499999999997)   Landslide      Landslide         Medium
##       trigger storm_name injuries fatalities source_name
## 396  Downpour                  NA          0            
## 853  Downpour                  NA          1            
## 1290     Rain                   0          6  EL HERALDO
##                                                                                                          source_link
## 396                                                                                                                 
## 853  http://colombiareports.com/colombia-news/news/21092-santos-calls-on-colombians-to-heed-evacuation-warnings.html
## 1290         http://www.elheraldo.co/magdalena/seis-indigenas-murieron-por-alud-de-tierra-en-la-sierra-nevada-169129
ggplot(data=df_Mag, aes(x=City, y=Distance)) + geom_bar(stat="identity", color="blue", fill="white")

Gráfico circular
ggplot(data=df_Mag, aes(x = "Magdalena", y = Distance, fill=City)) +
  geom_bar(stat = "identity", width = 1, color = "black") +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_Mag <- df_Mag %>% 
  arrange(desc(City)) %>%
  mutate(prop = Distance / sum(df_Mag$Distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_Mag, aes(x=State, y = prop, fill=City)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(Distance/100)), color = "white", size=6) +
  scale_fill_brewer(palette="Set8")
## Warning in pal_name(palette, type): Unknown palette Set8

Diagrama de pareto
  • Concentracion de ciudades con mayor deslizamiento
library(qcc)

Distance <- df_Mag$Distance
names(Distance) <- df_Mag$City 

pareto.chart(Distance, 
             ylab="Distance",
             col = heat.colors(length(Distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "DONDE SE CONCENTRAN LAS CIUDADES CON MAYORES DESLIZAMIENTOS"
)

##              
## Pareto chart analysis for Distance
##                Frequency  Cum.Freq. Percentage Cum.Percent.
##   Ciénaga     51.841250  51.841250  63.888059    63.888059
##   Ariguaní    28.505690  80.346940  35.129809    99.017868
##   Santa Marta   0.796940  81.143880   0.982132   100.000000
Diagrama de tallo y hojas
stem(df_Mag$"Distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 1
##   2 | 9
##   4 | 2
head(df_Mag)
##     id     date time America  Country country_code     State population
## 1 4096 12/16/11           SA Colombia           CO Magdalena     431781
## 2 2175  7/30/10           SA Colombia           CO Magdalena      88311
## 3 6219  10/7/14           SA Colombia           CO Magdalena      26246
##          City Distance location_description latitude longitude
## 1 Santa Marta  0.79694                       11.2475  -74.2017
## 2    Ciénaga 51.84125                       11.0029  -73.7733
## 3   Ariguaní 28.50569              Unknown  10.4741  -73.8715
##                      geolocation hazard_type landslide_type landslide_size
## 1 (11.2475, -74.201700000000002)   Landslide      Landslide          Large
## 2 (11.0029, -73.773300000000006)   Landslide       Mudslide         Medium
## 3 (10.4741, -73.871499999999997)   Landslide      Landslide         Medium
##    trigger storm_name injuries fatalities source_name
## 1 Downpour                  NA          1            
## 2 Downpour                  NA          0            
## 3     Rain                   0          6  EL HERALDO
##                                                                                                       source_link
## 1 http://colombiareports.com/colombia-news/news/21092-santos-calls-on-colombians-to-heed-evacuation-warnings.html
## 2                                                                                                                
## 3         http://www.elheraldo.co/magdalena/seis-indigenas-murieron-por-alud-de-tierra-en-la-sierra-nevada-169129
##        prop      ypos
## 1  0.982132  0.491066
## 2 63.888059 32.926162
## 3 35.129809 82.435096
knitr::kable(head(df_Mag))
id date time America Country country_code State population City Distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
4096 12/16/11 SA Colombia CO Magdalena 431781 Santa Marta 0.79694 11.2475 -74.2017 (11.2475, -74.201700000000002) Landslide Landslide Large Downpour NA 1 http://colombiareports.com/colombia-news/news/21092-santos-calls-on-colombians-to-heed-evacuation-warnings.html 0.982132 0.491066
2175 7/30/10 SA Colombia CO Magdalena 88311 Ciénaga 51.84125 11.0029 -73.7733 (11.0029, -73.773300000000006) Landslide Mudslide Medium Downpour NA 0 63.888059 32.926161
6219 10/7/14 SA Colombia CO Magdalena 26246 Ariguaní 28.50569 Unknown 10.4741 -73.8715 (10.4741, -73.871499999999997) Landslide Landslide Medium Rain 0 6 EL HERALDO http://www.elheraldo.co/magdalena/seis-indigenas-murieron-por-alud-de-tierra-en-la-sierra-nevada-169129 35.129809 82.435096
stem(df_Mag$"Distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 1
##   2 | 9
##   4 | 2
stem(df_Mag$"Distance", scale = 2)
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 1
##   1 | 
##   2 | 9
##   3 | 
##   4 | 
##   5 | 2
Gráfico de series temporales
library(forecast)
data_serie<- ts(df_Mag$Distance, frequency=12, start=2007)
head(data_serie)
##           Jan      Feb      Mar
## 2007  0.79694 51.84125 28.50569
autoplot(data_serie)+
labs(title = "Serie de Deslizamiento", x="Tiempo", y = "Distancia", colour = "#00a0dc") +theme_bw()

Tablas de frecuencia
library(questionr)

table <- questionr::freq(Distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
0.79694 1 33.3 33.3 33.3 33.3
28.50569 1 33.3 33.3 66.7 66.7
51.84125 1 33.3 33.3 100.0 100.0
Total 3 100.0 100.0 100.0 100.0
str(table) 
## Classes 'freqtab' and 'data.frame':  4 obs. of  5 variables:
##  $ n      : num  1 1 1 3
##  $ %      : num  33.3 33.3 33.3 100
##  $ val%   : num  33.3 33.3 33.3 100
##  $ %cum   : num  33.3 66.7 100 100
##  $ val%cum: num  33.3 66.7 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
0.79694 1
28.50569 1
51.84125 1
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="white", fill="blue") +
  xlab("Número de asistencias") +
  ylab("Frecuencia")

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(Distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(Distance) - min(Distance)
w = ceiling(R/n_clases)
bins <- seq(min(Distance), max(Distance) + w, by = w)
bins
## [1]  0.79694 18.79694 36.79694 54.79694
Edades <- cut(Distance, bins)
Freq_table <- transform(table(Distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
Distance Freq Rel_Freq Cum_Freq
0.79694 1 0.3333333 1
28.50569 1 0.3333333 2
51.84125 1 0.3333333 3
str(Freq_table)
## 'data.frame':    3 obs. of  4 variables:
##  $ Distance: Factor w/ 3 levels "0.79694","28.50569",..: 1 2 3
##  $ Freq    : int  1 1 1
##  $ Rel_Freq: num  0.333 0.333 0.333
##  $ Cum_Freq: int  1 2 3
df <- data.frame(x = Freq_table$Distance, y = Freq_table$Freq)
knitr::kable(df)
x y
0.79694 1
28.50569 1
51.84125 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="blue", fill="green") +
  xlab("Rango de Distance") +
  ylab("Frecuencia")

Estadísticos
  • Personas Afectadas por Deslizamiento
summary(df_Mag$Distance)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.7969 14.6513 28.5057 27.0480 40.1735 51.8413
library(pastecs)
stat.desc(df_Mag)
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
##                        id date time America Country country_code State
## nbr.val      3.000000e+00   NA   NA      NA      NA           NA    NA
## nbr.null     0.000000e+00   NA   NA      NA      NA           NA    NA
## nbr.na       0.000000e+00   NA   NA      NA      NA           NA    NA
## min          2.175000e+03   NA   NA      NA      NA           NA    NA
## max          6.219000e+03   NA   NA      NA      NA           NA    NA
## range        4.044000e+03   NA   NA      NA      NA           NA    NA
## sum          1.249000e+04   NA   NA      NA      NA           NA    NA
## median       4.096000e+03   NA   NA      NA      NA           NA    NA
## mean         4.163333e+03   NA   NA      NA      NA           NA    NA
## SE.mean      1.167888e+03   NA   NA      NA      NA           NA    NA
## CI.mean.0.95 5.025015e+03   NA   NA      NA      NA           NA    NA
## var          4.091884e+06   NA   NA      NA      NA           NA    NA
## std.dev      2.022841e+03   NA   NA      NA      NA           NA    NA
## coef.var     4.858705e-01   NA   NA      NA      NA           NA    NA
##                population City    Distance location_description    latitude
## nbr.val      3.000000e+00   NA   3.0000000                   NA  3.00000000
## nbr.null     0.000000e+00   NA   0.0000000                   NA  0.00000000
## nbr.na       0.000000e+00   NA   0.0000000                   NA  0.00000000
## min          2.624600e+04   NA   0.7969400                   NA 10.47410000
## max          4.317810e+05   NA  51.8412500                   NA 11.24750000
## range        4.055350e+05   NA  51.0443100                   NA  0.77340000
## sum          5.463380e+05   NA  81.1438800                   NA 32.72450000
## median       8.831100e+04   NA  28.5056900                   NA 11.00290000
## mean         1.821127e+05   NA  27.0479600                   NA 10.90816667
## SE.mean      1.261133e+05   NA  14.7532384                   NA  0.22823065
## CI.mean.0.95 5.426219e+05   NA  63.4780614                   NA  0.98199724
## var          4.771372e+10   NA 652.9741284                   NA  0.15626769
## std.dev      2.184347e+05   NA  25.5533585                   NA  0.39530709
## coef.var     1.199448e+00   NA   0.9447425                   NA  0.03623955
##                  longitude geolocation hazard_type landslide_type
## nbr.val       3.000000e+00          NA          NA             NA
## nbr.null      0.000000e+00          NA          NA             NA
## nbr.na        0.000000e+00          NA          NA             NA
## min          -7.420170e+01          NA          NA             NA
## max          -7.377330e+01          NA          NA             NA
## range         4.284000e-01          NA          NA             NA
## sum          -2.218465e+02          NA          NA             NA
## median       -7.387150e+01          NA          NA             NA
## mean         -7.394883e+01          NA          NA             NA
## SE.mean       1.295723e-01          NA          NA             NA
## CI.mean.0.95  5.575048e-01          NA          NA             NA
## var           5.036697e-02          NA          NA             NA
## std.dev       2.244259e-01          NA          NA             NA
## coef.var     -3.034881e-03          NA          NA             NA
##              landslide_size trigger storm_name injuries fatalities source_name
## nbr.val                  NA      NA         NA        1   3.000000          NA
## nbr.null                 NA      NA         NA        1   1.000000          NA
## nbr.na                   NA      NA         NA        2   0.000000          NA
## min                      NA      NA         NA        0   0.000000          NA
## max                      NA      NA         NA        0   6.000000          NA
## range                    NA      NA         NA        0   6.000000          NA
## sum                      NA      NA         NA        0   7.000000          NA
## median                   NA      NA         NA        0   1.000000          NA
## mean                     NA      NA         NA        0   2.333333          NA
## SE.mean                  NA      NA         NA       NA   1.855921          NA
## CI.mean.0.95             NA      NA         NA      NaN   7.985386          NA
## var                      NA      NA         NA       NA  10.333333          NA
## std.dev                  NA      NA         NA       NA   3.214550          NA
## coef.var                 NA      NA         NA       NA   1.377664          NA
##              source_link        prop        ypos
## nbr.val               NA   3.0000000    3.000000
## nbr.null              NA   0.0000000    0.000000
## nbr.na                NA   0.0000000    0.000000
## min                   NA   0.9821320    0.491066
## max                   NA  63.8880591   82.435096
## range                 NA  62.9059271   81.944030
## sum                   NA 100.0000000  115.852323
## median                NA  35.1298089   32.926162
## mean                  NA  33.3333333   38.617441
## SE.mean               NA  18.1815787   23.825749
## CI.mean.0.95          NA  78.2290191  102.513925
## var                   NA 991.7094095 1702.998991
## std.dev               NA  31.4914180   41.267408
## coef.var              NA   0.9447425    1.068621
Caja y extensión
boxplot(Distance, horizontal=TRUE, col='steelblue')

library(tidyverse)
library(hrbrthemes)
library(viridis)

df <- data.frame(Distance)
df %>% ggplot(aes(x = "", y = Distance)) +
  geom_boxplot(color="red", fill="orange", alpha=0.5) +
  theme_ipsum() +
  theme(legend.position="none", plot.title = element_text(size=11)) +
  ggtitle("Deslizamientos  ") +
  coord_flip() +
  xlab("") +
  ylab("")
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

Meta:

Deslizamientos de las ciudades de Meta
df_Meta %>% 
  select(Country, State, City, Distance, date) 
##      Country State          City Distance    date
## 349 Colombia  Meta Puerto López 22.53724 5/23/10
head(df_Meta)
##       id    date time America  Country country_code State population
## 349 1892 5/23/10           SA Colombia           CO  Meta      16678
##              City Distance location_description latitude longitude
## 349 Puerto López 22.53724                        3.8844  -72.9175
##                                   geolocation hazard_type landslide_type
## 349 (3.8843999999999999, -72.917500000000004)   Landslide      Landslide
##     landslide_size  trigger storm_name injuries fatalities source_name
## 349         Medium Downpour                  NA          0            
##                                                                                                               source_link
## 349 http://www.etaiwannews.com/etn/news_content.php?id=1265492&lang=eng_news&cate_img=49.jpg&cate_rss=news_Society_TAIWAN
ggplot(data=df_Meta, aes(x=City, y=Distance)) + geom_bar(stat="identity", color="blue", fill="white")

Gráfico circular
ggplot(data=df_Meta, aes(x = "Meta", y = Distance, fill=City)) +
  geom_bar(stat = "identity", width = 1, color = "black") +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_Meta <- df_Meta %>% 
  arrange(desc(City)) %>%
  mutate(prop = Distance / sum(df_Meta$Distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_Meta, aes(x=State, y = prop, fill=City)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(Distance/100)), color = "white", size=6) +
  scale_fill_brewer(palette="Set8")
## Warning in pal_name(palette, type): Unknown palette Set8

Diagrama de pareto
  • Donde se concentran las ciudades con mayor deslizamiento
library(qcc)

Distance <- df_Meta$Distance
names(Distance) <- df_Meta$City 

pareto.chart(Distance, 
             ylab="Distance",
             col = heat.colors(length(Distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "DONDE SE CONCENTRAN LAS CIUDADES CON MAYORES DESLIZAMIENTOS"
)

##                
## Pareto chart analysis for Distance
##                 Frequency Cum.Freq. Percentage Cum.Percent.
##   Puerto López  22.53724  22.53724  100.00000    100.00000
Diagrama de tallo y hojas
stem(df_Meta$"Distance")
head(df_Meta)
##     id    date time America  Country country_code State population
## 1 1892 5/23/10           SA Colombia           CO  Meta      16678
##            City Distance location_description latitude longitude
## 1 Puerto López 22.53724                        3.8844  -72.9175
##                                 geolocation hazard_type landslide_type
## 1 (3.8843999999999999, -72.917500000000004)   Landslide      Landslide
##   landslide_size  trigger storm_name injuries fatalities source_name
## 1         Medium Downpour                  NA          0            
##                                                                                                             source_link
## 1 http://www.etaiwannews.com/etn/news_content.php?id=1265492&lang=eng_news&cate_img=49.jpg&cate_rss=news_Society_TAIWAN
##   prop ypos
## 1  100   50
knitr::kable(head(df_Meta))
id date time America Country country_code State population City Distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
1892 5/23/10 SA Colombia CO Meta 16678 Puerto López 22.53724 3.8844 -72.9175 (3.8843999999999999, -72.917500000000004) Landslide Landslide Medium Downpour NA 0 http://www.etaiwannews.com/etn/news_content.php?id=1265492&lang=eng_news&cate_img=49.jpg&cate_rss=news_Society_TAIWAN 100 50
stem(df_Meta$"Distance")
stem(df_Meta$"Distance", scale = 2)
Gráfico de series temporales
library(forecast)
data_serie<- ts(df_Meta$Distance, frequency=12, start=2007)
head(data_serie)
##           Jan
## 2007 22.53724
autoplot(data_serie)+
labs(title = "Serie de Deslizamiento", x="Tiempo", y = "Distancia", colour = "#00a0dc") +theme_bw()
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?

Tablas de frecuencia
library(questionr)

table <- questionr::freq(Distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
22.53724 1 100 100 100 100
Total 1 100 100 100 100
str(table) 
## Classes 'freqtab' and 'data.frame':  2 obs. of  5 variables:
##  $ n      : num  1 1
##  $ %      : num  100 100
##  $ val%   : num  100 100
##  $ %cum   : num  100 100
##  $ val%cum: num  100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
22.53724 1
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="white", fill="blue") +
  xlab("Número de asistencias") +
  ylab("Frecuencia")

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(Distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(Distance) - min(Distance)
w = ceiling(R/n_clases)
bins <- seq(min(Distance), max(Distance) + w, by = w)
bins
## [1] 22.53724
Edades <- cut(Distance, bins)
Freq_table <- transform(table(Distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
Distance Freq Rel_Freq Cum_Freq
22.53724 1 1 1
str(Freq_table)
## 'data.frame':    1 obs. of  4 variables:
##  $ Distance: Factor w/ 1 level "22.53724": 1
##  $ Freq    : int 1
##  $ Rel_Freq: num 1
##  $ Cum_Freq: int 1
df <- data.frame(x = Freq_table$Distance, y = Freq_table$Freq)
knitr::kable(df)
x y
22.53724 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="blue", fill="green") +
  xlab("Rango de Distance") +
  ylab("Frecuencia")

Estadísticos
  • Personas Afectadas por Deslizamiento
summary(df_Meta$Distance)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   22.54   22.54   22.54   22.54   22.54   22.54
library(pastecs)
stat.desc(df_Meta)
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced

## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced

## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced

## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced

## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## Warning in min(x): ningún argumento finito para min; retornando Inf
## Warning in max(x): ningun argumento finito para max; retornando -Inf
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced

## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced

## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced

## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
##                id date time America Country country_code State population City
## nbr.val         1   NA   NA      NA      NA           NA    NA          1   NA
## nbr.null        0   NA   NA      NA      NA           NA    NA          0   NA
## nbr.na          0   NA   NA      NA      NA           NA    NA          0   NA
## min          1892   NA   NA      NA      NA           NA    NA      16678   NA
## max          1892   NA   NA      NA      NA           NA    NA      16678   NA
## range           0   NA   NA      NA      NA           NA    NA          0   NA
## sum          1892   NA   NA      NA      NA           NA    NA      16678   NA
## median       1892   NA   NA      NA      NA           NA    NA      16678   NA
## mean         1892   NA   NA      NA      NA           NA    NA      16678   NA
## SE.mean        NA   NA   NA      NA      NA           NA    NA         NA   NA
## CI.mean.0.95  NaN   NA   NA      NA      NA           NA    NA        NaN   NA
## var            NA   NA   NA      NA      NA           NA    NA         NA   NA
## std.dev        NA   NA   NA      NA      NA           NA    NA         NA   NA
## coef.var       NA   NA   NA      NA      NA           NA    NA         NA   NA
##              Distance location_description latitude longitude geolocation
## nbr.val       1.00000                   NA   1.0000    1.0000          NA
## nbr.null      0.00000                   NA   0.0000    0.0000          NA
## nbr.na        0.00000                   NA   0.0000    0.0000          NA
## min          22.53724                   NA   3.8844  -72.9175          NA
## max          22.53724                   NA   3.8844  -72.9175          NA
## range         0.00000                   NA   0.0000    0.0000          NA
## sum          22.53724                   NA   3.8844  -72.9175          NA
## median       22.53724                   NA   3.8844  -72.9175          NA
## mean         22.53724                   NA   3.8844  -72.9175          NA
## SE.mean            NA                   NA       NA        NA          NA
## CI.mean.0.95      NaN                   NA      NaN       NaN          NA
## var                NA                   NA       NA        NA          NA
## std.dev            NA                   NA       NA        NA          NA
## coef.var           NA                   NA       NA        NA          NA
##              hazard_type landslide_type landslide_size trigger storm_name
## nbr.val               NA             NA             NA      NA         NA
## nbr.null              NA             NA             NA      NA         NA
## nbr.na                NA             NA             NA      NA         NA
## min                   NA             NA             NA      NA         NA
## max                   NA             NA             NA      NA         NA
## range                 NA             NA             NA      NA         NA
## sum                   NA             NA             NA      NA         NA
## median                NA             NA             NA      NA         NA
## mean                  NA             NA             NA      NA         NA
## SE.mean               NA             NA             NA      NA         NA
## CI.mean.0.95          NA             NA             NA      NA         NA
## var                   NA             NA             NA      NA         NA
## std.dev               NA             NA             NA      NA         NA
## coef.var              NA             NA             NA      NA         NA
##              injuries fatalities source_name source_link prop ypos
## nbr.val             0          1          NA          NA    1    1
## nbr.null            0          1          NA          NA    0    0
## nbr.na              1          0          NA          NA    0    0
## min               Inf          0          NA          NA  100   50
## max              -Inf          0          NA          NA  100   50
## range            -Inf          0          NA          NA    0    0
## sum                 0          0          NA          NA  100   50
## median             NA          0          NA          NA  100   50
## mean              NaN          0          NA          NA  100   50
## SE.mean            NA         NA          NA          NA   NA   NA
## CI.mean.0.95      NaN        NaN          NA          NA  NaN  NaN
## var                NA         NA          NA          NA   NA   NA
## std.dev            NA         NA          NA          NA   NA   NA
## coef.var           NA         NA          NA          NA   NA   NA
Caja y extensión
boxplot(Distance, horizontal=TRUE, col='steelblue')

library(tidyverse)
library(hrbrthemes)
library(viridis)

df <- data.frame(Distance)
df %>% ggplot(aes(x = "", y = Distance)) +
  geom_boxplot(color="red", fill="orange", alpha=0.5) +
  theme_ipsum() +
  theme(legend.position="none", plot.title = element_text(size=11)) +
  ggtitle("Deslizamientos  ") +
  coord_flip() +
  xlab("") +
  ylab("")
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

Putumayo:

Deslizamientos de las ciudades de Putumayo
df_Put %>% 
  select(Country, State, City, Distance, date) 
##       Country    State          City Distance    date
## 1456 Colombia Putumayo San Francisco   1.4794 3/21/15
head(df_Put)
##        id    date time America  Country country_code    State population
## 1456 6914 3/21/15           SA Colombia           CO Putumayo       4350
##               City Distance location_description latitude longitude
## 1456 San Francisco   1.4794        Natural slope   1.1656  -76.8755
##                        geolocation hazard_type landslide_type landslide_size
## 1456 (1.1656, -76.875500000000002)   Landslide      Landslide         Medium
##      trigger storm_name injuries fatalities source_name
## 1456 Unknown                   0          0  AOL Travel
##                                                                                  source_link
## 1456 http://travel.aol.co.uk/2015/03/24/face-of-jesus-appears-cliff-south-america-landslide/
ggplot(data=df_Put, aes(x=City, y=Distance)) + geom_bar(stat="identity", color="blue", fill="white")

Gráfico circular
ggplot(data=df_Put, aes(x = "Putumayo", y = Distance, fill=City)) +
  geom_bar(stat = "identity", width = 1, color = "black") +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_Put <- df_Put %>% 
  arrange(desc(City)) %>%
  mutate(prop = Distance / sum(df_Put$Distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_Put, aes(x=State, y = prop, fill=City)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(Distance/100)), color = "white", size=6) +
  scale_fill_brewer(palette="Set8")
## Warning in pal_name(palette, type): Unknown palette Set8

Diagrama de pareto
  • Donde se concentran las ciudades con mayor deslizamiento
library(qcc)

Distance <- df_Put$Distance
names(Distance) <- df_Put$City 

pareto.chart(Distance, 
             ylab="Distance",
             col = heat.colors(length(Distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "DONDE SE CONCENTRAN LAS CIUDADES CON MAYORES DESLIZAMIENTOS"
)

##                
## Pareto chart analysis for Distance
##                 Frequency Cum.Freq. Percentage Cum.Percent.
##   San Francisco    1.4794    1.4794   100.0000     100.0000
Diagrama de tallo y hojas
stem(df_Put$"Distance")
head(df_Put)
##     id    date time America  Country country_code    State population
## 1 6914 3/21/15           SA Colombia           CO Putumayo       4350
##            City Distance location_description latitude longitude
## 1 San Francisco   1.4794        Natural slope   1.1656  -76.8755
##                     geolocation hazard_type landslide_type landslide_size
## 1 (1.1656, -76.875500000000002)   Landslide      Landslide         Medium
##   trigger storm_name injuries fatalities source_name
## 1 Unknown                   0          0  AOL Travel
##                                                                               source_link
## 1 http://travel.aol.co.uk/2015/03/24/face-of-jesus-appears-cliff-south-america-landslide/
##   prop ypos
## 1  100   50
knitr::kable(head(df_Put))
id date time America Country country_code State population City Distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
6914 3/21/15 SA Colombia CO Putumayo 4350 San Francisco 1.4794 Natural slope 1.1656 -76.8755 (1.1656, -76.875500000000002) Landslide Landslide Medium Unknown 0 0 AOL Travel http://travel.aol.co.uk/2015/03/24/face-of-jesus-appears-cliff-south-america-landslide/ 100 50
stem(df_Put$"Distance")
stem(df_Put$"Distance", scale = 2)
Gráfico de series temporales
library(forecast)
data_serie<- ts(df_Put$Distance, frequency=12, start=2007)
head(data_serie)
##         Jan
## 2007 1.4794
autoplot(data_serie)+
labs(title = "Serie de Deslizamiento", x="Tiempo", y = "Distancia", colour = "#00a0dc") +theme_bw()
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?

Tablas de frecuencia
library(questionr)

table <- questionr::freq(Distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
1.4794 1 100 100 100 100
Total 1 100 100 100 100
str(table) 
## Classes 'freqtab' and 'data.frame':  2 obs. of  5 variables:
##  $ n      : num  1 1
##  $ %      : num  100 100
##  $ val%   : num  100 100
##  $ %cum   : num  100 100
##  $ val%cum: num  100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
1.4794 1
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="white", fill="blue") +
  xlab("Número de asistencias") +
  ylab("Frecuencia")

Estadísticos
  • Personas Afectadas por Deslizamiento
summary(df_Put$Distance)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.479   1.479   1.479   1.479   1.479   1.479
library(pastecs)
stat.desc(df_Put)
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced

## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced

## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced

## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced

## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced

## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced

## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced

## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced

## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
##                id date time America Country country_code State population City
## nbr.val         1   NA   NA      NA      NA           NA    NA          1   NA
## nbr.null        0   NA   NA      NA      NA           NA    NA          0   NA
## nbr.na          0   NA   NA      NA      NA           NA    NA          0   NA
## min          6914   NA   NA      NA      NA           NA    NA       4350   NA
## max          6914   NA   NA      NA      NA           NA    NA       4350   NA
## range           0   NA   NA      NA      NA           NA    NA          0   NA
## sum          6914   NA   NA      NA      NA           NA    NA       4350   NA
## median       6914   NA   NA      NA      NA           NA    NA       4350   NA
## mean         6914   NA   NA      NA      NA           NA    NA       4350   NA
## SE.mean        NA   NA   NA      NA      NA           NA    NA         NA   NA
## CI.mean.0.95  NaN   NA   NA      NA      NA           NA    NA        NaN   NA
## var            NA   NA   NA      NA      NA           NA    NA         NA   NA
## std.dev        NA   NA   NA      NA      NA           NA    NA         NA   NA
## coef.var       NA   NA   NA      NA      NA           NA    NA         NA   NA
##              Distance location_description latitude longitude geolocation
## nbr.val        1.0000                   NA   1.0000    1.0000          NA
## nbr.null       0.0000                   NA   0.0000    0.0000          NA
## nbr.na         0.0000                   NA   0.0000    0.0000          NA
## min            1.4794                   NA   1.1656  -76.8755          NA
## max            1.4794                   NA   1.1656  -76.8755          NA
## range          0.0000                   NA   0.0000    0.0000          NA
## sum            1.4794                   NA   1.1656  -76.8755          NA
## median         1.4794                   NA   1.1656  -76.8755          NA
## mean           1.4794                   NA   1.1656  -76.8755          NA
## SE.mean            NA                   NA       NA        NA          NA
## CI.mean.0.95      NaN                   NA      NaN       NaN          NA
## var                NA                   NA       NA        NA          NA
## std.dev            NA                   NA       NA        NA          NA
## coef.var           NA                   NA       NA        NA          NA
##              hazard_type landslide_type landslide_size trigger storm_name
## nbr.val               NA             NA             NA      NA         NA
## nbr.null              NA             NA             NA      NA         NA
## nbr.na                NA             NA             NA      NA         NA
## min                   NA             NA             NA      NA         NA
## max                   NA             NA             NA      NA         NA
## range                 NA             NA             NA      NA         NA
## sum                   NA             NA             NA      NA         NA
## median                NA             NA             NA      NA         NA
## mean                  NA             NA             NA      NA         NA
## SE.mean               NA             NA             NA      NA         NA
## CI.mean.0.95          NA             NA             NA      NA         NA
## var                   NA             NA             NA      NA         NA
## std.dev               NA             NA             NA      NA         NA
## coef.var              NA             NA             NA      NA         NA
##              injuries fatalities source_name source_link prop ypos
## nbr.val             1          1          NA          NA    1    1
## nbr.null            1          1          NA          NA    0    0
## nbr.na              0          0          NA          NA    0    0
## min                 0          0          NA          NA  100   50
## max                 0          0          NA          NA  100   50
## range               0          0          NA          NA    0    0
## sum                 0          0          NA          NA  100   50
## median              0          0          NA          NA  100   50
## mean                0          0          NA          NA  100   50
## SE.mean            NA         NA          NA          NA   NA   NA
## CI.mean.0.95      NaN        NaN          NA          NA  NaN  NaN
## var                NA         NA          NA          NA   NA   NA
## std.dev            NA         NA          NA          NA   NA   NA
## coef.var           NA         NA          NA          NA   NA   NA
Caja y extensión
boxplot(Distance, horizontal=TRUE, col='steelblue')

library(tidyverse)
library(hrbrthemes)
library(viridis)

df <- data.frame(Distance)
df %>% ggplot(aes(x = "", y = Distance)) +
  geom_boxplot(color="red", fill="orange", alpha=0.5) +
  theme_ipsum() +
  theme(legend.position="none", plot.title = element_text(size=11)) +
  ggtitle("Deslizamientos  ") +
  coord_flip() +
  xlab("") +
  ylab("")
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

Santander:

Deslizamientos de las ciudades de Santander
df_San %>% 
  select(Country, State, City, Distance, date) 
##       Country     State                    City Distance     date
## 425  Colombia Santander                Málaga   2.99929  8/25/10
## 434  Colombia Santander                Málaga   2.99929  8/28/10
## 527  Colombia Santander             San Andrés  3.06383 11/20/10
## 594  Colombia Santander                 Matanza  6.16385   3/3/11
## 643  Colombia Santander                Florián  0.44753  4/15/11
## 715  Colombia Santander                 Lebrija  9.21217  5/16/11
## 716  Colombia Santander             Piedecuesta 10.55986  5/17/11
## 718  Colombia Santander San Vicente de Chucurí  0.80432  5/18/11
## 848  Colombia Santander              Los Santos 12.61362  12/7/11
## 1222 Colombia Santander             Bucaramanga  1.33829  3/16/14
## 1305 Colombia Santander San Vicente de Chucurí  1.08964 10/21/14
head(df_San)
##       id     date      time America  Country country_code     State population
## 425 2331  8/25/10                SA Colombia           CO Santander      18739
## 434 2356  8/28/10                SA Colombia           CO Santander      18739
## 527 2740 11/20/10                SA Colombia           CO Santander       3032
## 594 3170   3/3/11 Afternoon      SA Colombia           CO Santander       1669
## 643 3365  4/15/11                SA Colombia           CO Santander       1227
## 715 3500  5/16/11   5:45:00      SA Colombia           CO Santander       8949
##            City Distance location_description latitude longitude
## 425    Málaga   2.99929                        6.7254  -72.7260
## 434    Málaga   2.99929                        6.7254  -72.7260
## 527 San Andrés  3.06383                        6.8202  -72.8756
## 594     Matanza  6.16385                        7.3500  -72.9667
## 643    Florián  0.44753                        5.8044  -73.9743
## 715     Lebrija  9.21217                        7.0960  -73.2994
##                                   geolocation hazard_type landslide_type
## 425 (6.7253999999999996, -72.725999999999999)   Landslide      Landslide
## 434 (6.7253999999999996, -72.725999999999999)   Landslide      Landslide
## 527 (6.8201999999999998, -72.875600000000006)   Landslide      Landslide
## 594               (7.35, -72.966700000000003)   Landslide       Mudslide
## 643 (5.8044000000000002, -73.974299999999999)   Landslide       Mudslide
## 715 (7.0960000000000001, -73.299400000000006)   Landslide      Landslide
##     landslide_size  trigger storm_name injuries fatalities source_name
## 425         Medium Downpour                  NA          0            
## 434         Medium Downpour                  NA          0            
## 527         Medium Downpour                  NA          0            
## 594         Medium Downpour                  NA          3            
## 643          Large Downpour                  NA         11            
## 715         Medium Downpour                  NA          2            
##                                                                                                 source_link
## 425                                                                                                        
## 434                                                                                                        
## 527                           http://colombiareports.com/colombia-news/news/13038-weekend-floods-kills.html
## 594 http://colombiareports.com/colombia-news/news/14728-deadly-mudslide-triggers-mass-evacuation-order.html
## 643                                       http://www.laht.com/article.asp?ArticleId=391822&CategoryId=12393
## 715     http://colombiareports.com/colombia-news/news/16289-2-die-and-1-missing-in-santander-landslide.html
ggplot(data=df_San, aes(x=City, y=Distance)) + geom_bar(stat="identity", color="blue", fill="white")

Gráfico circular
ggplot(data=df_San, aes(x = "Santander", y = Distance, fill=City)) +
  geom_bar(stat = "identity", width = 1, color = "black") +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_San <- df_San %>% 
  arrange(desc(City)) %>%
  mutate(prop = Distance / sum(df_San$Distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_San, aes(x=State, y = prop, fill=City)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(Distance/100)), color = "white", size=6) +
  scale_fill_brewer(palette="Set8")
## Warning in pal_name(palette, type): Unknown palette Set8

Diagrama de pareto
  • Donde se concentran las ciudades con mayor deslizamiento
library(qcc)

Distance <- df_San$Distance
names(Distance) <- df_San$City 

pareto.chart(Distance, 
             ylab="Distance",
             col = heat.colors(length(Distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "DONDE SE CONCENTRAN LAS CIUDADES CON MAYORES DESLIZAMIENTOS"
)

##                          
## Pareto chart analysis for Distance
##                             Frequency   Cum.Freq.  Percentage Cum.Percent.
##   Los Santos               12.6136200  12.6136200  24.5919368   24.5919368
##   Piedecuesta              10.5598600  23.1734800  20.5878574   45.1797942
##   Lebrija                   9.2121700  32.3856500  17.9603558   63.1401500
##   Matanza                   6.1638500  38.5495000  12.0172488   75.1573988
##   San Andrés               3.0638300  41.6133300   5.9733458   81.1307446
##   Málaga                   2.9992900  44.6126200   5.8475164   86.9782610
##   Málaga                   2.9992900  47.6119100   5.8475164   92.8257774
##   Bucaramanga               1.3382900  48.9502000   2.6091751   95.4349525
##   San Vicente de Chucurí   1.0896400  50.0398400   2.1243987   97.5593512
##   San Vicente de Chucurí   0.8043200  50.8441600   1.5681293   99.1274805
##   Florián                  0.4475300  51.2916900   0.8725195  100.0000000
Diagrama de tallo y hojas
stem(df_San$"Distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 0111333
##   0 | 69
##   1 | 13
head(df_San)
##     id     date      time America  Country country_code     State population
## 1 3503  5/18/11   3:30:00      SA Colombia           CO Santander      11265
## 2 6309 10/21/14      3:00      SA Colombia           CO Santander      11265
## 3 2740 11/20/10                SA Colombia           CO Santander       3032
## 4 3501  5/17/11   Evening      SA Colombia           CO Santander      86387
## 5 3170   3/3/11 Afternoon      SA Colombia           CO Santander       1669
## 6 2331  8/25/10                SA Colombia           CO Santander      18739
##                      City Distance location_description latitude longitude
## 1 San Vicente de Chucurí  0.80432                        6.8835  -73.4166
## 2 San Vicente de Chucurí  1.08964           Above road   6.8907  -73.4081
## 3             San Andrés  3.06383                        6.8202  -72.8756
## 4             Piedecuesta 10.55986                        6.9050  -73.0021
## 5                 Matanza  6.16385                        7.3500  -72.9667
## 6                Málaga   2.99929                        6.7254  -72.7260
##                                 geolocation hazard_type landslide_type
## 1 (6.8834999999999997, -73.416600000000003)   Landslide        Complex
## 2 (6.8906999999999998, -73.408100000000005)   Landslide       Rockfall
## 3 (6.8201999999999998, -72.875600000000006)   Landslide      Landslide
## 4 (6.9050000000000002, -73.002099999999999)   Landslide      Landslide
## 5               (7.35, -72.966700000000003)   Landslide       Mudslide
## 6 (6.7253999999999996, -72.725999999999999)   Landslide      Landslide
##   landslide_size  trigger storm_name injuries fatalities source_name
## 1         Medium Downpour                  NA          1            
## 2          Small     Rain                   0          0  Vanguardia
## 3         Medium Downpour                  NA          0            
## 4         Medium Downpour                  NA          0            
## 5         Medium Downpour                  NA          3            
## 6         Medium Downpour                  NA          0            
##                                                                                                source_link
## 1   http://colombiareports.com/colombia-news/news/16350-north-colombia-landslide-kills-one-injures-10.html
## 2 http://www.vanguardia.com/economia/local/283826-derrumbe-cerro-por-10-horas-via-a-san-vicente-de-chucuri
## 3                            http://colombiareports.com/colombia-news/news/13038-weekend-floods-kills.html
## 4          http://colombiareports.com/colombia-news/news/16343-bogota-cut-off-from-northeast-colombia.html
## 5  http://colombiareports.com/colombia-news/news/14728-deadly-mudslide-triggers-mass-evacuation-order.html
## 6                                                                                                         
##        prop       ypos
## 1  1.568129  0.7840646
## 2  2.124399  2.6303286
## 3  5.973346  6.6792009
## 4 20.587857 19.9598025
## 5 12.017249 36.2623556
## 6  5.847516 45.1947382
knitr::kable(head(df_San))
id date time America Country country_code State population City Distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
3503 5/18/11 3:30:00 SA Colombia CO Santander 11265 San Vicente de Chucurí 0.80432 6.8835 -73.4166 (6.8834999999999997, -73.416600000000003) Landslide Complex Medium Downpour NA 1 http://colombiareports.com/colombia-news/news/16350-north-colombia-landslide-kills-one-injures-10.html 1.568129 0.7840646
6309 10/21/14 3:00 SA Colombia CO Santander 11265 San Vicente de Chucurí 1.08964 Above road 6.8907 -73.4081 (6.8906999999999998, -73.408100000000005) Landslide Rockfall Small Rain 0 0 Vanguardia http://www.vanguardia.com/economia/local/283826-derrumbe-cerro-por-10-horas-via-a-san-vicente-de-chucuri 2.124399 2.6303286
2740 11/20/10 SA Colombia CO Santander 3032 San Andrés 3.06383 6.8202 -72.8756 (6.8201999999999998, -72.875600000000006) Landslide Landslide Medium Downpour NA 0 http://colombiareports.com/colombia-news/news/13038-weekend-floods-kills.html 5.973346 6.6792009
3501 5/17/11 Evening SA Colombia CO Santander 86387 Piedecuesta 10.55986 6.9050 -73.0021 (6.9050000000000002, -73.002099999999999) Landslide Landslide Medium Downpour NA 0 http://colombiareports.com/colombia-news/news/16343-bogota-cut-off-from-northeast-colombia.html 20.587857 19.9598025
3170 3/3/11 Afternoon SA Colombia CO Santander 1669 Matanza 6.16385 7.3500 -72.9667 (7.35, -72.966700000000003) Landslide Mudslide Medium Downpour NA 3 http://colombiareports.com/colombia-news/news/14728-deadly-mudslide-triggers-mass-evacuation-order.html 12.017249 36.2623556
2331 8/25/10 SA Colombia CO Santander 18739 Málaga 2.99929 6.7254 -72.7260 (6.7253999999999996, -72.725999999999999) Landslide Landslide Medium Downpour NA 0 5.847516 45.1947382
stem(df_San$"Distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 0111333
##   0 | 69
##   1 | 13
stem(df_San$"Distance", scale = 2)
## 
##   The decimal point is at the |
## 
##    0 | 4813
##    2 | 001
##    4 | 
##    6 | 2
##    8 | 2
##   10 | 6
##   12 | 6
Gráfico de series temporales
library(forecast)
data_serie<- ts(df_San$Distance, frequency=12, start=2007)
head(data_serie)
##           Jan      Feb      Mar      Apr      May      Jun
## 2007  0.80432  1.08964  3.06383 10.55986  6.16385  2.99929
autoplot(data_serie)+
labs(title = "Serie de Deslizamiento", x="Tiempo", y = "Distancia", colour = "#00a0dc") +theme_bw()

Tablas de frecuencia
library(questionr)

table <- questionr::freq(Distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
2.99929 2 18.2 18.2 18.2 18.2
0.44753 1 9.1 9.1 27.3 27.3
0.80432 1 9.1 9.1 36.4 36.4
1.08964 1 9.1 9.1 45.5 45.5
1.33829 1 9.1 9.1 54.5 54.5
3.06383 1 9.1 9.1 63.6 63.6
6.16385 1 9.1 9.1 72.7 72.7
9.21217 1 9.1 9.1 81.8 81.8
10.55986 1 9.1 9.1 90.9 90.9
12.61362 1 9.1 9.1 100.0 100.0
Total 11 100.0 100.0 100.0 100.0
str(table) 
## Classes 'freqtab' and 'data.frame':  11 obs. of  5 variables:
##  $ n      : num  2 1 1 1 1 1 1 1 1 1 ...
##  $ %      : num  18.2 9.1 9.1 9.1 9.1 9.1 9.1 9.1 9.1 9.1 ...
##  $ val%   : num  18.2 9.1 9.1 9.1 9.1 9.1 9.1 9.1 9.1 9.1 ...
##  $ %cum   : num  18.2 27.3 36.4 45.5 54.5 63.6 72.7 81.8 90.9 100 ...
##  $ val%cum: num  18.2 27.3 36.4 45.5 54.5 63.6 72.7 81.8 90.9 100 ...
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
2.99929 2
0.44753 1
0.80432 1
1.08964 1
1.33829 1
3.06383 1
6.16385 1
9.21217 1
10.55986 1
12.61362 1
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="white", fill="blue") +
  xlab("Número de asistencias") +
  ylab("Frecuencia")

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(Distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(Distance) - min(Distance)
w = ceiling(R/n_clases)
bins <- seq(min(Distance), max(Distance) + w, by = w)
bins
## [1]  0.44753  3.44753  6.44753  9.44753 12.44753 15.44753
Edades <- cut(Distance, bins)
Freq_table <- transform(table(Distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
Distance Freq Rel_Freq Cum_Freq
0.44753 1 0.0909091 1
0.80432 1 0.0909091 2
1.08964 1 0.0909091 3
1.33829 1 0.0909091 4
2.99929 2 0.1818182 6
3.06383 1 0.0909091 7
6.16385 1 0.0909091 8
9.21217 1 0.0909091 9
10.55986 1 0.0909091 10
12.61362 1 0.0909091 11
str(Freq_table)
## 'data.frame':    10 obs. of  4 variables:
##  $ Distance: Factor w/ 10 levels "0.44753","0.80432",..: 1 2 3 4 5 6 7 8 9 10
##  $ Freq    : int  1 1 1 1 2 1 1 1 1 1
##  $ Rel_Freq: num  0.0909 0.0909 0.0909 0.0909 0.1818 ...
##  $ Cum_Freq: int  1 2 3 4 6 7 8 9 10 11
df <- data.frame(x = Freq_table$Distance, y = Freq_table$Freq)
knitr::kable(df)
x y
0.44753 1
0.80432 1
1.08964 1
1.33829 1
2.99929 2
3.06383 1
6.16385 1
9.21217 1
10.55986 1
12.61362 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="blue", fill="green") +
  xlab("Rango de Distance") +
  ylab("Frecuencia")

Estadísticos
  • Personas Afectadas por Deslizamiento
summary(df_San$Distance)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.4475  1.2140  2.9993  4.6629  7.6880 12.6136
library(pastecs)
stat.desc(df_San)
##                        id date time America Country country_code State
## nbr.val      1.100000e+01   NA   NA      NA      NA           NA    NA
## nbr.null     0.000000e+00   NA   NA      NA      NA           NA    NA
## nbr.na       0.000000e+00   NA   NA      NA      NA           NA    NA
## min          2.331000e+03   NA   NA      NA      NA           NA    NA
## max          6.309000e+03   NA   NA      NA      NA           NA    NA
## range        3.978000e+03   NA   NA      NA      NA           NA    NA
## sum          4.082300e+04   NA   NA      NA      NA           NA    NA
## median       3.500000e+03   NA   NA      NA      NA           NA    NA
## mean         3.711182e+03   NA   NA      NA      NA           NA    NA
## SE.mean      3.951672e+02   NA   NA      NA      NA           NA    NA
## CI.mean.0.95 8.804874e+02   NA   NA      NA      NA           NA    NA
## var          1.717728e+06   NA   NA      NA      NA           NA    NA
## std.dev      1.310621e+03   NA   NA      NA      NA           NA    NA
## coef.var     3.531547e-01   NA   NA      NA      NA           NA    NA
##                population City   Distance location_description    latitude
## nbr.val      1.100000e+01   NA 11.0000000                   NA 11.00000000
## nbr.null     0.000000e+00   NA  0.0000000                   NA  0.00000000
## nbr.na       0.000000e+00   NA  0.0000000                   NA  0.00000000
## min          1.227000e+03   NA  0.4475300                   NA  5.80440000
## max          5.718200e+05   NA 12.6136200                   NA  7.35000000
## range        5.705930e+05   NA 12.1660900                   NA  1.54560000
## sum          7.344020e+05   NA 51.2916900                   NA 75.51610000
## median       1.126500e+04   NA  2.9992900                   NA  6.89070000
## mean         6.676382e+04   NA  4.6628809                   NA  6.86510000
## SE.mean      5.103125e+04   NA  1.2986294                   NA  0.12181958
## CI.mean.0.95 1.137047e+05   NA  2.8935266                   NA  0.27143094
## var          2.864607e+10   NA 18.5508218                   NA  0.16324011
## std.dev      1.692515e+05   NA  4.3070665                   NA  0.40402984
## coef.var     2.535078e+00   NA  0.9236922                   NA  0.05885272
##                  longitude geolocation hazard_type landslide_type
## nbr.val       1.100000e+01          NA          NA             NA
## nbr.null      0.000000e+00          NA          NA             NA
## nbr.na        0.000000e+00          NA          NA             NA
## min          -7.397430e+01          NA          NA             NA
## max          -7.272600e+01          NA          NA             NA
## range         1.248300e+00          NA          NA             NA
## sum          -8.044988e+02          NA          NA             NA
## median       -7.300210e+01          NA          NA             NA
## mean         -7.313625e+01          NA          NA             NA
## SE.mean       1.110412e-01          NA          NA             NA
## CI.mean.0.95  2.474153e-01          NA          NA             NA
## var           1.356317e-01          NA          NA             NA
## std.dev       3.682821e-01          NA          NA             NA
## coef.var     -5.035561e-03          NA          NA             NA
##              landslide_size trigger storm_name injuries fatalities source_name
## nbr.val                  NA      NA         NA        2 11.0000000          NA
## nbr.null                 NA      NA         NA        2  5.0000000          NA
## nbr.na                   NA      NA         NA        9  0.0000000          NA
## min                      NA      NA         NA        0  0.0000000          NA
## max                      NA      NA         NA        0 11.0000000          NA
## range                    NA      NA         NA        0 11.0000000          NA
## sum                      NA      NA         NA        0 21.0000000          NA
## median                   NA      NA         NA        0  1.0000000          NA
## mean                     NA      NA         NA        0  1.9090909          NA
## SE.mean                  NA      NA         NA        0  0.9672317          NA
## CI.mean.0.95             NA      NA         NA        0  2.1551266          NA
## var                      NA      NA         NA        0 10.2909091          NA
## std.dev                  NA      NA         NA        0  3.2079447          NA
## coef.var                 NA      NA         NA      NaN  1.6803520          NA
##              source_link        prop         ypos
## nbr.val               NA  11.0000000   11.0000000
## nbr.null              NA   0.0000000    0.0000000
## nbr.na                NA   0.0000000    0.0000000
## min                   NA   0.8725195    0.7840646
## max                   NA  24.5919368   98.6954125
## range                 NA  23.7194173   97.9113478
## sum                   NA 100.0000000  512.0028313
## median                NA   5.8475164   45.1947382
## mean                  NA   9.0909091   46.5457119
## SE.mean               NA   2.5318515   11.1910610
## CI.mean.0.95          NA   5.6413166   24.9352379
## var                   NA  70.5129909 1377.6383146
## std.dev               NA   8.3972014   37.1165504
## coef.var              NA   0.9236922    0.7974215
Caja y extensión
boxplot(Distance, horizontal=TRUE, col='steelblue')

library(tidyverse)
library(hrbrthemes)
library(viridis)

df <- data.frame(Distance)
df %>% ggplot(aes(x = "", y = Distance)) +
  geom_boxplot(color="red", fill="orange", alpha=0.5) +
  theme_ipsum() +
  theme(legend.position="none", plot.title = element_text(size=11)) +
  ggtitle("Deslizamientos  ") +
  coord_flip() +
  xlab("") +
  ylab("")
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

Deslizamiento Norte de Santander_Colombia

df_Nor %>% 
  select(Country, State, City, Distance, date) 
##      Country              State        City Distance     date
## 110 Colombia Norte de Santander    Hacarí   0.38844  6/24/08
## 403 Colombia Norte de Santander San Calixto  7.85369   8/3/10
## 526 Colombia Norte de Santander   Arboledas 14.62503 11/20/10
## 621 Colombia Norte de Santander     Cúcuta  3.60520  3/26/11
## 921 Colombia Norte de Santander San Calixto 11.19714 11/28/12
head(df_Nor)
##       id     date time America  Country country_code              State
## 110  605  6/24/08           SA Colombia           CO Norte de Santander
## 403 2194   8/3/10           SA Colombia           CO Norte de Santander
## 526 2739 11/20/10           SA Colombia           CO Norte de Santander
## 621 3300  3/26/11           SA Colombia           CO Norte de Santander
## 921 4637 11/28/12           SA Colombia           CO Norte de Santander
##     population        City Distance location_description latitude longitude
## 110       1502    Hacarí   0.38844                        8.3200  -73.1500
## 403       2080 San Calixto  7.85369                        8.4418  -73.2665
## 526       2702   Arboledas 14.62503                        7.6213  -72.9303
## 621     721398     Cúcuta  3.60520              Unknown   7.9467  -72.4908
## 921       2080 San Calixto 11.19714                        8.4683  -73.2843
##                                   geolocation hazard_type landslide_type
## 110               (8.32, -73.150000000000006)   Landslide      Landslide
## 403 (8.4418000000000006, -73.266499999999994)   Landslide        Complex
## 526 (7.6212999999999997, -72.930300000000003)   Landslide      Landslide
## 621 (7.9466999999999999, -72.490799999999993)   Landslide       Mudslide
## 921 (8.4682999999999993, -73.284300000000002)   Landslide      Landslide
##     landslide_size  trigger storm_name injuries fatalities   source_name
## 110         Medium Downpour                  NA         10              
## 403         Medium Downpour                  NA          4              
## 526         Medium Downpour                  NA          0              
## 621         Medium Downpour                  NA          0 Caracol Radio
## 921          Large     Rain                  NA         NA              
##                                                                                                   source_link
## 110                                          http://news.xinhuanet.com/english/2008-06/25/content_8434589.htm
## 403                                                                                                          
## 526                             http://colombiareports.com/colombia-news/news/13038-weekend-floods-kills.html
## 621 http://colombiareports.com/colombia-news/news/15175-landslide-destroys-houses-in-north-east-colombia.html
## 921                  http://latino.foxnews.com/latino/news/2012/11/28/10-missing-after-landslide-in-colombia/
ggplot(data=df_Nor, aes(x=City, y=Distance)) + geom_bar(stat="identity", color="blue", fill="white")

Gráfico circular
ggplot(data=df_Nor, aes(x = "Norte de Santander", y = Distance, fill=City)) +
  geom_bar(stat = "identity", width = 1, color = "black") +
  coord_polar("y", start = 0)

ggplot(df_Nor,aes(x="Norte de Santander",y=Distance, fill=City))+
  geom_bar(stat = "identity",
           color="white")+
    geom_text(aes(label=(Distance*10)),
              position=position_stack(vjust=0.5),color="white",size=6)+
  coord_polar(theta = "y")+
    labs(title="Gráfico de Deslizamiento")

Diagrama de pareto
  • Donde se concentran las ciudades con mayor deslizamiento
library(qcc)

Distance <- df_Nor$Distance
names(Distance) <- df_Nor$City 

pareto.chart(Distance, 
             ylab="Distance",
             col = heat.colors(length(Distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "DONDE SE CONCENTRAN LAS CIUDADES CON MAYORES DESLIZAMIENTOS"
)

##              
## Pareto chart analysis for Distance
##                Frequency  Cum.Freq. Percentage Cum.Percent.
##   Arboledas    14.625030  14.625030  38.824593    38.824593
##   San Calixto  11.197140  25.822170  29.724684    68.549277
##   San Calixto   7.853690  33.675860  20.848936    89.398213
##   Cúcuta       3.605200  37.281060   9.570608    98.968821
##   Hacarí       0.388440  37.669500   1.031179   100.000000
Diagrama de tallo y hojas
stem(df_Nor$"Distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 04
##   0 | 8
##   1 | 1
##   1 | 5
head(df_Nor)
##       id     date time America  Country country_code              State
## 110  605  6/24/08           SA Colombia           CO Norte de Santander
## 403 2194   8/3/10           SA Colombia           CO Norte de Santander
## 526 2739 11/20/10           SA Colombia           CO Norte de Santander
## 621 3300  3/26/11           SA Colombia           CO Norte de Santander
## 921 4637 11/28/12           SA Colombia           CO Norte de Santander
##     population        City Distance location_description latitude longitude
## 110       1502    Hacarí   0.38844                        8.3200  -73.1500
## 403       2080 San Calixto  7.85369                        8.4418  -73.2665
## 526       2702   Arboledas 14.62503                        7.6213  -72.9303
## 621     721398     Cúcuta  3.60520              Unknown   7.9467  -72.4908
## 921       2080 San Calixto 11.19714                        8.4683  -73.2843
##                                   geolocation hazard_type landslide_type
## 110               (8.32, -73.150000000000006)   Landslide      Landslide
## 403 (8.4418000000000006, -73.266499999999994)   Landslide        Complex
## 526 (7.6212999999999997, -72.930300000000003)   Landslide      Landslide
## 621 (7.9466999999999999, -72.490799999999993)   Landslide       Mudslide
## 921 (8.4682999999999993, -73.284300000000002)   Landslide      Landslide
##     landslide_size  trigger storm_name injuries fatalities   source_name
## 110         Medium Downpour                  NA         10              
## 403         Medium Downpour                  NA          4              
## 526         Medium Downpour                  NA          0              
## 621         Medium Downpour                  NA          0 Caracol Radio
## 921          Large     Rain                  NA         NA              
##                                                                                                   source_link
## 110                                          http://news.xinhuanet.com/english/2008-06/25/content_8434589.htm
## 403                                                                                                          
## 526                             http://colombiareports.com/colombia-news/news/13038-weekend-floods-kills.html
## 621 http://colombiareports.com/colombia-news/news/15175-landslide-destroys-houses-in-north-east-colombia.html
## 921                  http://latino.foxnews.com/latino/news/2012/11/28/10-missing-after-landslide-in-colombia/
knitr::kable(head(df_Nor))
id date time America Country country_code State population City Distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
110 605 6/24/08 SA Colombia CO Norte de Santander 1502 Hacarí 0.38844 8.3200 -73.1500 (8.32, -73.150000000000006) Landslide Landslide Medium Downpour NA 10 http://news.xinhuanet.com/english/2008-06/25/content_8434589.htm
403 2194 8/3/10 SA Colombia CO Norte de Santander 2080 San Calixto 7.85369 8.4418 -73.2665 (8.4418000000000006, -73.266499999999994) Landslide Complex Medium Downpour NA 4
526 2739 11/20/10 SA Colombia CO Norte de Santander 2702 Arboledas 14.62503 7.6213 -72.9303 (7.6212999999999997, -72.930300000000003) Landslide Landslide Medium Downpour NA 0 http://colombiareports.com/colombia-news/news/13038-weekend-floods-kills.html
621 3300 3/26/11 SA Colombia CO Norte de Santander 721398 Cúcuta 3.60520 Unknown 7.9467 -72.4908 (7.9466999999999999, -72.490799999999993) Landslide Mudslide Medium Downpour NA 0 Caracol Radio http://colombiareports.com/colombia-news/news/15175-landslide-destroys-houses-in-north-east-colombia.html
921 4637 11/28/12 SA Colombia CO Norte de Santander 2080 San Calixto 11.19714 8.4683 -73.2843 (8.4682999999999993, -73.284300000000002) Landslide Landslide Large Rain NA NA http://latino.foxnews.com/latino/news/2012/11/28/10-missing-after-landslide-in-colombia/
stem(df_Nor$"Distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 04
##   0 | 8
##   1 | 1
##   1 | 5
stem(df_Nor$"Distance", scale = 2)
## 
##   The decimal point is at the |
## 
##    0 | 4
##    2 | 6
##    4 | 
##    6 | 9
##    8 | 
##   10 | 2
##   12 | 
##   14 | 6
Gráfico de series temporales
library(forecast)
data_serie<- ts(df_Nor$Distance, frequency=12, start=2007)
head(data_serie)
##           Jan      Feb      Mar      Apr      May
## 2007  0.38844  7.85369 14.62503  3.60520 11.19714
autoplot(data_serie)+
labs(title = "Serie de Deslizamiento", x="Tiempo", y = "Distancia", colour = "#00a0dc") +theme_bw()

Tablas de frecuencia
library(questionr)

table <- questionr::freq(Distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
0.38844 1 20 20 20 20
3.6052 1 20 20 40 40
7.85369 1 20 20 60 60
11.19714 1 20 20 80 80
14.62503 1 20 20 100 100
Total 5 100 100 100 100
str(table) 
## Classes 'freqtab' and 'data.frame':  6 obs. of  5 variables:
##  $ n      : num  1 1 1 1 1 5
##  $ %      : num  20 20 20 20 20 100
##  $ val%   : num  20 20 20 20 20 100
##  $ %cum   : num  20 40 60 80 100 100
##  $ val%cum: num  20 40 60 80 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
0.38844 1
3.6052 1
7.85369 1
11.19714 1
14.62503 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="white", fill="blue") +
  xlab("Número de asistencias") +
  ylab("Frecuencia")

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(Distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(Distance) - min(Distance)
w = ceiling(R/n_clases)
bins <- seq(min(Distance), max(Distance) + w, by = w)
bins
## [1]  0.38844  5.38844 10.38844 15.38844
Edades <- cut(Distance, bins)
Freq_table <- transform(table(Distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
Distance Freq Rel_Freq Cum_Freq
0.38844 1 0.2 1
3.6052 1 0.2 2
7.85369 1 0.2 3
11.19714 1 0.2 4
14.62503 1 0.2 5
str(Freq_table)
## 'data.frame':    5 obs. of  4 variables:
##  $ Distance: Factor w/ 5 levels "0.38844","3.6052",..: 1 2 3 4 5
##  $ Freq    : int  1 1 1 1 1
##  $ Rel_Freq: num  0.2 0.2 0.2 0.2 0.2
##  $ Cum_Freq: int  1 2 3 4 5
df <- data.frame(x = Freq_table$Distance, y = Freq_table$Freq)
knitr::kable(df)
x y
0.38844 1
3.6052 1
7.85369 1
11.19714 1
14.62503 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="blue", fill="green") +
  xlab("Rango de Distance") +
  ylab("Frecuencia")

Estadísticos
  • Personas Afectadas por Deslizamiento
summary(df_Nor$Distance)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.3884  3.6052  7.8537  7.5339 11.1971 14.6250
library(pastecs)
stat.desc(df_Nor)
## Warning in min(x): ningún argumento finito para min; retornando Inf
## Warning in max(x): ningun argumento finito para max; retornando -Inf
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
##                        id date time America Country country_code State
## nbr.val      5.000000e+00   NA   NA      NA      NA           NA    NA
## nbr.null     0.000000e+00   NA   NA      NA      NA           NA    NA
## nbr.na       0.000000e+00   NA   NA      NA      NA           NA    NA
## min          6.050000e+02   NA   NA      NA      NA           NA    NA
## max          4.637000e+03   NA   NA      NA      NA           NA    NA
## range        4.032000e+03   NA   NA      NA      NA           NA    NA
## sum          1.347500e+04   NA   NA      NA      NA           NA    NA
## median       2.739000e+03   NA   NA      NA      NA           NA    NA
## mean         2.695000e+03   NA   NA      NA      NA           NA    NA
## SE.mean      6.617562e+02   NA   NA      NA      NA           NA    NA
## CI.mean.0.95 1.837330e+03   NA   NA      NA      NA           NA    NA
## var          2.189607e+06   NA   NA      NA      NA           NA    NA
## std.dev      1.479732e+03   NA   NA      NA      NA           NA    NA
## coef.var     5.490656e-01   NA   NA      NA      NA           NA    NA
##                population City   Distance location_description   latitude
## nbr.val      5.000000e+00   NA  5.0000000                   NA  5.0000000
## nbr.null     0.000000e+00   NA  0.0000000                   NA  0.0000000
## nbr.na       0.000000e+00   NA  0.0000000                   NA  0.0000000
## min          1.502000e+03   NA  0.3884400                   NA  7.6213000
## max          7.213980e+05   NA 14.6250300                   NA  8.4683000
## range        7.198960e+05   NA 14.2365900                   NA  0.8470000
## sum          7.297620e+05   NA 37.6695000                   NA 40.7981000
## median       2.080000e+03   NA  7.8536900                   NA  8.3200000
## mean         1.459524e+05   NA  7.5339000                   NA  8.1596200
## SE.mean      1.438615e+05   NA  2.5524302                   NA  0.1636695
## CI.mean.0.95 3.994236e+05   NA  7.0866824                   NA  0.4544193
## var          1.034807e+11   NA 32.5745000                   NA  0.1339385
## std.dev      3.216841e+05   NA  5.7074075                   NA  0.3659761
## coef.var     2.204035e+00   NA  0.7575635                   NA  0.0448521
##                  longitude geolocation hazard_type landslide_type
## nbr.val       5.000000e+00          NA          NA             NA
## nbr.null      0.000000e+00          NA          NA             NA
## nbr.na        0.000000e+00          NA          NA             NA
## min          -7.328430e+01          NA          NA             NA
## max          -7.249080e+01          NA          NA             NA
## range         7.935000e-01          NA          NA             NA
## sum          -3.651219e+02          NA          NA             NA
## median       -7.315000e+01          NA          NA             NA
## mean         -7.302438e+01          NA          NA             NA
## SE.mean       1.475669e-01          NA          NA             NA
## CI.mean.0.95  4.097113e-01          NA          NA             NA
## var           1.088799e-01          NA          NA             NA
## std.dev       3.299695e-01          NA          NA             NA
## coef.var     -4.518621e-03          NA          NA             NA
##              landslide_size trigger storm_name injuries fatalities source_name
## nbr.val                  NA      NA         NA        0   4.000000          NA
## nbr.null                 NA      NA         NA        0   2.000000          NA
## nbr.na                   NA      NA         NA        5   1.000000          NA
## min                      NA      NA         NA      Inf   0.000000          NA
## max                      NA      NA         NA     -Inf  10.000000          NA
## range                    NA      NA         NA     -Inf  10.000000          NA
## sum                      NA      NA         NA        0  14.000000          NA
## median                   NA      NA         NA       NA   2.000000          NA
## mean                     NA      NA         NA      NaN   3.500000          NA
## SE.mean                  NA      NA         NA       NA   2.362908          NA
## CI.mean.0.95             NA      NA         NA      NaN   7.519827          NA
## var                      NA      NA         NA       NA  22.333333          NA
## std.dev                  NA      NA         NA       NA   4.725816          NA
## coef.var                 NA      NA         NA       NA   1.350233          NA
##              source_link
## nbr.val               NA
## nbr.null              NA
## nbr.na                NA
## min                   NA
## max                   NA
## range                 NA
## sum                   NA
## median                NA
## mean                  NA
## SE.mean               NA
## CI.mean.0.95          NA
## var                   NA
## std.dev               NA
## coef.var              NA
Caja y extensión
boxplot(Distance, horizontal=TRUE, col='steelblue')

library(tidyverse)
library(hrbrthemes)
library(viridis)

df <- data.frame(Distance)
df %>% ggplot(aes(x = "", y = Distance)) +
  geom_boxplot(color="red", fill="orange", alpha=0.5) +
  theme_ipsum() +
  theme(legend.position="none", plot.title = element_text(size=11)) +
  ggtitle("Deslizamientos  ") +
  coord_flip() +
  xlab("") +
  ylab("")
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

CostaRica

df_CostaRica %>% 
  select(Country, State, City, Distance, date)
##         Country      State                  City Distance     date
## 38   Costa Rica    Heredia               Heredia  0.26208   9/9/07
## 44   Costa Rica  San José           San Ignacio  4.57763  10/9/07
## 45   Costa Rica   Alajuela                Atenas  3.08459 10/11/07
## 46   Costa Rica  San José                        9.56251 10/11/07
## 51   Costa Rica Puntarenas               Miramar  3.82425 10/24/07
## 102  Costa Rica Guanacaste               Bagaces 17.65521  5/29/08
## 147  Costa Rica  San José         Daniel Flores  1.85787   9/6/08
## 153  Costa Rica  San José            San Isidro 16.24937 10/12/08
## 154  Costa Rica  San José              Santiago 12.85801 10/12/08
## 156  Costa Rica Puntarenas               Golfito 11.74074 10/15/08
## 157  Costa Rica Puntarenas               Miramar  8.92048 10/16/08
## 229  Costa Rica Puntarenas              San Vito 18.00524 11/13/09
## 302  Costa Rica   Alajuela          Desamparados  6.88715  4/14/10
## 311  Costa Rica    Heredia              Ã\201ngeles 19.51432  4/27/10
## 347  Costa Rica   Alajuela          Desamparados  6.92174  5/22/10
## 395  Costa Rica   Alajuela          Desamparados  4.24199  7/30/10
## 459  Costa Rica   Alajuela            San Rafael  1.47396  9/29/10
## 469  Costa Rica  San José              Salitral  0.25254  10/1/10
## 470  Costa Rica  San José              Salitral  0.25254  10/1/10
## 480  Costa Rica    Heredia              Ã\201ngeles 14.81614 10/15/10
## 501  Costa Rica  San José               Escazú  3.67691  11/4/10
## 502  Costa Rica  San José            San Marcos  0.55804  11/4/10
## 503  Costa Rica   Alajuela            San Rafael  9.61692  11/4/10
## 504  Costa Rica Guanacaste              Tilarán 10.21631  11/4/10
## 505  Costa Rica    Cartago                Orosí 19.28722  11/4/10
## 506  Costa Rica Puntarenas               Golfito  7.87044  11/4/10
## 507  Costa Rica  San José                 Tejar  6.49523  11/4/10
## 508  Costa Rica  San José            San Isidro 15.64997  11/4/10
## 509  Costa Rica Puntarenas              Corredor  4.93053  11/4/10
## 510  Costa Rica Puntarenas               Parrita 13.48919  11/4/10
## 511  Costa Rica Puntarenas        Ciudad Cortés 20.06633  11/4/10
## 512  Costa Rica  San José            San Isidro 11.31047  11/4/10
## 513  Costa Rica  San José              Mercedes  8.21372  11/4/10
## 514  Costa Rica   Alajuela              Santiago  5.43516  11/5/10
## 529  Costa Rica    Heredia              Ã\201ngeles 19.54581 11/21/10
## 579  Costa Rica     Limón             Guápiles 17.23264  1/11/11
## 702  Costa Rica    Heredia              Ã\201ngeles 15.05161   5/8/11
## 780  Costa Rica   Alajuela                 Upala  0.70048  7/12/11
## 819  Costa Rica  San José            San Isidro 21.67452  9/25/11
## 828  Costa Rica    Cartago                   Cot  9.63616 10/31/11
## 884  Costa Rica    Heredia         Santo Domingo 21.95470  5/13/12
## 888  Costa Rica Guanacaste              Tilarán 12.33807  5/31/12
## 889  Costa Rica     Limón             Siquirres  5.36500  6/14/12
## 913  Costa Rica  San José         Daniel Flores  4.89954 10/23/12
## 1098 Costa Rica   Alajuela             Sabanilla  4.87432  8/27/13
## 1156 Costa Rica   Alajuela             Sabanilla 10.32968  9/16/13
## 1157 Costa Rica    Heredia         Santo Domingo  9.85736  9/16/13
## 1169 Costa Rica Guanacaste              Tilarán 12.21952  10/3/13
## 1173 Costa Rica Guanacaste              Tilarán 12.18115  10/8/13
## 1289 Costa Rica   Alajuela            La Fortuna  9.84213  10/4/14
## 1301 Costa Rica   Alajuela                        5.57523  9/19/14
## 1308 Costa Rica   Alajuela          Desamparados  5.95519  11/1/14
## 1342 Costa Rica   Alajuela           Rio Segundo 11.96524  8/21/14
## 1364 Costa Rica   Alajuela          Desamparados  5.12667  8/10/14
## 1383 Costa Rica    Cartago               Cartago  3.07297  9/13/14
## 1384 Costa Rica    Heredia Dulce Nombre de Jesus 10.01310 12/13/14
## 1385 Costa Rica  San José Dulce Nombre de Jesus  2.92605  11/3/14
## 1386 Costa Rica  San José            San Isidro 10.73752  9/19/14
## 1404 Costa Rica  San José            San Isidro 22.32368  1/28/15
## 1406 Costa Rica  San José Dulce Nombre de Jesus  8.39161   2/6/15
## 1461 Costa Rica   Alajuela            La Fortuna  5.96634  6/17/15
## 1475 Costa Rica   Alajuela                Atenas  6.80061   6/3/15
## 1528 Costa Rica  San José              Ã\201ngeles  9.53611   7/6/15
## 1529 Costa Rica  San José Dulce Nombre de Jesus  3.71407   7/6/15
## 1600 Costa Rica  San José              San Juan  0.72957 10/29/15
## 1642 Costa Rica   Alajuela         Santo Domingo  3.21979 10/27/15
## 1643 Costa Rica   Alajuela              Alajuela  3.08916 11/18/15
## 1644 Costa Rica   Alajuela               Naranjo  2.08469 10/29/15
## 1646 Costa Rica    Cartago                        5.15142 10/15/15
## 1647 Costa Rica    Cartago                   Cot  9.53493  3/20/15
## 1648 Costa Rica    Cartago               Cartago  2.94804  3/18/15
## 1649 Costa Rica Puntarenas          Buenos Aires  0.35225 11/23/15
## 1650 Costa Rica  San José             San José  1.16705  9/25/15
## 1651 Costa Rica  San José              Mercedes 10.01198  11/5/15
## 1652 Costa Rica  San José              Santiago  8.27042 11/11/15

Deslizamiento por estado o departamento

ggplot(data=df_CostaRica, aes(fill=State, y=Distance, x="Costa Rica")) +
  geom_bar(position="dodge", stat="identity")

ggplot(data=df_CostaRica, aes(fill=State, y=Distance, x="Costa Rica")) +
  geom_bar(position="stack", stat="identity")

Alajuela:

  • Deslizamientos de las ciudades de Alajuela
head(df_Alajuela %>% 
  select(Country, State, City, Distance, date))
##        Country    State         City Distance     date
## 45  Costa Rica Alajuela       Atenas  3.08459 10/11/07
## 302 Costa Rica Alajuela Desamparados  6.88715  4/14/10
## 347 Costa Rica Alajuela Desamparados  6.92174  5/22/10
## 395 Costa Rica Alajuela Desamparados  4.24199  7/30/10
## 459 Costa Rica Alajuela   San Rafael  1.47396  9/29/10
## 503 Costa Rica Alajuela   San Rafael  9.61692  11/4/10
head(df_Alajuela)
##       id     date     time America    Country country_code    State population
## 45   301 10/11/07             <NA> Costa Rica           CR Alajuela       7014
## 302 1749  4/14/10             <NA> Costa Rica           CR Alajuela      14448
## 347 1886  5/22/10 18:00:00    <NA> Costa Rica           CR Alajuela      14448
## 395 2174  7/30/10  9:30:00    <NA> Costa Rica           CR Alajuela      14448
## 459 2516  9/29/10             <NA> Costa Rica           CR Alajuela       3624
## 503 2682  11/4/10             <NA> Costa Rica           CR Alajuela       3624
##             City Distance location_description latitude longitude
## 45        Atenas  3.08459                        9.9869  -84.4070
## 302 Desamparados  6.88715           Above road   9.9323  -84.4453
## 347 Desamparados  6.92174           Above road   9.9290  -84.4428
## 395 Desamparados  4.24199           Above road   9.9271  -84.4568
## 459   San Rafael  1.47396                       10.0757  -84.4793
## 503   San Rafael  9.61692                       10.0421  -84.5577
##                                   geolocation hazard_type landslide_type
## 45  (9.9869000000000003, -84.406999999999996)   Landslide       Mudslide
## 302 (9.9322999999999997, -84.445300000000003)   Landslide      Landslide
## 347 (9.9290000000000003, -84.442800000000005)   Landslide      Landslide
## 395 (9.9270999999999994, -84.456800000000001)   Landslide      Landslide
## 459 (10.075699999999999, -84.479299999999995)   Landslide       Mudslide
## 503            (10.0421, -84.557699999999997)   Landslide      Landslide
##     landslide_size          trigger           storm_name injuries fatalities
## 45           Large             Rain                            NA         14
## 302         Medium         Downpour                            NA          0
## 347         Medium         Downpour                             3          0
## 395         Medium             Rain                            NA          0
## 459         Medium         Downpour                            NA          0
## 503         Medium Tropical cyclone Tropical Storm Tomas       NA          0
##                              source_name
## 45  Agence France-Presse, afp.google.com
## 302                                     
## 347                      Costa Rica News
## 395                           La Fortuna
## 459                                     
## 503                                     
##                                                                                                                     source_link
## 45                                                             http://afp.google.com/article/ALeqM5hu6a8oyAM1ycq9nU_6Zyj_l7F0AA
## 302                                                http://www.insidecostarica.com/dailynews/2010/april/16/costarica10041602.htm
## 347                                       http://thecostaricanews.com/rains-cause-landslides-and-road-accidents-on-caldera/3255
## 395    https://lafortunatimes.wordpress.com/2010/07/30/landslide-caused-closure-of-san-jose-caldera-for-most-of-the-day-friday/
## 459 http://www.ticotimes.net/News/Daily-News/Inter-American-Highway-Reopens-Caldera-Highway-Under-Repair_Monday-October-04-2010
## 503                                             http://fortunatimes.com/2010/11/06/no-passage-to-the-south-and-central-pacific/
ggplot(data=df_Alajuela, aes(x=City, y=Distance)) + geom_bar(stat="identity", color="blue", fill="white")

Gráfico circular
ggplot(data=df_Alajuela, aes(x = "Alajuela", y = Distance, fill=City)) +
  geom_bar(stat = "identity", width = 1, color = "black") +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_Alajuela <- df_Alajuela %>% 
  arrange(desc(City)) %>%
  mutate(prop = Distance / sum(df_Alajuela$Distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_Alajuela, aes(x=State, y = prop, fill=City)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(Distance/100)), color = "white", size=6) +
  scale_fill_brewer(palette="Set8")
## Warning in pal_name(palette, type): Unknown palette Set8
## Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Greens is 9
## Returning the palette you asked for with that many colors

Diagrama de pareto
  • Donde se concentran las ciudades con mayor deslizamiento
library(qcc)

Distance <- df_Alajuela$Distance
names(Distance) <- df_Alajuela$City 

pareto.chart(Distance, 
             ylab="Distance",
             col = heat.colors(length(Distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "DONDE SE CONCENTRAN LAS CIUDADES CON MAYORES DESLIZAMIENTOS"
)

##                
## Pareto chart analysis for Distance
##                   Frequency   Cum.Freq.  Percentage Cum.Percent.
##   Rio Segundo    11.9652400  11.9652400  10.5708367   10.5708367
##   Sabanilla      10.3296800  22.2949200   9.1258813   19.6967180
##   La Fortuna      9.8421300  32.1370500   8.6951494   28.3918674
##   San Rafael      9.6169200  41.7539700   8.4961849   36.8880523
##   Desamparados    6.9217400  48.6757100   6.1150953   43.0031476
##   Desamparados    6.8871500  55.5628600   6.0845364   49.0876840
##   Atenas          6.8006100  62.3634700   6.0080816   55.0957655
##   La Fortuna      5.9663400  68.3298100   5.2710356   60.3668011
##   Desamparados    5.9551900  74.2850000   5.2611850   65.6279861
##                   5.5752300  79.8602300   4.9255047   70.5534908
##   Santiago        5.4351600  85.2953900   4.8017582   75.3552490
##   Desamparados    5.1266700  90.4220600   4.5292189   79.8844679
##   Sabanilla       4.8743200  95.2963800   4.3062772   84.1907451
##   Desamparados    4.2419900  99.5383700   3.7476376   87.9383828
##   Santo Domingo   3.2197900 102.7581600   2.8445626   90.7829454
##   Alajuela        3.0891600 105.8473200   2.7291559   93.5121013
##   Atenas          3.0845900 108.9319100   2.7251185   96.2372198
##   Naranjo         2.0846900 111.0166000   1.8417447   98.0789646
##   San Rafael      1.4739600 112.4905600   1.3021879   99.3811524
##   Upala           0.7004800 113.1910400   0.6188476  100.0000000
Diagrama de tallo y hojas
stem(df_Alajuela$"Distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 1123334
##   0 | 555666777
##   1 | 0002
head(df_Alajuela)
##     id     date time America    Country country_code    State population
## 1 3762  7/12/11         <NA> Costa Rica           CR Alajuela       4185
## 2 7486 10/27/15         <NA> Costa Rica           CR Alajuela       5745
## 3 2703  11/5/10         <NA> Costa Rica           CR Alajuela       2107
## 4 2516  9/29/10         <NA> Costa Rica           CR Alajuela       3624
## 5 2682  11/4/10         <NA> Costa Rica           CR Alajuela       3624
## 6 5408  8/27/13         <NA> Costa Rica           CR Alajuela       1015
##            City Distance location_description latitude longitude
## 1         Upala  0.70048                       10.8916  -85.0141
## 2 Santo Domingo  3.21979           Above road  10.0600  -84.1841
## 3      Santiago  5.43516                        9.9990  -84.4876
## 4    San Rafael  1.47396                       10.0757  -84.4793
## 5    San Rafael  9.61692                       10.0421  -84.5577
## 6     Sabanilla  4.87432                       10.1181  -84.2146
##                                 geolocation hazard_type landslide_type
## 1            (10.8916, -85.014099999999999)   Landslide      Landslide
## 2              (10.06, -84.184100000000001)   Landslide       Mudslide
## 3            (9.9990000000000006, -84.4876)   Landslide      Landslide
## 4 (10.075699999999999, -84.479299999999995)   Landslide       Mudslide
## 5            (10.0421, -84.557699999999997)   Landslide      Landslide
## 6            (10.1181, -84.214600000000004)   Landslide      Landslide
##   landslide_size          trigger           storm_name injuries fatalities
## 1         Medium         Downpour                            NA          0
## 2          Small             Rain                             0          0
## 3         Medium Tropical cyclone Tropical Storm Tomas       NA          0
## 4         Medium         Downpour                            NA          0
## 5         Medium Tropical cyclone Tropical Storm Tomas       NA          0
## 6         Medium         Downpour                            NA         NA
##           source_name
## 1                    
## 2     La Prensa Libre
## 3                    
## 4                    
## 5                    
## 6 insidecostarica.com
##                                                                                                                      source_link
## 1 http://www.ticotimes.net/Current-Edition/News-Briefs/Upala-on-yellow-alert-after-earthquake-heavy-rains_Wednesday-July-13-2011
## 2                             http://www.laprensalibre.cr/Noticias/detalle/45060/430/deslizamiento-deja-bus-atrapado-en-alajuela
## 3                                                http://fortunatimes.com/2010/11/06/no-passage-to-the-south-and-central-pacific/
## 4    http://www.ticotimes.net/News/Daily-News/Inter-American-Highway-Reopens-Caldera-Highway-Under-Repair_Monday-October-04-2010
## 5                                                http://fortunatimes.com/2010/11/06/no-passage-to-the-south-and-central-pacific/
## 6                        http://insidecostarica.com/2013/08/28/flooding-and-landslides-after-heavy-downpours-across-the-country/
##        prop       ypos
## 1 0.6188476  0.3094238
## 2 2.8445626  2.0411289
## 3 4.8017582  5.8642893
## 4 1.3021879  8.9162623
## 5 8.4961849 13.8154486
## 6 4.3062772 20.2166797
knitr::kable(head(df_Alajuela))
id date time America Country country_code State population City Distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
3762 7/12/11 NA Costa Rica CR Alajuela 4185 Upala 0.70048 10.8916 -85.0141 (10.8916, -85.014099999999999) Landslide Landslide Medium Downpour NA 0 http://www.ticotimes.net/Current-Edition/News-Briefs/Upala-on-yellow-alert-after-earthquake-heavy-rains_Wednesday-July-13-2011 0.6188476 0.3094238
7486 10/27/15 NA Costa Rica CR Alajuela 5745 Santo Domingo 3.21979 Above road 10.0600 -84.1841 (10.06, -84.184100000000001) Landslide Mudslide Small Rain 0 0 La Prensa Libre http://www.laprensalibre.cr/Noticias/detalle/45060/430/deslizamiento-deja-bus-atrapado-en-alajuela 2.8445626 2.0411289
2703 11/5/10 NA Costa Rica CR Alajuela 2107 Santiago 5.43516 9.9990 -84.4876 (9.9990000000000006, -84.4876) Landslide Landslide Medium Tropical cyclone Tropical Storm Tomas NA 0 http://fortunatimes.com/2010/11/06/no-passage-to-the-south-and-central-pacific/ 4.8017582 5.8642893
2516 9/29/10 NA Costa Rica CR Alajuela 3624 San Rafael 1.47396 10.0757 -84.4793 (10.075699999999999, -84.479299999999995) Landslide Mudslide Medium Downpour NA 0 http://www.ticotimes.net/News/Daily-News/Inter-American-Highway-Reopens-Caldera-Highway-Under-Repair_Monday-October-04-2010 1.3021879 8.9162623
2682 11/4/10 NA Costa Rica CR Alajuela 3624 San Rafael 9.61692 10.0421 -84.5577 (10.0421, -84.557699999999997) Landslide Landslide Medium Tropical cyclone Tropical Storm Tomas NA 0 http://fortunatimes.com/2010/11/06/no-passage-to-the-south-and-central-pacific/ 8.4961849 13.8154486
5408 8/27/13 NA Costa Rica CR Alajuela 1015 Sabanilla 4.87432 10.1181 -84.2146 (10.1181, -84.214600000000004) Landslide Landslide Medium Downpour NA NA insidecostarica.com http://insidecostarica.com/2013/08/28/flooding-and-landslides-after-heavy-downpours-across-the-country/ 4.3062772 20.2166797
stem(df_Alajuela$"Distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 1123334
##   0 | 555666777
##   1 | 0002
stem(df_Alajuela$"Distance", scale = 2)
## 
##   The decimal point is at the |
## 
##    0 | 75
##    2 | 1112
##    4 | 29146
##    6 | 00899
##    8 | 68
##   10 | 3
##   12 | 0
Gráfico de series temporales
library(forecast)
data_serie<- ts(df_Alajuela$Distance, frequency=12, start=2007)
head(data_serie)
##          Jan     Feb     Mar     Apr     May     Jun
## 2007 0.70048 3.21979 5.43516 1.47396 9.61692 4.87432
autoplot(data_serie)+
labs(title = "Serie de Deslizamiento", x="Tiempo", y = "Distancia", colour = "#00a0dc") +theme_bw()

Tablas de frecuencia
library(questionr)

table <- questionr::freq(Distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
0.70048 1 5 5 5 5
1.47396 1 5 5 10 10
2.08469 1 5 5 15 15
3.08459 1 5 5 20 20
3.08916 1 5 5 25 25
3.21979 1 5 5 30 30
4.24199 1 5 5 35 35
4.87432 1 5 5 40 40
5.12667 1 5 5 45 45
5.43516 1 5 5 50 50
5.57523 1 5 5 55 55
5.95519 1 5 5 60 60
5.96634 1 5 5 65 65
6.80061 1 5 5 70 70
6.88715 1 5 5 75 75
6.92174 1 5 5 80 80
9.61692 1 5 5 85 85
9.84213 1 5 5 90 90
10.32968 1 5 5 95 95
11.96524 1 5 5 100 100
Total 20 100 100 100 100
str(table) 
## Classes 'freqtab' and 'data.frame':  21 obs. of  5 variables:
##  $ n      : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ %      : num  5 5 5 5 5 5 5 5 5 5 ...
##  $ val%   : num  5 5 5 5 5 5 5 5 5 5 ...
##  $ %cum   : num  5 10 15 20 25 30 35 40 45 50 ...
##  $ val%cum: num  5 10 15 20 25 30 35 40 45 50 ...
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
0.70048 1
1.47396 1
2.08469 1
3.08459 1
3.08916 1
3.21979 1
4.24199 1
4.87432 1
5.12667 1
5.43516 1
5.57523 1
5.95519 1
5.96634 1
6.80061 1
6.88715 1
6.92174 1
9.61692 1
9.84213 1
10.32968 1
11.96524 1
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="white", fill="blue") +
  xlab("Número de asistencias") +
  ylab("Frecuencia")

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(Distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(Distance) - min(Distance)
w = ceiling(R/n_clases)
bins <- seq(min(Distance), max(Distance) + w, by = w)
bins
## [1]  0.70048  3.70048  6.70048  9.70048 12.70048
Edades <- cut(Distance, bins)
Freq_table <- transform(table(Distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
Distance Freq Rel_Freq Cum_Freq
0.70048 1 0.05 1
1.47396 1 0.05 2
2.08469 1 0.05 3
3.08459 1 0.05 4
3.08916 1 0.05 5
3.21979 1 0.05 6
4.24199 1 0.05 7
4.87432 1 0.05 8
5.12667 1 0.05 9
5.43516 1 0.05 10
5.57523 1 0.05 11
5.95519 1 0.05 12
5.96634 1 0.05 13
6.80061 1 0.05 14
6.88715 1 0.05 15
6.92174 1 0.05 16
9.61692 1 0.05 17
9.84213 1 0.05 18
10.32968 1 0.05 19
11.96524 1 0.05 20
str(Freq_table)
## 'data.frame':    20 obs. of  4 variables:
##  $ Distance: Factor w/ 20 levels "0.70048","1.47396",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ Freq    : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Rel_Freq: num  0.05 0.05 0.05 0.05 0.05 0.05 0.05 0.05 0.05 0.05 ...
##  $ Cum_Freq: int  1 2 3 4 5 6 7 8 9 10 ...
df <- data.frame(x = Freq_table$Distance, y = Freq_table$Freq)
knitr::kable(df)
x y
0.70048 1
1.47396 1
2.08469 1
3.08459 1
3.08916 1
3.21979 1
4.24199 1
4.87432 1
5.12667 1
5.43516 1
5.57523 1
5.95519 1
5.96634 1
6.80061 1
6.88715 1
6.92174 1
9.61692 1
9.84213 1
10.32968 1
11.96524 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="blue", fill="green") +
  xlab("Rango de Distance") +
  ylab("Frecuencia")

Estadísticos
  • Personas Afectadas por Deslizamiento
summary(df_Alajuela$Distance)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.7005  3.1871  5.5052  5.6596  6.8958 11.9652
library(pastecs)
stat.desc(df_Alajuela)
##                        id date time America Country country_code State
## nbr.val      2.000000e+01   NA   NA      NA      NA           NA    NA
## nbr.null     0.000000e+00   NA   NA      NA      NA           NA    NA
## nbr.na       0.000000e+00   NA   NA      NA      NA           NA    NA
## min          3.010000e+02   NA   NA      NA      NA           NA    NA
## max          7.488000e+03   NA   NA      NA      NA           NA    NA
## range        7.187000e+03   NA   NA      NA      NA           NA    NA
## sum          9.718800e+04   NA   NA      NA      NA           NA    NA
## median       5.878000e+03   NA   NA      NA      NA           NA    NA
## mean         4.859400e+03   NA   NA      NA      NA           NA    NA
## SE.mean      5.261514e+02   NA   NA      NA      NA           NA    NA
## CI.mean.0.95 1.101248e+03   NA   NA      NA      NA           NA    NA
## var          5.536707e+06   NA   NA      NA      NA           NA    NA
## std.dev      2.353021e+03   NA   NA      NA      NA           NA    NA
## coef.var     4.842204e-01   NA   NA      NA      NA           NA    NA
##                population City    Distance location_description     latitude
## nbr.val      2.000000e+01   NA  20.0000000                   NA  20.00000000
## nbr.null     0.000000e+00   NA   0.0000000                   NA   0.00000000
## nbr.na       0.000000e+00   NA   0.0000000                   NA   0.00000000
## min          1.015000e+03   NA   0.7004800                   NA   9.91890000
## max          4.749400e+04   NA  11.9652400                   NA  10.89160000
## range        4.647900e+04   NA  11.2647600                   NA   0.97270000
## sum          1.924900e+05   NA 113.1910400                   NA 202.24760000
## median       7.014000e+03   NA   5.5051950                   NA  10.04315000
## mean         9.624500e+03   NA   5.6595520                   NA  10.11238000
## SE.mean      2.281502e+03   NA   0.6812501                   NA   0.05493583
## CI.mean.0.95 4.775238e+03   NA   1.4258729                   NA   0.11498201
## var          1.041050e+08   NA   9.2820347                   NA   0.06035891
## std.dev      1.020319e+04   NA   3.0466432                   NA   0.24568050
## coef.var     1.060126e+00   NA   0.5383188                   NA   0.02429502
##                  longitude geolocation hazard_type landslide_type
## nbr.val       2.000000e+01          NA          NA             NA
## nbr.null      0.000000e+00          NA          NA             NA
## nbr.na        0.000000e+00          NA          NA             NA
## min          -8.501410e+01          NA          NA             NA
## max          -8.418070e+01          NA          NA             NA
## range         8.334000e-01          NA          NA             NA
## sum          -1.688552e+03          NA          NA             NA
## median       -8.444405e+01          NA          NA             NA
## mean         -8.442758e+01          NA          NA             NA
## SE.mean       4.594981e-02          NA          NA             NA
## CI.mean.0.95  9.617405e-02          NA          NA             NA
## var           4.222770e-02          NA          NA             NA
## std.dev       2.054938e-01          NA          NA             NA
## coef.var     -2.433965e-03          NA          NA             NA
##              landslide_size trigger storm_name   injuries fatalities
## nbr.val                  NA      NA         NA 11.0000000 18.0000000
## nbr.null                 NA      NA         NA 10.0000000 15.0000000
## nbr.na                   NA      NA         NA  9.0000000  2.0000000
## min                      NA      NA         NA  0.0000000  0.0000000
## max                      NA      NA         NA  3.0000000 14.0000000
## range                    NA      NA         NA  3.0000000 14.0000000
## sum                      NA      NA         NA  3.0000000 16.0000000
## median                   NA      NA         NA  0.0000000  0.0000000
## mean                     NA      NA         NA  0.2727273  0.8888889
## SE.mean                  NA      NA         NA  0.2727273  0.7749716
## CI.mean.0.95             NA      NA         NA  0.6076742  1.6350471
## var                      NA      NA         NA  0.8181818 10.8104575
## std.dev                  NA      NA         NA  0.9045340  3.2879260
## coef.var                 NA      NA         NA  3.3166248  3.6989168
##              source_name source_link        prop         ypos
## nbr.val               NA          NA  20.0000000   20.0000000
## nbr.null              NA          NA   0.0000000    0.0000000
## nbr.na                NA          NA   0.0000000    0.0000000
## min                   NA          NA   0.6188476    0.3094238
## max                   NA          NA  10.5708367   97.5372476
## range                 NA          NA   9.9519891   97.2278239
## sum                   NA          NA 100.0000000  984.6035428
## median                NA          NA   4.8636314   51.7474020
## mean                  NA          NA   5.0000000   49.2301771
## SE.mean               NA          NA   0.6018587    7.4144621
## CI.mean.0.95          NA          NA   1.2597047   15.5186476
## var                   NA          NA   7.2446780 1099.4849742
## std.dev               NA          NA   2.6915940   33.1584827
## coef.var              NA          NA   0.5383188    0.6735398
Caja y extensión
boxplot(Distance, horizontal=TRUE, col='steelblue')

library(tidyverse)
library(hrbrthemes)
library(viridis)

df <- data.frame(Distance)
df %>% ggplot(aes(x = "", y = Distance)) +
  geom_boxplot(color="red", fill="orange", alpha=0.5) +
  theme_ipsum() +
  theme(legend.position="none", plot.title = element_text(size=11)) +
  ggtitle("Deslizamientos  ") +
  coord_flip() +
  xlab("") +
  ylab("")
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

Cartago:

  • Deslizamientos de las ciudades de Cartago
df_Cartago %>% 
  select(Country, State, City, Distance, date) 
##         Country   State    City Distance     date
## 505  Costa Rica Cartago  Orosí 19.28722  11/4/10
## 828  Costa Rica Cartago     Cot  9.63616 10/31/11
## 1383 Costa Rica Cartago Cartago  3.07297  9/13/14
## 1646 Costa Rica Cartago          5.15142 10/15/15
## 1647 Costa Rica Cartago     Cot  9.53493  3/20/15
## 1648 Costa Rica Cartago Cartago  2.94804  3/18/15
head(df_Cartago)
##        id     date time America    Country country_code   State population
## 505  2684  11/4/10         <NA> Costa Rica           CR Cartago       4350
## 828  4031 10/31/11         <NA> Costa Rica           CR Cartago       6784
## 1383 6695  9/13/14         <NA> Costa Rica           CR Cartago      26594
## 1646 7490 10/15/15         <NA> Costa Rica           CR Cartago       4060
## 1647 7491  3/20/15 8:00    <NA> Costa Rica           CR Cartago       6784
## 1648 7492  3/18/15         <NA> Costa Rica           CR Cartago      26594
##         City Distance location_description latitude longitude
## 505   Orosí 19.28722                        9.6227  -83.8359
## 828      Cot  9.63616        Natural slope   9.9792  -83.8525
## 1383 Cartago  3.07297           Below road   9.8895  -83.9316
## 1646          5.15142           Above road   9.7917  -83.9815
## 1647     Cot  9.53493        Natural slope   9.9786  -83.8542
## 1648 Cartago  2.94804           Urban area   9.8815  -83.9401
##                                    geolocation hazard_type landslide_type
## 505              (9.6227, -83.835899999999995)   Landslide      Landslide
## 828  (9.9792000000000005, -83.852500000000006)   Landslide      Landslide
## 1383             (9.8895, -83.931600000000003)   Landslide      Landslide
## 1646 (9.7917000000000005, -83.981499999999997)   Landslide      Landslide
## 1647 (9.9786000000000001, -83.854200000000006)   Landslide          Other
## 1648 (9.8815000000000008, -83.940100000000001)   Landslide      Landslide
##      landslide_size          trigger           storm_name injuries fatalities
## 505          Medium Tropical cyclone Tropical Storm Tomas       NA          0
## 828          Medium         Downpour                            NA          0
## 1383          Small             Rain                             0          0
## 1646         Medium         Downpour                             0          0
## 1647         Medium          Volcano                             0          0
## 1648         Medium         Downpour                             0          0
##            source_name
## 505                   
## 828  Inside Costa Rica
## 1383             Ahora
## 1646             crhoy
## 1647   Costa Rica Star
## 1648      CIUDADREGION
##                                                                                                         source_link
## 505                                 http://fortunatimes.com/2010/11/06/no-passage-to-the-south-and-central-pacific/
## 828                                  http://www.insidecostarica.com/dailynews/2011/october/31/costarica11103102.htm
## 1383                            http://www.ahora.cr/nacionales/Derrumbe-pone-riesgo-linea-Cartago_0_1439256064.html
## 1646                                http://www.crhoy.com/carril-cerrado-sobre-interamericana-sur-por-deslizamiento/
## 1647                                      http://news.co.cr/landslides-irazu-volcano-restrict-travel-tourism/37698/
## 1648 http://www.ciudadregion.com/cartago/fuerte-aguacero-manana-del-miercoles-causo-inundaciones-cartago_1426729745
ggplot(data=df_Cartago, aes(x=City, y=Distance)) + geom_bar(stat="identity", color="blue", fill="white")

Gráfico circular
ggplot(data=df_Cartago, aes(x = "Cartago", y = Distance, fill=City)) +
  geom_bar(stat = "identity", width = 1, color = "black") +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_Cartago <- df_Cartago %>% 
  arrange(desc(City)) %>%
  mutate(prop = Distance / sum(df_Cartago$Distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_Cartago, aes(x=State, y = prop, fill=City)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(Distance/100)), color = "white", size=6) +
  scale_fill_brewer(palette="Set8")
## Warning in pal_name(palette, type): Unknown palette Set8

Diagrama de pareto
  • Donde se concentran las ciudades con mayor deslizamiento
library(qcc)

Distance <- df_Cartago$Distance
names(Distance) <- df_Cartago$City 

pareto.chart(Distance, 
             ylab="Distance",
             col = heat.colors(length(Distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "DONDE SE CONCENTRAN LAS CIUDADES CON MAYORES DESLIZAMIENTOS"
)

##          
## Pareto chart analysis for Distance
##            Frequency  Cum.Freq. Percentage Cum.Percent.
##   Orosí   19.287220  19.287220  38.861440    38.861440
##   Cot       9.636160  28.923380  19.415709    58.277148
##   Cot       9.534930  38.458310  19.211743    77.488891
##             5.151420  43.609730  10.379495    87.868386
##   Cartago   3.072970  46.682700   6.191667    94.060052
##   Cartago   2.948040  49.630740   5.939948   100.000000
Diagrama de tallo y hojas
stem(df_Cartago$"Distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 33
##   0 | 5
##   1 | 00
##   1 | 9
head(df_Cartago)
##     id     date time America    Country country_code   State population    City
## 1 2684  11/4/10         <NA> Costa Rica           CR Cartago       4350  Orosí
## 2 4031 10/31/11         <NA> Costa Rica           CR Cartago       6784     Cot
## 3 7491  3/20/15 8:00    <NA> Costa Rica           CR Cartago       6784     Cot
## 4 6695  9/13/14         <NA> Costa Rica           CR Cartago      26594 Cartago
## 5 7492  3/18/15         <NA> Costa Rica           CR Cartago      26594 Cartago
## 6 7490 10/15/15         <NA> Costa Rica           CR Cartago       4060        
##   Distance location_description latitude longitude
## 1 19.28722                        9.6227  -83.8359
## 2  9.63616        Natural slope   9.9792  -83.8525
## 3  9.53493        Natural slope   9.9786  -83.8542
## 4  3.07297           Below road   9.8895  -83.9316
## 5  2.94804           Urban area   9.8815  -83.9401
## 6  5.15142           Above road   9.7917  -83.9815
##                                 geolocation hazard_type landslide_type
## 1             (9.6227, -83.835899999999995)   Landslide      Landslide
## 2 (9.9792000000000005, -83.852500000000006)   Landslide      Landslide
## 3 (9.9786000000000001, -83.854200000000006)   Landslide          Other
## 4             (9.8895, -83.931600000000003)   Landslide      Landslide
## 5 (9.8815000000000008, -83.940100000000001)   Landslide      Landslide
## 6 (9.7917000000000005, -83.981499999999997)   Landslide      Landslide
##   landslide_size          trigger           storm_name injuries fatalities
## 1         Medium Tropical cyclone Tropical Storm Tomas       NA          0
## 2         Medium         Downpour                            NA          0
## 3         Medium          Volcano                             0          0
## 4          Small             Rain                             0          0
## 5         Medium         Downpour                             0          0
## 6         Medium         Downpour                             0          0
##         source_name
## 1                  
## 2 Inside Costa Rica
## 3   Costa Rica Star
## 4             Ahora
## 5      CIUDADREGION
## 6             crhoy
##                                                                                                      source_link
## 1                                http://fortunatimes.com/2010/11/06/no-passage-to-the-south-and-central-pacific/
## 2                                 http://www.insidecostarica.com/dailynews/2011/october/31/costarica11103102.htm
## 3                                      http://news.co.cr/landslides-irazu-volcano-restrict-travel-tourism/37698/
## 4                            http://www.ahora.cr/nacionales/Derrumbe-pone-riesgo-linea-Cartago_0_1439256064.html
## 5 http://www.ciudadregion.com/cartago/fuerte-aguacero-manana-del-miercoles-causo-inundaciones-cartago_1426729745
## 6                                http://www.crhoy.com/carril-cerrado-sobre-interamericana-sur-por-deslizamiento/
##        prop     ypos
## 1 38.861440 19.43072
## 2 19.415709 48.56929
## 3 19.211743 67.88302
## 4  6.191667 80.58472
## 5  5.939948 86.65053
## 6 10.379495 94.81025
knitr::kable(head(df_Cartago))
id date time America Country country_code State population City Distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
2684 11/4/10 NA Costa Rica CR Cartago 4350 Orosí 19.28722 9.6227 -83.8359 (9.6227, -83.835899999999995) Landslide Landslide Medium Tropical cyclone Tropical Storm Tomas NA 0 http://fortunatimes.com/2010/11/06/no-passage-to-the-south-and-central-pacific/ 38.861440 19.43072
4031 10/31/11 NA Costa Rica CR Cartago 6784 Cot 9.63616 Natural slope 9.9792 -83.8525 (9.9792000000000005, -83.852500000000006) Landslide Landslide Medium Downpour NA 0 Inside Costa Rica http://www.insidecostarica.com/dailynews/2011/october/31/costarica11103102.htm 19.415709 48.56929
7491 3/20/15 8:00 NA Costa Rica CR Cartago 6784 Cot 9.53493 Natural slope 9.9786 -83.8542 (9.9786000000000001, -83.854200000000006) Landslide Other Medium Volcano 0 0 Costa Rica Star http://news.co.cr/landslides-irazu-volcano-restrict-travel-tourism/37698/ 19.211743 67.88302
6695 9/13/14 NA Costa Rica CR Cartago 26594 Cartago 3.07297 Below road 9.8895 -83.9316 (9.8895, -83.931600000000003) Landslide Landslide Small Rain 0 0 Ahora http://www.ahora.cr/nacionales/Derrumbe-pone-riesgo-linea-Cartago_0_1439256064.html 6.191667 80.58472
7492 3/18/15 NA Costa Rica CR Cartago 26594 Cartago 2.94804 Urban area 9.8815 -83.9401 (9.8815000000000008, -83.940100000000001) Landslide Landslide Medium Downpour 0 0 CIUDADREGION http://www.ciudadregion.com/cartago/fuerte-aguacero-manana-del-miercoles-causo-inundaciones-cartago_1426729745 5.939948 86.65053
7490 10/15/15 NA Costa Rica CR Cartago 4060 5.15142 Above road 9.7917 -83.9815 (9.7917000000000005, -83.981499999999997) Landslide Landslide Medium Downpour 0 0 crhoy http://www.crhoy.com/carril-cerrado-sobre-interamericana-sur-por-deslizamiento/ 10.379495 94.81025
stem(df_Cartago$"Distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 33
##   0 | 5
##   1 | 00
##   1 | 9
stem(df_Cartago$"Distance", scale = 2)
## 
##   The decimal point is at the |
## 
##    2 | 91
##    4 | 2
##    6 | 
##    8 | 56
##   10 | 
##   12 | 
##   14 | 
##   16 | 
##   18 | 3
Gráfico de series temporales
library(forecast)
data_serie<- ts(df_Cartago$Distance, frequency=12, start=2007)
head(data_serie)
##           Jan      Feb      Mar      Apr      May      Jun
## 2007 19.28722  9.63616  9.53493  3.07297  2.94804  5.15142
autoplot(data_serie)+
labs(title = "Serie de Deslizamiento", x="Tiempo", y = "Distancia", colour = "#00a0dc") +theme_bw()

Tablas de frecuencia
library(questionr)

table <- questionr::freq(Distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
2.94804 1 16.7 16.7 16.7 16.7
3.07297 1 16.7 16.7 33.3 33.3
5.15142 1 16.7 16.7 50.0 50.0
9.53493 1 16.7 16.7 66.7 66.7
9.63616 1 16.7 16.7 83.3 83.3
19.28722 1 16.7 16.7 100.0 100.0
Total 6 100.0 100.0 100.0 100.0
str(table) 
## Classes 'freqtab' and 'data.frame':  7 obs. of  5 variables:
##  $ n      : num  1 1 1 1 1 1 6
##  $ %      : num  16.7 16.7 16.7 16.7 16.7 16.7 100
##  $ val%   : num  16.7 16.7 16.7 16.7 16.7 16.7 100
##  $ %cum   : num  16.7 33.3 50 66.7 83.3 100 100
##  $ val%cum: num  16.7 33.3 50 66.7 83.3 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
2.94804 1
3.07297 1
5.15142 1
9.53493 1
9.63616 1
19.28722 1
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="white", fill="blue") +
  xlab("Número de asistencias") +
  ylab("Frecuencia")

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(Distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(Distance) - min(Distance)
w = ceiling(R/n_clases)
bins <- seq(min(Distance), max(Distance) + w, by = w)
bins
## [1]  2.94804  8.94804 14.94804 20.94804
Edades <- cut(Distance, bins)
Freq_table <- transform(table(Distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
Distance Freq Rel_Freq Cum_Freq
2.94804 1 0.1666667 1
3.07297 1 0.1666667 2
5.15142 1 0.1666667 3
9.53493 1 0.1666667 4
9.63616 1 0.1666667 5
19.28722 1 0.1666667 6
str(Freq_table)
## 'data.frame':    6 obs. of  4 variables:
##  $ Distance: Factor w/ 6 levels "2.94804","3.07297",..: 1 2 3 4 5 6
##  $ Freq    : int  1 1 1 1 1 1
##  $ Rel_Freq: num  0.167 0.167 0.167 0.167 0.167 ...
##  $ Cum_Freq: int  1 2 3 4 5 6
df <- data.frame(x = Freq_table$Distance, y = Freq_table$Freq)
knitr::kable(df)
x y
2.94804 1
3.07297 1
5.15142 1
9.53493 1
9.63616 1
19.28722 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="blue", fill="green") +
  xlab("Rango de Distance") +
  ylab("Frecuencia")

Estadísticos
  • Personas Afectadas por Deslizamiento
summary(df_Cartago$Distance)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   2.948   3.593   7.343   8.272   9.611  19.287
library(pastecs)
stat.desc(df_Cartago)
##                        id date time America Country country_code State
## nbr.val      6.000000e+00   NA   NA      NA      NA           NA    NA
## nbr.null     0.000000e+00   NA   NA      NA      NA           NA    NA
## nbr.na       0.000000e+00   NA   NA      NA      NA           NA    NA
## min          2.684000e+03   NA   NA      NA      NA           NA    NA
## max          7.492000e+03   NA   NA      NA      NA           NA    NA
## range        4.808000e+03   NA   NA      NA      NA           NA    NA
## sum          3.588300e+04   NA   NA      NA      NA           NA    NA
## median       7.092500e+03   NA   NA      NA      NA           NA    NA
## mean         5.980500e+03   NA   NA      NA      NA           NA    NA
## SE.mean      8.567926e+02   NA   NA      NA      NA           NA    NA
## CI.mean.0.95 2.202455e+03   NA   NA      NA      NA           NA    NA
## var          4.404561e+06   NA   NA      NA      NA           NA    NA
## std.dev      2.098705e+03   NA   NA      NA      NA           NA    NA
## coef.var     3.509246e-01   NA   NA      NA      NA           NA    NA
##                population City   Distance location_description    latitude
## nbr.val      6.000000e+00   NA  6.0000000                   NA  6.00000000
## nbr.null     0.000000e+00   NA  0.0000000                   NA  0.00000000
## nbr.na       0.000000e+00   NA  0.0000000                   NA  0.00000000
## min          4.060000e+03   NA  2.9480400                   NA  9.62270000
## max          2.659400e+04   NA 19.2872200                   NA  9.97920000
## range        2.253400e+04   NA 16.3391800                   NA  0.35650000
## sum          7.516600e+04   NA 49.6307400                   NA 59.14320000
## median       6.784000e+03   NA  7.3431750                   NA  9.88550000
## mean         1.252767e+04   NA  8.2717900                   NA  9.85720000
## SE.mean      4.473174e+03   NA  2.5159722                   NA  0.05493519
## CI.mean.0.95 1.149866e+04   NA  6.4675124                   NA  0.14121539
## var          1.200557e+08   NA 37.9806957                   NA  0.01810725
## std.dev      1.095699e+04   NA  6.1628480                   NA  0.13456317
## coef.var     8.746236e-01   NA  0.7450441                   NA  0.01365126
##                  longitude geolocation hazard_type landslide_type
## nbr.val       6.000000e+00          NA          NA             NA
## nbr.null      0.000000e+00          NA          NA             NA
## nbr.na        0.000000e+00          NA          NA             NA
## min          -8.398150e+01          NA          NA             NA
## max          -8.383590e+01          NA          NA             NA
## range         1.456000e-01          NA          NA             NA
## sum          -5.033958e+02          NA          NA             NA
## median       -8.389290e+01          NA          NA             NA
## mean         -8.389930e+01          NA          NA             NA
## SE.mean       2.429580e-02          NA          NA             NA
## CI.mean.0.95  6.245435e-02          NA          NA             NA
## var           3.541716e-03          NA          NA             NA
## std.dev       5.951232e-02          NA          NA             NA
## coef.var     -7.093303e-04          NA          NA             NA
##              landslide_size trigger storm_name injuries fatalities source_name
## nbr.val                  NA      NA         NA        4          6          NA
## nbr.null                 NA      NA         NA        4          6          NA
## nbr.na                   NA      NA         NA        2          0          NA
## min                      NA      NA         NA        0          0          NA
## max                      NA      NA         NA        0          0          NA
## range                    NA      NA         NA        0          0          NA
## sum                      NA      NA         NA        0          0          NA
## median                   NA      NA         NA        0          0          NA
## mean                     NA      NA         NA        0          0          NA
## SE.mean                  NA      NA         NA        0          0          NA
## CI.mean.0.95             NA      NA         NA        0          0          NA
## var                      NA      NA         NA        0          0          NA
## std.dev                  NA      NA         NA        0          0          NA
## coef.var                 NA      NA         NA      NaN        NaN          NA
##              source_link        prop        ypos
## nbr.val               NA   6.0000000   6.0000000
## nbr.null              NA   0.0000000   0.0000000
## nbr.na                NA   0.0000000   0.0000000
## min                   NA   5.9399477  19.4307198
## max                   NA  38.8614395  94.8102527
## range                 NA  32.9214918  75.3795329
## sum                   NA 100.0000000 397.9285419
## median                NA  14.7956186  74.2338720
## mean                  NA  16.6666667  66.3214236
## SE.mean               NA   5.0693827  11.4668254
## CI.mean.0.95          NA  13.0312632  29.4764131
## var                   NA 154.1918484 788.9285109
## std.dev               NA  12.4174010  28.0878712
## coef.var              NA   0.7450441   0.4235113
Caja y extensión
boxplot(Distance, horizontal=TRUE, col='steelblue')

library(tidyverse)
library(hrbrthemes)
library(viridis)

df <- data.frame(Distance)
df %>% ggplot(aes(x = "", y = Distance)) +
  geom_boxplot(color="red", fill="orange", alpha=0.5) +
  theme_ipsum() +
  theme(legend.position="none", plot.title = element_text(size=11)) +
  ggtitle("Deslizamientos  ") +
  coord_flip() +
  xlab("") +
  ylab("")
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

Guanacaste:

  • Deslizamientos de las ciudades de Guanacaste
df_Guanacaste %>% 
  select(Country, State, City, Distance, date) 
##         Country      State     City Distance    date
## 102  Costa Rica Guanacaste  Bagaces 17.65521 5/29/08
## 504  Costa Rica Guanacaste Tilarán 10.21631 11/4/10
## 888  Costa Rica Guanacaste Tilarán 12.33807 5/31/12
## 1169 Costa Rica Guanacaste Tilarán 12.21952 10/3/13
## 1173 Costa Rica Guanacaste Tilarán 12.18115 10/8/13
head(df_Guanacaste)
##        id    date    time America    Country country_code      State population
## 102   556 5/29/08            <NA> Costa Rica           CR Guanacaste       4108
## 504  2683 11/4/10            <NA> Costa Rica           CR Guanacaste       7301
## 888  4375 5/31/12            <NA> Costa Rica           CR Guanacaste       7301
## 1169 5571 10/3/13            <NA> Costa Rica           CR Guanacaste       7301
## 1173 5591 10/8/13 Morning    <NA> Costa Rica           CR Guanacaste       7301
##          City Distance location_description latitude longitude
## 102   Bagaces 17.65521                       10.4024  -85.3555
## 504  Tilarán 10.21631                       10.4548  -84.8751
## 888  Tilarán 12.33807                       10.5562  -84.8952
## 1169 Tilarán 12.21952                       10.5543  -84.8946
## 1173 Tilarán 12.18115                       10.5546  -84.8955
##                                    geolocation hazard_type landslide_type
## 102             (10.4024, -85.355500000000006)   Landslide      Landslide
## 504  (10.454800000000001, -84.875100000000003)   Landslide      Landslide
## 888             (10.5562, -84.895200000000003)   Landslide      Landslide
## 1169            (10.5543, -84.894599999999997)   Landslide      Landslide
## 1173 (10.554600000000001, -84.895499999999998)   Landslide      Landslide
##      landslide_size          trigger           storm_name injuries fatalities
## 102          Medium Tropical cyclone  Tropical Storm Alma       NA         NA
## 504          Medium Tropical cyclone Tropical Storm Tomas       NA          0
## 888           Large         Downpour                            NA         NA
## 1169         Medium   Mining digging                            NA         NA
## 1173         Medium             Rain                            NA          2
##              source_name
## 102                     
## 504                     
## 888                     
## 1169   www.ticotimes.net
## 1173 insidecostarica.com
##                                                                                                                                          source_link
## 102                                                                            http://www.reliefweb.int/rw/RWB.NSF/db900SID/ASAZ-7FHCHL?OpenDocument
## 504                                                                  http://fortunatimes.com/2010/11/06/no-passage-to-the-south-and-central-pacific/
## 888                                     http://thecostaricanews.com/landslides-and-wash-outs-continue-to-cause-problems-in-northern-costa-rica/12129
## 1169 http://www.ticotimes.net/More-news/News-Briefs/TRAVEL-ALERT-UPDATE-Rains-landslides-close-eight-routes-across-Costa-Rica_Friday-October-04-2013
## 1173                                                     http://insidecostarica.com/2013/10/14/bodies-man-son-buried-landslide-nuevo-arenal-located/
ggplot(data=df_Guanacaste, aes(x=City, y=Distance)) + geom_bar(stat="identity", color="blue", fill="white")

Gráfico circular
ggplot(data=df_Guanacaste, aes(x = "Guanacaste", y = Distance, fill=City)) +
  geom_bar(stat = "identity", width = 1, color = "black") +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_Guanacaste <- df_Guanacaste %>% 
  arrange(desc(City)) %>%
  mutate(prop = Distance / sum(df_Guanacaste$Distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_Guanacaste, aes(x=State, y = prop, fill=City)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(Distance/100)), color = "white", size=6) +
  scale_fill_brewer(palette="Set8")
## Warning in pal_name(palette, type): Unknown palette Set8

Diagrama de pareto
  • Donde se concentran las ciudades con mayor deslizamiento
library(qcc)

Distance <- df_Guanacaste$Distance
names(Distance) <- df_Guanacaste$City 

pareto.chart(Distance, 
             ylab="Distance",
             col = heat.colors(length(Distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "DONDE SE CONCENTRAN LAS CIUDADES CON MAYORES DESLIZAMIENTOS"
)

##           
## Pareto chart analysis for Distance
##            Frequency Cum.Freq. Percentage Cum.Percent.
##   Bagaces   17.65521  17.65521   27.32571     27.32571
##   Tilarán  12.33807  29.99328   19.09615     46.42185
##   Tilarán  12.21952  42.21280   18.91266     65.33451
##   Tilarán  12.18115  54.39395   18.85328     84.18779
##   Tilarán  10.21631  64.61026   15.81221    100.00000
Diagrama de tallo y hojas
stem(df_Guanacaste$"Distance")
## 
##   The decimal point is at the |
## 
##   10 | 2
##   12 | 223
##   14 | 
##   16 | 7
head(df_Guanacaste)
##     id    date    time America    Country country_code      State population
## 1 2683 11/4/10            <NA> Costa Rica           CR Guanacaste       7301
## 2 4375 5/31/12            <NA> Costa Rica           CR Guanacaste       7301
## 3 5571 10/3/13            <NA> Costa Rica           CR Guanacaste       7301
## 4 5591 10/8/13 Morning    <NA> Costa Rica           CR Guanacaste       7301
## 5  556 5/29/08            <NA> Costa Rica           CR Guanacaste       4108
##       City Distance location_description latitude longitude
## 1 Tilarán 10.21631                       10.4548  -84.8751
## 2 Tilarán 12.33807                       10.5562  -84.8952
## 3 Tilarán 12.21952                       10.5543  -84.8946
## 4 Tilarán 12.18115                       10.5546  -84.8955
## 5  Bagaces 17.65521                       10.4024  -85.3555
##                                 geolocation hazard_type landslide_type
## 1 (10.454800000000001, -84.875100000000003)   Landslide      Landslide
## 2            (10.5562, -84.895200000000003)   Landslide      Landslide
## 3            (10.5543, -84.894599999999997)   Landslide      Landslide
## 4 (10.554600000000001, -84.895499999999998)   Landslide      Landslide
## 5            (10.4024, -85.355500000000006)   Landslide      Landslide
##   landslide_size          trigger           storm_name injuries fatalities
## 1         Medium Tropical cyclone Tropical Storm Tomas       NA          0
## 2          Large         Downpour                            NA         NA
## 3         Medium   Mining digging                            NA         NA
## 4         Medium             Rain                            NA          2
## 5         Medium Tropical cyclone  Tropical Storm Alma       NA         NA
##           source_name
## 1                    
## 2                    
## 3   www.ticotimes.net
## 4 insidecostarica.com
## 5                    
##                                                                                                                                       source_link
## 1                                                                 http://fortunatimes.com/2010/11/06/no-passage-to-the-south-and-central-pacific/
## 2                                    http://thecostaricanews.com/landslides-and-wash-outs-continue-to-cause-problems-in-northern-costa-rica/12129
## 3 http://www.ticotimes.net/More-news/News-Briefs/TRAVEL-ALERT-UPDATE-Rains-landslides-close-eight-routes-across-Costa-Rica_Friday-October-04-2013
## 4                                                     http://insidecostarica.com/2013/10/14/bodies-man-son-buried-landslide-nuevo-arenal-located/
## 5                                                                           http://www.reliefweb.int/rw/RWB.NSF/db900SID/ASAZ-7FHCHL?OpenDocument
##       prop      ypos
## 1 15.81221  7.906105
## 2 19.09615 25.360283
## 3 18.91266 44.364688
## 4 18.85328 63.247656
## 5 27.32571 86.337147
knitr::kable(head(df_Guanacaste))
id date time America Country country_code State population City Distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
2683 11/4/10 NA Costa Rica CR Guanacaste 7301 Tilarán 10.21631 10.4548 -84.8751 (10.454800000000001, -84.875100000000003) Landslide Landslide Medium Tropical cyclone Tropical Storm Tomas NA 0 http://fortunatimes.com/2010/11/06/no-passage-to-the-south-and-central-pacific/ 15.81221 7.906105
4375 5/31/12 NA Costa Rica CR Guanacaste 7301 Tilarán 12.33807 10.5562 -84.8952 (10.5562, -84.895200000000003) Landslide Landslide Large Downpour NA NA http://thecostaricanews.com/landslides-and-wash-outs-continue-to-cause-problems-in-northern-costa-rica/12129 19.09615 25.360283
5571 10/3/13 NA Costa Rica CR Guanacaste 7301 Tilarán 12.21952 10.5543 -84.8946 (10.5543, -84.894599999999997) Landslide Landslide Medium Mining digging NA NA www.ticotimes.net http://www.ticotimes.net/More-news/News-Briefs/TRAVEL-ALERT-UPDATE-Rains-landslides-close-eight-routes-across-Costa-Rica_Friday-October-04-2013 18.91266 44.364688
5591 10/8/13 Morning NA Costa Rica CR Guanacaste 7301 Tilarán 12.18115 10.5546 -84.8955 (10.554600000000001, -84.895499999999998) Landslide Landslide Medium Rain NA 2 insidecostarica.com http://insidecostarica.com/2013/10/14/bodies-man-son-buried-landslide-nuevo-arenal-located/ 18.85328 63.247656
556 5/29/08 NA Costa Rica CR Guanacaste 4108 Bagaces 17.65521 10.4024 -85.3555 (10.4024, -85.355500000000006) Landslide Landslide Medium Tropical cyclone Tropical Storm Alma NA NA http://www.reliefweb.int/rw/RWB.NSF/db900SID/ASAZ-7FHCHL?OpenDocument 27.32571 86.337147
stem(df_Guanacaste$"Distance")
## 
##   The decimal point is at the |
## 
##   10 | 2
##   12 | 223
##   14 | 
##   16 | 7
stem(df_Guanacaste$"Distance", scale = 2)
## 
##   The decimal point is at the |
## 
##   10 | 2
##   11 | 
##   12 | 223
##   13 | 
##   14 | 
##   15 | 
##   16 | 
##   17 | 7
Gráfico de series temporales
library(forecast)
data_serie<- ts(df_Guanacaste$Distance, frequency=12, start=2007)
head(data_serie)
##           Jan      Feb      Mar      Apr      May
## 2007 10.21631 12.33807 12.21952 12.18115 17.65521
autoplot(data_serie)+
labs(title = "Serie de Deslizamiento", x="Tiempo", y = "Distancia", colour = "#00a0dc") +theme_bw()

Tablas de frecuencia
library(questionr)

table <- questionr::freq(Distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
10.21631 1 20 20 20 20
12.18115 1 20 20 40 40
12.21952 1 20 20 60 60
12.33807 1 20 20 80 80
17.65521 1 20 20 100 100
Total 5 100 100 100 100
str(table) 
## Classes 'freqtab' and 'data.frame':  6 obs. of  5 variables:
##  $ n      : num  1 1 1 1 1 5
##  $ %      : num  20 20 20 20 20 100
##  $ val%   : num  20 20 20 20 20 100
##  $ %cum   : num  20 40 60 80 100 100
##  $ val%cum: num  20 40 60 80 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
10.21631 1
12.18115 1
12.21952 1
12.33807 1
17.65521 1
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="white", fill="blue") +
  xlab("Número de asistencias") +
  ylab("Frecuencia")

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(Distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(Distance) - min(Distance)
w = ceiling(R/n_clases)
bins <- seq(min(Distance), max(Distance) + w, by = w)
bins
## [1] 10.21631 13.21631 16.21631 19.21631
Edades <- cut(Distance, bins)
Freq_table <- transform(table(Distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
Distance Freq Rel_Freq Cum_Freq
10.21631 1 0.2 1
12.18115 1 0.2 2
12.21952 1 0.2 3
12.33807 1 0.2 4
17.65521 1 0.2 5
str(Freq_table)
## 'data.frame':    5 obs. of  4 variables:
##  $ Distance: Factor w/ 5 levels "10.21631","12.18115",..: 1 2 3 4 5
##  $ Freq    : int  1 1 1 1 1
##  $ Rel_Freq: num  0.2 0.2 0.2 0.2 0.2
##  $ Cum_Freq: int  1 2 3 4 5
df <- data.frame(x = Freq_table$Distance, y = Freq_table$Freq)
knitr::kable(df)
x y
10.21631 1
12.18115 1
12.21952 1
12.33807 1
17.65521 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="blue", fill="green") +
  xlab("Rango de Distance") +
  ylab("Frecuencia")

Estadísticos
  • Personas Afectadas por Deslizamiento
summary(df_Guanacaste$Distance)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   10.22   12.18   12.22   12.92   12.34   17.66
library(pastecs)
stat.desc(df_Guanacaste)
## Warning in min(x): ningún argumento finito para min; retornando Inf
## Warning in max(x): ningun argumento finito para max; retornando -Inf
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
##                        id date time America Country country_code State
## nbr.val      5.000000e+00   NA   NA      NA      NA           NA    NA
## nbr.null     0.000000e+00   NA   NA      NA      NA           NA    NA
## nbr.na       0.000000e+00   NA   NA      NA      NA           NA    NA
## min          5.560000e+02   NA   NA      NA      NA           NA    NA
## max          5.591000e+03   NA   NA      NA      NA           NA    NA
## range        5.035000e+03   NA   NA      NA      NA           NA    NA
## sum          1.877600e+04   NA   NA      NA      NA           NA    NA
## median       4.375000e+03   NA   NA      NA      NA           NA    NA
## mean         3.755200e+03   NA   NA      NA      NA           NA    NA
## SE.mean      9.601025e+02   NA   NA      NA      NA           NA    NA
## CI.mean.0.95 2.665672e+03   NA   NA      NA      NA           NA    NA
## var          4.608984e+06   NA   NA      NA      NA           NA    NA
## std.dev      2.146854e+03   NA   NA      NA      NA           NA    NA
## coef.var     5.717018e-01   NA   NA      NA      NA           NA    NA
##                population City   Distance location_description     latitude
## nbr.val      5.000000e+00   NA  5.0000000                   NA  5.000000000
## nbr.null     0.000000e+00   NA  0.0000000                   NA  0.000000000
## nbr.na       0.000000e+00   NA  0.0000000                   NA  0.000000000
## min          4.108000e+03   NA 10.2163100                   NA 10.402400000
## max          7.301000e+03   NA 17.6552100                   NA 10.556200000
## range        3.193000e+03   NA  7.4389000                   NA  0.153800000
## sum          3.331200e+04   NA 64.6102600                   NA 52.522300000
## median       7.301000e+03   NA 12.2195200                   NA 10.554300000
## mean         6.662400e+03   NA 12.9220520                   NA 10.504460000
## SE.mean      6.386000e+02   NA  1.2471437                   NA  0.032060437
## CI.mean.0.95 1.773038e+03   NA  3.4626259                   NA  0.089014042
## var          2.039050e+06   NA  7.7768366                   NA  0.005139358
## std.dev      1.427953e+03   NA  2.7886980                   NA  0.071689316
## coef.var     2.143301e-01   NA  0.2158092                   NA  0.006824655
##                  longitude geolocation hazard_type landslide_type
## nbr.val       5.000000e+00          NA          NA             NA
## nbr.null      0.000000e+00          NA          NA             NA
## nbr.na        0.000000e+00          NA          NA             NA
## min          -8.535550e+01          NA          NA             NA
## max          -8.487510e+01          NA          NA             NA
## range         4.804000e-01          NA          NA             NA
## sum          -4.249159e+02          NA          NA             NA
## median       -8.489520e+01          NA          NA             NA
## mean         -8.498318e+01          NA          NA             NA
## SE.mean       9.316065e-02          NA          NA             NA
## CI.mean.0.95  2.586554e-01          NA          NA             NA
## var           4.339454e-02          NA          NA             NA
## std.dev       2.083136e-01          NA          NA             NA
## coef.var     -2.451233e-03          NA          NA             NA
##              landslide_size trigger storm_name injuries fatalities source_name
## nbr.val                  NA      NA         NA        0   2.000000          NA
## nbr.null                 NA      NA         NA        0   1.000000          NA
## nbr.na                   NA      NA         NA        5   3.000000          NA
## min                      NA      NA         NA      Inf   0.000000          NA
## max                      NA      NA         NA     -Inf   2.000000          NA
## range                    NA      NA         NA     -Inf   2.000000          NA
## sum                      NA      NA         NA        0   2.000000          NA
## median                   NA      NA         NA       NA   1.000000          NA
## mean                     NA      NA         NA      NaN   1.000000          NA
## SE.mean                  NA      NA         NA       NA   1.000000          NA
## CI.mean.0.95             NA      NA         NA      NaN  12.706205          NA
## var                      NA      NA         NA       NA   2.000000          NA
## std.dev                  NA      NA         NA       NA   1.414214          NA
## coef.var                 NA      NA         NA       NA   1.414214          NA
##              source_link        prop       ypos
## nbr.val               NA   5.0000000   5.000000
## nbr.null              NA   0.0000000   0.000000
## nbr.na                NA   0.0000000   0.000000
## min                   NA  15.8122100   7.906105
## max                   NA  27.3257065  86.337147
## range                 NA  11.5134965  78.431042
## sum                   NA 100.0000000 227.215879
## median                NA  18.9126619  44.364688
## mean                  NA  20.0000000  45.443176
## SE.mean               NA   1.9302564  13.789187
## CI.mean.0.95          NA   5.3592509  38.284922
## var                   NA  18.6294484 950.708442
## std.dev               NA   4.3161845  30.833560
## coef.var              NA   0.2158092   0.678508
Caja y extensión
boxplot(Distance, horizontal=TRUE, col='steelblue')

library(tidyverse)
library(hrbrthemes)
library(viridis)

df <- data.frame(Distance)
df %>% ggplot(aes(x = "", y = Distance)) +
  geom_boxplot(color="red", fill="orange", alpha=0.5) +
  theme_ipsum() +
  theme(legend.position="none", plot.title = element_text(size=11)) +
  ggtitle("Deslizamientos  ") +
  coord_flip() +
  xlab("") +
  ylab("")
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

Heredia:

  • Deslizamientos de las ciudades de Heredia
df_Heredia %>% 
  select(Country, State, City, Distance, date) 
##         Country   State                  City Distance     date
## 38   Costa Rica Heredia               Heredia  0.26208   9/9/07
## 311  Costa Rica Heredia              Ã\201ngeles 19.51432  4/27/10
## 480  Costa Rica Heredia              Ã\201ngeles 14.81614 10/15/10
## 529  Costa Rica Heredia              Ã\201ngeles 19.54581 11/21/10
## 702  Costa Rica Heredia              Ã\201ngeles 15.05161   5/8/11
## 884  Costa Rica Heredia         Santo Domingo 21.95470  5/13/12
## 1157 Costa Rica Heredia         Santo Domingo  9.85736  9/16/13
## 1384 Costa Rica Heredia Dulce Nombre de Jesus 10.01310 12/13/14
head(df_Heredia)
##       id     date          time America    Country country_code   State
## 38   249   9/9/07                  <NA> Costa Rica           CR Heredia
## 311 1786  4/27/10 Early morning    <NA> Costa Rica           CR Heredia
## 480 2598 10/15/10                  <NA> Costa Rica           CR Heredia
## 529 2742 11/21/10                  <NA> Costa Rica           CR Heredia
## 702 3472   5/8/11         Night    <NA> Costa Rica           CR Heredia
## 884 4358  5/13/12                  <NA> Costa Rica           CR Heredia
##     population          City Distance location_description latitude longitude
## 38       21947       Heredia  0.26208                       10.0000  -84.1167
## 311       1355      Ã\201ngeles 19.51432                       10.1452  -83.9564
## 480       1355      Ã\201ngeles 14.81614                       10.1067  -83.9753
## 529       1355      Ã\201ngeles 19.54581                       10.1433  -83.9529
## 702       1355      Ã\201ngeles 15.05161                       10.1118  -83.9793
## 884       5745 Santo Domingo 21.95470                       10.1981  -84.0074
##                                   geolocation hazard_type landslide_type
## 38                  (10, -84.116699999999994)   Landslide      Landslide
## 311 (10.145200000000001, -83.956400000000002)   Landslide      Landslide
## 480            (10.1067, -83.975300000000004)   Landslide       Rockfall
## 529                       (10.1433, -83.9529)   Landslide      Landslide
## 702 (10.111800000000001, -83.979299999999995)   Landslide      Landslide
## 884            (10.1981, -84.007400000000004)   Landslide      Landslide
##     landslide_size  trigger storm_name injuries fatalities   source_name
## 38          Medium     Rain                  NA         NA ticotimes.net
## 311         Medium Downpour                  NA          0              
## 480         Medium Downpour                  NA          2              
## 529         Medium Downpour                  NA          0              
## 702         Medium     Rain                  NA          0              
## 884         Medium Downpour                  NA         NA              
##                                                                                                        source_link
## 38                                                       http://www.ticotimes.net/dailyarchive/2007_09/0911072.htm
## 311                                                                  http://en.trend.az/news/incident/1678592.html
## 480 http://www.ticotimes.net/News/Daily-News/Two-People-Die-in-Landslide-on-Limon-Highway_Saturday-October-16-2010
## 529                                    http://insidecostarica.com/dailynews/2010/november/22/costarica10112204.htm
## 702                                         http://insidecostarica.com/dailynews/2011/may/10/costarica11051010.htm
## 884                                     http://www.insidecostarica.com/dailynews/2012/may/17/costarica12051708.htm
ggplot(data=df_Heredia, aes(x=City, y=Distance)) + geom_bar(stat="identity", color="blue", fill="white")

Gráfico circular
ggplot(data=df_Heredia, aes(x = "Heredia", y = Distance, fill=City)) +
  geom_bar(stat = "identity", width = 1, color = "black") +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_Heredia <- df_Heredia %>% 
  arrange(desc(City)) %>%
  mutate(prop = Distance / sum(df_Heredia$Distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_Heredia, aes(x=State, y = prop, fill=City)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(Distance/100)), color = "white", size=6) +
  scale_fill_brewer(palette="Set8")
## Warning in pal_name(palette, type): Unknown palette Set8

Diagrama de pareto
  • Donde se concentran las ciudades con mayor deslizamiento
library(qcc)

Distance <- df_Heredia$Distance
names(Distance) <- df_Heredia$City 

pareto.chart(Distance, 
             ylab="Distance",
             col = heat.colors(length(Distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "DONDE SE CONCENTRAN LAS CIUDADES CON MAYORES DESLIZAMIENTOS"
)

##                        
## Pareto chart analysis for Distance
##                          Frequency  Cum.Freq. Percentage Cum.Percent.
##   Santo Domingo          21.954700  21.954700  19.776315    19.776315
##   Ã\201ngeles               19.545810  41.500510  17.606440    37.382755
##   Ã\201ngeles               19.514320  61.014830  17.578074    54.960829
##   Ã\201ngeles               15.051610  76.066440  13.558162    68.518991
##   Ã\201ngeles               14.816140  90.882580  13.346056    81.865047
##   Dulce Nombre de Jesus  10.013100 100.895680   9.019582    90.884629
##   Santo Domingo           9.857360 110.753040   8.879295    99.763924
##   Heredia                 0.262080 111.015120   0.236076   100.000000
Diagrama de tallo y hojas
stem(df_Heredia$"Distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 0
##   0 | 
##   1 | 00
##   1 | 55
##   2 | 002
head(df_Heredia)
##     id     date          time America    Country country_code   State
## 1 4358  5/13/12                  <NA> Costa Rica           CR Heredia
## 2 5541  9/16/13                  <NA> Costa Rica           CR Heredia
## 3  249   9/9/07                  <NA> Costa Rica           CR Heredia
## 4 6696 12/13/14         Night    <NA> Costa Rica           CR Heredia
## 5 1786  4/27/10 Early morning    <NA> Costa Rica           CR Heredia
## 6 2598 10/15/10                  <NA> Costa Rica           CR Heredia
##   population                  City Distance location_description latitude
## 1       5745         Santo Domingo 21.95470                       10.1981
## 2       5745         Santo Domingo  9.85736                       10.1528
## 3      21947               Heredia  0.26208                       10.0000
## 4          0 Dulce Nombre de Jesus 10.01310              Unknown  10.2054
## 5       1355              Ã\201ngeles 19.51432                       10.1452
## 6       1355              Ã\201ngeles 14.81614                       10.1067
##   longitude                               geolocation hazard_type
## 1  -84.0074            (10.1981, -84.007400000000004)   Landslide
## 2  -84.1489 (10.152799999999999, -84.148899999999998)   Landslide
## 3  -84.1167                 (10, -84.116699999999994)   Landslide
## 4  -83.9041            (10.205399999999999, -83.9041)   Landslide
## 5  -83.9564 (10.145200000000001, -83.956400000000002)   Landslide
## 6  -83.9753            (10.1067, -83.975300000000004)   Landslide
##   landslide_type landslide_size          trigger storm_name injuries fatalities
## 1      Landslide         Medium         Downpour                  NA         NA
## 2      Landslide         Medium Tropical cyclone     Manuel       NA          0
## 3      Landslide         Medium             Rain                  NA         NA
## 4      Landslide         Medium          Unknown                   0          0
## 5      Landslide         Medium         Downpour                  NA          0
## 6       Rockfall         Medium         Downpour                  NA          2
##           source_name
## 1                    
## 2 insidecostarica.com
## 3       ticotimes.net
## 4            Columbia
## 5                    
## 6                    
##                                                                                                                                                 source_link
## 1                                                                                http://www.insidecostarica.com/dailynews/2012/may/17/costarica12051708.htm
## 2                                            http://insidecostarica.com/2013/09/17/torrential-rains-flooding-washed-out-bridges-and-landslides-wreak-havoc/
## 3                                                                                                 http://www.ticotimes.net/dailyarchive/2007_09/0911072.htm
## 4 http://www.columbia.co.cr/index.php/nacionales/transporte/6953-la-ruta-32-se-mantiene-cerrada-desde-anoche-por-un-deslizamiento-en-el-sector-de-rio-sucio
## 5                                                                                                             http://en.trend.az/news/incident/1678592.html
## 6                                            http://www.ticotimes.net/News/Daily-News/Two-People-Die-in-Landslide-on-Limon-Highway_Saturday-October-16-2010
##        prop      ypos
## 1 19.776315  9.888158
## 2  8.879295 24.215963
## 3  0.236076 28.773648
## 4  9.019582 33.401477
## 5 17.578074 46.700305
## 6 13.346056 62.162370
knitr::kable(head(df_Heredia))
id date time America Country country_code State population City Distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
4358 5/13/12 NA Costa Rica CR Heredia 5745 Santo Domingo 21.95470 10.1981 -84.0074 (10.1981, -84.007400000000004) Landslide Landslide Medium Downpour NA NA http://www.insidecostarica.com/dailynews/2012/may/17/costarica12051708.htm 19.776315 9.888158
5541 9/16/13 NA Costa Rica CR Heredia 5745 Santo Domingo 9.85736 10.1528 -84.1489 (10.152799999999999, -84.148899999999998) Landslide Landslide Medium Tropical cyclone Manuel NA 0 insidecostarica.com http://insidecostarica.com/2013/09/17/torrential-rains-flooding-washed-out-bridges-and-landslides-wreak-havoc/ 8.879295 24.215963
249 9/9/07 NA Costa Rica CR Heredia 21947 Heredia 0.26208 10.0000 -84.1167 (10, -84.116699999999994) Landslide Landslide Medium Rain NA NA ticotimes.net http://www.ticotimes.net/dailyarchive/2007_09/0911072.htm 0.236076 28.773648
6696 12/13/14 Night NA Costa Rica CR Heredia 0 Dulce Nombre de Jesus 10.01310 Unknown 10.2054 -83.9041 (10.205399999999999, -83.9041) Landslide Landslide Medium Unknown 0 0 Columbia http://www.columbia.co.cr/index.php/nacionales/transporte/6953-la-ruta-32-se-mantiene-cerrada-desde-anoche-por-un-deslizamiento-en-el-sector-de-rio-sucio 9.019582 33.401477
1786 4/27/10 Early morning NA Costa Rica CR Heredia 1355 Ángeles 19.51432 10.1452 -83.9564 (10.145200000000001, -83.956400000000002) Landslide Landslide Medium Downpour NA 0 http://en.trend.az/news/incident/1678592.html 17.578074 46.700305
2598 10/15/10 NA Costa Rica CR Heredia 1355 Ángeles 14.81614 10.1067 -83.9753 (10.1067, -83.975300000000004) Landslide Rockfall Medium Downpour NA 2 http://www.ticotimes.net/News/Daily-News/Two-People-Die-in-Landslide-on-Limon-Highway_Saturday-October-16-2010 13.346056 62.162370
stem(df_Heredia$"Distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 0
##   0 | 
##   1 | 00
##   1 | 55
##   2 | 002
stem(df_Heredia$"Distance", scale = 2)
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 0
##   0 | 
##   1 | 00
##   1 | 55
##   2 | 002
Gráfico de series temporales
library(forecast)
data_serie<- ts(df_Heredia$Distance, frequency=12, start=2007)
head(data_serie)
##           Jan      Feb      Mar      Apr      May      Jun
## 2007 21.95470  9.85736  0.26208 10.01310 19.51432 14.81614
autoplot(data_serie)+
labs(title = "Serie de Deslizamiento", x="Tiempo", y = "Distancia", colour = "#00a0dc") +theme_bw()

Tablas de frecuencia
library(questionr)

table <- questionr::freq(Distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
0.26208 1 12.5 12.5 12.5 12.5
9.85736 1 12.5 12.5 25.0 25.0
10.0131 1 12.5 12.5 37.5 37.5
14.81614 1 12.5 12.5 50.0 50.0
15.05161 1 12.5 12.5 62.5 62.5
19.51432 1 12.5 12.5 75.0 75.0
19.54581 1 12.5 12.5 87.5 87.5
21.9547 1 12.5 12.5 100.0 100.0
Total 8 100.0 100.0 100.0 100.0
str(table) 
## Classes 'freqtab' and 'data.frame':  9 obs. of  5 variables:
##  $ n      : num  1 1 1 1 1 1 1 1 8
##  $ %      : num  12.5 12.5 12.5 12.5 12.5 12.5 12.5 12.5 100
##  $ val%   : num  12.5 12.5 12.5 12.5 12.5 12.5 12.5 12.5 100
##  $ %cum   : num  12.5 25 37.5 50 62.5 75 87.5 100 100
##  $ val%cum: num  12.5 25 37.5 50 62.5 75 87.5 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
0.26208 1
9.85736 1
10.0131 1
14.81614 1
15.05161 1
19.51432 1
19.54581 1
21.9547 1
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="white", fill="blue") +
  xlab("Número de asistencias") +
  ylab("Frecuencia")

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(Distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(Distance) - min(Distance)
w = ceiling(R/n_clases)
bins <- seq(min(Distance), max(Distance) + w, by = w)
bins
## [1]  0.26208  6.26208 12.26208 18.26208 24.26208
Edades <- cut(Distance, bins)
Freq_table <- transform(table(Distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
Distance Freq Rel_Freq Cum_Freq
0.26208 1 0.125 1
9.85736 1 0.125 2
10.0131 1 0.125 3
14.81614 1 0.125 4
15.05161 1 0.125 5
19.51432 1 0.125 6
19.54581 1 0.125 7
21.9547 1 0.125 8
str(Freq_table)
## 'data.frame':    8 obs. of  4 variables:
##  $ Distance: Factor w/ 8 levels "0.26208","9.85736",..: 1 2 3 4 5 6 7 8
##  $ Freq    : int  1 1 1 1 1 1 1 1
##  $ Rel_Freq: num  0.125 0.125 0.125 0.125 0.125 0.125 0.125 0.125
##  $ Cum_Freq: int  1 2 3 4 5 6 7 8
df <- data.frame(x = Freq_table$Distance, y = Freq_table$Freq)
knitr::kable(df)
x y
0.26208 1
9.85736 1
10.0131 1
14.81614 1
15.05161 1
19.51432 1
19.54581 1
21.9547 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="blue", fill="green") +
  xlab("Rango de Distance") +
  ylab("Frecuencia")

Estadísticos
  • Personas Afectadas por Deslizamiento
summary(df_Heredia$Distance)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.2621  9.9742 14.9339 13.8769 19.5222 21.9547
library(pastecs)
stat.desc(df_Heredia)
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
##                        id date time America Country country_code State
## nbr.val      8.000000e+00   NA   NA      NA      NA           NA    NA
## nbr.null     0.000000e+00   NA   NA      NA      NA           NA    NA
## nbr.na       0.000000e+00   NA   NA      NA      NA           NA    NA
## min          2.490000e+02   NA   NA      NA      NA           NA    NA
## max          6.696000e+03   NA   NA      NA      NA           NA    NA
## range        6.447000e+03   NA   NA      NA      NA           NA    NA
## sum          2.744200e+04   NA   NA      NA      NA           NA    NA
## median       3.107000e+03   NA   NA      NA      NA           NA    NA
## mean         3.430250e+03   NA   NA      NA      NA           NA    NA
## SE.mean      7.315967e+02   NA   NA      NA      NA           NA    NA
## CI.mean.0.95 1.729951e+03   NA   NA      NA      NA           NA    NA
## var          4.281870e+06   NA   NA      NA      NA           NA    NA
## std.dev      2.069268e+03   NA   NA      NA      NA           NA    NA
## coef.var     6.032412e-01   NA   NA      NA      NA           NA    NA
##                population City    Distance location_description     latitude
## nbr.val      8.000000e+00   NA   8.0000000                   NA  8.000000000
## nbr.null     1.000000e+00   NA   0.0000000                   NA  0.000000000
## nbr.na       0.000000e+00   NA   0.0000000                   NA  0.000000000
## min          0.000000e+00   NA   0.2620800                   NA 10.000000000
## max          2.194700e+04   NA  21.9547000                   NA 10.205400000
## range        2.194700e+04   NA  21.6926200                   NA  0.205400000
## sum          3.885700e+04   NA 111.0151200                   NA 81.063300000
## median       1.355000e+03   NA  14.9338750                   NA 10.144250000
## mean         4.857125e+03   NA  13.8768900                   NA 10.132912500
## SE.mean      2.557523e+03   NA   2.4924134                   NA  0.022739522
## CI.mean.0.95 6.047580e+03   NA   5.8936213                   NA  0.053770426
## var          5.232738e+07   NA  49.6969984                   NA  0.004136687
## std.dev      7.233767e+03   NA   7.0496098                   NA  0.064317081
## coef.var     1.489310e+00   NA   0.5080108                   NA  0.006347344
##                  longitude geolocation hazard_type landslide_type
## nbr.val       8.000000e+00          NA          NA             NA
## nbr.null      0.000000e+00          NA          NA             NA
## nbr.na        0.000000e+00          NA          NA             NA
## min          -8.414890e+01          NA          NA             NA
## max          -8.390410e+01          NA          NA             NA
## range         2.448000e-01          NA          NA             NA
## sum          -6.720410e+02          NA          NA             NA
## median       -8.397730e+01          NA          NA             NA
## mean         -8.400512e+01          NA          NA             NA
## SE.mean       2.987758e-02          NA          NA             NA
## CI.mean.0.95  7.064924e-02          NA          NA             NA
## var           7.141356e-03          NA          NA             NA
## std.dev       8.450655e-02          NA          NA             NA
## coef.var     -1.005969e-03          NA          NA             NA
##              landslide_size trigger storm_name injuries fatalities source_name
## nbr.val                  NA      NA         NA        1  6.0000000          NA
## nbr.null                 NA      NA         NA        1  5.0000000          NA
## nbr.na                   NA      NA         NA        7  2.0000000          NA
## min                      NA      NA         NA        0  0.0000000          NA
## max                      NA      NA         NA        0  2.0000000          NA
## range                    NA      NA         NA        0  2.0000000          NA
## sum                      NA      NA         NA        0  2.0000000          NA
## median                   NA      NA         NA        0  0.0000000          NA
## mean                     NA      NA         NA        0  0.3333333          NA
## SE.mean                  NA      NA         NA       NA  0.3333333          NA
## CI.mean.0.95             NA      NA         NA      NaN  0.8568606          NA
## var                      NA      NA         NA       NA  0.6666667          NA
## std.dev                  NA      NA         NA       NA  0.8164966          NA
## coef.var                 NA      NA         NA       NA  2.4494897          NA
##              source_link        prop        ypos
## nbr.val               NA   8.0000000   8.0000000
## nbr.null              NA   0.0000000   0.0000000
## nbr.na                NA   0.0000000   0.0000000
## min                   NA   0.2360760   9.8881576
## max                   NA  19.7763152  93.2209189
## range                 NA  19.5402392  83.3327613
## sum                   NA 100.0000000 376.0014582
## median                NA  13.4521090  40.0508913
## mean                  NA  12.5000000  47.0001823
## SE.mean               NA   2.2451117  10.1053438
## CI.mean.0.95          NA   5.3088456  23.8953410
## var                   NA  40.3242124 816.9437832
## std.dev               NA   6.3501348  28.5822285
## coef.var              NA   0.5080108   0.6081302
Caja y extensión
boxplot(Distance, horizontal=TRUE, col='steelblue')

library(tidyverse)
library(hrbrthemes)
library(viridis)

df <- data.frame(Distance)
df %>% ggplot(aes(x = "", y = Distance)) +
  geom_boxplot(color="red", fill="orange", alpha=0.5) +
  theme_ipsum() +
  theme(legend.position="none", plot.title = element_text(size=11)) +
  ggtitle("Deslizamientos  ") +
  coord_flip() +
  xlab("") +
  ylab("")
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

Venezuela

Deslizamiento Miranda_Venezuela

df_Mir %>% 
  select(Country, State, City, Distance, date) 
##       Country   State         City Distance     date
## 530 Venezuela Miranda Santa Teresa  8.45736 11/25/10
## 542 Venezuela Miranda       Baruta  2.69644 11/27/10
## 543 Venezuela Miranda   El Hatillo  1.04263 11/27/10
## 546 Venezuela Miranda      Guatire 15.84114 11/29/10
head(df_Mir)
##       id     date  time America   Country country_code   State population
## 530 2749 11/25/10            SA Venezuela           VE Miranda     278890
## 542 2765 11/27/10 Night      SA Venezuela           VE Miranda     244216
## 543 2766 11/27/10 Night      SA Venezuela           VE Miranda      57591
## 546 2771 11/29/10            SA Venezuela           VE Miranda     191903
##             City Distance location_description latitude longitude
## 530 Santa Teresa  8.45736                       10.2452  -66.5867
## 542       Baruta  2.69644                       10.4447  -66.8545
## 543   El Hatillo  1.04263                       10.4393  -66.8150
## 546      Guatire 15.84114                       10.6144  -66.5806
##                                   geolocation hazard_type landslide_type
## 530 (10.245200000000001, -66.586699999999993)   Landslide       Mudslide
## 542 (10.444699999999999, -66.854500000000002)   Landslide      Landslide
## 543 (10.439299999999999, -66.814999999999998)   Landslide      Landslide
## 546            (10.6144, -66.580600000000004)   Landslide       Mudslide
##     landslide_size  trigger storm_name injuries fatalities source_name
## 530         Medium Downpour                  NA          1            
## 542         Medium Downpour                  NA          0            
## 543         Medium Downpour                  NA          0            
## 546         Medium Downpour                  NA          0            
##                                                                                                               source_link
## 530                                                     http://www.laht.com/article.asp?ArticleId=379809&CategoryId=10717
## 542                            http://english.eluniversal.com/2010/11/30/en_pol_esp_landslides-hit-sever_30A4792571.shtml
## 543                            http://english.eluniversal.com/2010/11/30/en_pol_esp_landslides-hit-sever_30A4792571.shtml
## 546 http://www.google.com/hostednews/ap/article/ALeqM5gEWcL7PPm0K3Ut10_J9xJK41TZog?docId=51e8b1855b1344c781f245d32d0e1f4a
ggplot(data=df_Mir, aes(x=City, y=Distance)) + geom_bar(stat="identity", color="blue", fill="white")

Gráfico circular
ggplot(data=df_Mir, aes(x = "Miranda", y = Distance, fill=City)) +
  geom_bar(stat = "identity", width = 1, color = "black") +
  coord_polar("y", start = 0)

ggplot(df_Mir,aes(x="Miranda",y=Distance, fill=City))+
  geom_bar(stat = "identity",
           color="white")+
    geom_text(aes(label=(Distance*10)),
              position=position_stack(vjust=0.5),color="white",size=3)+
  coord_polar(theta = "y")+
    labs(title="Gráfico de Deslizamiento")

Diagrama de pareto
  • Donde se concentran las ciudades con mayor deslizamiento
library(qcc)

Distance <- df_Mir$Distance
names(Distance) <- df_Mir$City 

pareto.chart(Distance, 
             ylab="Distance",
             col = heat.colors(length(Distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "DONDE SE CONCENTRAN LAS CIUDADES CON MAYORES DESLIZAMIENTOS"
)

##               
## Pareto chart analysis for Distance
##                 Frequency  Cum.Freq. Percentage Cum.Percent.
##   Guatire       15.841140  15.841140  56.499690    56.499690
##   Santa Teresa   8.457360  24.298500  30.164383    86.664073
##   Baruta         2.696440  26.994940   9.617239    96.281311
##   El Hatillo     1.042630  28.037570   3.718689   100.000000
Diagrama de tallo y hojas
stem(df_Mir$"Distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 13
##   0 | 8
##   1 | 
##   1 | 6
head(df_Mir)
##       id     date  time America   Country country_code   State population
## 530 2749 11/25/10            SA Venezuela           VE Miranda     278890
## 542 2765 11/27/10 Night      SA Venezuela           VE Miranda     244216
## 543 2766 11/27/10 Night      SA Venezuela           VE Miranda      57591
## 546 2771 11/29/10            SA Venezuela           VE Miranda     191903
##             City Distance location_description latitude longitude
## 530 Santa Teresa  8.45736                       10.2452  -66.5867
## 542       Baruta  2.69644                       10.4447  -66.8545
## 543   El Hatillo  1.04263                       10.4393  -66.8150
## 546      Guatire 15.84114                       10.6144  -66.5806
##                                   geolocation hazard_type landslide_type
## 530 (10.245200000000001, -66.586699999999993)   Landslide       Mudslide
## 542 (10.444699999999999, -66.854500000000002)   Landslide      Landslide
## 543 (10.439299999999999, -66.814999999999998)   Landslide      Landslide
## 546            (10.6144, -66.580600000000004)   Landslide       Mudslide
##     landslide_size  trigger storm_name injuries fatalities source_name
## 530         Medium Downpour                  NA          1            
## 542         Medium Downpour                  NA          0            
## 543         Medium Downpour                  NA          0            
## 546         Medium Downpour                  NA          0            
##                                                                                                               source_link
## 530                                                     http://www.laht.com/article.asp?ArticleId=379809&CategoryId=10717
## 542                            http://english.eluniversal.com/2010/11/30/en_pol_esp_landslides-hit-sever_30A4792571.shtml
## 543                            http://english.eluniversal.com/2010/11/30/en_pol_esp_landslides-hit-sever_30A4792571.shtml
## 546 http://www.google.com/hostednews/ap/article/ALeqM5gEWcL7PPm0K3Ut10_J9xJK41TZog?docId=51e8b1855b1344c781f245d32d0e1f4a
knitr::kable(head(df_Mir))
id date time America Country country_code State population City Distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
530 2749 11/25/10 SA Venezuela VE Miranda 278890 Santa Teresa 8.45736 10.2452 -66.5867 (10.245200000000001, -66.586699999999993) Landslide Mudslide Medium Downpour NA 1 http://www.laht.com/article.asp?ArticleId=379809&CategoryId=10717
542 2765 11/27/10 Night SA Venezuela VE Miranda 244216 Baruta 2.69644 10.4447 -66.8545 (10.444699999999999, -66.854500000000002) Landslide Landslide Medium Downpour NA 0 http://english.eluniversal.com/2010/11/30/en_pol_esp_landslides-hit-sever_30A4792571.shtml
543 2766 11/27/10 Night SA Venezuela VE Miranda 57591 El Hatillo 1.04263 10.4393 -66.8150 (10.439299999999999, -66.814999999999998) Landslide Landslide Medium Downpour NA 0 http://english.eluniversal.com/2010/11/30/en_pol_esp_landslides-hit-sever_30A4792571.shtml
546 2771 11/29/10 SA Venezuela VE Miranda 191903 Guatire 15.84114 10.6144 -66.5806 (10.6144, -66.580600000000004) Landslide Mudslide Medium Downpour NA 0 http://www.google.com/hostednews/ap/article/ALeqM5gEWcL7PPm0K3Ut10_J9xJK41TZog?docId=51e8b1855b1344c781f245d32d0e1f4a
stem(df_Mir$"Distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 13
##   0 | 8
##   1 | 
##   1 | 6
stem(df_Mir$"Distance", scale = 2)
## 
##   The decimal point is at the |
## 
##    0 | 0
##    2 | 7
##    4 | 
##    6 | 
##    8 | 5
##   10 | 
##   12 | 
##   14 | 8
Gráfico de series temporales
library(forecast)
data_serie<- ts(df_Mir$Distance, frequency=12, start=2007)
head(data_serie)
##           Jan      Feb      Mar      Apr
## 2007  8.45736  2.69644  1.04263 15.84114
autoplot(data_serie)+
labs(title = "Serie de Deslizamiento", x="Tiempo", y = "Distancia", colour = "#00a0dc") +theme_bw()

Tablas de frecuencia
library(questionr)

table <- questionr::freq(Distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
1.04263 1 25 25 25 25
2.69644 1 25 25 50 50
8.45736 1 25 25 75 75
15.84114 1 25 25 100 100
Total 4 100 100 100 100
str(table) 
## Classes 'freqtab' and 'data.frame':  5 obs. of  5 variables:
##  $ n      : num  1 1 1 1 4
##  $ %      : num  25 25 25 25 100
##  $ val%   : num  25 25 25 25 100
##  $ %cum   : num  25 50 75 100 100
##  $ val%cum: num  25 50 75 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
1.04263 1
2.69644 1
8.45736 1
15.84114 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="white", fill="blue") +
  xlab("Número de asistencias") +
  ylab("Frecuencia")

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(Distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(Distance) - min(Distance)
w = ceiling(R/n_clases)
bins <- seq(min(Distance), max(Distance) + w, by = w)
bins
## [1]  1.04263  6.04263 11.04263 16.04263
Edades <- cut(Distance, bins)
Freq_table <- transform(table(Distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
Distance Freq Rel_Freq Cum_Freq
1.04263 1 0.25 1
2.69644 1 0.25 2
8.45736 1 0.25 3
15.84114 1 0.25 4
str(Freq_table)
## 'data.frame':    4 obs. of  4 variables:
##  $ Distance: Factor w/ 4 levels "1.04263","2.69644",..: 1 2 3 4
##  $ Freq    : int  1 1 1 1
##  $ Rel_Freq: num  0.25 0.25 0.25 0.25
##  $ Cum_Freq: int  1 2 3 4
df <- data.frame(x = Freq_table$Distance, y = Freq_table$Freq)
knitr::kable(df)
x y
1.04263 1
2.69644 1
8.45736 1
15.84114 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="blue", fill="green") +
  xlab("Rango de Distance") +
  ylab("Frecuencia")

Estadísticos
  • Personas Afectadas por Deslizamiento
summary(df_Mir$Distance)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.043   2.283   5.577   7.009  10.303  15.841
library(pastecs)
stat.desc(df_Mir)
## Warning in min(x): ningún argumento finito para min; retornando Inf
## Warning in max(x): ningun argumento finito para max; retornando -Inf
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
##                        id date time America Country country_code State
## nbr.val      4.000000e+00   NA   NA      NA      NA           NA    NA
## nbr.null     0.000000e+00   NA   NA      NA      NA           NA    NA
## nbr.na       0.000000e+00   NA   NA      NA      NA           NA    NA
## min          2.749000e+03   NA   NA      NA      NA           NA    NA
## max          2.771000e+03   NA   NA      NA      NA           NA    NA
## range        2.200000e+01   NA   NA      NA      NA           NA    NA
## sum          1.105100e+04   NA   NA      NA      NA           NA    NA
## median       2.765500e+03   NA   NA      NA      NA           NA    NA
## mean         2.762750e+03   NA   NA      NA      NA           NA    NA
## SE.mean      4.767512e+00   NA   NA      NA      NA           NA    NA
## CI.mean.0.95 1.517235e+01   NA   NA      NA      NA           NA    NA
## var          9.091667e+01   NA   NA      NA      NA           NA    NA
## std.dev      9.535023e+00   NA   NA      NA      NA           NA    NA
## coef.var     3.451280e-03   NA   NA      NA      NA           NA    NA
##                population City  Distance location_description    latitude
## nbr.val      4.000000e+00   NA  4.000000                   NA  4.00000000
## nbr.null     0.000000e+00   NA  0.000000                   NA  0.00000000
## nbr.na       0.000000e+00   NA  0.000000                   NA  0.00000000
## min          5.759100e+04   NA  1.042630                   NA 10.24520000
## max          2.788900e+05   NA 15.841140                   NA 10.61440000
## range        2.212990e+05   NA 14.798510                   NA  0.36920000
## sum          7.726000e+05   NA 28.037570                   NA 41.74360000
## median       2.180595e+05   NA  5.576900                   NA 10.44200000
## mean         1.931500e+05   NA  7.009392                   NA 10.43590000
## SE.mean      4.859431e+04   NA  3.345397                   NA  0.07545293
## CI.mean.0.95 1.546488e+05   NA 10.646547                   NA  0.24012491
## var          9.445627e+09   NA 44.766729                   NA  0.02277258
## std.dev      9.718862e+04   NA  6.690794                   NA  0.15090586
## coef.var     5.031769e-01   NA  0.954547                   NA  0.01446026
##                  longitude geolocation hazard_type landslide_type
## nbr.val       4.000000e+00          NA          NA             NA
## nbr.null      0.000000e+00          NA          NA             NA
## nbr.na        0.000000e+00          NA          NA             NA
## min          -6.685450e+01          NA          NA             NA
## max          -6.658060e+01          NA          NA             NA
## range         2.739000e-01          NA          NA             NA
## sum          -2.668368e+02          NA          NA             NA
## median       -6.670085e+01          NA          NA             NA
## mean         -6.670920e+01          NA          NA             NA
## SE.mean       7.294401e-02          NA          NA             NA
## CI.mean.0.95  2.321404e-01          NA          NA             NA
## var           2.128331e-02          NA          NA             NA
## std.dev       1.458880e-01          NA          NA             NA
## coef.var     -2.186925e-03          NA          NA             NA
##              landslide_size trigger storm_name injuries fatalities source_name
## nbr.val                  NA      NA         NA        0  4.0000000          NA
## nbr.null                 NA      NA         NA        0  3.0000000          NA
## nbr.na                   NA      NA         NA        4  0.0000000          NA
## min                      NA      NA         NA      Inf  0.0000000          NA
## max                      NA      NA         NA     -Inf  1.0000000          NA
## range                    NA      NA         NA     -Inf  1.0000000          NA
## sum                      NA      NA         NA        0  1.0000000          NA
## median                   NA      NA         NA       NA  0.0000000          NA
## mean                     NA      NA         NA      NaN  0.2500000          NA
## SE.mean                  NA      NA         NA       NA  0.2500000          NA
## CI.mean.0.95             NA      NA         NA      NaN  0.7956116          NA
## var                      NA      NA         NA       NA  0.2500000          NA
## std.dev                  NA      NA         NA       NA  0.5000000          NA
## coef.var                 NA      NA         NA       NA  2.0000000          NA
##              source_link
## nbr.val               NA
## nbr.null              NA
## nbr.na                NA
## min                   NA
## max                   NA
## range                 NA
## sum                   NA
## median                NA
## mean                  NA
## SE.mean               NA
## CI.mean.0.95          NA
## var                   NA
## std.dev               NA
## coef.var              NA
Caja y extensión
boxplot(Distance, horizontal=TRUE, col='steelblue')

library(tidyverse)
library(hrbrthemes)
library(viridis)

df <- data.frame(Distance)
df %>% ggplot(aes(x = "", y = Distance)) +
  geom_boxplot(color="red", fill="orange", alpha=0.5) +
  theme_ipsum() +
  theme(legend.position="none", plot.title = element_text(size=11)) +
  ggtitle("Deslizamientos  ") +
  coord_flip() +
  xlab("") +
  ylab("")
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

Deslizamientos por estado o departamentos

ggplot(data=df_Venezuela, aes(fill=State, y=Distance, x="Venezuela")) +
  geom_bar(position="dodge", stat="identity")

ggplot(data=df_Venezuela, aes(fill=State, y=Distance, x="Venezuela")) +
  geom_bar(position="stack", stat="identity")

Distrito Federal :

  • Deslizamientos de las ciudades de Distrito Federal
df_DF %>% 
  select(Country, State, City, Distance, date) 
##       Country            State     City Distance     date
## 165 Venezuela Distrito Federal  Caracas  2.92493 11/20/08
## 531 Venezuela Distrito Federal  Caracas  2.95706 11/26/10
## 538 Venezuela Distrito Federal Caricuao  7.90754 11/27/10
## 539 Venezuela Distrito Federal Caricuao  5.64050 11/27/10
## 540 Venezuela Distrito Federal Caricuao  5.74106 11/27/10
## 541 Venezuela Distrito Federal  Caracas  4.58994 11/27/10
## 544 Venezuela Distrito Federal  Caracas  3.14060 11/29/10
## 545 Venezuela Distrito Federal  Caracas  6.04235 11/29/10
## 547 Venezuela Distrito Federal  Caracas  3.87793 11/29/10
## 548 Venezuela Distrito Federal  Caracas  3.65044 11/29/10
## 549 Venezuela Distrito Federal  Caracas  8.11953 11/30/10
## 847 Venezuela Distrito Federal  Caracas  2.55507  12/6/11
head(df_DF)
##       id     date        time America   Country country_code            State
## 165  896 11/20/08                  SA Venezuela           VE Distrito Federal
## 531 2753 11/26/10 Before dawn      SA Venezuela           VE Distrito Federal
## 538 2761 11/27/10                  SA Venezuela           VE Distrito Federal
## 539 2762 11/27/10       Night      SA Venezuela           VE Distrito Federal
## 540 2763 11/27/10       Night      SA Venezuela           VE Distrito Federal
## 541 2764 11/27/10       Night      SA Venezuela           VE Distrito Federal
##     population     City Distance location_description latitude longitude
## 165    3000000  Caracas  2.92493                       10.4660  -66.8940
## 531    3000000  Caracas  2.95706                       10.4913  -66.9060
## 538          0 Caricuao  7.90754                       10.5030  -66.9995
## 539          0 Caricuao  5.64050                       10.4267  -67.0342
## 540          0 Caricuao  5.74106                       10.4669  -66.9431
## 541    3000000  Caracas  4.58994                       10.4867  -66.9211
##                                   geolocation hazard_type landslide_type
## 165 (10.465999999999999, -66.894000000000005)   Landslide       Mudslide
## 531 (10.491300000000001, -66.906000000000006)   Landslide       Mudslide
## 538             (10.503, -66.999499999999998)   Landslide       Mudslide
## 539            (10.4267, -67.034199999999998)   Landslide      Landslide
## 540 (10.466900000000001, -66.943100000000001)   Landslide      Landslide
## 541 (10.486700000000001, -66.921099999999996)   Landslide      Landslide
##     landslide_size  trigger storm_name injuries fatalities source_name
## 165         Medium Downpour                  NA          8            
## 531         Medium Downpour                  NA          3            
## 538         Medium Downpour                  NA          1            
## 539         Medium Downpour                  NA          0            
## 540         Medium Downpour                  NA          2            
## 541         Medium Downpour                  NA          0            
##                                                                                    source_link
## 165                                         http://www.foxnews.com/story/0,2933,456304,00.html
## 531                          http://www.laht.com/article.asp?ArticleId=379809&CategoryId=10717
## 538                          http://www.laht.com/article.asp?ArticleId=380021&CategoryId=10717
## 539 http://english.eluniversal.com/2010/11/30/en_pol_esp_landslides-hit-sever_30A4792571.shtml
## 540 http://english.eluniversal.com/2010/11/30/en_pol_esp_landslides-hit-sever_30A4792571.shtml
## 541 http://english.eluniversal.com/2010/11/30/en_pol_esp_landslides-hit-sever_30A4792571.shtml
ggplot(data=df_DF, aes(x=City, y=Distance)) + geom_bar(stat="identity", color="blue", fill="white")

Gráfico circular
ggplot(data=df_DF, aes(x = "Distrito Federal    ", y = Distance, fill=City)) +
  geom_bar(stat = "identity", width = 1, color = "black") +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_DF <- df_DF %>% 
  arrange(desc(City)) %>%
  mutate(prop = Distance / sum(df_DF$Distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
library(ggplot2)
library(dplyr)

df_DF <- df_DF %>% 
  arrange(desc(City)) %>%
  mutate(prop = Distance / sum(df_DF$Distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_DF, aes(x=State, y = prop, fill=City)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(Distance/100)), color = "white", size=3) +
  scale_fill_brewer(palette="Set8")
## Warning in pal_name(palette, type): Unknown palette Set8

Diagrama de pareto
  • Donde se concentran las ciudades con mayor deslizamiento
library(qcc)

Distance <- df_DF$Distance
names(Distance) <- df_DF$City 

pareto.chart(Distance, 
             ylab="Distance",
             col = heat.colors(length(Distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "DONDE SE CONCENTRAN LAS CIUDADES CON MAYORES DESLIZAMIENTOS"
)

##           
## Pareto chart analysis for Distance
##             Frequency  Cum.Freq. Percentage Cum.Percent.
##   Caracas    8.119530   8.119530  14.208160    14.208160
##   Caricuao   7.907540  16.027070  13.837204    28.045364
##   Caracas    6.042350  22.069420  10.573355    38.618719
##   Caricuao   5.741060  27.810480  10.046135    48.664854
##   Caricuao   5.640500  33.450980   9.870168    58.535022
##   Caracas    4.589940  38.040920   8.031820    66.566842
##   Caracas    3.877930  41.918850   6.785891    73.352734
##   Caracas    3.650440  45.569290   6.387812    79.740546
##   Caracas    3.140600  48.709890   5.495656    85.236202
##   Caracas    2.957060  51.666950   5.174484    90.410687
##   Caracas    2.924930  54.591880   5.118261    95.528948
##   Caracas    2.555070  57.146950   4.471052   100.000000
Diagrama de tallo y hojas
stem(df_DF$"Distance")
## 
##   The decimal point is at the |
## 
##   2 | 690179
##   4 | 667
##   6 | 09
##   8 | 1
head(df_DF)
##     id     date        time America   Country country_code            State
## 1 2761 11/27/10                  SA Venezuela           VE Distrito Federal
## 2 2762 11/27/10       Night      SA Venezuela           VE Distrito Federal
## 3 2763 11/27/10       Night      SA Venezuela           VE Distrito Federal
## 4  896 11/20/08                  SA Venezuela           VE Distrito Federal
## 5 2753 11/26/10 Before dawn      SA Venezuela           VE Distrito Federal
## 6 2764 11/27/10       Night      SA Venezuela           VE Distrito Federal
##   population     City Distance location_description latitude longitude
## 1          0 Caricuao  7.90754                       10.5030  -66.9995
## 2          0 Caricuao  5.64050                       10.4267  -67.0342
## 3          0 Caricuao  5.74106                       10.4669  -66.9431
## 4    3000000  Caracas  2.92493                       10.4660  -66.8940
## 5    3000000  Caracas  2.95706                       10.4913  -66.9060
## 6    3000000  Caracas  4.58994                       10.4867  -66.9211
##                                 geolocation hazard_type landslide_type
## 1             (10.503, -66.999499999999998)   Landslide       Mudslide
## 2            (10.4267, -67.034199999999998)   Landslide      Landslide
## 3 (10.466900000000001, -66.943100000000001)   Landslide      Landslide
## 4 (10.465999999999999, -66.894000000000005)   Landslide       Mudslide
## 5 (10.491300000000001, -66.906000000000006)   Landslide       Mudslide
## 6 (10.486700000000001, -66.921099999999996)   Landslide      Landslide
##   landslide_size  trigger storm_name injuries fatalities source_name
## 1         Medium Downpour                  NA          1            
## 2         Medium Downpour                  NA          0            
## 3         Medium Downpour                  NA          2            
## 4         Medium Downpour                  NA          8            
## 5         Medium Downpour                  NA          3            
## 6         Medium Downpour                  NA          0            
##                                                                                  source_link
## 1                          http://www.laht.com/article.asp?ArticleId=380021&CategoryId=10717
## 2 http://english.eluniversal.com/2010/11/30/en_pol_esp_landslides-hit-sever_30A4792571.shtml
## 3 http://english.eluniversal.com/2010/11/30/en_pol_esp_landslides-hit-sever_30A4792571.shtml
## 4                                         http://www.foxnews.com/story/0,2933,456304,00.html
## 5                          http://www.laht.com/article.asp?ArticleId=379809&CategoryId=10717
## 6 http://english.eluniversal.com/2010/11/30/en_pol_esp_landslides-hit-sever_30A4792571.shtml
##        prop      ypos
## 1 13.837204  6.918602
## 2  9.870168 18.772288
## 3 10.046135 28.730440
## 4  5.118261 36.312638
## 5  5.174484 41.459010
## 6  8.031820 48.062163
knitr::kable(head(df_DF))
id date time America Country country_code State population City Distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
2761 11/27/10 SA Venezuela VE Distrito Federal 0e+00 Caricuao 7.90754 10.5030 -66.9995 (10.503, -66.999499999999998) Landslide Mudslide Medium Downpour NA 1 http://www.laht.com/article.asp?ArticleId=380021&CategoryId=10717 13.837204 6.918602
2762 11/27/10 Night SA Venezuela VE Distrito Federal 0e+00 Caricuao 5.64050 10.4267 -67.0342 (10.4267, -67.034199999999998) Landslide Landslide Medium Downpour NA 0 http://english.eluniversal.com/2010/11/30/en_pol_esp_landslides-hit-sever_30A4792571.shtml 9.870168 18.772288
2763 11/27/10 Night SA Venezuela VE Distrito Federal 0e+00 Caricuao 5.74106 10.4669 -66.9431 (10.466900000000001, -66.943100000000001) Landslide Landslide Medium Downpour NA 2 http://english.eluniversal.com/2010/11/30/en_pol_esp_landslides-hit-sever_30A4792571.shtml 10.046135 28.730440
896 11/20/08 SA Venezuela VE Distrito Federal 3e+06 Caracas 2.92493 10.4660 -66.8940 (10.465999999999999, -66.894000000000005) Landslide Mudslide Medium Downpour NA 8 http://www.foxnews.com/story/0,2933,456304,00.html 5.118261 36.312638
2753 11/26/10 Before dawn SA Venezuela VE Distrito Federal 3e+06 Caracas 2.95706 10.4913 -66.9060 (10.491300000000001, -66.906000000000006) Landslide Mudslide Medium Downpour NA 3 http://www.laht.com/article.asp?ArticleId=379809&CategoryId=10717 5.174484 41.459010
2764 11/27/10 Night SA Venezuela VE Distrito Federal 3e+06 Caracas 4.58994 10.4867 -66.9211 (10.486700000000001, -66.921099999999996) Landslide Landslide Medium Downpour NA 0 http://english.eluniversal.com/2010/11/30/en_pol_esp_landslides-hit-sever_30A4792571.shtml 8.031820 48.062162
stem(df_DF$"Distance")
## 
##   The decimal point is at the |
## 
##   2 | 690179
##   4 | 667
##   6 | 09
##   8 | 1
stem(df_DF$"Distance", scale = 2)
## 
##   The decimal point is at the |
## 
##   2 | 69
##   3 | 0179
##   4 | 6
##   5 | 67
##   6 | 0
##   7 | 9
##   8 | 1
Gráfico de series temporales
library(forecast)
data_serie<- ts(df_DF$Distance, frequency=12, start=2007)
head(data_serie)
##          Jan     Feb     Mar     Apr     May     Jun
## 2007 7.90754 5.64050 5.74106 2.92493 2.95706 4.58994
autoplot(data_serie)+
labs(title = "Serie de Deslizamiento", x="Tiempo", y = "Distancia", colour = "#00a0dc") +theme_bw()

Tablas de frecuencia
library(questionr)

table <- questionr::freq(Distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
2.55507 1 8.3 8.3 8.3 8.3
2.92493 1 8.3 8.3 16.7 16.7
2.95706 1 8.3 8.3 25.0 25.0
3.1406 1 8.3 8.3 33.3 33.3
3.65044 1 8.3 8.3 41.7 41.7
3.87793 1 8.3 8.3 50.0 50.0
4.58994 1 8.3 8.3 58.3 58.3
5.6405 1 8.3 8.3 66.7 66.7
5.74106 1 8.3 8.3 75.0 75.0
6.04235 1 8.3 8.3 83.3 83.3
7.90754 1 8.3 8.3 91.7 91.7
8.11953 1 8.3 8.3 100.0 100.0
Total 12 100.0 100.0 100.0 100.0
str(table) 
## Classes 'freqtab' and 'data.frame':  13 obs. of  5 variables:
##  $ n      : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ %      : num  8.3 8.3 8.3 8.3 8.3 8.3 8.3 8.3 8.3 8.3 ...
##  $ val%   : num  8.3 8.3 8.3 8.3 8.3 8.3 8.3 8.3 8.3 8.3 ...
##  $ %cum   : num  8.3 16.7 25 33.3 41.7 50 58.3 66.7 75 83.3 ...
##  $ val%cum: num  8.3 16.7 25 33.3 41.7 50 58.3 66.7 75 83.3 ...
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
2.55507 1
2.92493 1
2.95706 1
3.1406 1
3.65044 1
3.87793 1
4.58994 1
5.6405 1
5.74106 1
6.04235 1
7.90754 1
8.11953 1
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="white", fill="blue") +
  xlab("Número de asistencias") +
  ylab("Frecuencia")

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(Distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(Distance) - min(Distance)
w = ceiling(R/n_clases)
bins <- seq(min(Distance), max(Distance) + w, by = w)
bins
## [1] 2.55507 4.55507 6.55507 8.55507
Edades <- cut(Distance, bins)
Freq_table <- transform(table(Distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
Distance Freq Rel_Freq Cum_Freq
2.55507 1 0.0833333 1
2.92493 1 0.0833333 2
2.95706 1 0.0833333 3
3.1406 1 0.0833333 4
3.65044 1 0.0833333 5
3.87793 1 0.0833333 6
4.58994 1 0.0833333 7
5.6405 1 0.0833333 8
5.74106 1 0.0833333 9
6.04235 1 0.0833333 10
7.90754 1 0.0833333 11
8.11953 1 0.0833333 12
str(Freq_table)
## 'data.frame':    12 obs. of  4 variables:
##  $ Distance: Factor w/ 12 levels "2.55507","2.92493",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ Freq    : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Rel_Freq: num  0.0833 0.0833 0.0833 0.0833 0.0833 ...
##  $ Cum_Freq: int  1 2 3 4 5 6 7 8 9 10 ...
df <- data.frame(x = Freq_table$Distance, y = Freq_table$Freq)
knitr::kable(df)
x y
2.55507 1
2.92493 1
2.95706 1
3.1406 1
3.65044 1
3.87793 1
4.58994 1
5.6405 1
5.74106 1
6.04235 1
7.90754 1
8.11953 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="blue", fill="green") +
  xlab("Rango de Distance") +
  ylab("Frecuencia")

Estadísticos
  • Personas Afectadas por Deslizamiento
summary(df_DF$Distance)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   2.555   3.095   4.234   4.762   5.816   8.120
library(pastecs)
stat.desc(df_DF)
## Warning in min(x): ningún argumento finito para min; retornando Inf
## Warning in max(x): ningun argumento finito para max; retornando -Inf
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
##                        id date time America Country country_code State
## nbr.val      1.200000e+01   NA   NA      NA      NA           NA    NA
## nbr.null     0.000000e+00   NA   NA      NA      NA           NA    NA
## nbr.na       0.000000e+00   NA   NA      NA      NA           NA    NA
## min          8.960000e+02   NA   NA      NA      NA           NA    NA
## max          4.087000e+03   NA   NA      NA      NA           NA    NA
## range        3.191000e+03   NA   NA      NA      NA           NA    NA
## sum          3.264400e+04   NA   NA      NA      NA           NA    NA
## median       2.766500e+03   NA   NA      NA      NA           NA    NA
## mean         2.720333e+03   NA   NA      NA      NA           NA    NA
## SE.mean      1.988092e+02   NA   NA      NA      NA           NA    NA
## CI.mean.0.95 4.375761e+02   NA   NA      NA      NA           NA    NA
## var          4.743012e+05   NA   NA      NA      NA           NA    NA
## std.dev      6.886953e+02   NA   NA      NA      NA           NA    NA
## coef.var     2.531658e-01   NA   NA      NA      NA           NA    NA
##                population City   Distance location_description     latitude
## nbr.val      1.200000e+01   NA 12.0000000                   NA 1.200000e+01
## nbr.null     3.000000e+00   NA  0.0000000                   NA 0.000000e+00
## nbr.na       0.000000e+00   NA  0.0000000                   NA 0.000000e+00
## min          0.000000e+00   NA  2.5550700                   NA 1.042670e+01
## max          3.000000e+06   NA  8.1195300                   NA 1.051390e+01
## range        3.000000e+06   NA  5.5644600                   NA 8.720000e-02
## sum          2.700000e+07   NA 57.1469500                   NA 1.258214e+02
## median       3.000000e+06   NA  4.2339350                   NA 1.049020e+01
## mean         2.250000e+06   NA  4.7622458                   NA 1.048512e+01
## SE.mean      3.916747e+05   NA  0.5553862                   NA 7.110799e-03
## CI.mean.0.95 8.620703e+05   NA  1.2223968                   NA 1.565076e-02
## var          1.840909e+12   NA  3.7014462                   NA 6.067615e-04
## std.dev      1.356801e+06   NA  1.9239143                   NA 2.463253e-02
## coef.var     6.030227e-01   NA  0.4039931                   NA 2.349285e-03
##                  longitude geolocation hazard_type landslide_type
## nbr.val       1.200000e+01          NA          NA             NA
## nbr.null      0.000000e+00          NA          NA             NA
## nbr.na        0.000000e+00          NA          NA             NA
## min          -6.703420e+01          NA          NA             NA
## max          -6.689300e+01          NA          NA             NA
## range         1.412000e-01          NA          NA             NA
## sum          -8.031958e+02          NA          NA             NA
## median       -6.691785e+01          NA          NA             NA
## mean         -6.693298e+01          NA          NA             NA
## SE.mean       1.259930e-02          NA          NA             NA
## CI.mean.0.95  2.773088e-02          NA          NA             NA
## var           1.904909e-03          NA          NA             NA
## std.dev       4.364526e-02          NA          NA             NA
## coef.var     -6.520740e-04          NA          NA             NA
##              landslide_size trigger storm_name injuries fatalities source_name
## nbr.val                  NA      NA         NA        0 12.0000000          NA
## nbr.null                 NA      NA         NA        0  4.0000000          NA
## nbr.na                   NA      NA         NA       12  0.0000000          NA
## min                      NA      NA         NA      Inf  0.0000000          NA
## max                      NA      NA         NA     -Inf  8.0000000          NA
## range                    NA      NA         NA     -Inf  8.0000000          NA
## sum                      NA      NA         NA        0 20.0000000          NA
## median                   NA      NA         NA       NA  1.0000000          NA
## mean                     NA      NA         NA      NaN  1.6666667          NA
## SE.mean                  NA      NA         NA       NA  0.6435382          NA
## CI.mean.0.95             NA      NA         NA      NaN  1.4164180          NA
## var                      NA      NA         NA       NA  4.9696970          NA
## std.dev                  NA      NA         NA       NA  2.2292817          NA
## coef.var                 NA      NA         NA       NA  1.3375690          NA
##              source_link        prop        ypos
## nbr.val               NA  12.0000000  12.0000000
## nbr.null              NA   0.0000000   0.0000000
## nbr.na                NA   0.0000000   0.0000000
## min                   NA   4.4710523   6.9186020
## max                   NA  14.2081598  97.7644739
## range                 NA   9.7371076  90.8458719
## sum                   NA 100.0000000 633.7977005
## median                NA   7.4088556  51.4440316
## mean                  NA   8.3333333  52.8164750
## SE.mean               NA   0.9718563   8.0970053
## CI.mean.0.95          NA   2.1390412  17.8213886
## var                   NA  11.3340554 786.7379476
## std.dev               NA   3.3666089  28.0488493
## coef.var              NA   0.4039931   0.5310625
Caja y extensión
boxplot(Distance, horizontal=TRUE, col='steelblue')

library(tidyverse)
library(hrbrthemes)
library(viridis)

df <- data.frame(Distance)
df %>% ggplot(aes(x = "", y = Distance)) +
  geom_boxplot(color="red", fill="orange", alpha=0.5) +
  theme_ipsum() +
  theme(legend.position="none", plot.title = element_text(size=11)) +
  ggtitle("Deslizamientos  ") +
  coord_flip() +
  xlab("") +
  ylab("")
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

Vargas:

  • Deslizamientos de las ciudades de Vargas
df_var %>% 
  select(Country, State, City, Distance, date) 
##       Country  State       City Distance    date
## 448 Venezuela Vargas Maiquetía  6.65506 9/20/10
## 599 Venezuela Vargas Maiquetía  7.89319  3/7/11
head(df_var)
##       id    date time America   Country country_code  State population
## 448 2473 9/20/10           SA Venezuela           VE Vargas      52564
## 599 3180  3/7/11           SA Venezuela           VE Vargas      52564
##           City Distance location_description latitude longitude
## 448 Maiquetía  6.65506                       10.5363  -66.9492
## 599 Maiquetía  7.89319                       10.5259  -66.9425
##                                   geolocation hazard_type landslide_type
## 448 (10.536300000000001, -66.949200000000005)   Landslide      Landslide
## 599            (10.5259, -66.942499999999995)   Landslide       Mudslide
##     landslide_size  trigger storm_name injuries fatalities source_name
## 448         Medium Downpour                  NA          7            
## 599         Medium Downpour                  NA          0            
##                                                           source_link
## 448 http://www.laht.com/article.asp?ArticleId=367610&CategoryId=14091
## 599                   http://www.vheadline.com/readnews.asp?id=101578
ggplot(data=df_var, aes(x=City, y=Distance)) + geom_bar(stat="identity", color="blue", fill="white")

Gráfico circular
ggplot(data=df_var, aes(x = "Vargas", y = Distance, fill=City)) +
  geom_bar(stat = "identity", width = 1, color = "black") +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_var <- df_var %>% 
  arrange(desc(City)) %>%
  mutate(prop = Distance / sum(df_var$Distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_var, aes(x=State, y = prop, fill=City)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(Distance/100)), color = "white", size=3) +
  scale_fill_brewer(palette="Set8") 
## Warning in pal_name(palette, type): Unknown palette Set8

Diagrama de pareto
  • Donde se concentran las ciudades con mayor deslizamiento
library(qcc)

Distance <- df_var$Distance
names(Distance) <- df_var$City 

pareto.chart(Distance, 
             ylab="Distance",
             col = heat.colors(length(Distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "DONDE SE CONCENTRAN LAS CIUDADES CON MAYORES DESLIZAMIENTOS"
)

##             
## Pareto chart analysis for Distance
##              Frequency Cum.Freq. Percentage Cum.Percent.
##   Maiquetía   7.89319   7.89319   54.25525     54.25525
##   Maiquetía   6.65506  14.54825   45.74475    100.00000
Diagrama de tallo y hojas
stem(df_var$"Distance")
## 
##   The decimal point is at the |
## 
##   6 | 7
##   7 | 
##   7 | 9
head(df_var)
##     id    date time America   Country country_code  State population       City
## 1 2473 9/20/10           SA Venezuela           VE Vargas      52564 Maiquetía
## 2 3180  3/7/11           SA Venezuela           VE Vargas      52564 Maiquetía
##   Distance location_description latitude longitude
## 1  6.65506                       10.5363  -66.9492
## 2  7.89319                       10.5259  -66.9425
##                                 geolocation hazard_type landslide_type
## 1 (10.536300000000001, -66.949200000000005)   Landslide      Landslide
## 2            (10.5259, -66.942499999999995)   Landslide       Mudslide
##   landslide_size  trigger storm_name injuries fatalities source_name
## 1         Medium Downpour                  NA          7            
## 2         Medium Downpour                  NA          0            
##                                                         source_link     prop
## 1 http://www.laht.com/article.asp?ArticleId=367610&CategoryId=14091 45.74475
## 2                   http://www.vheadline.com/readnews.asp?id=101578 54.25525
##       ypos
## 1 22.87237
## 2 72.87237
knitr::kable(head(df_var))
id date time America Country country_code State population City Distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
2473 9/20/10 SA Venezuela VE Vargas 52564 Maiquetía 6.65506 10.5363 -66.9492 (10.536300000000001, -66.949200000000005) Landslide Landslide Medium Downpour NA 7 http://www.laht.com/article.asp?ArticleId=367610&CategoryId=14091 45.74475 22.87237
3180 3/7/11 SA Venezuela VE Vargas 52564 Maiquetía 7.89319 10.5259 -66.9425 (10.5259, -66.942499999999995) Landslide Mudslide Medium Downpour NA 0 http://www.vheadline.com/readnews.asp?id=101578 54.25525 72.87237
stem(df_var$"Distance")
## 
##   The decimal point is at the |
## 
##   6 | 7
##   7 | 
##   7 | 9
stem(df_var$"Distance", scale = 2)
## 
##   The decimal point is 1 digit(s) to the left of the |
## 
##   66 | 6
##   68 | 
##   70 | 
##   72 | 
##   74 | 
##   76 | 
##   78 | 9
Gráfico de series temporales
library(forecast)
data_serie<- ts(df_var$Distance, frequency=12, start=2007)
head(data_serie)
##          Jan     Feb
## 2007 6.65506 7.89319
autoplot(data_serie)+
labs(title = "Serie de Deslizamiento", x="Tiempo", y = "Distancia", colour = "#00a0dc") +theme_bw()

Tablas de frecuencia
library(questionr)

table <- questionr::freq(Distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
6.65506 1 50 50 50 50
7.89319 1 50 50 100 100
Total 2 100 100 100 100
str(table) 
## Classes 'freqtab' and 'data.frame':  3 obs. of  5 variables:
##  $ n      : num  1 1 2
##  $ %      : num  50 50 100
##  $ val%   : num  50 50 100
##  $ %cum   : num  50 100 100
##  $ val%cum: num  50 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
6.65506 1
7.89319 1
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="white", fill="blue") +
  xlab("Número de asistencias") +
  ylab("Frecuencia")

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(Distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(Distance) - min(Distance)
w = ceiling(R/n_clases)
bins <- seq(min(Distance), max(Distance) + w, by = w)
bins
## [1] 6.65506 7.65506 8.65506
Edades <- cut(Distance, bins)
Freq_table <- transform(table(Distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
Distance Freq Rel_Freq Cum_Freq
6.65506 1 0.5 1
7.89319 1 0.5 2
str(Freq_table)
## 'data.frame':    2 obs. of  4 variables:
##  $ Distance: Factor w/ 2 levels "6.65506","7.89319": 1 2
##  $ Freq    : int  1 1
##  $ Rel_Freq: num  0.5 0.5
##  $ Cum_Freq: int  1 2
df <- data.frame(x = Freq_table$Distance, y = Freq_table$Freq)
knitr::kable(df)
x y
6.65506 1
7.89319 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="blue", fill="green") +
  xlab("Rango de Distance") +
  ylab("Frecuencia")

Estadísticos
  • Personas Afectadas por Deslizamiento
summary(df_var$Distance)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   6.655   6.965   7.274   7.274   7.584   7.893
library(pastecs)
stat.desc(df_var)
## Warning in min(x): ningún argumento finito para min; retornando Inf
## Warning in max(x): ningun argumento finito para max; retornando -Inf
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
##                        id date time America Country country_code State
## nbr.val      2.000000e+00   NA   NA      NA      NA           NA    NA
## nbr.null     0.000000e+00   NA   NA      NA      NA           NA    NA
## nbr.na       0.000000e+00   NA   NA      NA      NA           NA    NA
## min          2.473000e+03   NA   NA      NA      NA           NA    NA
## max          3.180000e+03   NA   NA      NA      NA           NA    NA
## range        7.070000e+02   NA   NA      NA      NA           NA    NA
## sum          5.653000e+03   NA   NA      NA      NA           NA    NA
## median       2.826500e+03   NA   NA      NA      NA           NA    NA
## mean         2.826500e+03   NA   NA      NA      NA           NA    NA
## SE.mean      3.535000e+02   NA   NA      NA      NA           NA    NA
## CI.mean.0.95 4.491643e+03   NA   NA      NA      NA           NA    NA
## var          2.499245e+05   NA   NA      NA      NA           NA    NA
## std.dev      4.999245e+02   NA   NA      NA      NA           NA    NA
## coef.var     1.768705e-01   NA   NA      NA      NA           NA    NA
##              population City   Distance location_description     latitude
## nbr.val               2   NA  2.0000000                   NA 2.000000e+00
## nbr.null              0   NA  0.0000000                   NA 0.000000e+00
## nbr.na                0   NA  0.0000000                   NA 0.000000e+00
## min               52564   NA  6.6550600                   NA 1.052590e+01
## max               52564   NA  7.8931900                   NA 1.053630e+01
## range                 0   NA  1.2381300                   NA 1.040000e-02
## sum              105128   NA 14.5482500                   NA 2.106220e+01
## median            52564   NA  7.2741250                   NA 1.053110e+01
## mean              52564   NA  7.2741250                   NA 1.053110e+01
## SE.mean               0   NA  0.6190650                   NA 5.200000e-03
## CI.mean.0.95          0   NA  7.8659666                   NA 6.607226e-02
## var                   0   NA  0.7664829                   NA 5.408000e-05
## std.dev               0   NA  0.8754901                   NA 7.353911e-03
## coef.var              0   NA  0.1203568                   NA 6.983041e-04
##                  longitude geolocation hazard_type landslide_type
## nbr.val       2.000000e+00          NA          NA             NA
## nbr.null      0.000000e+00          NA          NA             NA
## nbr.na        0.000000e+00          NA          NA             NA
## min          -6.694920e+01          NA          NA             NA
## max          -6.694250e+01          NA          NA             NA
## range         6.700000e-03          NA          NA             NA
## sum          -1.338917e+02          NA          NA             NA
## median       -6.694585e+01          NA          NA             NA
## mean         -6.694585e+01          NA          NA             NA
## SE.mean       3.350000e-03          NA          NA             NA
## CI.mean.0.95  4.256579e-02          NA          NA             NA
## var           2.244500e-05          NA          NA             NA
## std.dev       4.737615e-03          NA          NA             NA
## coef.var     -7.076787e-05          NA          NA             NA
##              landslide_size trigger storm_name injuries fatalities source_name
## nbr.val                  NA      NA         NA        0   2.000000          NA
## nbr.null                 NA      NA         NA        0   1.000000          NA
## nbr.na                   NA      NA         NA        2   0.000000          NA
## min                      NA      NA         NA      Inf   0.000000          NA
## max                      NA      NA         NA     -Inf   7.000000          NA
## range                    NA      NA         NA     -Inf   7.000000          NA
## sum                      NA      NA         NA        0   7.000000          NA
## median                   NA      NA         NA       NA   3.500000          NA
## mean                     NA      NA         NA      NaN   3.500000          NA
## SE.mean                  NA      NA         NA       NA   3.500000          NA
## CI.mean.0.95             NA      NA         NA      NaN  44.471717          NA
## var                      NA      NA         NA       NA  24.500000          NA
## std.dev                  NA      NA         NA       NA   4.949747          NA
## coef.var                 NA      NA         NA       NA   1.414214          NA
##              source_link        prop         ypos
## nbr.val               NA   2.0000000    2.0000000
## nbr.null              NA   0.0000000    0.0000000
## nbr.na                NA   0.0000000    0.0000000
## min                   NA  45.7447459   22.8723730
## max                   NA  54.2552541   72.8723730
## range                 NA   8.5105081   50.0000000
## sum                   NA 100.0000000   95.7447459
## median                NA  50.0000000   47.8723730
## mean                  NA  50.0000000   47.8723730
## SE.mean               NA   4.2552541   25.0000000
## CI.mean.0.95          NA  54.0681294  317.6551184
## var                   NA  36.2143744 1250.0000000
## std.dev               NA   6.0178380   35.3553391
## coef.var              NA   0.1203568    0.7385332
Caja y extensión
boxplot(Distance, horizontal=TRUE, col='steelblue')

library(tidyverse)
library(hrbrthemes)
library(viridis)

df <- data.frame(Distance)
df %>% ggplot(aes(x = "", y = Distance)) +
  geom_boxplot(color="red", fill="orange", alpha=0.5) +
  theme_ipsum() +
  theme(legend.position="none", plot.title = element_text(size=11)) +
  ggtitle("Deslizamientos  ") +
  coord_flip() +
  xlab("") +
  ylab("")
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

DESLIZAMIENTOS EN PERÚ

library(readr)
library(knitr)
df <- read.csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
library(dplyr)
colnames(df)[4] <- "America"
colnames(df)[10] <- "Distance"
colnames(df)[5] <- "Country"
colnames(df)[7] <- "State"
colnames(df)[9] <- "City"
colnames(df)[2] <- "date"
library(readr)
library(knitr)
df_Peru <- subset (df, Country == "Peru")
knitr::kable(head(df_Peru,n=4))
id date time America Country country_code State population City Distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
80 472 3/5/08 SA Peru PE Huanuco 53177 Tingo María 17.34318 -9.2114 -76.1311 (-9.2113999999999994, -76.131100000000004) Landslide Landslide Medium Rain NA 7 http://news.xinhuanet.com/english/2008-03/06/content_7727748.htm
90 497 4/11/08 SA Peru PE Ancash 5044 Carhuaz 2.43089 -9.2965 -77.6613 (-9.2965, -77.661299999999997) Landslide Landslide Medium Rain NA NA http://news.xinhuanet.com/english/2008-04/14/content_7972049.htm
184 1004 4/14/09 SA Peru PE La Libertad 0 Parcoy 9.64894 -7.9589 -77.5239 (-7.9588999999999999, -77.523899999999998) Landslide Mudslide Medium Downpour NA 12 http://rawstory.com/news/afp/Nine_dead_in_Peru_mudslides_04142009.html
185 1005 4/14/09 SA Peru PE La Libertad 0 Aricapampa 0.00442 -7.8058 -77.7172 (-7.8057999999999996, -77.717200000000005) Landslide Mudslide Medium Downpour NA 0 http://rawstory.com/news/afp/Nine_dead_in_Peru_mudslides_04142009.html
df_Peru %>% 
  select(Country, State, City, Distance, date)
##      Country       State         City Distance     date
## 80      Peru     Huanuco Tingo María 17.34318   3/5/08
## 90      Peru      Ancash      Carhuaz  2.43089  4/11/08
## 184     Peru La Libertad       Parcoy  9.64894  4/14/09
## 185     Peru La Libertad   Aricapampa  0.00442  4/14/09
## 252     Peru      Ancash         Anta  0.13147  1/28/10
## 294     Peru     Huanuco      Acomayo  3.49890   4/1/10
## 295     Peru     Huanuco         Ambo  0.55865   4/2/10
## 516     Peru     Huanuco     Huánuco  1.01932  11/7/10
## 578     Peru      Ancash        Yanac  3.70678   1/9/11
## 864     Peru      Ancash        Tauca  6.77672  2/15/12
## 912     Peru San Martín    Yuracyacu  8.89799 10/17/12
## 1378    Peru      Ancash      Huachis  3.26788 11/19/14
## 1424    Peru San Martín     Naranjos 12.70296   3/3/15
## 1425    Peru San Martín     Tarapoto  2.89809   3/4/15

Deslizamientos por estado o departamentos

ggplot(data=df_Peru, aes(x = "Peru", y = Distance, fill=State)) +
  geom_bar(stat = "identity", width = 1, color = "black") +
  coord_polar("y", start = 0)

ggplot(data=df_Peru, aes(fill=State, y=Distance, x="Peru")) +
  geom_bar(position="dodge", stat="identity")

ggplot(data=df_Peru, aes(fill=State, y=Distance, x="Peru")) +
  geom_bar(position="stack", stat="identity")

Ancash:

  • Deslizamientos de las ciudades de Ancash
df_Ansc %>% 
  select(Country, State, City, Distance, date) 
##      Country  State    City Distance     date
## 90      Peru Ancash Carhuaz  2.43089  4/11/08
## 252     Peru Ancash    Anta  0.13147  1/28/10
## 578     Peru Ancash   Yanac  3.70678   1/9/11
## 864     Peru Ancash   Tauca  6.77672  2/15/12
## 1378    Peru Ancash Huachis  3.26788 11/19/14
head(df_Ansc)
##        id     date          time America Country country_code  State population
## 90    497  4/11/08                    SA    Peru           PE Ancash       5044
## 252  1459  1/28/10      15:00:00      SA    Peru           PE Ancash          0
## 578  2948   1/9/11                    SA    Peru           PE Ancash          0
## 864  4200  2/15/12 Early morning      SA    Peru           PE Ancash          0
## 1378 6690 11/19/14                    SA    Peru           PE Ancash          0
##         City Distance location_description latitude longitude
## 90   Carhuaz  2.43089                       -9.2965  -77.6613
## 252     Anta  0.13147                       -9.3584  -77.5984
## 578    Yanac  3.70678                       -8.6252  -77.8341
## 864    Tauca  6.77672                       -8.5348  -78.0834
## 1378 Huachis  3.26788           Above road  -9.3884  -77.1255
##                                     geolocation hazard_type landslide_type
## 90               (-9.2965, -77.661299999999997)   Landslide      Landslide
## 252  (-9.3583999999999996, -77.598399999999998)   Landslide        Complex
## 578  (-8.6251999999999995, -77.834100000000007)   Landslide       Mudslide
## 864  (-8.5348000000000006, -78.083399999999997)   Landslide       Mudslide
## 1378 (-9.3884000000000007, -77.125500000000002)   Landslide      Landslide
##      landslide_size  trigger storm_name injuries fatalities source_name
## 90           Medium     Rain                  NA         NA            
## 252          Medium Downpour                  NA          0            
## 578          Medium Downpour                  NA          0            
## 864          Medium Downpour                  NA          3            
## 1378         Medium Downpour                   0          0         RPP
##                                                                                                            source_link
## 90                                                    http://news.xinhuanet.com/english/2008-04/14/content_7972049.htm
## 252                          http://daveslandslideblog.blogspot.com/2010/01/new-images-of-level-of-destruction-in.html
## 578                                                  http://www.laht.com/article.asp?ArticleId=383476&CategoryId=14095
## 864                                http://www.peruviantimes.com/16/three-missing-after-landslide-hits-mine-camp/15046/
## 1378 http://www.rpp.com.pe/2014-11-19-deslizamiento-de-piedras-y-tierra-bloquea-carretera-en-huari-noticia_743504.html
ggplot(data=df_Ansc, aes(x=City, y=Distance)) + geom_bar(stat="identity", color="blue", fill="white")

Gráfico circular
ggplot(data=df_Ansc, aes(x = "Ancash", y = Distance, fill=City)) +
  geom_bar(stat = "identity", width = 1, color = "black") +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_Ansc <- df_Ansc %>% 
  arrange(desc(City)) %>%
  mutate(prop = Distance / sum(df_Ansc$Distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_Ansc, aes(x=State, y = prop, fill=City)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(Distance/100)), color = "white", size=6) +
  scale_fill_brewer(palette="Set8") 
## Warning in pal_name(palette, type): Unknown palette Set8

Ancash

Diagrama de pareto
  • Donde se concentran las ciudades con mayor deslizamiento
library(qcc)

Distance <- df_Ansc$Distance
names(Distance) <- df_Ansc$City 

pareto.chart(Distance, 
             ylab="Distance",
             col = heat.colors(length(Distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "DONDE SE CONCENTRAN LAS CIUDADES CON MAYORES DESLIZAMIENTOS"
)

##          
## Pareto chart analysis for Distance
##             Frequency   Cum.Freq.  Percentage Cum.Percent.
##   Tauca     6.7767200   6.7767200  41.5399534   41.5399534
##   Yanac     3.7067800  10.4835000  22.7218283   64.2617818
##   Huachis   3.2678800  13.7513800  20.0314581   84.2932399
##   Carhuaz   2.4308900  16.1822700  14.9008750   99.1941149
##   Anta      0.1314700  16.3137400   0.8058851  100.0000000
Diagrama de tallo y hojas
stem(df_Ansc$"Distance")
## 
##   The decimal point is at the |
## 
##   0 | 1
##   2 | 437
##   4 | 
##   6 | 8
head(df_Ansc)
##     id     date          time America Country country_code  State population
## 1 2948   1/9/11                    SA    Peru           PE Ancash          0
## 2 4200  2/15/12 Early morning      SA    Peru           PE Ancash          0
## 3 6690 11/19/14                    SA    Peru           PE Ancash          0
## 4  497  4/11/08                    SA    Peru           PE Ancash       5044
## 5 1459  1/28/10      15:00:00      SA    Peru           PE Ancash          0
##      City Distance location_description latitude longitude
## 1   Yanac  3.70678                       -8.6252  -77.8341
## 2   Tauca  6.77672                       -8.5348  -78.0834
## 3 Huachis  3.26788           Above road  -9.3884  -77.1255
## 4 Carhuaz  2.43089                       -9.2965  -77.6613
## 5    Anta  0.13147                       -9.3584  -77.5984
##                                  geolocation hazard_type landslide_type
## 1 (-8.6251999999999995, -77.834100000000007)   Landslide       Mudslide
## 2 (-8.5348000000000006, -78.083399999999997)   Landslide       Mudslide
## 3 (-9.3884000000000007, -77.125500000000002)   Landslide      Landslide
## 4             (-9.2965, -77.661299999999997)   Landslide      Landslide
## 5 (-9.3583999999999996, -77.598399999999998)   Landslide        Complex
##   landslide_size  trigger storm_name injuries fatalities source_name
## 1         Medium Downpour                  NA          0            
## 2         Medium Downpour                  NA          3            
## 3         Medium Downpour                   0          0         RPP
## 4         Medium     Rain                  NA         NA            
## 5         Medium Downpour                  NA          0            
##                                                                                                         source_link
## 1                                                 http://www.laht.com/article.asp?ArticleId=383476&CategoryId=14095
## 2                               http://www.peruviantimes.com/16/three-missing-after-landslide-hits-mine-camp/15046/
## 3 http://www.rpp.com.pe/2014-11-19-deslizamiento-de-piedras-y-tierra-bloquea-carretera-en-huari-noticia_743504.html
## 4                                                  http://news.xinhuanet.com/english/2008-04/14/content_7972049.htm
## 5                         http://daveslandslideblog.blogspot.com/2010/01/new-images-of-level-of-destruction-in.html
##         prop     ypos
## 1 22.7218283 11.36091
## 2 41.5399534 43.49181
## 3 20.0314581 74.27751
## 4 14.9008750 91.74368
## 5  0.8058851 99.59706
knitr::kable(head(df_Ansc))
id date time America Country country_code State population City Distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
2948 1/9/11 SA Peru PE Ancash 0 Yanac 3.70678 -8.6252 -77.8341 (-8.6251999999999995, -77.834100000000007) Landslide Mudslide Medium Downpour NA 0 http://www.laht.com/article.asp?ArticleId=383476&CategoryId=14095 22.7218283 11.36091
4200 2/15/12 Early morning SA Peru PE Ancash 0 Tauca 6.77672 -8.5348 -78.0834 (-8.5348000000000006, -78.083399999999997) Landslide Mudslide Medium Downpour NA 3 http://www.peruviantimes.com/16/three-missing-after-landslide-hits-mine-camp/15046/ 41.5399534 43.49181
6690 11/19/14 SA Peru PE Ancash 0 Huachis 3.26788 Above road -9.3884 -77.1255 (-9.3884000000000007, -77.125500000000002) Landslide Landslide Medium Downpour 0 0 RPP http://www.rpp.com.pe/2014-11-19-deslizamiento-de-piedras-y-tierra-bloquea-carretera-en-huari-noticia_743504.html 20.0314581 74.27751
497 4/11/08 SA Peru PE Ancash 5044 Carhuaz 2.43089 -9.2965 -77.6613 (-9.2965, -77.661299999999997) Landslide Landslide Medium Rain NA NA http://news.xinhuanet.com/english/2008-04/14/content_7972049.htm 14.9008750 91.74368
1459 1/28/10 15:00:00 SA Peru PE Ancash 0 Anta 0.13147 -9.3584 -77.5984 (-9.3583999999999996, -77.598399999999998) Landslide Complex Medium Downpour NA 0 http://daveslandslideblog.blogspot.com/2010/01/new-images-of-level-of-destruction-in.html 0.8058851 99.59706
stem(df_Ansc$"Distance")
## 
##   The decimal point is at the |
## 
##   0 | 1
##   2 | 437
##   4 | 
##   6 | 8
stem(df_Ansc$"Distance", scale = 2)
## 
##   The decimal point is at the |
## 
##   0 | 1
##   1 | 
##   2 | 4
##   3 | 37
##   4 | 
##   5 | 
##   6 | 8
Gráfico de series temporales
library(forecast)
data_serie<- ts(df_Ansc$Distance, frequency=12, start=2007)
head(data_serie)
##          Jan     Feb     Mar     Apr     May
## 2007 3.70678 6.77672 3.26788 2.43089 0.13147
autoplot(data_serie)+
labs(title = "Serie de Deslizamiento", x="Tiempo", y = "Distancia", colour = "#00a0dc") +theme_bw()

Tablas de frecuencia
library(questionr)

table <- questionr::freq(Distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
0.13147 1 20 20 20 20
2.43089 1 20 20 40 40
3.26788 1 20 20 60 60
3.70678 1 20 20 80 80
6.77672 1 20 20 100 100
Total 5 100 100 100 100
str(table) 
## Classes 'freqtab' and 'data.frame':  6 obs. of  5 variables:
##  $ n      : num  1 1 1 1 1 5
##  $ %      : num  20 20 20 20 20 100
##  $ val%   : num  20 20 20 20 20 100
##  $ %cum   : num  20 40 60 80 100 100
##  $ val%cum: num  20 40 60 80 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
0.13147 1
2.43089 1
3.26788 1
3.70678 1
6.77672 1
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="white", fill="blue") +
  xlab("Número de asistencias") +
  ylab("Frecuencia")

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(Distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(Distance) - min(Distance)
w = ceiling(R/n_clases)
bins <- seq(min(Distance), max(Distance) + w, by = w)
bins
## [1] 0.13147 3.13147 6.13147 9.13147
Edades <- cut(Distance, bins)
Freq_table <- transform(table(Distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
Distance Freq Rel_Freq Cum_Freq
0.13147 1 0.2 1
2.43089 1 0.2 2
3.26788 1 0.2 3
3.70678 1 0.2 4
6.77672 1 0.2 5
str(Freq_table)
## 'data.frame':    5 obs. of  4 variables:
##  $ Distance: Factor w/ 5 levels "0.13147","2.43089",..: 1 2 3 4 5
##  $ Freq    : int  1 1 1 1 1
##  $ Rel_Freq: num  0.2 0.2 0.2 0.2 0.2
##  $ Cum_Freq: int  1 2 3 4 5
df <- data.frame(x = Freq_table$Distance, y = Freq_table$Freq)
knitr::kable(df)
x y
0.13147 1
2.43089 1
3.26788 1
3.70678 1
6.77672 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="blue", fill="green") +
  xlab("Rango de Distance") +
  ylab("Frecuencia")

Estadísticos
  • Personas Afectadas por Deslizamiento
summary(df_Ansc$Distance)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.1315  2.4309  3.2679  3.2627  3.7068  6.7767
library(pastecs)
stat.desc(df_Ansc)
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
##                        id date time America Country country_code State
## nbr.val      5.000000e+00   NA   NA      NA      NA           NA    NA
## nbr.null     0.000000e+00   NA   NA      NA      NA           NA    NA
## nbr.na       0.000000e+00   NA   NA      NA      NA           NA    NA
## min          4.970000e+02   NA   NA      NA      NA           NA    NA
## max          6.690000e+03   NA   NA      NA      NA           NA    NA
## range        6.193000e+03   NA   NA      NA      NA           NA    NA
## sum          1.579400e+04   NA   NA      NA      NA           NA    NA
## median       2.948000e+03   NA   NA      NA      NA           NA    NA
## mean         3.158800e+03   NA   NA      NA      NA           NA    NA
## SE.mean      1.085643e+03   NA   NA      NA      NA           NA    NA
## CI.mean.0.95 3.014228e+03   NA   NA      NA      NA           NA    NA
## var          5.893102e+06   NA   NA      NA      NA           NA    NA
## std.dev      2.427571e+03   NA   NA      NA      NA           NA    NA
## coef.var     7.685106e-01   NA   NA      NA      NA           NA    NA
##                population City   Distance location_description     latitude
## nbr.val      5.000000e+00   NA  5.0000000                   NA   5.00000000
## nbr.null     4.000000e+00   NA  0.0000000                   NA   0.00000000
## nbr.na       0.000000e+00   NA  0.0000000                   NA   0.00000000
## min          0.000000e+00   NA  0.1314700                   NA  -9.38840000
## max          5.044000e+03   NA  6.7767200                   NA  -8.53480000
## range        5.044000e+03   NA  6.6452500                   NA   0.85360000
## sum          5.044000e+03   NA 16.3137400                   NA -45.20330000
## median       0.000000e+00   NA  3.2678800                   NA  -9.29650000
## mean         1.008800e+03   NA  3.2627480                   NA  -9.04066000
## SE.mean      1.008800e+03   NA  1.0733611                   NA   0.18918738
## CI.mean.0.95 2.800878e+03   NA  2.9801281                   NA   0.52526837
## var          5.088387e+06   NA  5.7605199                   NA   0.17895932
## std.dev      2.255745e+03   NA  2.4001083                   NA   0.42303584
## coef.var     2.236068e+00   NA  0.7356095                   NA  -0.04679258
##                  longitude geolocation hazard_type landslide_type
## nbr.val       5.000000e+00          NA          NA             NA
## nbr.null      0.000000e+00          NA          NA             NA
## nbr.na        0.000000e+00          NA          NA             NA
## min          -7.808340e+01          NA          NA             NA
## max          -7.712550e+01          NA          NA             NA
## range         9.579000e-01          NA          NA             NA
## sum          -3.883027e+02          NA          NA             NA
## median       -7.766130e+01          NA          NA             NA
## mean         -7.766054e+01          NA          NA             NA
## SE.mean       1.579657e-01          NA          NA             NA
## CI.mean.0.95  4.385832e-01          NA          NA             NA
## var           1.247659e-01          NA          NA             NA
## std.dev       3.532221e-01          NA          NA             NA
## coef.var     -4.548283e-03          NA          NA             NA
##              landslide_size trigger storm_name injuries fatalities source_name
## nbr.val                  NA      NA         NA        1   4.000000          NA
## nbr.null                 NA      NA         NA        1   3.000000          NA
## nbr.na                   NA      NA         NA        4   1.000000          NA
## min                      NA      NA         NA        0   0.000000          NA
## max                      NA      NA         NA        0   3.000000          NA
## range                    NA      NA         NA        0   3.000000          NA
## sum                      NA      NA         NA        0   3.000000          NA
## median                   NA      NA         NA        0   0.000000          NA
## mean                     NA      NA         NA        0   0.750000          NA
## SE.mean                  NA      NA         NA       NA   0.750000          NA
## CI.mean.0.95             NA      NA         NA      NaN   2.386835          NA
## var                      NA      NA         NA       NA   2.250000          NA
## std.dev                  NA      NA         NA       NA   1.500000          NA
## coef.var                 NA      NA         NA       NA   2.000000          NA
##              source_link        prop         ypos
## nbr.val               NA   5.0000000    5.0000000
## nbr.null              NA   0.0000000    0.0000000
## nbr.na                NA   0.0000000    0.0000000
## min                   NA   0.8058851   11.3609142
## max                   NA  41.5399534   99.5970574
## range                 NA  40.7340683   88.2361433
## sum                   NA 100.0000000  320.4709650
## median                NA  20.0314581   74.2775109
## mean                  NA  20.0000000   64.0941930
## SE.mean               NA   6.5794911   16.3308059
## CI.mean.0.95          NA  18.2675958   45.3415861
## var                   NA 216.4485140 1333.4761049
## std.dev               NA  14.7121893   36.5167921
## coef.var              NA   0.7356095    0.5697364
Caja y extensión
boxplot(Distance, horizontal=TRUE, col='steelblue')

library(tidyverse)
library(hrbrthemes)
library(viridis)

df <- data.frame(Distance)
df %>% ggplot(aes(x = "", y = Distance)) +
  geom_boxplot(color="red", fill="orange", alpha=0.5) +
  theme_ipsum() +
  theme(legend.position="none", plot.title = element_text(size=11)) +
  ggtitle("Deslizamientos  ") +
  coord_flip() +
  xlab("") +
  ylab("")
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

Deslizamientp en Libertad_Peru

df_La  %>% 
  select(Country, State, City, Distance, date) 
##          Country       State               City Distance     date
## 105  El Salvador La Libertad        Santa Tecla  4.96416   6/2/08
## 184         Peru La Libertad             Parcoy  9.64894  4/14/09
## 185         Peru La Libertad         Aricapampa  0.00442  4/14/09
## 225  El Salvador La Libertad Antiguo Cuscatlán  4.86219  11/8/09
## 1374 El Salvador La Libertad        Santa Tecla  4.60655 10/12/14
## 1596 El Salvador La Libertad        Santa Tecla  4.67722  11/3/15
## 1597 El Salvador La Libertad        Santa Tecla  9.87553  11/4/15
head(df_La )
##        id     date time America     Country country_code       State population
## 105   564   6/2/08         <NA> El Salvador           SV La Libertad     124694
## 184  1004  4/14/09           SA        Peru           PE La Libertad          0
## 185  1005  4/14/09           SA        Peru           PE La Libertad          0
## 225  1286  11/8/09         <NA> El Salvador           SV La Libertad      33767
## 1374 6686 10/12/14         <NA> El Salvador           SV La Libertad     124694
## 1596 7440  11/3/15 1:00    <NA> El Salvador           SV La Libertad     124694
##                    City Distance location_description latitude longitude
## 105         Santa Tecla  4.96416                       13.7205  -89.2687
## 184              Parcoy  9.64894                       -7.9589  -77.5239
## 185          Aricapampa  0.00442                       -7.8058  -77.7172
## 225  Antiguo Cuscatlán  4.86219                       13.7156  -89.2521
## 1374        Santa Tecla  4.60655           Above road  13.6905  -89.3200
## 1596        Santa Tecla  4.67722              Unknown  13.7178  -89.2685
##                                     geolocation hazard_type landslide_type
## 105   (13.720499999999999, -89.268699999999995)   Landslide      Landslide
## 184  (-7.9588999999999999, -77.523899999999998)   Landslide       Mudslide
## 185  (-7.8057999999999996, -77.717200000000005)   Landslide       Mudslide
## 225              (13.7156, -89.252099999999999)   Landslide       Mudslide
## 1374                          (13.6905, -89.32)   Landslide      Landslide
## 1596             (13.7178, -89.268500000000003)   Landslide       Mudslide
##      landslide_size          trigger            storm_name injuries fatalities
## 105          Medium Tropical cyclone Tropical Storm Arthur       NA         NA
## 184          Medium         Downpour                             NA         12
## 185          Medium         Downpour                             NA          0
## 225          Medium Tropical cyclone  Tropical Cyclone Ida       NA          4
## 1374         Medium             Rain                              0          0
## 1596         Medium             Rain                              0          0
##               source_name
## 105                      
## 184                      
## 185                      
## 225                      
## 1374             Mexicano
## 1596 El Salvador Noticias
##                                                                                                   source_link
## 105                                          http://news.xinhuanet.com/english/2008-06/04/content_8310737.htm
## 184                                    http://rawstory.com/news/afp/Nine_dead_in_Peru_mudslides_04142009.html
## 185                                    http://rawstory.com/news/afp/Nine_dead_in_Peru_mudslides_04142009.html
## 225                   http://www.google.com/hostednews/ap/article/ALeqM5j0XCCb1n12DyhoBoDzGj_hTyEtrAD9BRKPRG0
## 1374                                                      http://www.oem.com.mx/elmexicano/notas/n3569793.htm
## 1596 http://www.elsalvadornoticias.net/2015/11/03/deslave-en-colonia-escalon-por-desborde-quebrada-las-lajas/
ggplot(data=df_La , aes(x=City, y=Distance)) + geom_bar(stat="identity", color="blue", fill="white")

Gráfico circular
ggplot(data=df_La , aes(x = "La Libertad", y = Distance, fill=City)) +
  geom_bar(stat = "identity", width = 1, color = "black") +
  coord_polar("y", start = 0)

ggplot(df_La ,aes(x="La Libertad",y=Distance, fill=City))+
  geom_bar(stat = "identity",
           color="white")+
    geom_text(aes(label=(Distance*10)),
              position=position_stack(vjust=0.5),color="white",size=3)+
  coord_polar(theta = "y")+
    labs(title="Gráfico de Deslizamiento")

Diagrama de pareto
  • Donde se concentran las ciudades con mayor deslizamiento
library(qcc)

Distance <- df_La $Distance
names(Distance) <- df_La $City 

pareto.chart(Distance, 
             ylab="Distance",
             col = heat.colors(length(Distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "DONDE SE CONCENTRAN LAS CIUDADES CON MAYORES DESLIZAMIENTOS"
)

##                     
## Pareto chart analysis for Distance
##                         Frequency    Cum.Freq.   Percentage Cum.Percent.
##   Santa Tecla          9.87553000   9.87553000  25.55844469  25.55844469
##   Parcoy               9.64894000  19.52447000  24.97201662  50.53046131
##   Santa Tecla          4.96416000  24.48863000  12.84753414  63.37799545
##   Antiguo Cuscatlán   4.86219000  29.35082000  12.58362986  75.96162531
##   Santa Tecla          4.67722000  34.02804000  12.10491677  88.06654208
##   Santa Tecla          4.60655000  38.63459000  11.92201871  99.98856078
##   Aricapampa           0.00442000  38.63901000   0.01143922 100.00000000
Diagrama de tallo y hojas
stem(df_La $"Distance")
## 
##   The decimal point is at the |
## 
##   0 | 0
##   2 | 
##   4 | 6790
##   6 | 
##   8 | 69
head(df_La )
##        id     date time America     Country country_code       State population
## 105   564   6/2/08         <NA> El Salvador           SV La Libertad     124694
## 184  1004  4/14/09           SA        Peru           PE La Libertad          0
## 185  1005  4/14/09           SA        Peru           PE La Libertad          0
## 225  1286  11/8/09         <NA> El Salvador           SV La Libertad      33767
## 1374 6686 10/12/14         <NA> El Salvador           SV La Libertad     124694
## 1596 7440  11/3/15 1:00    <NA> El Salvador           SV La Libertad     124694
##                    City Distance location_description latitude longitude
## 105         Santa Tecla  4.96416                       13.7205  -89.2687
## 184              Parcoy  9.64894                       -7.9589  -77.5239
## 185          Aricapampa  0.00442                       -7.8058  -77.7172
## 225  Antiguo Cuscatlán  4.86219                       13.7156  -89.2521
## 1374        Santa Tecla  4.60655           Above road  13.6905  -89.3200
## 1596        Santa Tecla  4.67722              Unknown  13.7178  -89.2685
##                                     geolocation hazard_type landslide_type
## 105   (13.720499999999999, -89.268699999999995)   Landslide      Landslide
## 184  (-7.9588999999999999, -77.523899999999998)   Landslide       Mudslide
## 185  (-7.8057999999999996, -77.717200000000005)   Landslide       Mudslide
## 225              (13.7156, -89.252099999999999)   Landslide       Mudslide
## 1374                          (13.6905, -89.32)   Landslide      Landslide
## 1596             (13.7178, -89.268500000000003)   Landslide       Mudslide
##      landslide_size          trigger            storm_name injuries fatalities
## 105          Medium Tropical cyclone Tropical Storm Arthur       NA         NA
## 184          Medium         Downpour                             NA         12
## 185          Medium         Downpour                             NA          0
## 225          Medium Tropical cyclone  Tropical Cyclone Ida       NA          4
## 1374         Medium             Rain                              0          0
## 1596         Medium             Rain                              0          0
##               source_name
## 105                      
## 184                      
## 185                      
## 225                      
## 1374             Mexicano
## 1596 El Salvador Noticias
##                                                                                                   source_link
## 105                                          http://news.xinhuanet.com/english/2008-06/04/content_8310737.htm
## 184                                    http://rawstory.com/news/afp/Nine_dead_in_Peru_mudslides_04142009.html
## 185                                    http://rawstory.com/news/afp/Nine_dead_in_Peru_mudslides_04142009.html
## 225                   http://www.google.com/hostednews/ap/article/ALeqM5j0XCCb1n12DyhoBoDzGj_hTyEtrAD9BRKPRG0
## 1374                                                      http://www.oem.com.mx/elmexicano/notas/n3569793.htm
## 1596 http://www.elsalvadornoticias.net/2015/11/03/deslave-en-colonia-escalon-por-desborde-quebrada-las-lajas/
knitr::kable(head(df_La ))
id date time America Country country_code State population City Distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
105 564 6/2/08 NA El Salvador SV La Libertad 124694 Santa Tecla 4.96416 13.7205 -89.2687 (13.720499999999999, -89.268699999999995) Landslide Landslide Medium Tropical cyclone Tropical Storm Arthur NA NA http://news.xinhuanet.com/english/2008-06/04/content_8310737.htm
184 1004 4/14/09 SA Peru PE La Libertad 0 Parcoy 9.64894 -7.9589 -77.5239 (-7.9588999999999999, -77.523899999999998) Landslide Mudslide Medium Downpour NA 12 http://rawstory.com/news/afp/Nine_dead_in_Peru_mudslides_04142009.html
185 1005 4/14/09 SA Peru PE La Libertad 0 Aricapampa 0.00442 -7.8058 -77.7172 (-7.8057999999999996, -77.717200000000005) Landslide Mudslide Medium Downpour NA 0 http://rawstory.com/news/afp/Nine_dead_in_Peru_mudslides_04142009.html
225 1286 11/8/09 NA El Salvador SV La Libertad 33767 Antiguo Cuscatlán 4.86219 13.7156 -89.2521 (13.7156, -89.252099999999999) Landslide Mudslide Medium Tropical cyclone Tropical Cyclone Ida NA 4 http://www.google.com/hostednews/ap/article/ALeqM5j0XCCb1n12DyhoBoDzGj_hTyEtrAD9BRKPRG0
1374 6686 10/12/14 NA El Salvador SV La Libertad 124694 Santa Tecla 4.60655 Above road 13.6905 -89.3200 (13.6905, -89.32) Landslide Landslide Medium Rain 0 0 Mexicano http://www.oem.com.mx/elmexicano/notas/n3569793.htm
1596 7440 11/3/15 1:00 NA El Salvador SV La Libertad 124694 Santa Tecla 4.67722 Unknown 13.7178 -89.2685 (13.7178, -89.268500000000003) Landslide Mudslide Medium Rain 0 0 El Salvador Noticias http://www.elsalvadornoticias.net/2015/11/03/deslave-en-colonia-escalon-por-desborde-quebrada-las-lajas/
stem(df_La $"Distance")
## 
##   The decimal point is at the |
## 
##   0 | 0
##   2 | 
##   4 | 6790
##   6 | 
##   8 | 69
stem(df_La $"Distance", scale = 2)
## 
##   The decimal point is at the |
## 
##   0 | 0
##   1 | 
##   2 | 
##   3 | 
##   4 | 679
##   5 | 0
##   6 | 
##   7 | 
##   8 | 
##   9 | 69
Gráfico de series temporales
library(forecast)
data_serie<- ts(df_La $Distance, frequency=12, start=2007)
head(data_serie)
##          Jan     Feb     Mar     Apr     May     Jun
## 2007 4.96416 9.64894 0.00442 4.86219 4.60655 4.67722
autoplot(data_serie)+
labs(title = "Serie de Deslizamiento", x="Tiempo", y = "Distancia", colour = "#00a0dc") +theme_bw()

Tablas de frecuencia
library(questionr)

table <- questionr::freq(Distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
0.00442 1 14.3 14.3 14.3 14.3
4.60655 1 14.3 14.3 28.6 28.6
4.67722 1 14.3 14.3 42.9 42.9
4.86219 1 14.3 14.3 57.1 57.1
4.96416 1 14.3 14.3 71.4 71.4
9.64894 1 14.3 14.3 85.7 85.7
9.87553 1 14.3 14.3 100.0 100.0
Total 7 100.0 100.0 100.0 100.0
str(table) 
## Classes 'freqtab' and 'data.frame':  8 obs. of  5 variables:
##  $ n      : num  1 1 1 1 1 1 1 7
##  $ %      : num  14.3 14.3 14.3 14.3 14.3 14.3 14.3 100
##  $ val%   : num  14.3 14.3 14.3 14.3 14.3 14.3 14.3 100
##  $ %cum   : num  14.3 28.6 42.9 57.1 71.4 85.7 100 100
##  $ val%cum: num  14.3 28.6 42.9 57.1 71.4 85.7 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
0.00442 1
4.60655 1
4.67722 1
4.86219 1
4.96416 1
9.64894 1
9.87553 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="white", fill="blue") +
  xlab("Número de asistencias") +
  ylab("Frecuencia")

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(Distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(Distance) - min(Distance)
w = ceiling(R/n_clases)
bins <- seq(min(Distance), max(Distance) + w, by = w)
bins
## [1]  0.00442  4.00442  8.00442 12.00442
Edades <- cut(Distance, bins)
Freq_table <- transform(table(Distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
Distance Freq Rel_Freq Cum_Freq
0.00442 1 0.1428571 1
4.60655 1 0.1428571 2
4.67722 1 0.1428571 3
4.86219 1 0.1428571 4
4.96416 1 0.1428571 5
9.64894 1 0.1428571 6
9.87553 1 0.1428571 7
str(Freq_table)
## 'data.frame':    7 obs. of  4 variables:
##  $ Distance: Factor w/ 7 levels "0.00442","4.60655",..: 1 2 3 4 5 6 7
##  $ Freq    : int  1 1 1 1 1 1 1
##  $ Rel_Freq: num  0.143 0.143 0.143 0.143 0.143 ...
##  $ Cum_Freq: int  1 2 3 4 5 6 7
df <- data.frame(x = Freq_table$Distance, y = Freq_table$Freq)
knitr::kable(df)
x y
0.00442 1
4.60655 1
4.67722 1
4.86219 1
4.96416 1
9.64894 1
9.87553 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="blue", fill="green") +
  xlab("Rango de Distance") +
  ylab("Frecuencia")

Estadísticos
  • Personas Afectadas por Deslizamiento
summary(df_La $Distance)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.00442 4.64189 4.86219 5.51986 7.30655 9.87553
library(pastecs)
stat.desc(df_La )
##                        id date time America Country country_code State
## nbr.val      7.000000e+00   NA   NA      NA      NA           NA    NA
## nbr.null     0.000000e+00   NA   NA      NA      NA           NA    NA
## nbr.na       0.000000e+00   NA   NA      NA      NA           NA    NA
## min          5.640000e+02   NA   NA      NA      NA           NA    NA
## max          7.441000e+03   NA   NA      NA      NA           NA    NA
## range        6.877000e+03   NA   NA      NA      NA           NA    NA
## sum          2.542600e+04   NA   NA      NA      NA           NA    NA
## median       1.286000e+03   NA   NA      NA      NA           NA    NA
## mean         3.632286e+03   NA   NA      NA      NA           NA    NA
## SE.mean      1.263594e+03   NA   NA      NA      NA           NA    NA
## CI.mean.0.95 3.091903e+03   NA   NA      NA      NA           NA    NA
## var          1.117669e+07   NA   NA      NA      NA           NA    NA
## std.dev      3.343155e+03   NA   NA      NA      NA           NA    NA
## coef.var     9.203998e-01   NA   NA      NA      NA           NA    NA
##                population City   Distance location_description   latitude
## nbr.val      7.000000e+00   NA  7.0000000                   NA   7.000000
## nbr.null     2.000000e+00   NA  0.0000000                   NA   0.000000
## nbr.na       0.000000e+00   NA  0.0000000                   NA   0.000000
## min          0.000000e+00   NA  0.0044200                   NA  -7.958900
## max          1.246940e+05   NA  9.8755300                   NA  13.720500
## range        1.246940e+05   NA  9.8711100                   NA  21.679400
## sum          5.325430e+05   NA 38.6390100                   NA  52.794400
## median       1.246940e+05   NA  4.8621900                   NA  13.714700
## mean         7.607757e+04   NA  5.5198586                   NA   7.542057
## SE.mean      2.330952e+04   NA  1.2791994                   NA   3.982602
## CI.mean.0.95 5.703633e+04   NA  3.1300883                   NA   9.745075
## var          3.803334e+09   NA 11.4544585                   NA 111.027808
## std.dev      6.167118e+04   NA  3.3844436                   NA  10.536973
## coef.var     8.106355e-01   NA  0.6131395                   NA   1.397095
##                  longitude geolocation hazard_type landslide_type
## nbr.val         7.00000000          NA          NA             NA
## nbr.null        0.00000000          NA          NA             NA
## nbr.na          0.00000000          NA          NA             NA
## min           -89.36250000          NA          NA             NA
## max           -77.52390000          NA          NA             NA
## range          11.83860000          NA          NA             NA
## sum          -601.71290000          NA          NA             NA
## median        -89.26850000          NA          NA             NA
## mean          -85.95898571          NA          NA             NA
## SE.mean         2.15312466          NA          NA             NA
## CI.mean.0.95    5.26850626          NA          NA             NA
## var            32.45162074          NA          NA             NA
## std.dev         5.69663240          NA          NA             NA
## coef.var       -0.06627152          NA          NA             NA
##              landslide_size trigger storm_name  injuries fatalities source_name
## nbr.val                  NA      NA         NA 3.0000000   6.000000          NA
## nbr.null                 NA      NA         NA 2.0000000   4.000000          NA
## nbr.na                   NA      NA         NA 4.0000000   1.000000          NA
## min                      NA      NA         NA 0.0000000   0.000000          NA
## max                      NA      NA         NA 1.0000000  12.000000          NA
## range                    NA      NA         NA 1.0000000  12.000000          NA
## sum                      NA      NA         NA 1.0000000  16.000000          NA
## median                   NA      NA         NA 0.0000000   0.000000          NA
## mean                     NA      NA         NA 0.3333333   2.666667          NA
## SE.mean                  NA      NA         NA 0.3333333   1.977653          NA
## CI.mean.0.95             NA      NA         NA 1.4342176   5.083719          NA
## var                      NA      NA         NA 0.3333333  23.466667          NA
## std.dev                  NA      NA         NA 0.5773503   4.844241          NA
## coef.var                 NA      NA         NA 1.7320508   1.816590          NA
##              source_link
## nbr.val               NA
## nbr.null              NA
## nbr.na                NA
## min                   NA
## max                   NA
## range                 NA
## sum                   NA
## median                NA
## mean                  NA
## SE.mean               NA
## CI.mean.0.95          NA
## var                   NA
## std.dev               NA
## coef.var              NA
Caja y extensión
boxplot(Distance, horizontal=TRUE, col='steelblue')

library(tidyverse)
library(hrbrthemes)
library(viridis)

df <- data.frame(Distance)
df %>% ggplot(aes(x = "", y = Distance)) +
  geom_boxplot(color="red", fill="orange", alpha=0.5) +
  theme_ipsum() +
  theme(legend.position="none", plot.title = element_text(size=11)) +
  ggtitle("Deslizamientos  ") +
  coord_flip() +
  xlab("") +
  ylab("")
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

library(readr)
library(knitr)
df <- read.csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
library(dplyr)
colnames(df)[4] <- "America"
colnames(df)[10] <- "Distance"
colnames(df)[5] <- "Country"
colnames(df)[7] <- "State"
colnames(df)[9] <- "City"
colnames(df)[2] <- "date"

DESLIZAMIENTOS EN HUANUCO-PERÚ

library(readr)
library(knitr)
df_Hua <- subset (df, State == "Huanuco")
df_Hua %>% 
  select(Country, State, City, Distance, date) 
##     Country   State         City Distance    date
## 80     Peru Huanuco Tingo María 17.34318  3/5/08
## 294    Peru Huanuco      Acomayo  3.49890  4/1/10
## 295    Peru Huanuco         Ambo  0.55865  4/2/10
## 516    Peru Huanuco     Huánuco  1.01932 11/7/10
head(df_Hua)
##       id    date          time America Country country_code   State population
## 80   472  3/5/08                    SA    Peru           PE Huanuco      53177
## 294 1612  4/1/10 Early morning      SA    Peru           PE Huanuco          0
## 295 1614  4/2/10                    SA    Peru           PE Huanuco       6865
## 516 2707 11/7/10                    SA    Peru           PE Huanuco     147959
##             City Distance location_description latitude longitude
## 80  Tingo María 17.34318                       -9.2114  -76.1311
## 294      Acomayo  3.49890                       -9.7996  -76.1038
## 295         Ambo  0.55865                      -10.1258  -76.2043
## 516     Huánuco  1.01932                       -9.9241  -76.2488
##                                    geolocation hazard_type landslide_type
## 80  (-9.2113999999999994, -76.131100000000004)   Landslide      Landslide
## 294 (-9.7995999999999999, -76.103800000000007)   Landslide          Lahar
## 295            (-10.1258, -76.204300000000003)   Landslide      Landslide
## 516 (-9.9240999999999993, -76.248800000000003)   Landslide      Landslide
##     landslide_size  trigger storm_name injuries fatalities source_name
## 80          Medium     Rain                  NA          7            
## 294          Large Downpour                  NA         12            
## 295          Large Downpour                  NA         28            
## 516         Medium Downpour                  NA          2            
##                                                                                   source_link
## 80                           http://news.xinhuanet.com/english/2008-03/06/content_7727748.htm
## 294 http://www.thejakartapost.com/news/2010/03/18/mudslide-isolates-1500-residents-solok.html
## 295                              http://www.thedailytimes.com/article/20100405/NEWS/304059983
## 516                          http://english.peopledaily.com.cn/90001/90777/90852/7191360.html
ggplot(data=df_Hua, aes(x=City, y=Distance)) + geom_bar(stat="identity", color="blue", fill="white")

Gráfico circular
ggplot(data=df_Hua, aes(x = "Huanuco", y = Distance, fill=City)) +
  geom_bar(stat = "identity", width = 1, color = "black") +
  coord_polar("y", start = 0)

ggplot(df_Hua,aes(x="Huanuco",y=Distance, fill=City))+
  geom_bar(stat = "identity",
           color="white")+
    geom_text(aes(label=(Distance*10)),
              position=position_stack(vjust=0.5),color="white",size=3)+
  coord_polar(theta = "y")+
    labs(title="Gráfico de Deslizamiento")

Diagrama de pareto
  • Donde se concentran las ciudades con mayor deslizamientOS
library(qcc)

Distance <- df_Hua$Distance
names(Distance) <- df_Hua$City 

pareto.chart(Distance, 
             ylab="Distance",
             col = heat.colors(length(Distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "DONDE SE CONCENTRAN LAS CIUDADES CON MAYORES DESLIZAMIENTOS"
)

##               
## Pareto chart analysis for Distance
##                 Frequency  Cum.Freq. Percentage Cum.Percent.
##   Tingo María  17.343180  17.343180  77.355670    77.355670
##   Acomayo        3.498900  20.842080  15.606120    92.961791
##   Huánuco       1.019320  21.861400   4.546466    97.508257
##   Ambo           0.558650  22.420050   2.491743   100.000000
Diagrama de tallo y hojas
stem(df_Hua$"Distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 113
##   0 | 
##   1 | 
##   1 | 7
head(df_Hua)
##       id    date          time America Country country_code   State population
## 80   472  3/5/08                    SA    Peru           PE Huanuco      53177
## 294 1612  4/1/10 Early morning      SA    Peru           PE Huanuco          0
## 295 1614  4/2/10                    SA    Peru           PE Huanuco       6865
## 516 2707 11/7/10                    SA    Peru           PE Huanuco     147959
##             City Distance location_description latitude longitude
## 80  Tingo María 17.34318                       -9.2114  -76.1311
## 294      Acomayo  3.49890                       -9.7996  -76.1038
## 295         Ambo  0.55865                      -10.1258  -76.2043
## 516     Huánuco  1.01932                       -9.9241  -76.2488
##                                    geolocation hazard_type landslide_type
## 80  (-9.2113999999999994, -76.131100000000004)   Landslide      Landslide
## 294 (-9.7995999999999999, -76.103800000000007)   Landslide          Lahar
## 295            (-10.1258, -76.204300000000003)   Landslide      Landslide
## 516 (-9.9240999999999993, -76.248800000000003)   Landslide      Landslide
##     landslide_size  trigger storm_name injuries fatalities source_name
## 80          Medium     Rain                  NA          7            
## 294          Large Downpour                  NA         12            
## 295          Large Downpour                  NA         28            
## 516         Medium Downpour                  NA          2            
##                                                                                   source_link
## 80                           http://news.xinhuanet.com/english/2008-03/06/content_7727748.htm
## 294 http://www.thejakartapost.com/news/2010/03/18/mudslide-isolates-1500-residents-solok.html
## 295                              http://www.thedailytimes.com/article/20100405/NEWS/304059983
## 516                          http://english.peopledaily.com.cn/90001/90777/90852/7191360.html
knitr::kable(head(df_Hua))
id date time America Country country_code State population City Distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
80 472 3/5/08 SA Peru PE Huanuco 53177 Tingo María 17.34318 -9.2114 -76.1311 (-9.2113999999999994, -76.131100000000004) Landslide Landslide Medium Rain NA 7 http://news.xinhuanet.com/english/2008-03/06/content_7727748.htm
294 1612 4/1/10 Early morning SA Peru PE Huanuco 0 Acomayo 3.49890 -9.7996 -76.1038 (-9.7995999999999999, -76.103800000000007) Landslide Lahar Large Downpour NA 12 http://www.thejakartapost.com/news/2010/03/18/mudslide-isolates-1500-residents-solok.html
295 1614 4/2/10 SA Peru PE Huanuco 6865 Ambo 0.55865 -10.1258 -76.2043 (-10.1258, -76.204300000000003) Landslide Landslide Large Downpour NA 28 http://www.thedailytimes.com/article/20100405/NEWS/304059983
516 2707 11/7/10 SA Peru PE Huanuco 147959 Huánuco 1.01932 -9.9241 -76.2488 (-9.9240999999999993, -76.248800000000003) Landslide Landslide Medium Downpour NA 2 http://english.peopledaily.com.cn/90001/90777/90852/7191360.html
stem(df_Hua$"Distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 113
##   0 | 
##   1 | 
##   1 | 7
stem(df_Hua$"Distance", scale = 2)
## 
##   The decimal point is at the |
## 
##    0 | 60
##    2 | 5
##    4 | 
##    6 | 
##    8 | 
##   10 | 
##   12 | 
##   14 | 
##   16 | 3
Gráfico de series temporales
library(forecast)
data_serie<- ts(df_Hua$Distance, frequency=12, start=2007)
head(data_serie)
##           Jan      Feb      Mar      Apr
## 2007 17.34318  3.49890  0.55865  1.01932
autoplot(data_serie)+
labs(title = "Serie de Deslizamiento", x="Tiempo", y = "Distancia", colour = "#00a0dc") +theme_bw()

Tablas de frecuencia
library(questionr)

table <- questionr::freq(Distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
0.55865 1 25 25 25 25
1.01932 1 25 25 50 50
3.4989 1 25 25 75 75
17.34318 1 25 25 100 100
Total 4 100 100 100 100
str(table) 
## Classes 'freqtab' and 'data.frame':  5 obs. of  5 variables:
##  $ n      : num  1 1 1 1 4
##  $ %      : num  25 25 25 25 100
##  $ val%   : num  25 25 25 25 100
##  $ %cum   : num  25 50 75 100 100
##  $ val%cum: num  25 50 75 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
0.55865 1
1.01932 1
3.4989 1
17.34318 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="white", fill="blue") +
  xlab("Número de asistencias") +
  ylab("Frecuencia")

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(Distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(Distance) - min(Distance)
w = ceiling(R/n_clases)
bins <- seq(min(Distance), max(Distance) + w, by = w)
bins
## [1]  0.55865  6.55865 12.55865 18.55865
Edades <- cut(Distance, bins)
Freq_table <- transform(table(Distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
Distance Freq Rel_Freq Cum_Freq
0.55865 1 0.25 1
1.01932 1 0.25 2
3.4989 1 0.25 3
17.34318 1 0.25 4
str(Freq_table)
## 'data.frame':    4 obs. of  4 variables:
##  $ Distance: Factor w/ 4 levels "0.55865","1.01932",..: 1 2 3 4
##  $ Freq    : int  1 1 1 1
##  $ Rel_Freq: num  0.25 0.25 0.25 0.25
##  $ Cum_Freq: int  1 2 3 4
df <- data.frame(x = Freq_table$Distance, y = Freq_table$Freq)
knitr::kable(df)
x y
0.55865 1
1.01932 1
3.4989 1
17.34318 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="blue", fill="green") +
  xlab("Rango de Distance") +
  ylab("Frecuencia")

Estadísticos
  • Personas Afectadas por Deslizamiento
summary(df_Hua$Distance)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.5586  0.9042  2.2591  5.6050  6.9600 17.3432
library(pastecs)
stat.desc(df_Hua)
## Warning in min(x): ningún argumento finito para min; retornando Inf
## Warning in max(x): ningun argumento finito para max; retornando -Inf
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
##                        id date time America Country country_code State
## nbr.val      4.000000e+00   NA   NA      NA      NA           NA    NA
## nbr.null     0.000000e+00   NA   NA      NA      NA           NA    NA
## nbr.na       0.000000e+00   NA   NA      NA      NA           NA    NA
## min          4.720000e+02   NA   NA      NA      NA           NA    NA
## max          2.707000e+03   NA   NA      NA      NA           NA    NA
## range        2.235000e+03   NA   NA      NA      NA           NA    NA
## sum          6.405000e+03   NA   NA      NA      NA           NA    NA
## median       1.613000e+03   NA   NA      NA      NA           NA    NA
## mean         1.601250e+03   NA   NA      NA      NA           NA    NA
## SE.mean      4.562681e+02   NA   NA      NA      NA           NA    NA
## CI.mean.0.95 1.452049e+03   NA   NA      NA      NA           NA    NA
## var          8.327223e+05   NA   NA      NA      NA           NA    NA
## std.dev      9.125362e+02   NA   NA      NA      NA           NA    NA
## coef.var     5.698899e-01   NA   NA      NA      NA           NA    NA
##                population City  Distance location_description    latitude
## nbr.val      4.000000e+00   NA  4.000000                   NA   4.0000000
## nbr.null     1.000000e+00   NA  0.000000                   NA   0.0000000
## nbr.na       0.000000e+00   NA  0.000000                   NA   0.0000000
## min          0.000000e+00   NA  0.558650                   NA -10.1258000
## max          1.479590e+05   NA 17.343180                   NA  -9.2114000
## range        1.479590e+05   NA 16.784530                   NA   0.9144000
## sum          2.080010e+05   NA 22.420050                   NA -39.0609000
## median       3.002100e+04   NA  2.259110                   NA  -9.8618500
## mean         5.200025e+04   NA  5.605012                   NA  -9.7652250
## SE.mean      3.409629e+04   NA  3.965630                   NA   0.1964602
## CI.mean.0.95 1.085096e+05   NA 12.620404                   NA   0.6252241
## var          4.650228e+09   NA 62.904879                   NA   0.1543865
## std.dev      6.819258e+04   NA  7.931260                   NA   0.3929204
## coef.var     1.311389e+00   NA  1.415030                   NA  -0.0402367
##                  longitude geolocation hazard_type landslide_type
## nbr.val       4.000000e+00          NA          NA             NA
## nbr.null      0.000000e+00          NA          NA             NA
## nbr.na        0.000000e+00          NA          NA             NA
## min          -7.624880e+01          NA          NA             NA
## max          -7.610380e+01          NA          NA             NA
## range         1.450000e-01          NA          NA             NA
## sum          -3.046880e+02          NA          NA             NA
## median       -7.616770e+01          NA          NA             NA
## mean         -7.617200e+01          NA          NA             NA
## SE.mean       3.324853e-02          NA          NA             NA
## CI.mean.0.95  1.058117e-01          NA          NA             NA
## var           4.421860e-03          NA          NA             NA
## std.dev       6.649707e-02          NA          NA             NA
## coef.var     -8.729857e-04          NA          NA             NA
##              landslide_size trigger storm_name injuries  fatalities source_name
## nbr.val                  NA      NA         NA        0   4.0000000          NA
## nbr.null                 NA      NA         NA        0   0.0000000          NA
## nbr.na                   NA      NA         NA        4   0.0000000          NA
## min                      NA      NA         NA      Inf   2.0000000          NA
## max                      NA      NA         NA     -Inf  28.0000000          NA
## range                    NA      NA         NA     -Inf  26.0000000          NA
## sum                      NA      NA         NA        0  49.0000000          NA
## median                   NA      NA         NA       NA   9.5000000          NA
## mean                     NA      NA         NA      NaN  12.2500000          NA
## SE.mean                  NA      NA         NA       NA   5.6328649          NA
## CI.mean.0.95             NA      NA         NA      NaN  17.9262900          NA
## var                      NA      NA         NA       NA 126.9166667          NA
## std.dev                  NA      NA         NA       NA  11.2657297          NA
## coef.var                 NA      NA         NA       NA   0.9196514          NA
##              source_link
## nbr.val               NA
## nbr.null              NA
## nbr.na                NA
## min                   NA
## max                   NA
## range                 NA
## sum                   NA
## median                NA
## mean                  NA
## SE.mean               NA
## CI.mean.0.95          NA
## var                   NA
## std.dev               NA
## coef.var              NA
Caja y extensión
boxplot(Distance, horizontal=TRUE, col='steelblue')

library(tidyverse)
library(hrbrthemes)
library(viridis)

df <- data.frame(Distance)
df %>% ggplot(aes(x = "", y = Distance)) +
  geom_boxplot(color="red", fill="orange", alpha=0.5) +
  theme_ipsum() +
  theme(legend.position="none", plot.title = element_text(size=11)) +
  ggtitle("Deslizamientos  ") +
  coord_flip() +
  xlab("") +
  ylab("")
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

DESLIZAMIENTOS EN ECUADOR

library(readr)
library(knitr)
df <- read.csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
library(dplyr)
colnames(df)[4] <- "America"
colnames(df)[10] <- "Distance"
colnames(df)[5] <- "Country"
colnames(df)[7] <- "State"
colnames(df)[9] <- "City"
colnames(df)[2] <- "date"
library(readr)
library(knitr)
df_Ecuador <- subset (df, Country == "Ecuador")
knitr::kable(head(df_Ecuador,n=4))
id date time America Country country_code State population City Distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
9 105 6/27/07 SA Ecuador EC Zamora-Chinchipe 15276 Zamora 0.47714 -4.0650 -78.9510 (-4.0650000000000004, -78.950999999999993) Landslide Landslide Medium Downpour NA NA Red Cross - Field reports https://www-secure.ifrc.org/dmis/prepare/view_report.asp?ReportID=2908
10 106 6/27/07 SA Ecuador EC Loja 117796 Loja 0.35649 -3.9900 -79.2050 (-3.99, -79.204999999999998) Landslide Landslide Medium Downpour NA NA Red Cross - Field reports https://www-secure.ifrc.org/dmis/prepare/view_report.asp?ReportID=2908
11 107 6/27/07 SA Ecuador EC Pichincha 5114 Sangolquí 33.94603 -0.3560 -78.1480 (-0.35599999999999998, -78.147999999999996) Landslide Landslide Medium Downpour NA NA Red Cross - Field reports https://www-secure.ifrc.org/dmis/prepare/view_report.asp?ReportID=2908
78 468 2/28/08 SA Ecuador EC Napo 7309 Archidona 50.21741 -0.4635 -77.8928 (-0.46350000000000002, -77.892799999999994) Landslide Landslide Medium Rain NA 7 http://english.aljazeera.net/NR/exeres/868843D7-B211-4DE5-AFBE-31C29CF79C5A.htm
df_Ecuador %>% 
  select(Country, State, City, Distance, date)
##      Country                           State                           City
## 9    Ecuador                Zamora-Chinchipe                         Zamora
## 10   Ecuador                            Loja                           Loja
## 11   Ecuador                       Pichincha                     Sangolquí
## 78   Ecuador                            Napo                      Archidona
## 87   Ecuador                       Pichincha                          Quito
## 218  Ecuador Santo Domingo de los Tsáchilas Santo Domingo de los Colorados
## 238  Ecuador                       Pichincha                       Machachi
## 253  Ecuador                      Tungurahua                         Baños
## 339  Ecuador                      Esmeraldas                     Esmeraldas
## 586  Ecuador                       Pichincha                       Machachi
## 587  Ecuador                        Cotopaxi                     Saquisilí
## 660  Ecuador                 Morona-Santiago                          Macas
## 697  Ecuador                       Pichincha                          Quito
## 751  Ecuador                          Carchi                      El Ã\201ngel
## 868  Ecuador                         Manabi                           Chone
## 872  Ecuador                         Manabi                         Tosagua
## 942  Ecuador                          Guayas                          Balao
## 956  Ecuador                      Esmeraldas                     Esmeraldas
## 984  Ecuador                       Pichincha                        Cayambe
## 1304 Ecuador                          Carchi                    San Gabriel
## 1332 Ecuador                       Pichincha                          Quito
## 1333 Ecuador                       Pichincha                          Quito
## 1335 Ecuador                       Sucumbios                Gonzalo Pizarro
## 1365 Ecuador                           Azuay                         Cuenca
## 1368 Ecuador                Zamora-Chinchipe                         Zamora
## 1451 Ecuador                            Loja                        Macará
## 1452 Ecuador                            Loja                           Loja
## 1453 Ecuador                            Loja                       Catamayo
## 1454 Ecuador                           Azuay                         Cuenca
## 1566 Ecuador                       Pichincha                          Quito
##      Distance     date
## 9     0.47714  6/27/07
## 10    0.35649  6/27/07
## 11   33.94603  6/27/07
## 78   50.21741  2/28/08
## 87    1.56942   4/1/08
## 218   1.16036 12/28/09
## 238  26.18676  1/10/10
## 253  11.91442   2/3/10
## 339   2.81891   5/3/10
## 586  25.82923  2/14/11
## 587  30.81169  2/14/11
## 660  46.77007  4/24/11
## 697   4.39517   5/2/11
## 751  28.29459   6/5/11
## 868  19.85816  3/13/12
## 872   7.67919  3/24/12
## 942  16.34404  1/24/13
## 956  21.26652  4/23/13
## 984  45.69792  5/31/13
## 1304 10.47204 10/20/14
## 1332 26.72137  8/12/14
## 1333 23.97854  8/12/14
## 1335 11.55916 12/13/14
## 1365 13.21139  9/29/14
## 1368  1.23724  4/30/14
## 1451 18.88784  3/18/15
## 1452  1.82885  3/18/15
## 1453 17.57187  3/18/15
## 1454 10.16196  3/18/15
## 1566  4.25486  4/29/11

Deslizamientos por estados o departamentos

ggplot(data=df_Ecuador, aes(x = "Ecuador", y = Distance, fill=State)) +
  geom_bar(stat = "identity", width = 1, color = "black") +
  coord_polar("y", start = 0)

ggplot(data=df_Ecuador, aes(fill=State, y=Distance, x="Ecuador")) +
  geom_bar(position="dodge", stat="identity")

ggplot(data=df_Ecuador, aes(fill=State, y=Distance, x="Ecuador")) +
  geom_bar(position="stack", stat="identity")

DESLIZAMIENTOS EN CARCHI-ECUADOR
library(readr)
library(knitr)
df_Car <- subset (df, State == "Carchi")
df_Car %>% 
  select(Country, State, City, Distance, date) 
##      Country  State        City Distance     date
## 751  Ecuador Carchi   El Ã\201ngel 28.29459   6/5/11
## 1304 Ecuador Carchi San Gabriel 10.47204 10/20/14
head(df_Car)
##        id     date  time America Country country_code  State population
## 751  3572   6/5/11            SA Ecuador           EC Carchi       3983
## 1304 6308 10/20/14 19:33      SA Ecuador           EC Carchi      15112
##             City Distance location_description latitude longitude
## 751    El Ã\201ngel 28.29459                        0.8479  -78.0609
## 1304 San Gabriel 10.47204           Above road   0.6194  -77.7404
##                                     geolocation hazard_type landslide_type
## 751  (0.84789999999999999, -78.060900000000004)   Landslide      Landslide
## 1304 (0.61939999999999995, -77.740399999999994)   Landslide       Rockfall
##      landslide_size    trigger storm_name injuries fatalities       source_name
## 751          Medium   Downpour                  NA          0                  
## 1304          Small Earthquake                   0          0 Earthquake Report
##                                                                                                       source_link
## 751                                                            http://www.cre.com.ec/Desktop.aspx?Id=143&e=153271
## 1304 http://earthquake-report.com/2014/10/20/strong-earthquake-colombia-ecuador-border-region-on-october-20-2014/
ggplot(data=df_Car, aes(x=City, y=Distance)) + geom_bar(stat="identity", color="blue", fill="white")

Gráfico circular
ggplot(data=df_Car, aes(x = "Carchi", y = Distance, fill=City)) +
  geom_bar(stat = "identity", width = 1, color = "black") +
  coord_polar("y", start = 0)

ggplot(df_Car,aes(x="Carchi",y=Distance, fill=City))+
  geom_bar(stat = "identity",
           color="white")+
    geom_text(aes(label=(Distance*10)),
              position=position_stack(vjust=0.5),color="white",size=3)+
  coord_polar(theta = "y")+
    labs(title="Gráfico de Deslizamiento")

Diagrama de pareto
  • Donde se concentran las ciudades con mayor deslizamiento
library(qcc)

Distance <- df_Car$Distance
names(Distance) <- df_Car$City 

pareto.chart(Distance, 
             ylab="Distance",
             col = heat.colors(length(Distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "DONDE SE CONCENTRAN LAS CIUDADES CON MAYORES DESLIZAMIENTOS"
)

##              
## Pareto chart analysis for Distance
##               Frequency Cum.Freq. Percentage Cum.Percent.
##   El Ã\201ngel    28.29459  28.29459   72.98697     72.98697
##   San Gabriel  10.47204  38.76663   27.01303    100.00000
Diagrama de tallo y hojas
stem(df_Car$"Distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   1 | 0
##   1 | 
##   2 | 
##   2 | 8
head(df_Car)
##        id     date  time America Country country_code  State population
## 751  3572   6/5/11            SA Ecuador           EC Carchi       3983
## 1304 6308 10/20/14 19:33      SA Ecuador           EC Carchi      15112
##             City Distance location_description latitude longitude
## 751    El Ã\201ngel 28.29459                        0.8479  -78.0609
## 1304 San Gabriel 10.47204           Above road   0.6194  -77.7404
##                                     geolocation hazard_type landslide_type
## 751  (0.84789999999999999, -78.060900000000004)   Landslide      Landslide
## 1304 (0.61939999999999995, -77.740399999999994)   Landslide       Rockfall
##      landslide_size    trigger storm_name injuries fatalities       source_name
## 751          Medium   Downpour                  NA          0                  
## 1304          Small Earthquake                   0          0 Earthquake Report
##                                                                                                       source_link
## 751                                                            http://www.cre.com.ec/Desktop.aspx?Id=143&e=153271
## 1304 http://earthquake-report.com/2014/10/20/strong-earthquake-colombia-ecuador-border-region-on-october-20-2014/
knitr::kable(head(df_Car))
id date time America Country country_code State population City Distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
751 3572 6/5/11 SA Ecuador EC Carchi 3983 El Ángel 28.29459 0.8479 -78.0609 (0.84789999999999999, -78.060900000000004) Landslide Landslide Medium Downpour NA 0 http://www.cre.com.ec/Desktop.aspx?Id=143&e=153271
1304 6308 10/20/14 19:33 SA Ecuador EC Carchi 15112 San Gabriel 10.47204 Above road 0.6194 -77.7404 (0.61939999999999995, -77.740399999999994) Landslide Rockfall Small Earthquake 0 0 Earthquake Report http://earthquake-report.com/2014/10/20/strong-earthquake-colombia-ecuador-border-region-on-october-20-2014/
stem(df_Car$"Distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   1 | 0
##   1 | 
##   2 | 
##   2 | 8
stem(df_Car$"Distance", scale = 2)
## 
##   The decimal point is at the |
## 
##   10 | 5
##   12 | 
##   14 | 
##   16 | 
##   18 | 
##   20 | 
##   22 | 
##   24 | 
##   26 | 
##   28 | 3
Gráfico de series temporales
library(forecast)
data_serie<- ts(df_Car$Distance, frequency=12, start=2007)
head(data_serie)
##           Jan      Feb
## 2007 28.29459 10.47204
autoplot(data_serie)+
labs(title = "Serie de Deslizamiento", x="Tiempo", y = "Distancia", colour = "#00a0dc") +theme_bw()

Tablas de frecuencia
library(questionr)

table <- questionr::freq(Distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
10.47204 1 50 50 50 50
28.29459 1 50 50 100 100
Total 2 100 100 100 100
str(table) 
## Classes 'freqtab' and 'data.frame':  3 obs. of  5 variables:
##  $ n      : num  1 1 2
##  $ %      : num  50 50 100
##  $ val%   : num  50 50 100
##  $ %cum   : num  50 100 100
##  $ val%cum: num  50 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
10.47204 1
28.29459 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="white", fill="blue") +
  xlab("Número de asistencias") +
  ylab("Frecuencia")

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(Distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(Distance) - min(Distance)
w = ceiling(R/n_clases)
bins <- seq(min(Distance), max(Distance) + w, by = w)
bins
## [1] 10.47204 19.47204 28.47204
Edades <- cut(Distance, bins)
Freq_table <- transform(table(Distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
Distance Freq Rel_Freq Cum_Freq
10.47204 1 0.5 1
28.29459 1 0.5 2
str(Freq_table)
## 'data.frame':    2 obs. of  4 variables:
##  $ Distance: Factor w/ 2 levels "10.47204","28.29459": 1 2
##  $ Freq    : int  1 1
##  $ Rel_Freq: num  0.5 0.5
##  $ Cum_Freq: int  1 2
df <- data.frame(x = Freq_table$Distance, y = Freq_table$Freq)
knitr::kable(df)
x y
10.47204 1
28.29459 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="blue", fill="green") +
  xlab("Rango de Distance") +
  ylab("Frecuencia")

Estadísticos
  • Personas Afectadas por Deslizamiento
summary(df_Car$Distance)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   10.47   14.93   19.38   19.38   23.84   28.29
library(pastecs)
stat.desc(df_Car)
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
##                        id date time America Country country_code State
## nbr.val      2.000000e+00   NA   NA      NA      NA           NA    NA
## nbr.null     0.000000e+00   NA   NA      NA      NA           NA    NA
## nbr.na       0.000000e+00   NA   NA      NA      NA           NA    NA
## min          3.572000e+03   NA   NA      NA      NA           NA    NA
## max          6.308000e+03   NA   NA      NA      NA           NA    NA
## range        2.736000e+03   NA   NA      NA      NA           NA    NA
## sum          9.880000e+03   NA   NA      NA      NA           NA    NA
## median       4.940000e+03   NA   NA      NA      NA           NA    NA
## mean         4.940000e+03   NA   NA      NA      NA           NA    NA
## SE.mean      1.368000e+03   NA   NA      NA      NA           NA    NA
## CI.mean.0.95 1.738209e+04   NA   NA      NA      NA           NA    NA
## var          3.742848e+06   NA   NA      NA      NA           NA    NA
## std.dev      1.934644e+03   NA   NA      NA      NA           NA    NA
## coef.var     3.916284e-01   NA   NA      NA      NA           NA    NA
##                population City    Distance location_description   latitude
## nbr.val      2.000000e+00   NA   2.0000000                   NA 2.00000000
## nbr.null     0.000000e+00   NA   0.0000000                   NA 0.00000000
## nbr.na       0.000000e+00   NA   0.0000000                   NA 0.00000000
## min          3.983000e+03   NA  10.4720400                   NA 0.61940000
## max          1.511200e+04   NA  28.2945900                   NA 0.84790000
## range        1.112900e+04   NA  17.8225500                   NA 0.22850000
## sum          1.909500e+04   NA  38.7666300                   NA 1.46730000
## median       9.547500e+03   NA  19.3833150                   NA 0.73365000
## mean         9.547500e+03   NA  19.3833150                   NA 0.73365000
## SE.mean      5.564500e+03   NA   8.9112750                   NA 0.11425000
## CI.mean.0.95 7.070368e+04   NA 113.2284846                   NA 1.45168389
## var          6.192732e+07   NA 158.8216443                   NA 0.02610613
## std.dev      7.869391e+03   NA  12.6024460                   NA 0.16157390
## coef.var     8.242358e-01   NA   0.6501698                   NA 0.22023294
##                  longitude geolocation hazard_type landslide_type
## nbr.val       2.000000e+00          NA          NA             NA
## nbr.null      0.000000e+00          NA          NA             NA
## nbr.na        0.000000e+00          NA          NA             NA
## min          -7.806090e+01          NA          NA             NA
## max          -7.774040e+01          NA          NA             NA
## range         3.205000e-01          NA          NA             NA
## sum          -1.558013e+02          NA          NA             NA
## median       -7.790065e+01          NA          NA             NA
## mean         -7.790065e+01          NA          NA             NA
## SE.mean       1.602500e-01          NA          NA             NA
## CI.mean.0.95  2.036169e+00          NA          NA             NA
## var           5.136013e-02          NA          NA             NA
## std.dev       2.266277e-01          NA          NA             NA
## coef.var     -2.909189e-03          NA          NA             NA
##              landslide_size trigger storm_name injuries fatalities source_name
## nbr.val                  NA      NA         NA        1          2          NA
## nbr.null                 NA      NA         NA        1          2          NA
## nbr.na                   NA      NA         NA        1          0          NA
## min                      NA      NA         NA        0          0          NA
## max                      NA      NA         NA        0          0          NA
## range                    NA      NA         NA        0          0          NA
## sum                      NA      NA         NA        0          0          NA
## median                   NA      NA         NA        0          0          NA
## mean                     NA      NA         NA        0          0          NA
## SE.mean                  NA      NA         NA       NA          0          NA
## CI.mean.0.95             NA      NA         NA      NaN          0          NA
## var                      NA      NA         NA       NA          0          NA
## std.dev                  NA      NA         NA       NA          0          NA
## coef.var                 NA      NA         NA       NA        NaN          NA
##              source_link
## nbr.val               NA
## nbr.null              NA
## nbr.na                NA
## min                   NA
## max                   NA
## range                 NA
## sum                   NA
## median                NA
## mean                  NA
## SE.mean               NA
## CI.mean.0.95          NA
## var                   NA
## std.dev               NA
## coef.var              NA
Caja y extensión
boxplot(Distance, horizontal=TRUE, col='steelblue')

library(tidyverse)
library(hrbrthemes)
library(viridis)

df <- data.frame(Distance)
df %>% ggplot(aes(x = "", y = Distance)) +
  geom_boxplot(color="red", fill="orange", alpha=0.5) +
  theme_ipsum() +
  theme(legend.position="none", plot.title = element_text(size=11)) +
  ggtitle("Deslizamientos  ") +
  coord_flip() +
  xlab("") +
  ylab("")
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

library(readr)
library(knitr)
df <- read.csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
library(dplyr)
colnames(df)[4] <- "America"
colnames(df)[10] <- "Distance"
colnames(df)[5] <- "Country"
colnames(df)[7] <- "State"
colnames(df)[9] <- "City"
colnames(df)[2] <- "date"

DESLIZAMIENTOS EN LOJA-ECUADOR

library(readr)
library(knitr)
df_Loj <- subset (df, State == "Loja")
df_Loj %>% 
  select(Country, State, City, Distance, date) 
##      Country State     City Distance    date
## 10   Ecuador  Loja     Loja  0.35649 6/27/07
## 1451 Ecuador  Loja  Macará 18.88784 3/18/15
## 1452 Ecuador  Loja     Loja  1.82885 3/18/15
## 1453 Ecuador  Loja Catamayo 17.57187 3/18/15
head(df_Loj)
##        id    date  time America Country country_code State population     City
## 10    106 6/27/07            SA Ecuador           EC  Loja     117796     Loja
## 1451 6893 3/18/15 Night      SA Ecuador           EC  Loja      13035  Macará
## 1452 6900 3/18/15            SA Ecuador           EC  Loja     117796     Loja
## 1453 6901 3/18/15            SA Ecuador           EC  Loja      18565 Catamayo
##      Distance location_description latitude longitude
## 10    0.35649                       -3.9900  -79.2050
## 1451 18.88784           Above road  -4.3313  -79.7811
## 1452  1.82885              Unknown  -4.0094  -79.2073
## 1453 17.57187           Above road  -4.1380  -79.4069
##                                     geolocation hazard_type landslide_type
## 10                 (-3.99, -79.204999999999998)   Landslide      Landslide
## 1451 (-4.3312999999999997, -79.781099999999995)   Landslide      Landslide
## 1452 (-4.0094000000000003, -79.207300000000004)   Landslide      Landslide
## 1453 (-4.1379999999999999, -79.406899999999993)   Landslide      Landslide
##      landslide_size         trigger storm_name injuries fatalities
## 10           Medium        Downpour                  NA         NA
## 1451          Small Continuous rain                   0          0
## 1452         Medium Continuous rain                   0          0
## 1453         Medium Continuous rain                   0          0
##                    source_name
## 10   Red Cross - Field reports
## 1451               El Comercio
## 1452               El Comercio
## 1453               El Comercio
##                                                                            source_link
## 10              https://www-secure.ifrc.org/dmis/prepare/view_report.asp?ReportID=2908
## 1451 http://www.elcomercio.com/actualidad/deslizamientos-cuenca-loja-lluvia-clima.html
## 1452 http://www.elcomercio.com/actualidad/deslizamientos-cuenca-loja-lluvia-clima.html
## 1453 http://www.elcomercio.com/actualidad/deslizamientos-cuenca-loja-lluvia-clima.html
ggplot(data=df_Loj, aes(x=City, y=Distance)) + geom_bar(stat="identity", color="blue", fill="white")

Gráfico circular
ggplot(data=df_Loj, aes(x = "Loja", y = Distance, fill=City)) +
  geom_bar(stat = "identity", width = 1, color = "black") +
  coord_polar("y", start = 0)

ggplot(df_Loj,aes(x="Loja",y=Distance, fill=City))+
  geom_bar(stat = "identity",
           color="white")+
    geom_text(aes(label=(Distance*10)),
              position=position_stack(vjust=0.5),color="white",size=3)+
  coord_polar(theta = "y")+
    labs(title="Gráfico de Deslizamiento")

Diagrama de pareto
  • Donde se concentran las ciudades con mayor deslizamiento
library(qcc)

Distance <- df_Loj$Distance
names(Distance) <- df_Loj$City 

pareto.chart(Distance, 
             ylab="Distance",
             col = heat.colors(length(Distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "DONDE SE CONCENTRAN LAS CIUDADES CON MAYORES DESLIZAMIENTOS"
)

##           
## Pareto chart analysis for Distance
##              Frequency   Cum.Freq.  Percentage Cum.Percent.
##   Macará   18.8878400  18.8878400  48.8751858   48.8751858
##   Catamayo  17.5718700  36.4597100  45.4699114   94.3450972
##   Loja       1.8288500  38.2885600   4.7324302   99.0775274
##   Loja       0.3564900  38.6450500   0.9224726  100.0000000
Diagrama de tallo y hojas
stem(df_Loj$"Distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 02
##   0 | 
##   1 | 
##   1 | 89
head(df_Loj)
##        id    date  time America Country country_code State population     City
## 10    106 6/27/07            SA Ecuador           EC  Loja     117796     Loja
## 1451 6893 3/18/15 Night      SA Ecuador           EC  Loja      13035  Macará
## 1452 6900 3/18/15            SA Ecuador           EC  Loja     117796     Loja
## 1453 6901 3/18/15            SA Ecuador           EC  Loja      18565 Catamayo
##      Distance location_description latitude longitude
## 10    0.35649                       -3.9900  -79.2050
## 1451 18.88784           Above road  -4.3313  -79.7811
## 1452  1.82885              Unknown  -4.0094  -79.2073
## 1453 17.57187           Above road  -4.1380  -79.4069
##                                     geolocation hazard_type landslide_type
## 10                 (-3.99, -79.204999999999998)   Landslide      Landslide
## 1451 (-4.3312999999999997, -79.781099999999995)   Landslide      Landslide
## 1452 (-4.0094000000000003, -79.207300000000004)   Landslide      Landslide
## 1453 (-4.1379999999999999, -79.406899999999993)   Landslide      Landslide
##      landslide_size         trigger storm_name injuries fatalities
## 10           Medium        Downpour                  NA         NA
## 1451          Small Continuous rain                   0          0
## 1452         Medium Continuous rain                   0          0
## 1453         Medium Continuous rain                   0          0
##                    source_name
## 10   Red Cross - Field reports
## 1451               El Comercio
## 1452               El Comercio
## 1453               El Comercio
##                                                                            source_link
## 10              https://www-secure.ifrc.org/dmis/prepare/view_report.asp?ReportID=2908
## 1451 http://www.elcomercio.com/actualidad/deslizamientos-cuenca-loja-lluvia-clima.html
## 1452 http://www.elcomercio.com/actualidad/deslizamientos-cuenca-loja-lluvia-clima.html
## 1453 http://www.elcomercio.com/actualidad/deslizamientos-cuenca-loja-lluvia-clima.html
knitr::kable(head(df_Loj))
id date time America Country country_code State population City Distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link
10 106 6/27/07 SA Ecuador EC Loja 117796 Loja 0.35649 -3.9900 -79.2050 (-3.99, -79.204999999999998) Landslide Landslide Medium Downpour NA NA Red Cross - Field reports https://www-secure.ifrc.org/dmis/prepare/view_report.asp?ReportID=2908
1451 6893 3/18/15 Night SA Ecuador EC Loja 13035 Macará 18.88784 Above road -4.3313 -79.7811 (-4.3312999999999997, -79.781099999999995) Landslide Landslide Small Continuous rain 0 0 El Comercio http://www.elcomercio.com/actualidad/deslizamientos-cuenca-loja-lluvia-clima.html
1452 6900 3/18/15 SA Ecuador EC Loja 117796 Loja 1.82885 Unknown -4.0094 -79.2073 (-4.0094000000000003, -79.207300000000004) Landslide Landslide Medium Continuous rain 0 0 El Comercio http://www.elcomercio.com/actualidad/deslizamientos-cuenca-loja-lluvia-clima.html
1453 6901 3/18/15 SA Ecuador EC Loja 18565 Catamayo 17.57187 Above road -4.1380 -79.4069 (-4.1379999999999999, -79.406899999999993) Landslide Landslide Medium Continuous rain 0 0 El Comercio http://www.elcomercio.com/actualidad/deslizamientos-cuenca-loja-lluvia-clima.html
stem(df_Loj$"Distance")
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   0 | 02
##   0 | 
##   1 | 
##   1 | 89
stem(df_Loj$"Distance", scale = 2)
## 
##   The decimal point is at the |
## 
##    0 | 48
##    2 | 
##    4 | 
##    6 | 
##    8 | 
##   10 | 
##   12 | 
##   14 | 
##   16 | 6
##   18 | 9
Gráfico de series temporales
library(forecast)
data_serie<- ts(df_Loj$Distance, frequency=12, start=2007)
head(data_serie)
##           Jan      Feb      Mar      Apr
## 2007  0.35649 18.88784  1.82885 17.57187
autoplot(data_serie)+
labs(title = "Serie de Deslizamiento", x="Tiempo", y = "Distancia", colour = "#00a0dc") +theme_bw()

Tablas de frecuencia
library(questionr)

table <- questionr::freq(Distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
0.35649 1 25 25 25 25
1.82885 1 25 25 50 50
17.57187 1 25 25 75 75
18.88784 1 25 25 100 100
Total 4 100 100 100 100
str(table) 
## Classes 'freqtab' and 'data.frame':  5 obs. of  5 variables:
##  $ n      : num  1 1 1 1 4
##  $ %      : num  25 25 25 25 100
##  $ val%   : num  25 25 25 25 100
##  $ %cum   : num  25 50 75 100 100
##  $ val%cum: num  25 50 75 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
0.35649 1
1.82885 1
17.57187 1
18.88784 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="white", fill="blue") +
  xlab("Número de asistencias") +
  ylab("Frecuencia")

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(Distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(Distance) - min(Distance)
w = ceiling(R/n_clases)
bins <- seq(min(Distance), max(Distance) + w, by = w)
bins
## [1]  0.35649  7.35649 14.35649 21.35649
Edades <- cut(Distance, bins)
Freq_table <- transform(table(Distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
Distance Freq Rel_Freq Cum_Freq
0.35649 1 0.25 1
1.82885 1 0.25 2
17.57187 1 0.25 3
18.88784 1 0.25 4
str(Freq_table)
## 'data.frame':    4 obs. of  4 variables:
##  $ Distance: Factor w/ 4 levels "0.35649","1.82885",..: 1 2 3 4
##  $ Freq    : int  1 1 1 1
##  $ Rel_Freq: num  0.25 0.25 0.25 0.25
##  $ Cum_Freq: int  1 2 3 4
df <- data.frame(x = Freq_table$Distance, y = Freq_table$Freq)
knitr::kable(df)
x y
0.35649 1
1.82885 1
17.57187 1
18.88784 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="blue", fill="green") +
  xlab("Rango de Distance") +
  ylab("Frecuencia")

Estadísticos
  • Personas Afectadas por Deslizamiento
summary(df_Loj$Distance)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.3565  1.4608  9.7004  9.6613 17.9009 18.8878
library(pastecs)
stat.desc(df_Loj)
##                        id date time America Country country_code State
## nbr.val      4.000000e+00   NA   NA      NA      NA           NA    NA
## nbr.null     0.000000e+00   NA   NA      NA      NA           NA    NA
## nbr.na       0.000000e+00   NA   NA      NA      NA           NA    NA
## min          1.060000e+02   NA   NA      NA      NA           NA    NA
## max          6.901000e+03   NA   NA      NA      NA           NA    NA
## range        6.795000e+03   NA   NA      NA      NA           NA    NA
## sum          2.080000e+04   NA   NA      NA      NA           NA    NA
## median       6.896500e+03   NA   NA      NA      NA           NA    NA
## mean         5.200000e+03   NA   NA      NA      NA           NA    NA
## SE.mean      1.698001e+03   NA   NA      NA      NA           NA    NA
## CI.mean.0.95 5.403797e+03   NA   NA      NA      NA           NA    NA
## var          1.153283e+07   NA   NA      NA      NA           NA    NA
## std.dev      3.396002e+03   NA   NA      NA      NA           NA    NA
## coef.var     6.530773e-01   NA   NA      NA      NA           NA    NA
##                population City  Distance location_description     latitude
## nbr.val      4.000000e+00   NA  4.000000                   NA   4.00000000
## nbr.null     0.000000e+00   NA  0.000000                   NA   0.00000000
## nbr.na       0.000000e+00   NA  0.000000                   NA   0.00000000
## min          1.303500e+04   NA  0.356490                   NA  -4.33130000
## max          1.177960e+05   NA 18.887840                   NA  -3.99000000
## range        1.047610e+05   NA 18.531350                   NA   0.34130000
## sum          2.671920e+05   NA 38.645050                   NA -16.46870000
## median       6.818050e+04   NA  9.700360                   NA  -4.07370000
## mean         6.679800e+04   NA  9.661262                   NA  -4.11717500
## SE.mean      2.946534e+04   NA  4.963474                   NA   0.07856639
## CI.mean.0.95 9.377186e+04   NA 15.795990                   NA   0.25003331
## var          3.472825e+09   NA 98.544307                   NA   0.02469071
## std.dev      5.893068e+04   NA  9.926949                   NA   0.15713278
## coef.var     8.822222e-01   NA  1.027500                   NA  -0.03816519
##                  longitude geolocation hazard_type landslide_type
## nbr.val       4.000000e+00          NA          NA             NA
## nbr.null      0.000000e+00          NA          NA             NA
## nbr.na        0.000000e+00          NA          NA             NA
## min          -7.978110e+01          NA          NA             NA
## max          -7.920500e+01          NA          NA             NA
## range         5.761000e-01          NA          NA             NA
## sum          -3.176003e+02          NA          NA             NA
## median       -7.930710e+01          NA          NA             NA
## mean         -7.940008e+01          NA          NA             NA
## SE.mean       1.355369e-01          NA          NA             NA
## CI.mean.0.95  4.313390e-01          NA          NA             NA
## var           7.348103e-02          NA          NA             NA
## std.dev       2.710738e-01          NA          NA             NA
## coef.var     -3.414025e-03          NA          NA             NA
##              landslide_size trigger storm_name injuries fatalities source_name
## nbr.val                  NA      NA         NA        3          3          NA
## nbr.null                 NA      NA         NA        3          3          NA
## nbr.na                   NA      NA         NA        1          1          NA
## min                      NA      NA         NA        0          0          NA
## max                      NA      NA         NA        0          0          NA
## range                    NA      NA         NA        0          0          NA
## sum                      NA      NA         NA        0          0          NA
## median                   NA      NA         NA        0          0          NA
## mean                     NA      NA         NA        0          0          NA
## SE.mean                  NA      NA         NA        0          0          NA
## CI.mean.0.95             NA      NA         NA        0          0          NA
## var                      NA      NA         NA        0          0          NA
## std.dev                  NA      NA         NA        0          0          NA
## coef.var                 NA      NA         NA      NaN        NaN          NA
##              source_link
## nbr.val               NA
## nbr.null              NA
## nbr.na                NA
## min                   NA
## max                   NA
## range                 NA
## sum                   NA
## median                NA
## mean                  NA
## SE.mean               NA
## CI.mean.0.95          NA
## var                   NA
## std.dev               NA
## coef.var              NA
Caja y extensión
boxplot(Distance, horizontal=TRUE, col='steelblue')

library(tidyverse)
library(hrbrthemes)
library(viridis)

df <- data.frame(Distance)
df %>% ggplot(aes(x = "", y = Distance)) +
  geom_boxplot(color="red", fill="orange", alpha=0.5) +
  theme_ipsum() +
  theme(legend.position="none", plot.title = element_text(size=11)) +
  ggtitle("Deslizamientos  ") +
  coord_flip() +
  xlab("") +
  ylab("")
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

Zamora-Chinchipe:

Deslizamientos de las ciudades de Zamora-Chinchipe
df_za %>% 
  select(Country, State, City, Distance, date) 
##      Country            State   City Distance    date
## 9    Ecuador Zamora-Chinchipe Zamora  0.47714 6/27/07
## 1368 Ecuador Zamora-Chinchipe Zamora  1.23724 4/30/14
head(df_za)
##        id    date time America Country country_code            State population
## 9     105 6/27/07           SA Ecuador           EC Zamora-Chinchipe      15276
## 1368 6680 4/30/14 4:30      SA Ecuador           EC Zamora-Chinchipe      15276
##        City Distance location_description latitude longitude
## 9    Zamora  0.47714                       -4.0650  -78.9510
## 1368 Zamora  1.23724           Urban area  -4.0602  -78.9638
##                                     geolocation hazard_type landslide_type
## 9    (-4.0650000000000004, -78.950999999999993)   Landslide      Landslide
## 1368             (-4.0602, -78.963800000000006)   Landslide      Landslide
##      landslide_size  trigger storm_name injuries fatalities
## 9            Medium Downpour                  NA         NA
## 1368          Small Downpour                   0          3
##                    source_name
## 9    Red Cross - Field reports
## 1368                notimerica
##                                                                                                                         source_link
## 9                                                            https://www-secure.ifrc.org/dmis/prepare/view_report.asp?ReportID=2908
## 1368 http://www.notimerica.com/sociedad/noticia-ecuador-mueren-tres-ninas-deslizamiento-tierras-sureste-ecuador-20140507151426.html
ggplot(data=df_za, aes(x=City, y=Distance)) + geom_bar(stat="identity", color="blue", fill="white")

Gráfico circular
ggplot(data=df_za, aes(x = "Zamora-Chinchipe", y = Distance, fill=City)) +
  geom_bar(stat = "identity", width = 1, color = "black") +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_za <- df_za %>% 
  arrange(desc(City)) %>%
  mutate(prop = Distance / sum(df_za$Distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
library(ggplot2)
library(dplyr)

df_za <- df_za %>% 
  arrange(desc(City)) %>%
  mutate(prop = Distance / sum(df_za$Distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_za, aes(x=State, y = prop, fill=City)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(Distance/100)), color = "white", size=6) +
  scale_fill_brewer(palette="Set8")
## Warning in pal_name(palette, type): Unknown palette Set8

Diagrama de pareto
  • Donde se concentran las ciudades con mayor deslizamiento
library(qcc)

Distance <- df_za$Distance
names(Distance) <- df_za$City 

pareto.chart(Distance, 
             ylab="Distance",
             col = heat.colors(length(Distance)),
             cumperc = seq(0, 100, by = 10),
             ylab2 = "Porcentaje acumulado",
             main = "DONDE SE CONCENTRAN LAS CIUDADES CON MAYORES DESLIZAMIENTOS"
)

##         
## Pareto chart analysis for Distance
##          Frequency Cum.Freq. Percentage Cum.Percent.
##   Zamora   1.23724   1.23724   72.16836     72.16836
##   Zamora   0.47714   1.71438   27.83164    100.00000
Diagrama de tallo y hojas
stem(df_za$"Distance")
## 
##   The decimal point is 1 digit(s) to the left of the |
## 
##    4 | 8
##    6 | 
##    8 | 
##   10 | 
##   12 | 4
head(df_za)
##     id    date time America Country country_code            State population
## 1  105 6/27/07           SA Ecuador           EC Zamora-Chinchipe      15276
## 2 6680 4/30/14 4:30      SA Ecuador           EC Zamora-Chinchipe      15276
##     City Distance location_description latitude longitude
## 1 Zamora  0.47714                       -4.0650  -78.9510
## 2 Zamora  1.23724           Urban area  -4.0602  -78.9638
##                                  geolocation hazard_type landslide_type
## 1 (-4.0650000000000004, -78.950999999999993)   Landslide      Landslide
## 2             (-4.0602, -78.963800000000006)   Landslide      Landslide
##   landslide_size  trigger storm_name injuries fatalities
## 1         Medium Downpour                  NA         NA
## 2          Small Downpour                   0          3
##                 source_name
## 1 Red Cross - Field reports
## 2                notimerica
##                                                                                                                      source_link
## 1                                                         https://www-secure.ifrc.org/dmis/prepare/view_report.asp?ReportID=2908
## 2 http://www.notimerica.com/sociedad/noticia-ecuador-mueren-tres-ninas-deslizamiento-tierras-sureste-ecuador-20140507151426.html
##       prop     ypos
## 1 27.83164 13.91582
## 2 72.16836 63.91582
knitr::kable(head(df_za))
id date time America Country country_code State population City Distance location_description latitude longitude geolocation hazard_type landslide_type landslide_size trigger storm_name injuries fatalities source_name source_link prop ypos
105 6/27/07 SA Ecuador EC Zamora-Chinchipe 15276 Zamora 0.47714 -4.0650 -78.9510 (-4.0650000000000004, -78.950999999999993) Landslide Landslide Medium Downpour NA NA Red Cross - Field reports https://www-secure.ifrc.org/dmis/prepare/view_report.asp?ReportID=2908 27.83164 13.91582
6680 4/30/14 4:30 SA Ecuador EC Zamora-Chinchipe 15276 Zamora 1.23724 Urban area -4.0602 -78.9638 (-4.0602, -78.963800000000006) Landslide Landslide Small Downpour 0 3 notimerica http://www.notimerica.com/sociedad/noticia-ecuador-mueren-tres-ninas-deslizamiento-tierras-sureste-ecuador-20140507151426.html 72.16836 63.91582
stem(df_za$"Distance")
## 
##   The decimal point is 1 digit(s) to the left of the |
## 
##    4 | 8
##    6 | 
##    8 | 
##   10 | 
##   12 | 4
stem(df_za$"Distance", scale = 2)
## 
##   The decimal point is 1 digit(s) to the left of the |
## 
##    4 | 8
##    5 | 
##    6 | 
##    7 | 
##    8 | 
##    9 | 
##   10 | 
##   11 | 
##   12 | 4
Gráfico de series temporales
library(forecast)
data_serie<- ts(df_za$Distance, frequency=12, start=2007)
head(data_serie)
##          Jan     Feb
## 2007 0.47714 1.23724
autoplot(data_serie)+
labs(title = "Serie de Deslizamiento", x="Tiempo", y = "Distancia", colour = "#00a0dc") +theme_bw()

Tablas de frecuencia
library(questionr)

table <- questionr::freq(Distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n % val% %cum val%cum
0.47714 1 50 50 50 50
1.23724 1 50 50 100 100
Total 2 100 100 100 100
str(table) 
## Classes 'freqtab' and 'data.frame':  3 obs. of  5 variables:
##  $ n      : num  1 1 2
##  $ %      : num  50 50 100
##  $ val%   : num  50 50 100
##  $ %cum   : num  50 100 100
##  $ val%cum: num  50 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x y
0.47714 1
1.23724 1
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) + 
  geom_bar(stat="identity", color="white", fill="blue") +
  xlab("Número de asistencias") +
  ylab("Frecuencia")

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(Distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)

n_clases = 0
if (n_sturgesc%%2 == 0) {
  n_clases = n_sturgesf
} else {
  n_clases = n_sturgesc
}
R = max(Distance) - min(Distance)
w = ceiling(R/n_clases)
bins <- seq(min(Distance), max(Distance) + w, by = w)
bins
## [1] 0.47714 1.47714
Edades <- cut(Distance, bins)
Freq_table <- transform(table(Distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
Distance Freq Rel_Freq Cum_Freq
0.47714 1 0.5 1
1.23724 1 0.5 2
str(Freq_table)
## 'data.frame':    2 obs. of  4 variables:
##  $ Distance: Factor w/ 2 levels "0.47714","1.23724": 1 2
##  $ Freq    : int  1 1
##  $ Rel_Freq: num  0.5 0.5
##  $ Cum_Freq: int  1 2
df <- data.frame(x = Freq_table$Distance, y = Freq_table$Freq)
knitr::kable(df)
x y
0.47714 1
1.23724 1
library(ggplot2)

ggplot(data=df, aes(x=x, y=y)) +
  geom_bar(stat="identity", color="blue", fill="green") +
  xlab("Rango de Distance") +
  ylab("Frecuencia")

Estadísticos
  • Personas Afectadas por Deslizamiento
summary(df_za$Distance)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.4771  0.6672  0.8572  0.8572  1.0472  1.2372
library(pastecs)
stat.desc(df_za)
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced

## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
##                        id date time America Country country_code State
## nbr.val      2.000000e+00   NA   NA      NA      NA           NA    NA
## nbr.null     0.000000e+00   NA   NA      NA      NA           NA    NA
## nbr.na       0.000000e+00   NA   NA      NA      NA           NA    NA
## min          1.050000e+02   NA   NA      NA      NA           NA    NA
## max          6.680000e+03   NA   NA      NA      NA           NA    NA
## range        6.575000e+03   NA   NA      NA      NA           NA    NA
## sum          6.785000e+03   NA   NA      NA      NA           NA    NA
## median       3.392500e+03   NA   NA      NA      NA           NA    NA
## mean         3.392500e+03   NA   NA      NA      NA           NA    NA
## SE.mean      3.287500e+03   NA   NA      NA      NA           NA    NA
## CI.mean.0.95 4.177165e+04   NA   NA      NA      NA           NA    NA
## var          2.161531e+07   NA   NA      NA      NA           NA    NA
## std.dev      4.649227e+03   NA   NA      NA      NA           NA    NA
## coef.var     1.370443e+00   NA   NA      NA      NA           NA    NA
##              population City  Distance location_description      latitude
## nbr.val               2   NA 2.0000000                   NA  2.0000000000
## nbr.null              0   NA 0.0000000                   NA  0.0000000000
## nbr.na                0   NA 0.0000000                   NA  0.0000000000
## min               15276   NA 0.4771400                   NA -4.0650000000
## max               15276   NA 1.2372400                   NA -4.0602000000
## range                 0   NA 0.7601000                   NA  0.0048000000
## sum               30552   NA 1.7143800                   NA -8.1252000000
## median            15276   NA 0.8571900                   NA -4.0626000000
## mean              15276   NA 0.8571900                   NA -4.0626000000
## SE.mean               0   NA 0.3800500                   NA  0.0024000000
## CI.mean.0.95          0   NA 4.8289931                   NA  0.0304948914
## var                   0   NA 0.2888760                   NA  0.0000115200
## std.dev               0   NA 0.5374719                   NA  0.0033941125
## coef.var              0   NA 0.6270160                   NA -0.0008354533
##                  longitude geolocation hazard_type landslide_type
## nbr.val       2.000000e+00          NA          NA             NA
## nbr.null      0.000000e+00          NA          NA             NA
## nbr.na        0.000000e+00          NA          NA             NA
## min          -7.896380e+01          NA          NA             NA
## max          -7.895100e+01          NA          NA             NA
## range         1.280000e-02          NA          NA             NA
## sum          -1.579148e+02          NA          NA             NA
## median       -7.895740e+01          NA          NA             NA
## mean         -7.895740e+01          NA          NA             NA
## SE.mean       6.400000e-03          NA          NA             NA
## CI.mean.0.95  8.131971e-02          NA          NA             NA
## var           8.192000e-05          NA          NA             NA
## std.dev       9.050967e-03          NA          NA             NA
## coef.var     -1.146310e-04          NA          NA             NA
##              landslide_size trigger storm_name injuries fatalities source_name
## nbr.val                  NA      NA         NA        1          1          NA
## nbr.null                 NA      NA         NA        1          0          NA
## nbr.na                   NA      NA         NA        1          1          NA
## min                      NA      NA         NA        0          3          NA
## max                      NA      NA         NA        0          3          NA
## range                    NA      NA         NA        0          0          NA
## sum                      NA      NA         NA        0          3          NA
## median                   NA      NA         NA        0          3          NA
## mean                     NA      NA         NA        0          3          NA
## SE.mean                  NA      NA         NA       NA         NA          NA
## CI.mean.0.95             NA      NA         NA      NaN        NaN          NA
## var                      NA      NA         NA       NA         NA          NA
## std.dev                  NA      NA         NA       NA         NA          NA
## coef.var                 NA      NA         NA       NA         NA          NA
##              source_link       prop         ypos
## nbr.val               NA   2.000000    2.0000000
## nbr.null              NA   0.000000    0.0000000
## nbr.na                NA   0.000000    0.0000000
## min                   NA  27.831636   13.9158180
## max                   NA  72.168364   63.9158180
## range                 NA  44.336728   50.0000000
## sum                   NA 100.000000   77.8316359
## median                NA  50.000000   38.9158180
## mean                  NA  50.000000   38.9158180
## SE.mean               NA  22.168364   25.0000000
## CI.mean.0.95          NA 281.675773  317.6551184
## var                   NA 982.872731 1250.0000000
## std.dev               NA  31.350801   35.3553391
## coef.var              NA   0.627016    0.9085082
Caja y extensión
boxplot(Distance, horizontal=TRUE, col='steelblue')

library(tidyverse)
library(hrbrthemes)
library(viridis)

df <- data.frame(Distance)
df %>% ggplot(aes(x = "", y = Distance)) +
  geom_boxplot(color="red", fill="orange", alpha=0.5) +
  theme_ipsum() +
  theme(legend.position="none", plot.title = element_text(size=11)) +
  ggtitle("Deslizamientos  ") +
  coord_flip() +
  xlab("") +
  ylab("")
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

Pichincha:

Deslizamientos de las ciudades de Pichincha
df_pi %>% 
  select(Country, State, City, Distance, date) 
##      Country     State       City Distance    date
## 11   Ecuador Pichincha Sangolquí 33.94603 6/27/07
## 87   Ecuador Pichincha      Quito  1.56942  4/1/08
## 238  Ecuador Pichincha   Machachi 26.18676 1/10/10
## 586  Ecuador Pichincha   Machachi 25.82923 2/14/11
## 697  Ecuador Pichincha      Quito  4.39517  5/2/11
## 984  Ecuador Pichincha    Cayambe 45.69792 5/31/13
## 1332 Ecuador Pichincha      Quito 26.72137 8/12/14
## 1333 Ecuador Pichincha      Quito 23.97854 8/12/14
## 1566 Ecuador Pichincha      Quito  4.25486 4/29/11
head(df_pi)
##       id    date  time America Country country_code     State population
## 11   107 6/27/07            SA Ecuador           EC Pichincha       5114
## 87   489  4/1/08            SA Ecuador           EC Pichincha    1399814
## 238 1393 1/10/10 Night      SA Ecuador           EC Pichincha      25742
## 586 3119 2/14/11            SA Ecuador           EC Pichincha      25742
## 697 3460  5/2/11 12:15      SA Ecuador           EC Pichincha    1399814
## 984 4890 5/31/13            SA Ecuador           EC Pichincha      26582
##           City Distance location_description latitude longitude
## 11  Sangolquí 33.94603                       -0.3560  -78.1480
## 87       Quito  1.56942                       -0.2196  -78.5347
## 238   Machachi 26.18676                       -0.4167  -78.7833
## 586   Machachi 25.82923                       -0.6273  -78.3664
## 697      Quito  4.39517           Urban area  -0.2648  -78.5074
## 984    Cayambe 45.69792                       -0.1193  -77.7668
##                                     geolocation hazard_type landslide_type
## 11  (-0.35599999999999998, -78.147999999999996)   Landslide      Landslide
## 87  (-0.21959999999999999, -78.534700000000001)   Landslide      Landslide
## 238 (-0.41670000000000001, -78.783299999999997)   Landslide       Mudslide
## 586 (-0.62729999999999997, -78.366399999999999)   Landslide       Mudslide
## 697 (-0.26479999999999998, -78.507400000000004)   Landslide       Mudslide
## 984              (-0.1193, -77.766800000000003)   Landslide      Landslide
##     landslide_size  trigger storm_name injuries fatalities
## 11          Medium Downpour                  NA         NA
## 87          Medium     Rain                  NA         NA
## 238         Medium     Rain                  NA          3
## 586         Medium Downpour                  NA          0
## 697         Medium  Unknown                   7          5
## 984         Medium Downpour                  NA          0
##                       source_name
## 11      Red Cross - Field reports
## 87                               
## 238                              
## 586                              
## 697 Latin American Herald Tribune
## 984              ens-newswire.com
##                                                                                                  source_link
## 11                                    https://www-secure.ifrc.org/dmis/prepare/view_report.asp?ReportID=2908
## 87            http://www.plenglish.com/Article.asp?ID=%7B39BA85A0-5900-4A4A-A329-818B3FC61EA1%7D&language=EN
## 238                                        http://www.laht.com/article.asp?ArticleId=349500&CategoryId=14089
## 586 http://www.theweathernetwork.com/news/storm_watch_stories3&stormfile=mudslide_in_ecuador_destroys_150211
## 697                                        http://www.laht.com/article.asp?ArticleId=393028&CategoryId=14089
## 984                        http://ens-newswire.com/2013/06/11/oil-spilled-into-ecuadors-rivers-reaches-peru/
ggplot(data=df_pi, aes(x=City, y=Distance)) + geom_bar(stat="identity", color="blue", fill="white")

Gráfico circular
ggplot(data=df_pi, aes(x = "Pichincha", y = Distance, fill=City)) +
  geom_bar(stat = "identity", width = 1, color = "black") +
  coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)

df_pi <- df_pi %>% 
  arrange(desc(City)) %>%
  mutate(prop = Distance / sum(df_pi$Distance) *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_pi, aes(x=State, y = prop, fill=City)) +
  geom_bar(stat="identity", width=1, color="black") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="none") +
  
  geom_text(aes(y = ypos, label = percent(Distance/100)), color = "white", size=3) +
  scale_fill_brewer(palette="Set8") 
## Warning in pal_name(palette, type): Unknown palette Set8