Datos de Deslazamientos de Centroamérica
library(readr)
library(knitr)
df <- read.csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
colnames(df)[2] <- "Date"
colnames(df)[4] <- "America"
colnames(df)[5] <- "Country"
colnames(df)[7] <- "State"
colnames(df)[9] <- "City"
colnames(df)[10] <- "Distance"
library(ggplot2)
library(dplyr)
ggplot(data=df, aes(x=America, y=Distance)) + geom_bar(stat="identity", width=0.5)
## Warning: Removed 1 rows containing missing values (position_stack).

ggplot(data=df, aes(x=America, y=Distance)) + geom_bar(stat="identity") + coord_flip()
## Warning: Removed 1 rows containing missing values (position_stack).

Republica Dominicana
library(readr)
library(knitr)
df_DO <- subset (df, Country == "Dominican Republic")
knitr::kable(head(df_DO))
df_DO %>%
select(Country, State, City, Distance, Date)
## Country State City Distance
## 15 Dominican Republic Distrito Nacional San Carlos 1.70298
## 52 Dominican Republic San Cristóbal Bajos de Haina 1.72138
## 58 Dominican Republic La Vega RÃo Verde Abajo 3.72637
## 64 Dominican Republic Santiago Santiago de los Caballeros 1.10868
## 132 Dominican Republic Hato Mayor Sabana de La Mar 0.75284
## 138 Dominican Republic Distrito Nacional La Agustina 5.71058
## 178 Dominican Republic Santiago Pedro GarcÃa 4.86398
## 211 Dominican Republic Puerto Plata Altamira 0.88500
## 212 Dominican Republic Santiago Tamboril 4.31327
## 750 Dominican Republic Santiago San José de Las Matas 2.72462
## 774 Dominican Republic Distrito Nacional Santo Domingo 0.55721
## 833 Dominican Republic La Vega Constanza 0.52969
## 923 Dominican Republic Puerto Plata Puerto Plata 1.19636
## 1394 Dominican Republic Santo Domingo Santo Domingo Este 3.98059
## 1395 Dominican Republic Puerto Plata Luperón 1.54885
## Date
## 15 7/13/07
## 52 10/29/07
## 58 11/1/07
## 64 12/11/07
## 132 8/17/08
## 138 8/26/08
## 178 2/12/09
## 211 9/20/09
## 212 9/20/09
## 750 6/3/11
## 774 7/6/11
## 833 11/18/11
## 923 12/5/12
## 1394 8/3/14
## 1395 11/7/14
Distrito Nacional:
Deslizamientos de las ciudades de Distrito Nacional
library(readr)
library(knitr)
df_DN <- subset (df, State == "Distrito Nacional")
df_DN %>%
select(Country, State, City, Distance, Date)
## Country State City Distance Date
## 15 Dominican Republic Distrito Nacional San Carlos 1.70298 7/13/07
## 138 Dominican Republic Distrito Nacional La Agustina 5.71058 8/26/08
## 774 Dominican Republic Distrito Nacional Santo Domingo 0.55721 7/6/11
head(df_DN)
## id Date time America Country country_code
## 15 124 7/13/07 Night <NA> Dominican Republic DO
## 138 746 8/26/08 <NA> Dominican Republic DO
## 774 3736 7/6/11 <NA> Dominican Republic DO
## State population City Distance location_description
## 15 Distrito Nacional 13456 San Carlos 1.70298
## 138 Distrito Nacional 10457 La Agustina 5.71058
## 774 Distrito Nacional 2201941 Santo Domingo 0.55721
## latitude longitude geolocation hazard_type
## 15 18.4757 -69.9140 (18.4757, -69.914000000000001) Landslide
## 138 18.5500 -69.9200 (18.55, -69.92) Landslide
## 774 18.5000 -69.9833 (18.5, -69.9833) Landslide
## landslide_type landslide_size trigger storm_name injuries
## 15 Landslide Small Unknown NA
## 138 Mudslide Medium Tropical cyclone Hurricane Gustav NA
## 774 Landslide Medium Downpour NA
## fatalities source_name
## 15 NA Dominican Today
## 138 8
## 774 1
## source_link
## 15 http://www.dominicantoday.com/app/article.aspx?id=24682
## 138 http://www.reuters.com/article/worldNews/idUSN2541891320080827?pageNumber=1&virtualBrandChannel=0
## 774 http://www.google.com/hostednews/ap/article/ALeqM5jKexw046ZtYaNWiMsOVfydb2LttA?docId=1ef9771813d64e66919fb5a9e3633a52
ggplot(data=df_DN, aes(x=City, y=Distance)) + geom_bar(stat="identity", color="blue", fill="white")

Gráfico circular
ggplot(df_DN,aes(x="Distrito Nacional",y=Distance, fill=City))+
geom_bar(stat = "identity",
color="white")+
geom_text(aes(label=(Distance*1)),
position=position_stack(vjust=0.5),color="white",size=6)+
coord_polar(theta = "y")+
labs(title="Gráfico de Deslizamiento")

Diagrama de pareto
- Ciudades con mayor deslizamiento
library(qcc)
## Warning: package 'qcc' was built under R version 4.1.1
## Package 'qcc' version 2.7
## Type 'citation("qcc")' for citing this R package in publications.
Distance <- df_DN$Distance
names(Distance) <- df_DN$City
pareto.chart(Distance,
ylab="Distance",
col = heat.colors(length(Distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "DONDE SE CONCENTRAN LAS CIUDADES CON MAYORES DESLIZAMIENTOS"
)

##
## Pareto chart analysis for Distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## La Agustina 5.710580 5.710580 71.644019 71.644019
## San Carlos 1.702980 7.413560 21.365314 93.009333
## Santo Domingo 0.557210 7.970770 6.990667 100.000000
Diagrama de tallo y hojas
stem(df_DN$"Distance")
##
## The decimal point is at the |
##
## 0 | 67
## 2 |
## 4 | 7
head(df_DN)
## id Date time America Country country_code
## 15 124 7/13/07 Night <NA> Dominican Republic DO
## 138 746 8/26/08 <NA> Dominican Republic DO
## 774 3736 7/6/11 <NA> Dominican Republic DO
## State population City Distance location_description
## 15 Distrito Nacional 13456 San Carlos 1.70298
## 138 Distrito Nacional 10457 La Agustina 5.71058
## 774 Distrito Nacional 2201941 Santo Domingo 0.55721
## latitude longitude geolocation hazard_type
## 15 18.4757 -69.9140 (18.4757, -69.914000000000001) Landslide
## 138 18.5500 -69.9200 (18.55, -69.92) Landslide
## 774 18.5000 -69.9833 (18.5, -69.9833) Landslide
## landslide_type landslide_size trigger storm_name injuries
## 15 Landslide Small Unknown NA
## 138 Mudslide Medium Tropical cyclone Hurricane Gustav NA
## 774 Landslide Medium Downpour NA
## fatalities source_name
## 15 NA Dominican Today
## 138 8
## 774 1
## source_link
## 15 http://www.dominicantoday.com/app/article.aspx?id=24682
## 138 http://www.reuters.com/article/worldNews/idUSN2541891320080827?pageNumber=1&virtualBrandChannel=0
## 774 http://www.google.com/hostednews/ap/article/ALeqM5jKexw046ZtYaNWiMsOVfydb2LttA?docId=1ef9771813d64e66919fb5a9e3633a52
knitr::kable(head(df_DN))
stem(df_DN$"Distance")
##
## The decimal point is at the |
##
## 0 | 67
## 2 |
## 4 | 7
stem(df_DN$"Distance", scale = 2)
##
## The decimal point is at the |
##
## 0 | 6
## 1 | 7
## 2 |
## 3 |
## 4 |
## 5 | 7
Gráfico de series temporales
library(forecast)
## Warning: package 'forecast' was built under R version 4.1.1
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
data_serie<- ts(df_DN$Distance, frequency=12, start=2007)
head(data_serie)
## Jan Feb Mar
## 2007 1.70298 5.71058 0.55721
autoplot(data_serie)+
labs(title = "Serie de Deslizamiento", x="Años", y = "Distancia", colour = "#00a0dc") +theme_bw()

Tablas de frecuencia
library(questionr)
## Warning: package 'questionr' was built under R version 4.1.1
table <- questionr::freq(Distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 0.55721 |
1 |
33.3 |
33.3 |
33.3 |
33.3 |
| 1.70298 |
1 |
33.3 |
33.3 |
66.7 |
66.7 |
| 5.71058 |
1 |
33.3 |
33.3 |
100.0 |
100.0 |
| Total |
3 |
100.0 |
100.0 |
100.0 |
100.0 |
str(table)
## Classes 'freqtab' and 'data.frame': 4 obs. of 5 variables:
## $ n : num 1 1 1 3
## $ % : num 33.3 33.3 33.3 100
## $ val% : num 33.3 33.3 33.3 100
## $ %cum : num 33.3 66.7 100 100
## $ val%cum: num 33.3 66.7 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
| 0.55721 |
1 |
| 1.70298 |
1 |
| 5.71058 |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="white", fill="blue") +
xlab("Número de asistencias") +
ylab("Frecuencia")

##Tabla de frecuencias agrupada
n_sturges = 1 + log(length(Distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(Distance) - min(Distance)
w = ceiling(R/n_clases)
bins <- seq(min(Distance), max(Distance) + w, by = w)
bins
## [1] 0.55721 2.55721 4.55721 6.55721
Edades <- cut(Distance, bins)
Freq_table <- transform(table(Distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
| 0.55721 |
1 |
0.3333333 |
1 |
| 1.70298 |
1 |
0.3333333 |
2 |
| 5.71058 |
1 |
0.3333333 |
3 |
str(Freq_table)
## 'data.frame': 3 obs. of 4 variables:
## $ Distance: Factor w/ 3 levels "0.55721","1.70298",..: 1 2 3
## $ Freq : int 1 1 1
## $ Rel_Freq: num 0.333 0.333 0.333
## $ Cum_Freq: int 1 2 3
df <- data.frame(x = Freq_table$Distance, y = Freq_table$Freq)
knitr::kable(df)
| 0.55721 |
1 |
| 1.70298 |
1 |
| 5.71058 |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="blue", fill="green") +
xlab("Rango de Distance") +
ylab("Frecuencia")

Estadísticos
Personas Afectadas por Deslizamiento
summary(df_DN$Distance)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.5572 1.1301 1.7030 2.6569 3.7068 5.7106
library(pastecs)
## Warning: package 'pastecs' was built under R version 4.1.1
##
## Attaching package: 'pastecs'
## The following objects are masked from 'package:dplyr':
##
## first, last
stat.desc(df_DN)
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## id Date time America Country country_code State
## nbr.val 3.000000e+00 NA NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA NA
## min 1.240000e+02 NA NA NA NA NA NA
## max 3.736000e+03 NA NA NA NA NA NA
## range 3.612000e+03 NA NA NA NA NA NA
## sum 4.606000e+03 NA NA NA NA NA NA
## median 7.460000e+02 NA NA NA NA NA NA
## mean 1.535333e+03 NA NA NA NA NA NA
## SE.mean 1.114887e+03 NA NA NA NA NA NA
## CI.mean.0.95 4.796973e+03 NA NA NA NA NA NA
## var 3.728921e+06 NA NA NA NA NA NA
## std.dev 1.931042e+03 NA NA NA NA NA NA
## coef.var 1.257734e+00 NA NA NA NA NA NA
## population City Distance location_description latitude
## nbr.val 3.000000e+00 NA 3.000000 NA 3.000000000
## nbr.null 0.000000e+00 NA 0.000000 NA 0.000000000
## nbr.na 0.000000e+00 NA 0.000000 NA 0.000000000
## min 1.045700e+04 NA 0.557210 NA 18.475700000
## max 2.201941e+06 NA 5.710580 NA 18.550000000
## range 2.191484e+06 NA 5.153370 NA 0.074300000
## sum 2.225854e+06 NA 7.970770 NA 55.525700000
## median 1.345600e+04 NA 1.702980 NA 18.500000000
## mean 7.419513e+05 NA 2.656923 NA 18.508566667
## SE.mean 7.299953e+05 NA 1.562243 NA 0.021872078
## CI.mean.0.95 3.140916e+06 NA 6.721790 NA 0.094107954
## var 1.598680e+12 NA 7.321812 NA 0.001435163
## std.dev 1.264389e+06 NA 2.705885 NA 0.037883550
## coef.var 1.704140e+00 NA 1.018428 NA 0.002046812
## longitude geolocation hazard_type landslide_type
## nbr.val 3.000000e+00 NA NA NA
## nbr.null 0.000000e+00 NA NA NA
## nbr.na 0.000000e+00 NA NA NA
## min -6.998330e+01 NA NA NA
## max -6.991400e+01 NA NA NA
## range 6.930000e-02 NA NA NA
## sum -2.098173e+02 NA NA NA
## median -6.992000e+01 NA NA NA
## mean -6.993910e+01 NA NA NA
## SE.mean 2.216777e-02 NA NA NA
## CI.mean.0.95 9.538021e-02 NA NA NA
## var 1.474230e-03 NA NA NA
## std.dev 3.839570e-02 NA NA NA
## coef.var -5.489877e-04 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 0 2.000000 NA
## nbr.null NA NA NA 0 0.000000 NA
## nbr.na NA NA NA 3 1.000000 NA
## min NA NA NA Inf 1.000000 NA
## max NA NA NA -Inf 8.000000 NA
## range NA NA NA -Inf 7.000000 NA
## sum NA NA NA 0 9.000000 NA
## median NA NA NA NA 4.500000 NA
## mean NA NA NA NaN 4.500000 NA
## SE.mean NA NA NA NA 3.500000 NA
## CI.mean.0.95 NA NA NA NaN 44.471717 NA
## var NA NA NA NA 24.500000 NA
## std.dev NA NA NA NA 4.949747 NA
## coef.var NA NA NA NA 1.099944 NA
## source_link
## nbr.val NA
## nbr.null NA
## nbr.na NA
## min NA
## max NA
## range NA
## sum NA
## median NA
## mean NA
## SE.mean NA
## CI.mean.0.95 NA
## var NA
## std.dev NA
## coef.var NA
Caja y extensión
boxplot(Distance, horizontal=TRUE, col='steelblue')

library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v tibble 3.1.3 v stringr 1.4.0
## v tidyr 1.1.3 v forcats 0.5.1
## v purrr 0.3.4
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x tidyr::extract() masks pastecs::extract()
## x dplyr::filter() masks stats::filter()
## x pastecs::first() masks dplyr::first()
## x dplyr::lag() masks stats::lag()
## x pastecs::last() masks dplyr::last()
library(hrbrthemes)
## Warning: package 'hrbrthemes' was built under R version 4.1.1
## NOTE: Either Arial Narrow or Roboto Condensed fonts are required to use these themes.
## Please use hrbrthemes::import_roboto_condensed() to install Roboto Condensed and
## if Arial Narrow is not on your system, please see https://bit.ly/arialnarrow
library(viridis)
## Warning: package 'viridis' was built under R version 4.1.1
## Loading required package: viridisLite
df <- data.frame(Distance)
df %>% ggplot(aes(x = "", y = Distance)) +
geom_boxplot(color="red", fill="orange", alpha=0.5) +
theme_ipsum() +
theme(legend.position="none", plot.title = element_text(size=11)) +
ggtitle("Deslizamientos") +
coord_flip() +
xlab("") +
ylab("")
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

```