En el siguiente informe, se analizarán los datos de deslizamiento para los países suramericanos, teniendo en cuenta estados y ciudades muestra . Dentro de este análisis estadístico se utilizaron diferentes tipos de métodos estadísticos tales como: Gráficos de barras y circulares, gráficos apilados, diagramas de tallo y hoja, gráficos de series temporales, tablas de frecuencias,datos estadísticos y diagramas de caja y extensión. Estos diferentes gráficos y diagramas, nos brindarán un panorama general pero en detalle, del desarrollo de los deslizamientos en cada país y la manera en cómo pueden llegar a afectar la población de dichas zonas. El objetivo de este informe es demostrar la utilidad de las estadísticas, para categorizar, organizar y presentar de manera clara datos; para que así estos tengan un significado entendible.
library(readr)
library(knitr)
df <- read.csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
colnames(df)[4] <- "America"
colnames(df)[10] <- "Distance"
colnames(df)[5] <- "Country"
colnames(df)[7] <- "State"
colnames(df)[9] <- "City"
colnames(df)[2] <- "date"
library(readr)
library(knitr)
library(ggplot2)
df_SA <- subset (df, America == "SA")
df_Ca <- subset (df, State == "Cauca")
df_C <- subset (df, State == "Córdoba")
df_Mag <- subset (df, State == "Magdalena")
df_Nar <- subset (df, State == "Nariño")
df_Meta <- subset (df, State == "Meta")
df_Put <- subset (df, State == "Putumayo")
df_San <- subset (df, State == "Santander")
df_Ant <- subset (df, State == "Antioquia")
df_Nor <- subset (df, State == "Norte de Santander")
df_CostaRica <- subset (df, Country == "Costa Rica")
df_Alajuela <- subset (df, State == "Alajuela")
df_Cartago <- subset (df, State == "Cartago")
df_Guanacaste <- subset (df, State == "Guanacaste")
df_Heredia <- subset (df, State == "Heredia")
df_Mir <- subset (df, State == "Miranda")
df_pi <- subset (df, State == "Pichincha")
df_za <- subset (df, State == "Zamora-Chinchipe")
df_Venezuela <- subset (df, Country == "Venezuela")
df_var <- subset (df, State == "Vargas")
df_DF <- subset (df, State == "Distrito Federal")
df_Peru <- subset (df, Country == "Peru")
df_Ansc <- subset (df, State == "Ancash")
df_La <- subset (df, State == "La Libertad")
knitr::kable(head(df_SA))
id | date | time | America | Country | country_code | State | population | City | Distance | location_description | latitude | longitude | geolocation | hazard_type | landslide_type | landslide_size | trigger | storm_name | injuries | fatalities | source_name | source_link | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
8 | 77 | 5/21/07 | SA | Colombia | CO | Risaralda | 440118 | Pereira | 0.62022 | 4.8081 | -75.6941 | (4.8080999999999996, -75.694100000000006) | Landslide | Mudslide | Large | Rain | NA | 13 | Reuters - AlertNet.org | http://www.reuters.com/news/video/videoStory?videoId=53594&feedType=RSS&rpc=23 | |||
9 | 105 | 6/27/07 | SA | Ecuador | EC | Zamora-Chinchipe | 15276 | Zamora | 0.47714 | -4.0650 | -78.9510 | (-4.0650000000000004, -78.950999999999993) | Landslide | Landslide | Medium | Downpour | NA | NA | Red Cross - Field reports | https://www-secure.ifrc.org/dmis/prepare/view_report.asp?ReportID=2908 | |||
10 | 106 | 6/27/07 | SA | Ecuador | EC | Loja | 117796 | Loja | 0.35649 | -3.9900 | -79.2050 | (-3.99, -79.204999999999998) | Landslide | Landslide | Medium | Downpour | NA | NA | Red Cross - Field reports | https://www-secure.ifrc.org/dmis/prepare/view_report.asp?ReportID=2908 | |||
11 | 107 | 6/27/07 | SA | Ecuador | EC | Pichincha | 5114 | Sangolquà | 33.94603 | -0.3560 | -78.1480 | (-0.35599999999999998, -78.147999999999996) | Landslide | Landslide | Medium | Downpour | NA | NA | Red Cross - Field reports | https://www-secure.ifrc.org/dmis/prepare/view_report.asp?ReportID=2908 | |||
49 | 307 | 10/13/07 | SA | Colombia | CO | Cauca | 9985 | Suárez | 8.46579 | 2.9437 | -76.7719 | (2.9437000000000002, -76.771900000000002) | Landslide | Mudslide | Large | Continuous rain | NA | 24 | Reuters - AlertNet.org | http://www.reuters.com/article/newsOne/idUSN1329387220071013 | |||
70 | 397 | 12/19/07 | SA | Colombia | CO | Tolima | 4892 | Ambalema | 6.96130 | 4.8470 | -74.7631 | (4.8470000000000004, -74.763099999999994) | Landslide | Landslide | Large | Rain | NA | NA | Indiamuslims.info | http://www.indiamuslims.info/news/2007/dec/20/eight_people_rescued_colombian_landslide.html |
head(df_SA %>%
select(Country, State, City, Distance, date))
## Country State City Distance date
## 8 Colombia Risaralda Pereira 0.62022 5/21/07
## 9 Ecuador Zamora-Chinchipe Zamora 0.47714 6/27/07
## 10 Ecuador Loja Loja 0.35649 6/27/07
## 11 Ecuador Pichincha Sangolquà 33.94603 6/27/07
## 49 Colombia Cauca Suárez 8.46579 10/13/07
## 70 Colombia Tolima Ambalema 6.96130 12/19/07
library(ggplot2)
library(readr)
library(knitr)
df_Col <- subset (df, Country == "Colombia")
knitr::kable(head(df_Col,n=4))
id | date | time | America | Country | country_code | State | population | City | Distance | location_description | latitude | longitude | geolocation | hazard_type | landslide_type | landslide_size | trigger | storm_name | injuries | fatalities | source_name | source_link | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
8 | 77 | 5/21/07 | SA | Colombia | CO | Risaralda | 440118 | Pereira | 0.62022 | 4.8081 | -75.6941 | (4.8080999999999996, -75.694100000000006) | Landslide | Mudslide | Large | Rain | NA | 13 | Reuters - AlertNet.org | http://www.reuters.com/news/video/videoStory?videoId=53594&feedType=RSS&rpc=23 | |||
49 | 307 | 10/13/07 | SA | Colombia | CO | Cauca | 9985 | Suárez | 8.46579 | 2.9437 | -76.7719 | (2.9437000000000002, -76.771900000000002) | Landslide | Mudslide | Large | Continuous rain | NA | 24 | Reuters - AlertNet.org | http://www.reuters.com/article/newsOne/idUSN1329387220071013 | |||
70 | 397 | 12/19/07 | SA | Colombia | CO | Tolima | 4892 | Ambalema | 6.96130 | 4.8470 | -74.7631 | (4.8470000000000004, -74.763099999999994) | Landslide | Landslide | Large | Rain | NA | NA | Indiamuslims.info | http://www.indiamuslims.info/news/2007/dec/20/eight_people_rescued_colombian_landslide.html | |||
103 | 562 | 5/31/08 | SA | Colombia | CO | Antioquia | 1999979 | MedellÃn | 5.12170 | 6.2746 | -75.6039 | (6.2746000000000004, -75.603899999999996) | Landslide | Complex | Large | Downpour | NA | 27 | http://english.people.com.cn/90001/90777/90852/6422291.html |
head(df_Col %>%
select(Country, State, City, Distance, date))
## Country State City Distance date
## 8 Colombia Risaralda Pereira 0.62022 5/21/07
## 49 Colombia Cauca Suárez 8.46579 10/13/07
## 70 Colombia Tolima Ambalema 6.96130 12/19/07
## 103 Colombia Antioquia MedellÃn 5.12170 5/31/08
## 110 Colombia Norte de Santander Hacarà 0.38844 6/24/08
## 117 Colombia Cundinamarca Quetame 8.58891 7/14/08
ggplot(data=df_Col, aes(fill=State, y=Distance, x="Colombia")) +
geom_bar(position="dodge", stat="identity")
ggplot(data=df_Col, aes(fill=State, y=Distance, x="Colombia")) +
geom_bar(position="dodge", stat="identity")
ggplot(data=df_Col, aes(fill=State, y=Distance, x="Colombia")) +
geom_bar(position="stack", stat="identity")
library(readr)
library(knitr)
df_ri <- subset (df, State == "Risaralda")
df_ri %>%
select(Country, State, City, Distance, date)
## Country State City Distance date
## 8 Colombia Risaralda Pereira 0.62022 5/21/07
## 454 Colombia Risaralda ApÃa 8.18229 9/27/10
## 517 Colombia Risaralda QuinchÃa 0.11421 11/13/10
## 855 Colombia Risaralda Dos Quebradas 0.74201 12/23/11
head(df_ri)
## id date time America Country country_code State population
## 8 77 5/21/07 SA Colombia CO Risaralda 440118
## 454 2507 9/27/10 3:00:00 SA Colombia CO Risaralda 6940
## 517 2720 11/13/10 SA Colombia CO Risaralda 10895
## 855 4104 12/23/11 SA Colombia CO Risaralda 179583
## City Distance location_description latitude longitude
## 8 Pereira 0.62022 4.8081 -75.6941
## 454 ApÃa 8.18229 5.1749 -75.9712
## 517 QuinchÃa 0.11421 5.3391 -75.7311
## 855 Dos Quebradas 0.74201 4.8349 -75.6621
## geolocation hazard_type landslide_type
## 8 (4.8080999999999996, -75.694100000000006) Landslide Mudslide
## 454 (5.1749000000000001, -75.971199999999996) Landslide Landslide
## 517 (5.3391000000000002, -75.731099999999998) Landslide Mudslide
## 855 (4.8349000000000002, -75.662099999999995) Landslide Landslide
## landslide_size trigger storm_name injuries fatalities
## 8 Large Rain NA 13
## 454 Medium Downpour NA 4
## 517 Medium Downpour NA 4
## 855 Medium Downpour NA 0
## source_name
## 8 Reuters - AlertNet.org
## 454
## 517
## 855
## source_link
## 8 http://www.reuters.com/news/video/videoStory?videoId=53594&feedType=RSS&rpc=23
## 454 http://www.laht.com/article.asp?ArticleId=368646&CategoryId=12393
## 517 http://www.laht.com/article.asp?ArticleId=377428&CategoryId=12393
## 855 http://www.reuters.com/article/2011/12/24/us-colombia-pipeline-idUSTRE7BN01O20111224
ggplot(data=df_ri, aes(x=City, y=Distance)) + geom_bar(stat="identity", color="blue", fill="white")
ggplot(data=df_ri, aes(x = "Risaralda", y = Distance, fill=City)) +
geom_bar(stat = "identity", width = 1, color = "black") +
coord_polar("y", start = 0)
library(ggplot2)
library(dplyr)
df_ri <- df_ri %>%
arrange(desc(City)) %>%
mutate(prop = Distance / sum(df_ri$Distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
library(ggplot2)
library(dplyr)
df_ri <- df_ri %>%
arrange(desc(City)) %>%
mutate(prop = Distance / sum(df_ri$Distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
## Loading required package: scales
##
## Attaching package: 'scales'
## The following object is masked from 'package:readr':
##
## col_factor
ggplot(df_ri, aes(x=State, y = prop, fill=City)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(Distance/100)), color = "white", size=6) +
scale_fill_brewer(palette="Set8")
## Warning in pal_name(palette, type): Unknown palette Set8
library(qcc)
## Warning: package 'qcc' was built under R version 4.1.1
## Package 'qcc' version 2.7
## Type 'citation("qcc")' for citing this R package in publications.
Distance <- df_ri$Distance
names(Distance) <- df_ri$City
pareto.chart(Distance,
ylab="Distance",
col = heat.colors(length(Distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "DONDE SE CONCENTRAN LAS CIUDADES CON MAYORES DESLIZAMIENTOS"
)
##
## Pareto chart analysis for Distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## ApÃa 8.182290 8.182290 84.713932 84.713932
## Dos Quebradas 0.742010 8.924300 7.682273 92.396205
## Pereira 0.620220 9.544520 6.421341 98.817546
## QuinchÃa 0.114210 9.658730 1.182454 100.000000
stem(df_ri$"Distance")
##
## The decimal point is at the |
##
## 0 | 167
## 2 |
## 4 |
## 6 |
## 8 | 2
head(df_ri)
## id date time America Country country_code State population
## 1 2720 11/13/10 SA Colombia CO Risaralda 10895
## 2 77 5/21/07 SA Colombia CO Risaralda 440118
## 3 4104 12/23/11 SA Colombia CO Risaralda 179583
## 4 2507 9/27/10 3:00:00 SA Colombia CO Risaralda 6940
## City Distance location_description latitude longitude
## 1 QuinchÃa 0.11421 5.3391 -75.7311
## 2 Pereira 0.62022 4.8081 -75.6941
## 3 Dos Quebradas 0.74201 4.8349 -75.6621
## 4 ApÃa 8.18229 5.1749 -75.9712
## geolocation hazard_type landslide_type
## 1 (5.3391000000000002, -75.731099999999998) Landslide Mudslide
## 2 (4.8080999999999996, -75.694100000000006) Landslide Mudslide
## 3 (4.8349000000000002, -75.662099999999995) Landslide Landslide
## 4 (5.1749000000000001, -75.971199999999996) Landslide Landslide
## landslide_size trigger storm_name injuries fatalities source_name
## 1 Medium Downpour NA 4
## 2 Large Rain NA 13 Reuters - AlertNet.org
## 3 Medium Downpour NA 0
## 4 Medium Downpour NA 4
## source_link
## 1 http://www.laht.com/article.asp?ArticleId=377428&CategoryId=12393
## 2 http://www.reuters.com/news/video/videoStory?videoId=53594&feedType=RSS&rpc=23
## 3 http://www.reuters.com/article/2011/12/24/us-colombia-pipeline-idUSTRE7BN01O20111224
## 4 http://www.laht.com/article.asp?ArticleId=368646&CategoryId=12393
## prop ypos
## 1 1.182454 0.5912268
## 2 6.421341 4.3931241
## 3 7.682273 11.4449312
## 4 84.713932 57.6430338
knitr::kable(head(df_ri))
id | date | time | America | Country | country_code | State | population | City | Distance | location_description | latitude | longitude | geolocation | hazard_type | landslide_type | landslide_size | trigger | storm_name | injuries | fatalities | source_name | source_link | prop | ypos |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
2720 | 11/13/10 | SA | Colombia | CO | Risaralda | 10895 | QuinchÃa | 0.11421 | 5.3391 | -75.7311 | (5.3391000000000002, -75.731099999999998) | Landslide | Mudslide | Medium | Downpour | NA | 4 | http://www.laht.com/article.asp?ArticleId=377428&CategoryId=12393 | 1.182454 | 0.5912268 | ||||
77 | 5/21/07 | SA | Colombia | CO | Risaralda | 440118 | Pereira | 0.62022 | 4.8081 | -75.6941 | (4.8080999999999996, -75.694100000000006) | Landslide | Mudslide | Large | Rain | NA | 13 | Reuters - AlertNet.org | http://www.reuters.com/news/video/videoStory?videoId=53594&feedType=RSS&rpc=23 | 6.421341 | 4.3931241 | |||
4104 | 12/23/11 | SA | Colombia | CO | Risaralda | 179583 | Dos Quebradas | 0.74201 | 4.8349 | -75.6621 | (4.8349000000000002, -75.662099999999995) | Landslide | Landslide | Medium | Downpour | NA | 0 | http://www.reuters.com/article/2011/12/24/us-colombia-pipeline-idUSTRE7BN01O20111224 | 7.682273 | 11.4449312 | ||||
2507 | 9/27/10 | 3:00:00 | SA | Colombia | CO | Risaralda | 6940 | ApÃa | 8.18229 | 5.1749 | -75.9712 | (5.1749000000000001, -75.971199999999996) | Landslide | Landslide | Medium | Downpour | NA | 4 | http://www.laht.com/article.asp?ArticleId=368646&CategoryId=12393 | 84.713932 | 57.6430338 |
stem(df_ri$"Distance")
##
## The decimal point is at the |
##
## 0 | 167
## 2 |
## 4 |
## 6 |
## 8 | 2
stem(df_ri$"Distance", scale = 2)
##
## The decimal point is at the |
##
## 0 | 167
## 1 |
## 2 |
## 3 |
## 4 |
## 5 |
## 6 |
## 7 |
## 8 | 2
library(forecast)
## Warning: package 'forecast' was built under R version 4.1.1
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
data_serie<- ts(df_ri$Distance, frequency=12, start=2007)
head(data_serie)
## Jan Feb Mar Apr
## 2007 0.11421 0.62022 0.74201 8.18229
autoplot(data_serie)+
labs(title = "Serie de Deslizamiento", x="Tiempo", y = "Distancia", colour = "#00a0dc") +theme_bw()
library(questionr)
## Warning: package 'questionr' was built under R version 4.1.1
table <- questionr::freq(Distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n | % | val% | %cum | val%cum | |
---|---|---|---|---|---|
0.11421 | 1 | 25 | 25 | 25 | 25 |
0.62022 | 1 | 25 | 25 | 50 | 50 |
0.74201 | 1 | 25 | 25 | 75 | 75 |
8.18229 | 1 | 25 | 25 | 100 | 100 |
Total | 4 | 100 | 100 | 100 | 100 |
str(table)
## Classes 'freqtab' and 'data.frame': 5 obs. of 5 variables:
## $ n : num 1 1 1 1 4
## $ % : num 25 25 25 25 100
## $ val% : num 25 25 25 25 100
## $ %cum : num 25 50 75 100 100
## $ val%cum: num 25 50 75 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x | y |
---|---|
0.11421 | 1 |
0.62022 | 1 |
0.74201 | 1 |
8.18229 | 1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="white", fill="blue") +
xlab("Número de asistencias") +
ylab("Frecuencia")
n_sturges = 1 + log(length(Distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(Distance) - min(Distance)
w = ceiling(R/n_clases)
bins <- seq(min(Distance), max(Distance) + w, by = w)
bins
## [1] 0.11421 3.11421 6.11421 9.11421
Edades <- cut(Distance, bins)
Freq_table <- transform(table(Distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
Distance | Freq | Rel_Freq | Cum_Freq |
---|---|---|---|
0.11421 | 1 | 0.25 | 1 |
0.62022 | 1 | 0.25 | 2 |
0.74201 | 1 | 0.25 | 3 |
8.18229 | 1 | 0.25 | 4 |
str(Freq_table)
## 'data.frame': 4 obs. of 4 variables:
## $ Distance: Factor w/ 4 levels "0.11421","0.62022",..: 1 2 3 4
## $ Freq : int 1 1 1 1
## $ Rel_Freq: num 0.25 0.25 0.25 0.25
## $ Cum_Freq: int 1 2 3 4
df <- data.frame(x = Freq_table$Distance, y = Freq_table$Freq)
knitr::kable(df)
x | y |
---|---|
0.11421 | 1 |
0.62022 | 1 |
0.74201 | 1 |
8.18229 | 1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="blue", fill="green") +
xlab("Rango de Distance") +
ylab("Frecuencia")
###### Estadísticos - Personas Afectadas por Deslizamiento
summary(df_ri$Distance)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.1142 0.4937 0.6811 2.4147 2.6021 8.1823
library(pastecs)
## Warning: package 'pastecs' was built under R version 4.1.1
##
## Attaching package: 'pastecs'
## The following objects are masked from 'package:dplyr':
##
## first, last
stat.desc(df_ri)
## Warning in min(x): ningún argumento finito para min; retornando Inf
## Warning in max(x): ningun argumento finito para max; retornando -Inf
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## id date time America Country country_code State
## nbr.val 4.000000e+00 NA NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA NA
## min 7.700000e+01 NA NA NA NA NA NA
## max 4.104000e+03 NA NA NA NA NA NA
## range 4.027000e+03 NA NA NA NA NA NA
## sum 9.408000e+03 NA NA NA NA NA NA
## median 2.613500e+03 NA NA NA NA NA NA
## mean 2.352000e+03 NA NA NA NA NA NA
## SE.mean 8.368880e+02 NA NA NA NA NA NA
## CI.mean.0.95 2.663351e+03 NA NA NA NA NA NA
## var 2.801526e+06 NA NA NA NA NA NA
## std.dev 1.673776e+03 NA NA NA NA NA NA
## coef.var 7.116394e-01 NA NA NA NA NA NA
## population City Distance location_description latitude
## nbr.val 4.000000e+00 NA 4.000000 NA 4.00000000
## nbr.null 0.000000e+00 NA 0.000000 NA 0.00000000
## nbr.na 0.000000e+00 NA 0.000000 NA 0.00000000
## min 6.940000e+03 NA 0.114210 NA 4.80810000
## max 4.401180e+05 NA 8.182290 NA 5.33910000
## range 4.331780e+05 NA 8.068080 NA 0.53100000
## sum 6.375360e+05 NA 9.658730 NA 20.15700000
## median 9.523900e+04 NA 0.681115 NA 5.00490000
## mean 1.593840e+05 NA 2.414683 NA 5.03925000
## SE.mean 1.018609e+05 NA 1.927334 NA 0.13022423
## CI.mean.0.95 3.241669e+05 NA 6.133637 NA 0.41443161
## var 4.150258e+10 NA 14.858466 NA 0.06783340
## std.dev 2.037218e+05 NA 3.854668 NA 0.26044845
## coef.var 1.278182e+00 NA 1.596346 NA 0.05168397
## longitude geolocation hazard_type landslide_type
## nbr.val 4.000000e+00 NA NA NA
## nbr.null 0.000000e+00 NA NA NA
## nbr.na 0.000000e+00 NA NA NA
## min -7.597120e+01 NA NA NA
## max -7.566210e+01 NA NA NA
## range 3.091000e-01 NA NA NA
## sum -3.030585e+02 NA NA NA
## median -7.571260e+01 NA NA NA
## mean -7.576462e+01 NA NA NA
## SE.mean 7.028650e-02 NA NA NA
## CI.mean.0.95 2.236830e-01 NA NA NA
## var 1.976077e-02 NA NA NA
## std.dev 1.405730e-01 NA NA NA
## coef.var -1.855391e-03 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 0 4.000000 NA
## nbr.null NA NA NA 0 1.000000 NA
## nbr.na NA NA NA 4 0.000000 NA
## min NA NA NA Inf 0.000000 NA
## max NA NA NA -Inf 13.000000 NA
## range NA NA NA -Inf 13.000000 NA
## sum NA NA NA 0 21.000000 NA
## median NA NA NA NA 4.000000 NA
## mean NA NA NA NaN 5.250000 NA
## SE.mean NA NA NA NA 2.750000 NA
## CI.mean.0.95 NA NA NA NaN 8.751727 NA
## var NA NA NA NA 30.250000 NA
## std.dev NA NA NA NA 5.500000 NA
## coef.var NA NA NA NA 1.047619 NA
## source_link prop ypos
## nbr.val NA 4.000000 4.0000000
## nbr.null NA 0.000000 0.0000000
## nbr.na NA 0.000000 0.0000000
## min NA 1.182454 0.5912268
## max NA 84.713932 57.6430338
## range NA 83.531479 57.0518070
## sum NA 100.000000 74.0723159
## median NA 7.051807 7.9190277
## mean NA 25.000000 18.5180790
## SE.mean NA 19.954322 13.2340404
## CI.mean.0.95 NA 63.503557 42.1166228
## var NA 1592.699824 700.5592963
## std.dev NA 39.908643 26.4680807
## coef.var NA 1.596346 1.4293103
boxplot(Distance, horizontal=TRUE, col='steelblue')
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.1.1
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v tibble 3.1.3 v stringr 1.4.0
## v tidyr 1.1.3 v forcats 0.5.1
## v purrr 0.3.4
## Warning: package 'tidyr' was built under R version 4.1.1
## Warning: package 'forcats' was built under R version 4.1.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x scales::col_factor() masks readr::col_factor()
## x purrr::discard() masks scales::discard()
## x tidyr::extract() masks pastecs::extract()
## x dplyr::filter() masks stats::filter()
## x pastecs::first() masks dplyr::first()
## x dplyr::lag() masks stats::lag()
## x pastecs::last() masks dplyr::last()
library(hrbrthemes)
## Warning: package 'hrbrthemes' was built under R version 4.1.1
## NOTE: Either Arial Narrow or Roboto Condensed fonts are required to use these themes.
## Please use hrbrthemes::import_roboto_condensed() to install Roboto Condensed and
## if Arial Narrow is not on your system, please see https://bit.ly/arialnarrow
library(viridis)
## Warning: package 'viridis' was built under R version 4.1.1
## Loading required package: viridisLite
##
## Attaching package: 'viridis'
## The following object is masked from 'package:scales':
##
## viridis_pal
df <- data.frame(Distance)
df %>% ggplot(aes(x = "", y = Distance)) +
geom_boxplot(color="red", fill="orange", alpha=0.5) +
theme_ipsum() +
theme(legend.position="none", plot.title = element_text(size=11)) +
ggtitle("Deslizamientos ") +
coord_flip() +
xlab("") +
ylab("")
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
df_Ant %>%
select(Country, State, City, Distance, date)
## Country State City Distance date
## 103 Colombia Antioquia MedellÃn 5.12170 5/31/08
## 119 Colombia Antioquia Briceño 6.44532 7/17/08
## 133 Colombia Antioquia Caldas 1.27637 8/18/08
## 163 Colombia Antioquia MedellÃn 4.09028 11/16/08
## 455 Colombia Antioquia Giraldo 2.55282 9/27/10
## 518 Colombia Antioquia MedellÃn 2.18776 11/13/10
## 528 Colombia Antioquia Cáceres 4.29197 11/20/10
## 553 Colombia Antioquia Bello 2.04898 12/5/10
## 629 Colombia Antioquia MedellÃn 4.19867 4/10/11
## 640 Colombia Antioquia Santa Rosa de Osos 18.91189 4/13/11
## 658 Colombia Antioquia Municipio de Copacabana 7.98838 4/22/11
## 659 Colombia Antioquia Frontino 5.06960 4/23/11
## 821 Colombia Antioquia MedellÃn 1.73101 9/27/11
## 826 Colombia Antioquia Ciudad BolÃvar 24.48479 10/15/11
## 859 Colombia Antioquia Envigado 2.07081 12/30/11
## 876 Colombia Antioquia Amalfi 11.11685 4/12/12
## 1362 Colombia Antioquia Barbosa 7.78677 11/14/14
## 1457 Colombia Antioquia Salgar 3.09014 5/18/15
## 1669 Colombia Antioquia Mutatá 15.04256 5/9/15
head(df_Ant)
## id date time America Country country_code State population
## 103 562 5/31/08 SA Colombia CO Antioquia 1999979
## 119 650 7/17/08 SA Colombia CO Antioquia 2214
## 133 728 8/18/08 SA Colombia CO Antioquia 65565
## 163 889 11/16/08 SA Colombia CO Antioquia 1999979
## 455 2508 9/27/10 SA Colombia CO Antioquia 1464
## 518 2721 11/13/10 SA Colombia CO Antioquia 1999979
## City Distance location_description latitude longitude
## 103 MedellÃn 5.12170 6.2746 -75.6039
## 119 Briceño 6.44532 Burned area 7.1600 -75.5200
## 133 Caldas 1.27637 6.0800 -75.6390
## 163 MedellÃn 4.09028 6.2170 -75.5760
## 455 Giraldo 2.55282 6.7060 -75.9917
## 518 MedellÃn 2.18776 6.2674 -75.5758
## geolocation hazard_type landslide_type
## 103 (6.2746000000000004, -75.603899999999996) Landslide Complex
## 119 (7.16, -75.52) Landslide Landslide
## 133 (6.08, -75.638999999999996) Landslide Landslide
## 163 (6.2169999999999996, -75.575999999999993) Landslide Mudslide
## 455 (6.7060000000000004, -75.991699999999994) Landslide Landslide
## 518 (6.2674000000000003, -75.575800000000001) Landslide Mudslide
## landslide_size trigger storm_name injuries fatalities source_name
## 103 Large Downpour NA 27
## 119 Medium Rain NA 8
## 133 Medium Rain NA NA
## 163 Large Downpour NA 8
## 455 Large Downpour NA 9
## 518 Medium Downpour NA 2
## source_link
## 103 http://english.people.com.cn/90001/90777/90852/6422291.html
## 119 http://colombiareports.com/2008/07/18/eight-people-die-in-antioquia-landslides/
## 133 http://colombiareports.com/2008/08/20/landslides-and-floods-hit-antioquia-and-cordoba/
## 163 http://www.chinapost.com.tw/international/americas/2008/11/19/183837/Eight-corpses.htm
## 455 http://www.youtube.com/watch?v=Atf9gmvLFWw&feature=related
## 518 http://www.laht.com/article.asp?ArticleId=377428&CategoryId=12393
ggplot(data=df_Ant, aes(x=City, y=Distance)) + geom_bar(stat="identity", color="blue", fill="white")
ggplot(data=df_Ant, aes(x = "Antioquia", y = Distance, fill=City)) +
geom_bar(stat = "identity", width = 1, color = "black") +
coord_polar("y", start = 0)
ggplot(df_Ant,aes(x="Antioquia",y=Distance, fill=City))+
geom_bar(stat = "identity",
color="white")+
geom_text(aes(label=(Distance*10)),
position=position_stack(vjust=0.5),color="white",size=6)+
coord_polar(theta = "y")+
labs(title="Gráfico de Deslizamiento")
library(qcc)
Distance <- df_Ant$Distance
names(Distance) <- df_Ant$City
pareto.chart(Distance,
ylab="Distance",
col = heat.colors(length(Distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "DONDE SE CONCENTRAN LAS CIUDADES CON MAYORES DESLIZAMIENTOS"
)
##
## Pareto chart analysis for Distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Ciudad BolÃvar 24.4847900 24.4847900 18.9062000 18.9062000
## Santa Rosa de Osos 18.9118900 43.3966800 14.6030239 33.5092239
## Mutatá 15.0425600 58.4392400 11.6152782 45.1245021
## Amalfi 11.1168500 69.5560900 8.5839980 53.7085001
## Municipio de Copacabana 7.9883800 77.5444700 6.1683155 59.8768156
## Barbosa 7.7867700 85.3312400 6.0126401 65.8894557
## Briceño 6.4453200 91.7765600 4.9768247 70.8662805
## MedellÃn 5.1217000 96.8982600 3.9547770 74.8210575
## Frontino 5.0696000 101.9678600 3.9145474 78.7356049
## Cáceres 4.2919700 106.2598300 3.3140919 82.0496967
## MedellÃn 4.1986700 110.4585000 3.2420492 85.2917460
## MedellÃn 4.0902800 114.5487800 3.1583547 88.4501007
## Salgar 3.0901400 117.6389200 2.3860856 90.8361863
## Giraldo 2.5528200 120.1917400 1.9711880 92.8073743
## MedellÃn 2.1877600 122.3795000 1.6893030 94.4966773
## Envigado 2.0708100 124.4503100 1.5989987 96.0956760
## Bello 2.0489800 126.4992900 1.5821424 97.6778184
## MedellÃn 1.7310100 128.2303000 1.3366184 99.0144369
## Caldas 1.2763700 129.5066700 0.9855631 100.0000000
stem(df_Ant$"Distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 1222233444
## 0 | 55688
## 1 | 1
## 1 | 59
## 2 | 4
head(df_Ant)
## id date time America Country country_code State population
## 103 562 5/31/08 SA Colombia CO Antioquia 1999979
## 119 650 7/17/08 SA Colombia CO Antioquia 2214
## 133 728 8/18/08 SA Colombia CO Antioquia 65565
## 163 889 11/16/08 SA Colombia CO Antioquia 1999979
## 455 2508 9/27/10 SA Colombia CO Antioquia 1464
## 518 2721 11/13/10 SA Colombia CO Antioquia 1999979
## City Distance location_description latitude longitude
## 103 MedellÃn 5.12170 6.2746 -75.6039
## 119 Briceño 6.44532 Burned area 7.1600 -75.5200
## 133 Caldas 1.27637 6.0800 -75.6390
## 163 MedellÃn 4.09028 6.2170 -75.5760
## 455 Giraldo 2.55282 6.7060 -75.9917
## 518 MedellÃn 2.18776 6.2674 -75.5758
## geolocation hazard_type landslide_type
## 103 (6.2746000000000004, -75.603899999999996) Landslide Complex
## 119 (7.16, -75.52) Landslide Landslide
## 133 (6.08, -75.638999999999996) Landslide Landslide
## 163 (6.2169999999999996, -75.575999999999993) Landslide Mudslide
## 455 (6.7060000000000004, -75.991699999999994) Landslide Landslide
## 518 (6.2674000000000003, -75.575800000000001) Landslide Mudslide
## landslide_size trigger storm_name injuries fatalities source_name
## 103 Large Downpour NA 27
## 119 Medium Rain NA 8
## 133 Medium Rain NA NA
## 163 Large Downpour NA 8
## 455 Large Downpour NA 9
## 518 Medium Downpour NA 2
## source_link
## 103 http://english.people.com.cn/90001/90777/90852/6422291.html
## 119 http://colombiareports.com/2008/07/18/eight-people-die-in-antioquia-landslides/
## 133 http://colombiareports.com/2008/08/20/landslides-and-floods-hit-antioquia-and-cordoba/
## 163 http://www.chinapost.com.tw/international/americas/2008/11/19/183837/Eight-corpses.htm
## 455 http://www.youtube.com/watch?v=Atf9gmvLFWw&feature=related
## 518 http://www.laht.com/article.asp?ArticleId=377428&CategoryId=12393
knitr::kable(head(df_Ant))
id | date | time | America | Country | country_code | State | population | City | Distance | location_description | latitude | longitude | geolocation | hazard_type | landslide_type | landslide_size | trigger | storm_name | injuries | fatalities | source_name | source_link | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
103 | 562 | 5/31/08 | SA | Colombia | CO | Antioquia | 1999979 | MedellÃn | 5.12170 | 6.2746 | -75.6039 | (6.2746000000000004, -75.603899999999996) | Landslide | Complex | Large | Downpour | NA | 27 | http://english.people.com.cn/90001/90777/90852/6422291.html | ||||
119 | 650 | 7/17/08 | SA | Colombia | CO | Antioquia | 2214 | Briceño | 6.44532 | Burned area | 7.1600 | -75.5200 | (7.16, -75.52) | Landslide | Landslide | Medium | Rain | NA | 8 | http://colombiareports.com/2008/07/18/eight-people-die-in-antioquia-landslides/ | |||
133 | 728 | 8/18/08 | SA | Colombia | CO | Antioquia | 65565 | Caldas | 1.27637 | 6.0800 | -75.6390 | (6.08, -75.638999999999996) | Landslide | Landslide | Medium | Rain | NA | NA | http://colombiareports.com/2008/08/20/landslides-and-floods-hit-antioquia-and-cordoba/ | ||||
163 | 889 | 11/16/08 | SA | Colombia | CO | Antioquia | 1999979 | MedellÃn | 4.09028 | 6.2170 | -75.5760 | (6.2169999999999996, -75.575999999999993) | Landslide | Mudslide | Large | Downpour | NA | 8 | http://www.chinapost.com.tw/international/americas/2008/11/19/183837/Eight-corpses.htm | ||||
455 | 2508 | 9/27/10 | SA | Colombia | CO | Antioquia | 1464 | Giraldo | 2.55282 | 6.7060 | -75.9917 | (6.7060000000000004, -75.991699999999994) | Landslide | Landslide | Large | Downpour | NA | 9 | http://www.youtube.com/watch?v=Atf9gmvLFWw&feature=related | ||||
518 | 2721 | 11/13/10 | SA | Colombia | CO | Antioquia | 1999979 | MedellÃn | 2.18776 | 6.2674 | -75.5758 | (6.2674000000000003, -75.575800000000001) | Landslide | Mudslide | Medium | Downpour | NA | 2 | http://www.laht.com/article.asp?ArticleId=377428&CategoryId=12393 |
stem(df_Ant$"Distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 1222233444
## 0 | 55688
## 1 | 1
## 1 | 59
## 2 | 4
stem(df_Ant$"Distance", scale = 2)
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 1222233444
## 0 | 55688
## 1 | 1
## 1 | 59
## 2 | 4
library(forecast)
data_serie<- ts(df_Ant$Distance, frequency=12, start=2007)
head(data_serie)
## Jan Feb Mar Apr May Jun
## 2007 5.12170 6.44532 1.27637 4.09028 2.55282 2.18776
autoplot(data_serie)+
labs(title = "Serie de Deslizamiento", x="Tiempo", y = "Distancia", colour = "#00a0dc") +theme_bw()
library(questionr)
table <- questionr::freq(Distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n | % | val% | %cum | val%cum | |
---|---|---|---|---|---|
1.27637 | 1 | 5.3 | 5.3 | 5.3 | 5.3 |
1.73101 | 1 | 5.3 | 5.3 | 10.5 | 10.5 |
2.04898 | 1 | 5.3 | 5.3 | 15.8 | 15.8 |
2.07081 | 1 | 5.3 | 5.3 | 21.1 | 21.1 |
2.18776 | 1 | 5.3 | 5.3 | 26.3 | 26.3 |
2.55282 | 1 | 5.3 | 5.3 | 31.6 | 31.6 |
3.09014 | 1 | 5.3 | 5.3 | 36.8 | 36.8 |
4.09028 | 1 | 5.3 | 5.3 | 42.1 | 42.1 |
4.19867 | 1 | 5.3 | 5.3 | 47.4 | 47.4 |
4.29197 | 1 | 5.3 | 5.3 | 52.6 | 52.6 |
5.0696 | 1 | 5.3 | 5.3 | 57.9 | 57.9 |
5.1217 | 1 | 5.3 | 5.3 | 63.2 | 63.2 |
6.44532 | 1 | 5.3 | 5.3 | 68.4 | 68.4 |
7.78677 | 1 | 5.3 | 5.3 | 73.7 | 73.7 |
7.98838 | 1 | 5.3 | 5.3 | 78.9 | 78.9 |
11.11685 | 1 | 5.3 | 5.3 | 84.2 | 84.2 |
15.04256 | 1 | 5.3 | 5.3 | 89.5 | 89.5 |
18.91189 | 1 | 5.3 | 5.3 | 94.7 | 94.7 |
24.48479 | 1 | 5.3 | 5.3 | 100.0 | 100.0 |
Total | 19 | 100.0 | 100.0 | 100.0 | 100.0 |
str(table)
## Classes 'freqtab' and 'data.frame': 20 obs. of 5 variables:
## $ n : num 1 1 1 1 1 1 1 1 1 1 ...
## $ % : num 5.3 5.3 5.3 5.3 5.3 5.3 5.3 5.3 5.3 5.3 ...
## $ val% : num 5.3 5.3 5.3 5.3 5.3 5.3 5.3 5.3 5.3 5.3 ...
## $ %cum : num 5.3 10.5 15.8 21.1 26.3 31.6 36.8 42.1 47.4 52.6 ...
## $ val%cum: num 5.3 10.5 15.8 21.1 26.3 31.6 36.8 42.1 47.4 52.6 ...
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x | y |
---|---|
1.27637 | 1 |
1.73101 | 1 |
2.04898 | 1 |
2.07081 | 1 |
2.18776 | 1 |
2.55282 | 1 |
3.09014 | 1 |
4.09028 | 1 |
4.19867 | 1 |
4.29197 | 1 |
5.0696 | 1 |
5.1217 | 1 |
6.44532 | 1 |
7.78677 | 1 |
7.98838 | 1 |
11.11685 | 1 |
15.04256 | 1 |
18.91189 | 1 |
24.48479 | 1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="white", fill="blue") +
xlab("Número de asistencias") +
ylab("Frecuencia")
n_sturges = 1 + log(length(Distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(Distance) - min(Distance)
w = ceiling(R/n_clases)
bins <- seq(min(Distance), max(Distance) + w, by = w)
bins
## [1] 1.27637 6.27637 11.27637 16.27637 21.27637 26.27637
Edades <- cut(Distance, bins)
Freq_table <- transform(table(Distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
Distance | Freq | Rel_Freq | Cum_Freq |
---|---|---|---|
1.27637 | 1 | 0.0526316 | 1 |
1.73101 | 1 | 0.0526316 | 2 |
2.04898 | 1 | 0.0526316 | 3 |
2.07081 | 1 | 0.0526316 | 4 |
2.18776 | 1 | 0.0526316 | 5 |
2.55282 | 1 | 0.0526316 | 6 |
3.09014 | 1 | 0.0526316 | 7 |
4.09028 | 1 | 0.0526316 | 8 |
4.19867 | 1 | 0.0526316 | 9 |
4.29197 | 1 | 0.0526316 | 10 |
5.0696 | 1 | 0.0526316 | 11 |
5.1217 | 1 | 0.0526316 | 12 |
6.44532 | 1 | 0.0526316 | 13 |
7.78677 | 1 | 0.0526316 | 14 |
7.98838 | 1 | 0.0526316 | 15 |
11.11685 | 1 | 0.0526316 | 16 |
15.04256 | 1 | 0.0526316 | 17 |
18.91189 | 1 | 0.0526316 | 18 |
24.48479 | 1 | 0.0526316 | 19 |
str(Freq_table)
## 'data.frame': 19 obs. of 4 variables:
## $ Distance: Factor w/ 19 levels "1.27637","1.73101",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ Freq : int 1 1 1 1 1 1 1 1 1 1 ...
## $ Rel_Freq: num 0.0526 0.0526 0.0526 0.0526 0.0526 ...
## $ Cum_Freq: int 1 2 3 4 5 6 7 8 9 10 ...
df <- data.frame(x = Freq_table$Distance, y = Freq_table$Freq)
knitr::kable(df)
x | y |
---|---|
1.27637 | 1 |
1.73101 | 1 |
2.04898 | 1 |
2.07081 | 1 |
2.18776 | 1 |
2.55282 | 1 |
3.09014 | 1 |
4.09028 | 1 |
4.19867 | 1 |
4.29197 | 1 |
5.0696 | 1 |
5.1217 | 1 |
6.44532 | 1 |
7.78677 | 1 |
7.98838 | 1 |
11.11685 | 1 |
15.04256 | 1 |
18.91189 | 1 |
24.48479 | 1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="blue", fill="green") +
xlab("Rango de Distance") +
ylab("Frecuencia")
summary(df_Ant$Distance)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.276 2.370 4.292 6.816 7.888 24.485
library(pastecs)
stat.desc(df_Ant)
## id date time America Country country_code State
## nbr.val 1.900000e+01 NA NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA NA
## min 5.620000e+02 NA NA NA NA NA NA
## max 7.513000e+03 NA NA NA NA NA NA
## range 6.951000e+03 NA NA NA NA NA NA
## sum 6.463000e+04 NA NA NA NA NA NA
## median 3.362000e+03 NA NA NA NA NA NA
## mean 3.401579e+03 NA NA NA NA NA NA
## SE.mean 4.626486e+02 NA NA NA NA NA NA
## CI.mean.0.95 9.719887e+02 NA NA NA NA NA NA
## var 4.066831e+06 NA NA NA NA NA NA
## std.dev 2.016639e+03 NA NA NA NA NA NA
## coef.var 5.928537e-01 NA NA NA NA NA NA
## population City Distance location_description latitude
## nbr.val 1.900000e+01 NA 19.0000000 NA 19.0000000
## nbr.null 0.000000e+00 NA 0.0000000 NA 0.0000000
## nbr.na 0.000000e+00 NA 0.0000000 NA 0.0000000
## min 1.464000e+03 NA 1.2763700 NA 5.5139000
## max 1.999979e+06 NA 24.4847900 NA 7.6167000
## range 1.998515e+06 NA 23.2084200 NA 2.1028000
## sum 1.075194e+07 NA 129.5066700 NA 122.7047000
## median 1.670700e+04 NA 4.2919700 NA 6.3236000
## mean 5.658915e+05 NA 6.8161405 NA 6.4581421
## SE.mean 2.030740e+05 NA 1.4599518 NA 0.1145878
## CI.mean.0.95 4.266426e+05 NA 3.0672450 NA 0.2407400
## var 7.835417e+11 NA 40.4977278 NA 0.2494769
## std.dev 8.851789e+05 NA 6.3637825 NA 0.4994766
## coef.var 1.564220e+00 NA 0.9336343 NA 0.0773406
## longitude geolocation hazard_type landslide_type
## nbr.val 1.900000e+01 NA NA NA
## nbr.null 0.000000e+00 NA NA NA
## nbr.na 0.000000e+00 NA NA NA
## min -7.640890e+01 NA NA NA
## max -7.494110e+01 NA NA NA
## range 1.467800e+00 NA NA NA
## sum -1.436556e+03 NA NA NA
## median -7.557580e+01 NA NA NA
## mean -7.560821e+01 NA NA NA
## SE.mean 9.157725e-02 NA NA NA
## CI.mean.0.95 1.923967e-01 NA NA NA
## var 1.593415e-01 NA NA NA
## std.dev 3.991760e-01 NA NA NA
## coef.var -5.279532e-03 NA NA NA
## landslide_size trigger storm_name injuries fatalities
## nbr.val NA NA NA 4.000000 18.000000
## nbr.null NA NA NA 1.000000 3.000000
## nbr.na NA NA NA 15.000000 1.000000
## min NA NA NA 0.000000 0.000000
## max NA NA NA 40.000000 92.000000
## range NA NA NA 40.000000 92.000000
## sum NA NA NA 42.000000 266.000000
## median NA NA NA 1.000000 4.000000
## mean NA NA NA 10.500000 14.777778
## SE.mean NA NA NA 9.836158 6.733758
## CI.mean.0.95 NA NA NA 31.303044 14.206988
## var NA NA NA 387.000000 816.183007
## std.dev NA NA NA 19.672316 28.568917
## coef.var NA NA NA 1.873554 1.933235
## source_name source_link
## nbr.val NA NA
## nbr.null NA NA
## nbr.na NA NA
## min NA NA
## max NA NA
## range NA NA
## sum NA NA
## median NA NA
## mean NA NA
## SE.mean NA NA
## CI.mean.0.95 NA NA
## var NA NA
## std.dev NA NA
## coef.var NA NA
boxplot(Distance, horizontal=TRUE, col='steelblue')
library(tidyverse)
library(hrbrthemes)
library(viridis)
df <- data.frame(Distance)
df %>% ggplot(aes(x = "", y = Distance)) +
geom_boxplot(color="red", fill="orange", alpha=0.5) +
theme_ipsum() +
theme(legend.position="none", plot.title = element_text(size=11)) +
ggtitle("Deslizamientos ") +
coord_flip() +
xlab("") +
ylab("")
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
df_Ca %>%
select(Country, State, City, Distance, date)
## Country State City Distance date
## 49 Colombia Cauca Suárez 8.46579 10/13/07
## 166 Colombia Cauca Miranda 0.70558 11/24/08
## 850 Colombia Cauca La Cruz 0.64469 12/13/11
## 1344 Colombia Cauca Almaguer 17.31514 12/21/14
## 1471 Colombia Cauca Jambaló 8.81287 6/13/15
head(df_Ca)
## id date time America Country country_code State population
## 49 307 10/13/07 SA Colombia CO Cauca 9985
## 166 904 11/24/08 SA Colombia CO Cauca 13223
## 850 4091 12/13/11 SA Colombia CO Cauca 8751
## 1344 6579 12/21/14 SA Colombia CO Cauca 3120
## 1471 6986 6/13/15 13:00 SA Colombia CO Cauca 1972
## City Distance location_description latitude longitude
## 49 Suárez 8.46579 2.9437 -76.7719
## 166 Miranda 0.70558 3.2468 -76.2226
## 850 La Cruz 0.64469 1.6056 -76.9742
## 1344 Almaguer 17.31514 Unknown 1.8362 -76.9857
## 1471 Jambaló 8.81287 Above road 2.8696 -76.3034
## geolocation hazard_type landslide_type
## 49 (2.9437000000000002, -76.771900000000002) Landslide Mudslide
## 166 (3.2467999999999999, -76.2226) Landslide Mudslide
## 850 (1.6055999999999999, -76.974199999999996) Landslide Landslide
## 1344 (1.8362000000000001, -76.985699999999994) Landslide Mudslide
## 1471 (2.8696000000000002, -76.303399999999996) Landslide Landslide
## landslide_size trigger storm_name injuries fatalities
## 49 Large Continuous rain NA 24
## 166 Medium Downpour NA 10
## 850 Medium Downpour NA 1
## 1344 Large Rain 0 6
## 1471 Medium Rain 0 0
## source_name
## 49 Reuters - AlertNet.org
## 166
## 850
## 1344 TeleSUR
## 1471 RSOE EDIS
## source_link
## 49 http://www.reuters.com/article/newsOne/idUSN1329387220071013
## 166 http://www.laht.com/article.asp?ArticleId=321599&CategoryId=12393
## 850 http://www.iol.co.za/dailynews/news/16-missing-after-mudslide-in-colombia-1.1197978#.UKenfOQ81s4
## 1344 http://www.telesurtv.net/english/news/Mudslide-Kills-Six-People-in-Colombia-20141222-0015.html
## 1471 http://hisz.rsoe.hu/alertmap/site/index.php?pageid=event_desc&edis_id=LS%20-20150613-48641-COL
ggplot(data=df_Ca, aes(x=City, y=Distance)) + geom_bar(stat="identity", color="blue", fill="white")
ggplot(data=df_Ca, aes(x = "Cauca", y = Distance, fill=City)) +
geom_bar(stat = "identity", width = 1, color = "black") +
coord_polar("y", start = 0)
library(ggplot2)
library(dplyr)
df_Ca <- df_Ca %>%
arrange(desc(City)) %>%
mutate(prop = Distance / sum(df_Ca$Distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_Ca, aes(x=State, y = prop, fill=City)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(Distance/100)), color = "white", size=6) +
scale_fill_brewer(palette="Set8")
## Warning in pal_name(palette, type): Unknown palette Set8
library(qcc)
Distance <- df_Ca$Distance
names(Distance) <- df_Ca$City
pareto.chart(Distance,
ylab="Distance",
col = heat.colors(length(Distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "DONDE SE CONCENTRAN LAS CIUDADES CON MAYORES DESLIZAMIENTOS"
)
##
## Pareto chart analysis for Distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Almaguer 17.315140 17.315140 48.172452 48.172452
## Jambaló 8.812870 26.128010 24.518286 72.690739
## Suárez 8.465790 34.593800 23.552675 96.243414
## Miranda 0.705580 35.299380 1.962994 98.206408
## La Cruz 0.644690 35.944070 1.793592 100.000000
stem(df_Ca$"Distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 11
## 0 | 89
## 1 |
## 1 | 7
head(df_Ca)
## id date time America Country country_code State population City
## 1 307 10/13/07 SA Colombia CO Cauca 9985 Suárez
## 2 904 11/24/08 SA Colombia CO Cauca 13223 Miranda
## 3 4091 12/13/11 SA Colombia CO Cauca 8751 La Cruz
## 4 6986 6/13/15 13:00 SA Colombia CO Cauca 1972 Jambaló
## 5 6579 12/21/14 SA Colombia CO Cauca 3120 Almaguer
## Distance location_description latitude longitude
## 1 8.46579 2.9437 -76.7719
## 2 0.70558 3.2468 -76.2226
## 3 0.64469 1.6056 -76.9742
## 4 8.81287 Above road 2.8696 -76.3034
## 5 17.31514 Unknown 1.8362 -76.9857
## geolocation hazard_type landslide_type
## 1 (2.9437000000000002, -76.771900000000002) Landslide Mudslide
## 2 (3.2467999999999999, -76.2226) Landslide Mudslide
## 3 (1.6055999999999999, -76.974199999999996) Landslide Landslide
## 4 (2.8696000000000002, -76.303399999999996) Landslide Landslide
## 5 (1.8362000000000001, -76.985699999999994) Landslide Mudslide
## landslide_size trigger storm_name injuries fatalities
## 1 Large Continuous rain NA 24
## 2 Medium Downpour NA 10
## 3 Medium Downpour NA 1
## 4 Medium Rain 0 0
## 5 Large Rain 0 6
## source_name
## 1 Reuters - AlertNet.org
## 2
## 3
## 4 RSOE EDIS
## 5 TeleSUR
## source_link
## 1 http://www.reuters.com/article/newsOne/idUSN1329387220071013
## 2 http://www.laht.com/article.asp?ArticleId=321599&CategoryId=12393
## 3 http://www.iol.co.za/dailynews/news/16-missing-after-mudslide-in-colombia-1.1197978#.UKenfOQ81s4
## 4 http://hisz.rsoe.hu/alertmap/site/index.php?pageid=event_desc&edis_id=LS%20-20150613-48641-COL
## 5 http://www.telesurtv.net/english/news/Mudslide-Kills-Six-People-in-Colombia-20141222-0015.html
## prop ypos
## 1 23.552675 11.77634
## 2 1.962994 24.53417
## 3 1.793592 26.41247
## 4 24.518286 39.56840
## 5 48.172452 75.91377
knitr::kable(head(df_Ca))
id | date | time | America | Country | country_code | State | population | City | Distance | location_description | latitude | longitude | geolocation | hazard_type | landslide_type | landslide_size | trigger | storm_name | injuries | fatalities | source_name | source_link | prop | ypos |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
307 | 10/13/07 | SA | Colombia | CO | Cauca | 9985 | Suárez | 8.46579 | 2.9437 | -76.7719 | (2.9437000000000002, -76.771900000000002) | Landslide | Mudslide | Large | Continuous rain | NA | 24 | Reuters - AlertNet.org | http://www.reuters.com/article/newsOne/idUSN1329387220071013 | 23.552675 | 11.77634 | |||
904 | 11/24/08 | SA | Colombia | CO | Cauca | 13223 | Miranda | 0.70558 | 3.2468 | -76.2226 | (3.2467999999999999, -76.2226) | Landslide | Mudslide | Medium | Downpour | NA | 10 | http://www.laht.com/article.asp?ArticleId=321599&CategoryId=12393 | 1.962994 | 24.53417 | ||||
4091 | 12/13/11 | SA | Colombia | CO | Cauca | 8751 | La Cruz | 0.64469 | 1.6056 | -76.9742 | (1.6055999999999999, -76.974199999999996) | Landslide | Landslide | Medium | Downpour | NA | 1 | http://www.iol.co.za/dailynews/news/16-missing-after-mudslide-in-colombia-1.1197978#.UKenfOQ81s4 | 1.793592 | 26.41247 | ||||
6986 | 6/13/15 | 13:00 | SA | Colombia | CO | Cauca | 1972 | Jambaló | 8.81287 | Above road | 2.8696 | -76.3034 | (2.8696000000000002, -76.303399999999996) | Landslide | Landslide | Medium | Rain | 0 | 0 | RSOE EDIS | http://hisz.rsoe.hu/alertmap/site/index.php?pageid=event_desc&edis_id=LS%20-20150613-48641-COL | 24.518286 | 39.56840 | |
6579 | 12/21/14 | SA | Colombia | CO | Cauca | 3120 | Almaguer | 17.31514 | Unknown | 1.8362 | -76.9857 | (1.8362000000000001, -76.985699999999994) | Landslide | Mudslide | Large | Rain | 0 | 6 | TeleSUR | http://www.telesurtv.net/english/news/Mudslide-Kills-Six-People-in-Colombia-20141222-0015.html | 48.172452 | 75.91377 |
stem(df_Ca$"Distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 11
## 0 | 89
## 1 |
## 1 | 7
stem(df_Ca$"Distance", scale = 2)
##
## The decimal point is at the |
##
## 0 | 67
## 2 |
## 4 |
## 6 |
## 8 | 58
## 10 |
## 12 |
## 14 |
## 16 | 3
library(forecast)
data_serie<- ts(df_Ca$Distance, frequency=12, start=2007)
head(data_serie)
## Jan Feb Mar Apr May
## 2007 8.46579 0.70558 0.64469 8.81287 17.31514
autoplot(data_serie)+
labs(title = "Serie de Deslizamiento", x="Tiempo", y = "Distancia", colour = "#00a0dc") +theme_bw()
library(questionr)
table <- questionr::freq(Distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n | % | val% | %cum | val%cum | |
---|---|---|---|---|---|
0.64469 | 1 | 20 | 20 | 20 | 20 |
0.70558 | 1 | 20 | 20 | 40 | 40 |
8.46579 | 1 | 20 | 20 | 60 | 60 |
8.81287 | 1 | 20 | 20 | 80 | 80 |
17.31514 | 1 | 20 | 20 | 100 | 100 |
Total | 5 | 100 | 100 | 100 | 100 |
str(table)
## Classes 'freqtab' and 'data.frame': 6 obs. of 5 variables:
## $ n : num 1 1 1 1 1 5
## $ % : num 20 20 20 20 20 100
## $ val% : num 20 20 20 20 20 100
## $ %cum : num 20 40 60 80 100 100
## $ val%cum: num 20 40 60 80 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x | y |
---|---|
0.64469 | 1 |
0.70558 | 1 |
8.46579 | 1 |
8.81287 | 1 |
17.31514 | 1 |
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="white", fill="blue") +
xlab("Número de asistencias") +
ylab("Frecuencia")
n_sturges = 1 + log(length(Distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(Distance) - min(Distance)
w = ceiling(R/n_clases)
bins <- seq(min(Distance), max(Distance) + w, by = w)
bins
## [1] 0.64469 6.64469 12.64469 18.64469
Edades <- cut(Distance, bins)
Freq_table <- transform(table(Distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
Distance | Freq | Rel_Freq | Cum_Freq |
---|---|---|---|
0.64469 | 1 | 0.2 | 1 |
0.70558 | 1 | 0.2 | 2 |
8.46579 | 1 | 0.2 | 3 |
8.81287 | 1 | 0.2 | 4 |
17.31514 | 1 | 0.2 | 5 |
str(Freq_table)
## 'data.frame': 5 obs. of 4 variables:
## $ Distance: Factor w/ 5 levels "0.64469","0.70558",..: 1 2 3 4 5
## $ Freq : int 1 1 1 1 1
## $ Rel_Freq: num 0.2 0.2 0.2 0.2 0.2
## $ Cum_Freq: int 1 2 3 4 5
df <- data.frame(x = Freq_table$Distance, y = Freq_table$Freq)
knitr::kable(df)
x | y |
---|---|
0.64469 | 1 |
0.70558 | 1 |
8.46579 | 1 |
8.81287 | 1 |
17.31514 | 1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="blue", fill="green") +
xlab("Rango de Distance") +
ylab("Frecuencia")
summary(df_Ca$Distance)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.6447 0.7056 8.4658 7.1888 8.8129 17.3151
library(pastecs)
stat.desc(df_Ca)
## id date time America Country country_code State
## nbr.val 5.000000e+00 NA NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA NA
## min 3.070000e+02 NA NA NA NA NA NA
## max 6.986000e+03 NA NA NA NA NA NA
## range 6.679000e+03 NA NA NA NA NA NA
## sum 1.886700e+04 NA NA NA NA NA NA
## median 4.091000e+03 NA NA NA NA NA NA
## mean 3.773400e+03 NA NA NA NA NA NA
## SE.mean 1.388208e+03 NA NA NA NA NA NA
## CI.mean.0.95 3.854284e+03 NA NA NA NA NA NA
## var 9.635611e+06 NA NA NA NA NA NA
## std.dev 3.104128e+03 NA NA NA NA NA NA
## coef.var 8.226343e-01 NA NA NA NA NA NA
## population City Distance location_description latitude
## nbr.val 5.000000e+00 NA 5.0000000 NA 5.0000000
## nbr.null 0.000000e+00 NA 0.0000000 NA 0.0000000
## nbr.na 0.000000e+00 NA 0.0000000 NA 0.0000000
## min 1.972000e+03 NA 0.6446900 NA 1.6056000
## max 1.322300e+04 NA 17.3151400 NA 3.2468000
## range 1.125100e+04 NA 16.6704500 NA 1.6412000
## sum 3.705100e+04 NA 35.9440700 NA 12.5019000
## median 8.751000e+03 NA 8.4657900 NA 2.8696000
## mean 7.410200e+03 NA 7.1888140 NA 2.5003800
## SE.mean 2.123627e+03 NA 3.0957114 NA 0.3264786
## CI.mean.0.95 5.896133e+03 NA 8.5950729 NA 0.9064498
## var 2.254895e+07 NA 47.9171465 NA 0.5329413
## std.dev 4.748574e+03 NA 6.9222212 NA 0.7300283
## coef.var 6.408159e-01 NA 0.9629156 NA 0.2919669
## longitude geolocation hazard_type landslide_type
## nbr.val 5.000000e+00 NA NA NA
## nbr.null 0.000000e+00 NA NA NA
## nbr.na 0.000000e+00 NA NA NA
## min -7.698570e+01 NA NA NA
## max -7.622260e+01 NA NA NA
## range 7.631000e-01 NA NA NA
## sum -3.832578e+02 NA NA NA
## median -7.677190e+01 NA NA NA
## mean -7.665156e+01 NA NA NA
## SE.mean 1.636230e-01 NA NA NA
## CI.mean.0.95 4.542903e-01 NA NA NA
## var 1.338625e-01 NA NA NA
## std.dev 3.658722e-01 NA NA NA
## coef.var -4.773187e-03 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 2 5.000000 NA
## nbr.null NA NA NA 2 1.000000 NA
## nbr.na NA NA NA 3 0.000000 NA
## min NA NA NA 0 0.000000 NA
## max NA NA NA 0 24.000000 NA
## range NA NA NA 0 24.000000 NA
## sum NA NA NA 0 41.000000 NA
## median NA NA NA 0 6.000000 NA
## mean NA NA NA 0 8.200000 NA
## SE.mean NA NA NA 0 4.340507 NA
## CI.mean.0.95 NA NA NA 0 12.051179 NA
## var NA NA NA 0 94.200000 NA
## std.dev NA NA NA 0 9.705668 NA
## coef.var NA NA NA NaN 1.183618 NA
## source_link prop ypos
## nbr.val NA 5.0000000 5.000000
## nbr.null NA 0.0000000 0.000000
## nbr.na NA 0.0000000 0.000000
## min NA 1.7935921 11.776338
## max NA 48.1724524 75.913774
## range NA 46.3788603 64.137436
## sum NA 100.0000000 178.205153
## median NA 23.5526750 26.412465
## mean NA 20.0000000 35.641031
## SE.mean NA 8.6125790 10.989473
## CI.mean.0.95 NA 23.9123529 30.511669
## var NA 370.8825879 603.842603
## std.dev NA 19.2583122 24.573209
## coef.var NA 0.9629156 0.689464
boxplot(Distance, horizontal=TRUE, col='steelblue')
library(tidyverse)
library(hrbrthemes)
library(viridis)
df <- data.frame(Distance)
df %>% ggplot(aes(x = "", y = Distance)) +
geom_boxplot(color="red", fill="orange", alpha=0.5) +
theme_ipsum() +
theme(legend.position="none", plot.title = element_text(size=11)) +
ggtitle("Deslizamientos ") +
coord_flip() +
xlab("") +
ylab("")
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
df_Mag %>%
select(Country, State, City, Distance, date)
## Country State City Distance date
## 396 Colombia Magdalena Ciénaga 51.84125 7/30/10
## 853 Colombia Magdalena Santa Marta 0.79694 12/16/11
## 1290 Colombia Magdalena Ariguanà 28.50569 10/7/14
head(df_Mag)
## id date time America Country country_code State population
## 396 2175 7/30/10 SA Colombia CO Magdalena 88311
## 853 4096 12/16/11 SA Colombia CO Magdalena 431781
## 1290 6219 10/7/14 SA Colombia CO Magdalena 26246
## City Distance location_description latitude longitude
## 396 Ciénaga 51.84125 11.0029 -73.7733
## 853 Santa Marta 0.79694 11.2475 -74.2017
## 1290 Ariguanà 28.50569 Unknown 10.4741 -73.8715
## geolocation hazard_type landslide_type landslide_size
## 396 (11.0029, -73.773300000000006) Landslide Mudslide Medium
## 853 (11.2475, -74.201700000000002) Landslide Landslide Large
## 1290 (10.4741, -73.871499999999997) Landslide Landslide Medium
## trigger storm_name injuries fatalities source_name
## 396 Downpour NA 0
## 853 Downpour NA 1
## 1290 Rain 0 6 EL HERALDO
## source_link
## 396
## 853 http://colombiareports.com/colombia-news/news/21092-santos-calls-on-colombians-to-heed-evacuation-warnings.html
## 1290 http://www.elheraldo.co/magdalena/seis-indigenas-murieron-por-alud-de-tierra-en-la-sierra-nevada-169129
ggplot(data=df_Mag, aes(x=City, y=Distance)) + geom_bar(stat="identity", color="blue", fill="white")
ggplot(data=df_Mag, aes(x = "Magdalena", y = Distance, fill=City)) +
geom_bar(stat = "identity", width = 1, color = "black") +
coord_polar("y", start = 0)
library(ggplot2)
library(dplyr)
df_Mag <- df_Mag %>%
arrange(desc(City)) %>%
mutate(prop = Distance / sum(df_Mag$Distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_Mag, aes(x=State, y = prop, fill=City)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(Distance/100)), color = "white", size=6) +
scale_fill_brewer(palette="Set8")
## Warning in pal_name(palette, type): Unknown palette Set8
library(qcc)
Distance <- df_Mag$Distance
names(Distance) <- df_Mag$City
pareto.chart(Distance,
ylab="Distance",
col = heat.colors(length(Distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "DONDE SE CONCENTRAN LAS CIUDADES CON MAYORES DESLIZAMIENTOS"
)
##
## Pareto chart analysis for Distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Ciénaga 51.841250 51.841250 63.888059 63.888059
## Ariguanà 28.505690 80.346940 35.129809 99.017868
## Santa Marta 0.796940 81.143880 0.982132 100.000000
stem(df_Mag$"Distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 1
## 2 | 9
## 4 | 2
head(df_Mag)
## id date time America Country country_code State population
## 1 4096 12/16/11 SA Colombia CO Magdalena 431781
## 2 2175 7/30/10 SA Colombia CO Magdalena 88311
## 3 6219 10/7/14 SA Colombia CO Magdalena 26246
## City Distance location_description latitude longitude
## 1 Santa Marta 0.79694 11.2475 -74.2017
## 2 Ciénaga 51.84125 11.0029 -73.7733
## 3 Ariguanà 28.50569 Unknown 10.4741 -73.8715
## geolocation hazard_type landslide_type landslide_size
## 1 (11.2475, -74.201700000000002) Landslide Landslide Large
## 2 (11.0029, -73.773300000000006) Landslide Mudslide Medium
## 3 (10.4741, -73.871499999999997) Landslide Landslide Medium
## trigger storm_name injuries fatalities source_name
## 1 Downpour NA 1
## 2 Downpour NA 0
## 3 Rain 0 6 EL HERALDO
## source_link
## 1 http://colombiareports.com/colombia-news/news/21092-santos-calls-on-colombians-to-heed-evacuation-warnings.html
## 2
## 3 http://www.elheraldo.co/magdalena/seis-indigenas-murieron-por-alud-de-tierra-en-la-sierra-nevada-169129
## prop ypos
## 1 0.982132 0.491066
## 2 63.888059 32.926162
## 3 35.129809 82.435096
knitr::kable(head(df_Mag))
id | date | time | America | Country | country_code | State | population | City | Distance | location_description | latitude | longitude | geolocation | hazard_type | landslide_type | landslide_size | trigger | storm_name | injuries | fatalities | source_name | source_link | prop | ypos |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
4096 | 12/16/11 | SA | Colombia | CO | Magdalena | 431781 | Santa Marta | 0.79694 | 11.2475 | -74.2017 | (11.2475, -74.201700000000002) | Landslide | Landslide | Large | Downpour | NA | 1 | http://colombiareports.com/colombia-news/news/21092-santos-calls-on-colombians-to-heed-evacuation-warnings.html | 0.982132 | 0.491066 | ||||
2175 | 7/30/10 | SA | Colombia | CO | Magdalena | 88311 | Ciénaga | 51.84125 | 11.0029 | -73.7733 | (11.0029, -73.773300000000006) | Landslide | Mudslide | Medium | Downpour | NA | 0 | 63.888059 | 32.926161 | |||||
6219 | 10/7/14 | SA | Colombia | CO | Magdalena | 26246 | Ariguanà | 28.50569 | Unknown | 10.4741 | -73.8715 | (10.4741, -73.871499999999997) | Landslide | Landslide | Medium | Rain | 0 | 6 | EL HERALDO | http://www.elheraldo.co/magdalena/seis-indigenas-murieron-por-alud-de-tierra-en-la-sierra-nevada-169129 | 35.129809 | 82.435096 |
stem(df_Mag$"Distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 1
## 2 | 9
## 4 | 2
stem(df_Mag$"Distance", scale = 2)
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 1
## 1 |
## 2 | 9
## 3 |
## 4 |
## 5 | 2
library(forecast)
data_serie<- ts(df_Mag$Distance, frequency=12, start=2007)
head(data_serie)
## Jan Feb Mar
## 2007 0.79694 51.84125 28.50569
autoplot(data_serie)+
labs(title = "Serie de Deslizamiento", x="Tiempo", y = "Distancia", colour = "#00a0dc") +theme_bw()
library(questionr)
table <- questionr::freq(Distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n | % | val% | %cum | val%cum | |
---|---|---|---|---|---|
0.79694 | 1 | 33.3 | 33.3 | 33.3 | 33.3 |
28.50569 | 1 | 33.3 | 33.3 | 66.7 | 66.7 |
51.84125 | 1 | 33.3 | 33.3 | 100.0 | 100.0 |
Total | 3 | 100.0 | 100.0 | 100.0 | 100.0 |
str(table)
## Classes 'freqtab' and 'data.frame': 4 obs. of 5 variables:
## $ n : num 1 1 1 3
## $ % : num 33.3 33.3 33.3 100
## $ val% : num 33.3 33.3 33.3 100
## $ %cum : num 33.3 66.7 100 100
## $ val%cum: num 33.3 66.7 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x | y |
---|---|
0.79694 | 1 |
28.50569 | 1 |
51.84125 | 1 |
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="white", fill="blue") +
xlab("Número de asistencias") +
ylab("Frecuencia")
n_sturges = 1 + log(length(Distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(Distance) - min(Distance)
w = ceiling(R/n_clases)
bins <- seq(min(Distance), max(Distance) + w, by = w)
bins
## [1] 0.79694 18.79694 36.79694 54.79694
Edades <- cut(Distance, bins)
Freq_table <- transform(table(Distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
Distance | Freq | Rel_Freq | Cum_Freq |
---|---|---|---|
0.79694 | 1 | 0.3333333 | 1 |
28.50569 | 1 | 0.3333333 | 2 |
51.84125 | 1 | 0.3333333 | 3 |
str(Freq_table)
## 'data.frame': 3 obs. of 4 variables:
## $ Distance: Factor w/ 3 levels "0.79694","28.50569",..: 1 2 3
## $ Freq : int 1 1 1
## $ Rel_Freq: num 0.333 0.333 0.333
## $ Cum_Freq: int 1 2 3
df <- data.frame(x = Freq_table$Distance, y = Freq_table$Freq)
knitr::kable(df)
x | y |
---|---|
0.79694 | 1 |
28.50569 | 1 |
51.84125 | 1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="blue", fill="green") +
xlab("Rango de Distance") +
ylab("Frecuencia")
summary(df_Mag$Distance)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.7969 14.6513 28.5057 27.0480 40.1735 51.8413
library(pastecs)
stat.desc(df_Mag)
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## id date time America Country country_code State
## nbr.val 3.000000e+00 NA NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA NA
## min 2.175000e+03 NA NA NA NA NA NA
## max 6.219000e+03 NA NA NA NA NA NA
## range 4.044000e+03 NA NA NA NA NA NA
## sum 1.249000e+04 NA NA NA NA NA NA
## median 4.096000e+03 NA NA NA NA NA NA
## mean 4.163333e+03 NA NA NA NA NA NA
## SE.mean 1.167888e+03 NA NA NA NA NA NA
## CI.mean.0.95 5.025015e+03 NA NA NA NA NA NA
## var 4.091884e+06 NA NA NA NA NA NA
## std.dev 2.022841e+03 NA NA NA NA NA NA
## coef.var 4.858705e-01 NA NA NA NA NA NA
## population City Distance location_description latitude
## nbr.val 3.000000e+00 NA 3.0000000 NA 3.00000000
## nbr.null 0.000000e+00 NA 0.0000000 NA 0.00000000
## nbr.na 0.000000e+00 NA 0.0000000 NA 0.00000000
## min 2.624600e+04 NA 0.7969400 NA 10.47410000
## max 4.317810e+05 NA 51.8412500 NA 11.24750000
## range 4.055350e+05 NA 51.0443100 NA 0.77340000
## sum 5.463380e+05 NA 81.1438800 NA 32.72450000
## median 8.831100e+04 NA 28.5056900 NA 11.00290000
## mean 1.821127e+05 NA 27.0479600 NA 10.90816667
## SE.mean 1.261133e+05 NA 14.7532384 NA 0.22823065
## CI.mean.0.95 5.426219e+05 NA 63.4780614 NA 0.98199724
## var 4.771372e+10 NA 652.9741284 NA 0.15626769
## std.dev 2.184347e+05 NA 25.5533585 NA 0.39530709
## coef.var 1.199448e+00 NA 0.9447425 NA 0.03623955
## longitude geolocation hazard_type landslide_type
## nbr.val 3.000000e+00 NA NA NA
## nbr.null 0.000000e+00 NA NA NA
## nbr.na 0.000000e+00 NA NA NA
## min -7.420170e+01 NA NA NA
## max -7.377330e+01 NA NA NA
## range 4.284000e-01 NA NA NA
## sum -2.218465e+02 NA NA NA
## median -7.387150e+01 NA NA NA
## mean -7.394883e+01 NA NA NA
## SE.mean 1.295723e-01 NA NA NA
## CI.mean.0.95 5.575048e-01 NA NA NA
## var 5.036697e-02 NA NA NA
## std.dev 2.244259e-01 NA NA NA
## coef.var -3.034881e-03 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 1 3.000000 NA
## nbr.null NA NA NA 1 1.000000 NA
## nbr.na NA NA NA 2 0.000000 NA
## min NA NA NA 0 0.000000 NA
## max NA NA NA 0 6.000000 NA
## range NA NA NA 0 6.000000 NA
## sum NA NA NA 0 7.000000 NA
## median NA NA NA 0 1.000000 NA
## mean NA NA NA 0 2.333333 NA
## SE.mean NA NA NA NA 1.855921 NA
## CI.mean.0.95 NA NA NA NaN 7.985386 NA
## var NA NA NA NA 10.333333 NA
## std.dev NA NA NA NA 3.214550 NA
## coef.var NA NA NA NA 1.377664 NA
## source_link prop ypos
## nbr.val NA 3.0000000 3.000000
## nbr.null NA 0.0000000 0.000000
## nbr.na NA 0.0000000 0.000000
## min NA 0.9821320 0.491066
## max NA 63.8880591 82.435096
## range NA 62.9059271 81.944030
## sum NA 100.0000000 115.852323
## median NA 35.1298089 32.926162
## mean NA 33.3333333 38.617441
## SE.mean NA 18.1815787 23.825749
## CI.mean.0.95 NA 78.2290191 102.513925
## var NA 991.7094095 1702.998991
## std.dev NA 31.4914180 41.267408
## coef.var NA 0.9447425 1.068621
boxplot(Distance, horizontal=TRUE, col='steelblue')
library(tidyverse)
library(hrbrthemes)
library(viridis)
df <- data.frame(Distance)
df %>% ggplot(aes(x = "", y = Distance)) +
geom_boxplot(color="red", fill="orange", alpha=0.5) +
theme_ipsum() +
theme(legend.position="none", plot.title = element_text(size=11)) +
ggtitle("Deslizamientos ") +
coord_flip() +
xlab("") +
ylab("")
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
df_Meta %>%
select(Country, State, City, Distance, date)
## Country State City Distance date
## 349 Colombia Meta Puerto López 22.53724 5/23/10
head(df_Meta)
## id date time America Country country_code State population
## 349 1892 5/23/10 SA Colombia CO Meta 16678
## City Distance location_description latitude longitude
## 349 Puerto López 22.53724 3.8844 -72.9175
## geolocation hazard_type landslide_type
## 349 (3.8843999999999999, -72.917500000000004) Landslide Landslide
## landslide_size trigger storm_name injuries fatalities source_name
## 349 Medium Downpour NA 0
## source_link
## 349 http://www.etaiwannews.com/etn/news_content.php?id=1265492&lang=eng_news&cate_img=49.jpg&cate_rss=news_Society_TAIWAN
ggplot(data=df_Meta, aes(x=City, y=Distance)) + geom_bar(stat="identity", color="blue", fill="white")
ggplot(data=df_Meta, aes(x = "Meta", y = Distance, fill=City)) +
geom_bar(stat = "identity", width = 1, color = "black") +
coord_polar("y", start = 0)
library(ggplot2)
library(dplyr)
df_Meta <- df_Meta %>%
arrange(desc(City)) %>%
mutate(prop = Distance / sum(df_Meta$Distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_Meta, aes(x=State, y = prop, fill=City)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(Distance/100)), color = "white", size=6) +
scale_fill_brewer(palette="Set8")
## Warning in pal_name(palette, type): Unknown palette Set8
library(qcc)
Distance <- df_Meta$Distance
names(Distance) <- df_Meta$City
pareto.chart(Distance,
ylab="Distance",
col = heat.colors(length(Distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "DONDE SE CONCENTRAN LAS CIUDADES CON MAYORES DESLIZAMIENTOS"
)
##
## Pareto chart analysis for Distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Puerto López 22.53724 22.53724 100.00000 100.00000
stem(df_Meta$"Distance")
head(df_Meta)
## id date time America Country country_code State population
## 1 1892 5/23/10 SA Colombia CO Meta 16678
## City Distance location_description latitude longitude
## 1 Puerto López 22.53724 3.8844 -72.9175
## geolocation hazard_type landslide_type
## 1 (3.8843999999999999, -72.917500000000004) Landslide Landslide
## landslide_size trigger storm_name injuries fatalities source_name
## 1 Medium Downpour NA 0
## source_link
## 1 http://www.etaiwannews.com/etn/news_content.php?id=1265492&lang=eng_news&cate_img=49.jpg&cate_rss=news_Society_TAIWAN
## prop ypos
## 1 100 50
knitr::kable(head(df_Meta))
id | date | time | America | Country | country_code | State | population | City | Distance | location_description | latitude | longitude | geolocation | hazard_type | landslide_type | landslide_size | trigger | storm_name | injuries | fatalities | source_name | source_link | prop | ypos |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1892 | 5/23/10 | SA | Colombia | CO | Meta | 16678 | Puerto López | 22.53724 | 3.8844 | -72.9175 | (3.8843999999999999, -72.917500000000004) | Landslide | Landslide | Medium | Downpour | NA | 0 | http://www.etaiwannews.com/etn/news_content.php?id=1265492&lang=eng_news&cate_img=49.jpg&cate_rss=news_Society_TAIWAN | 100 | 50 |
stem(df_Meta$"Distance")
stem(df_Meta$"Distance", scale = 2)
library(forecast)
data_serie<- ts(df_Meta$Distance, frequency=12, start=2007)
head(data_serie)
## Jan
## 2007 22.53724
autoplot(data_serie)+
labs(title = "Serie de Deslizamiento", x="Tiempo", y = "Distancia", colour = "#00a0dc") +theme_bw()
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
library(questionr)
table <- questionr::freq(Distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n | % | val% | %cum | val%cum | |
---|---|---|---|---|---|
22.53724 | 1 | 100 | 100 | 100 | 100 |
Total | 1 | 100 | 100 | 100 | 100 |
str(table)
## Classes 'freqtab' and 'data.frame': 2 obs. of 5 variables:
## $ n : num 1 1
## $ % : num 100 100
## $ val% : num 100 100
## $ %cum : num 100 100
## $ val%cum: num 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x | y |
---|---|
22.53724 | 1 |
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="white", fill="blue") +
xlab("Número de asistencias") +
ylab("Frecuencia")
n_sturges = 1 + log(length(Distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(Distance) - min(Distance)
w = ceiling(R/n_clases)
bins <- seq(min(Distance), max(Distance) + w, by = w)
bins
## [1] 22.53724
Edades <- cut(Distance, bins)
Freq_table <- transform(table(Distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
Distance | Freq | Rel_Freq | Cum_Freq |
---|---|---|---|
22.53724 | 1 | 1 | 1 |
str(Freq_table)
## 'data.frame': 1 obs. of 4 variables:
## $ Distance: Factor w/ 1 level "22.53724": 1
## $ Freq : int 1
## $ Rel_Freq: num 1
## $ Cum_Freq: int 1
df <- data.frame(x = Freq_table$Distance, y = Freq_table$Freq)
knitr::kable(df)
x | y |
---|---|
22.53724 | 1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="blue", fill="green") +
xlab("Rango de Distance") +
ylab("Frecuencia")
summary(df_Meta$Distance)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 22.54 22.54 22.54 22.54 22.54 22.54
library(pastecs)
stat.desc(df_Meta)
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## Warning in min(x): ningún argumento finito para min; retornando Inf
## Warning in max(x): ningun argumento finito para max; retornando -Inf
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## id date time America Country country_code State population City
## nbr.val 1 NA NA NA NA NA NA 1 NA
## nbr.null 0 NA NA NA NA NA NA 0 NA
## nbr.na 0 NA NA NA NA NA NA 0 NA
## min 1892 NA NA NA NA NA NA 16678 NA
## max 1892 NA NA NA NA NA NA 16678 NA
## range 0 NA NA NA NA NA NA 0 NA
## sum 1892 NA NA NA NA NA NA 16678 NA
## median 1892 NA NA NA NA NA NA 16678 NA
## mean 1892 NA NA NA NA NA NA 16678 NA
## SE.mean NA NA NA NA NA NA NA NA NA
## CI.mean.0.95 NaN NA NA NA NA NA NA NaN NA
## var NA NA NA NA NA NA NA NA NA
## std.dev NA NA NA NA NA NA NA NA NA
## coef.var NA NA NA NA NA NA NA NA NA
## Distance location_description latitude longitude geolocation
## nbr.val 1.00000 NA 1.0000 1.0000 NA
## nbr.null 0.00000 NA 0.0000 0.0000 NA
## nbr.na 0.00000 NA 0.0000 0.0000 NA
## min 22.53724 NA 3.8844 -72.9175 NA
## max 22.53724 NA 3.8844 -72.9175 NA
## range 0.00000 NA 0.0000 0.0000 NA
## sum 22.53724 NA 3.8844 -72.9175 NA
## median 22.53724 NA 3.8844 -72.9175 NA
## mean 22.53724 NA 3.8844 -72.9175 NA
## SE.mean NA NA NA NA NA
## CI.mean.0.95 NaN NA NaN NaN NA
## var NA NA NA NA NA
## std.dev NA NA NA NA NA
## coef.var NA NA NA NA NA
## hazard_type landslide_type landslide_size trigger storm_name
## nbr.val NA NA NA NA NA
## nbr.null NA NA NA NA NA
## nbr.na NA NA NA NA NA
## min NA NA NA NA NA
## max NA NA NA NA NA
## range NA NA NA NA NA
## sum NA NA NA NA NA
## median NA NA NA NA NA
## mean NA NA NA NA NA
## SE.mean NA NA NA NA NA
## CI.mean.0.95 NA NA NA NA NA
## var NA NA NA NA NA
## std.dev NA NA NA NA NA
## coef.var NA NA NA NA NA
## injuries fatalities source_name source_link prop ypos
## nbr.val 0 1 NA NA 1 1
## nbr.null 0 1 NA NA 0 0
## nbr.na 1 0 NA NA 0 0
## min Inf 0 NA NA 100 50
## max -Inf 0 NA NA 100 50
## range -Inf 0 NA NA 0 0
## sum 0 0 NA NA 100 50
## median NA 0 NA NA 100 50
## mean NaN 0 NA NA 100 50
## SE.mean NA NA NA NA NA NA
## CI.mean.0.95 NaN NaN NA NA NaN NaN
## var NA NA NA NA NA NA
## std.dev NA NA NA NA NA NA
## coef.var NA NA NA NA NA NA
boxplot(Distance, horizontal=TRUE, col='steelblue')
library(tidyverse)
library(hrbrthemes)
library(viridis)
df <- data.frame(Distance)
df %>% ggplot(aes(x = "", y = Distance)) +
geom_boxplot(color="red", fill="orange", alpha=0.5) +
theme_ipsum() +
theme(legend.position="none", plot.title = element_text(size=11)) +
ggtitle("Deslizamientos ") +
coord_flip() +
xlab("") +
ylab("")
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
df_Put %>%
select(Country, State, City, Distance, date)
## Country State City Distance date
## 1456 Colombia Putumayo San Francisco 1.4794 3/21/15
head(df_Put)
## id date time America Country country_code State population
## 1456 6914 3/21/15 SA Colombia CO Putumayo 4350
## City Distance location_description latitude longitude
## 1456 San Francisco 1.4794 Natural slope 1.1656 -76.8755
## geolocation hazard_type landslide_type landslide_size
## 1456 (1.1656, -76.875500000000002) Landslide Landslide Medium
## trigger storm_name injuries fatalities source_name
## 1456 Unknown 0 0 AOL Travel
## source_link
## 1456 http://travel.aol.co.uk/2015/03/24/face-of-jesus-appears-cliff-south-america-landslide/
ggplot(data=df_Put, aes(x=City, y=Distance)) + geom_bar(stat="identity", color="blue", fill="white")
ggplot(data=df_Put, aes(x = "Putumayo", y = Distance, fill=City)) +
geom_bar(stat = "identity", width = 1, color = "black") +
coord_polar("y", start = 0)
library(ggplot2)
library(dplyr)
df_Put <- df_Put %>%
arrange(desc(City)) %>%
mutate(prop = Distance / sum(df_Put$Distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_Put, aes(x=State, y = prop, fill=City)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(Distance/100)), color = "white", size=6) +
scale_fill_brewer(palette="Set8")
## Warning in pal_name(palette, type): Unknown palette Set8
library(qcc)
Distance <- df_Put$Distance
names(Distance) <- df_Put$City
pareto.chart(Distance,
ylab="Distance",
col = heat.colors(length(Distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "DONDE SE CONCENTRAN LAS CIUDADES CON MAYORES DESLIZAMIENTOS"
)
##
## Pareto chart analysis for Distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## San Francisco 1.4794 1.4794 100.0000 100.0000
stem(df_Put$"Distance")
head(df_Put)
## id date time America Country country_code State population
## 1 6914 3/21/15 SA Colombia CO Putumayo 4350
## City Distance location_description latitude longitude
## 1 San Francisco 1.4794 Natural slope 1.1656 -76.8755
## geolocation hazard_type landslide_type landslide_size
## 1 (1.1656, -76.875500000000002) Landslide Landslide Medium
## trigger storm_name injuries fatalities source_name
## 1 Unknown 0 0 AOL Travel
## source_link
## 1 http://travel.aol.co.uk/2015/03/24/face-of-jesus-appears-cliff-south-america-landslide/
## prop ypos
## 1 100 50
knitr::kable(head(df_Put))
id | date | time | America | Country | country_code | State | population | City | Distance | location_description | latitude | longitude | geolocation | hazard_type | landslide_type | landslide_size | trigger | storm_name | injuries | fatalities | source_name | source_link | prop | ypos |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
6914 | 3/21/15 | SA | Colombia | CO | Putumayo | 4350 | San Francisco | 1.4794 | Natural slope | 1.1656 | -76.8755 | (1.1656, -76.875500000000002) | Landslide | Landslide | Medium | Unknown | 0 | 0 | AOL Travel | http://travel.aol.co.uk/2015/03/24/face-of-jesus-appears-cliff-south-america-landslide/ | 100 | 50 |
stem(df_Put$"Distance")
stem(df_Put$"Distance", scale = 2)
library(forecast)
data_serie<- ts(df_Put$Distance, frequency=12, start=2007)
head(data_serie)
## Jan
## 2007 1.4794
autoplot(data_serie)+
labs(title = "Serie de Deslizamiento", x="Tiempo", y = "Distancia", colour = "#00a0dc") +theme_bw()
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
library(questionr)
table <- questionr::freq(Distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n | % | val% | %cum | val%cum | |
---|---|---|---|---|---|
1.4794 | 1 | 100 | 100 | 100 | 100 |
Total | 1 | 100 | 100 | 100 | 100 |
str(table)
## Classes 'freqtab' and 'data.frame': 2 obs. of 5 variables:
## $ n : num 1 1
## $ % : num 100 100
## $ val% : num 100 100
## $ %cum : num 100 100
## $ val%cum: num 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x | y |
---|---|
1.4794 | 1 |
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="white", fill="blue") +
xlab("Número de asistencias") +
ylab("Frecuencia")
summary(df_Put$Distance)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.479 1.479 1.479 1.479 1.479 1.479
library(pastecs)
stat.desc(df_Put)
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## id date time America Country country_code State population City
## nbr.val 1 NA NA NA NA NA NA 1 NA
## nbr.null 0 NA NA NA NA NA NA 0 NA
## nbr.na 0 NA NA NA NA NA NA 0 NA
## min 6914 NA NA NA NA NA NA 4350 NA
## max 6914 NA NA NA NA NA NA 4350 NA
## range 0 NA NA NA NA NA NA 0 NA
## sum 6914 NA NA NA NA NA NA 4350 NA
## median 6914 NA NA NA NA NA NA 4350 NA
## mean 6914 NA NA NA NA NA NA 4350 NA
## SE.mean NA NA NA NA NA NA NA NA NA
## CI.mean.0.95 NaN NA NA NA NA NA NA NaN NA
## var NA NA NA NA NA NA NA NA NA
## std.dev NA NA NA NA NA NA NA NA NA
## coef.var NA NA NA NA NA NA NA NA NA
## Distance location_description latitude longitude geolocation
## nbr.val 1.0000 NA 1.0000 1.0000 NA
## nbr.null 0.0000 NA 0.0000 0.0000 NA
## nbr.na 0.0000 NA 0.0000 0.0000 NA
## min 1.4794 NA 1.1656 -76.8755 NA
## max 1.4794 NA 1.1656 -76.8755 NA
## range 0.0000 NA 0.0000 0.0000 NA
## sum 1.4794 NA 1.1656 -76.8755 NA
## median 1.4794 NA 1.1656 -76.8755 NA
## mean 1.4794 NA 1.1656 -76.8755 NA
## SE.mean NA NA NA NA NA
## CI.mean.0.95 NaN NA NaN NaN NA
## var NA NA NA NA NA
## std.dev NA NA NA NA NA
## coef.var NA NA NA NA NA
## hazard_type landslide_type landslide_size trigger storm_name
## nbr.val NA NA NA NA NA
## nbr.null NA NA NA NA NA
## nbr.na NA NA NA NA NA
## min NA NA NA NA NA
## max NA NA NA NA NA
## range NA NA NA NA NA
## sum NA NA NA NA NA
## median NA NA NA NA NA
## mean NA NA NA NA NA
## SE.mean NA NA NA NA NA
## CI.mean.0.95 NA NA NA NA NA
## var NA NA NA NA NA
## std.dev NA NA NA NA NA
## coef.var NA NA NA NA NA
## injuries fatalities source_name source_link prop ypos
## nbr.val 1 1 NA NA 1 1
## nbr.null 1 1 NA NA 0 0
## nbr.na 0 0 NA NA 0 0
## min 0 0 NA NA 100 50
## max 0 0 NA NA 100 50
## range 0 0 NA NA 0 0
## sum 0 0 NA NA 100 50
## median 0 0 NA NA 100 50
## mean 0 0 NA NA 100 50
## SE.mean NA NA NA NA NA NA
## CI.mean.0.95 NaN NaN NA NA NaN NaN
## var NA NA NA NA NA NA
## std.dev NA NA NA NA NA NA
## coef.var NA NA NA NA NA NA
boxplot(Distance, horizontal=TRUE, col='steelblue')
library(tidyverse)
library(hrbrthemes)
library(viridis)
df <- data.frame(Distance)
df %>% ggplot(aes(x = "", y = Distance)) +
geom_boxplot(color="red", fill="orange", alpha=0.5) +
theme_ipsum() +
theme(legend.position="none", plot.title = element_text(size=11)) +
ggtitle("Deslizamientos ") +
coord_flip() +
xlab("") +
ylab("")
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
df_San %>%
select(Country, State, City, Distance, date)
## Country State City Distance date
## 425 Colombia Santander Málaga 2.99929 8/25/10
## 434 Colombia Santander Málaga 2.99929 8/28/10
## 527 Colombia Santander San Andrés 3.06383 11/20/10
## 594 Colombia Santander Matanza 6.16385 3/3/11
## 643 Colombia Santander Florián 0.44753 4/15/11
## 715 Colombia Santander Lebrija 9.21217 5/16/11
## 716 Colombia Santander Piedecuesta 10.55986 5/17/11
## 718 Colombia Santander San Vicente de Chucurà 0.80432 5/18/11
## 848 Colombia Santander Los Santos 12.61362 12/7/11
## 1222 Colombia Santander Bucaramanga 1.33829 3/16/14
## 1305 Colombia Santander San Vicente de Chucurà 1.08964 10/21/14
head(df_San)
## id date time America Country country_code State population
## 425 2331 8/25/10 SA Colombia CO Santander 18739
## 434 2356 8/28/10 SA Colombia CO Santander 18739
## 527 2740 11/20/10 SA Colombia CO Santander 3032
## 594 3170 3/3/11 Afternoon SA Colombia CO Santander 1669
## 643 3365 4/15/11 SA Colombia CO Santander 1227
## 715 3500 5/16/11 5:45:00 SA Colombia CO Santander 8949
## City Distance location_description latitude longitude
## 425 Málaga 2.99929 6.7254 -72.7260
## 434 Málaga 2.99929 6.7254 -72.7260
## 527 San Andrés 3.06383 6.8202 -72.8756
## 594 Matanza 6.16385 7.3500 -72.9667
## 643 Florián 0.44753 5.8044 -73.9743
## 715 Lebrija 9.21217 7.0960 -73.2994
## geolocation hazard_type landslide_type
## 425 (6.7253999999999996, -72.725999999999999) Landslide Landslide
## 434 (6.7253999999999996, -72.725999999999999) Landslide Landslide
## 527 (6.8201999999999998, -72.875600000000006) Landslide Landslide
## 594 (7.35, -72.966700000000003) Landslide Mudslide
## 643 (5.8044000000000002, -73.974299999999999) Landslide Mudslide
## 715 (7.0960000000000001, -73.299400000000006) Landslide Landslide
## landslide_size trigger storm_name injuries fatalities source_name
## 425 Medium Downpour NA 0
## 434 Medium Downpour NA 0
## 527 Medium Downpour NA 0
## 594 Medium Downpour NA 3
## 643 Large Downpour NA 11
## 715 Medium Downpour NA 2
## source_link
## 425
## 434
## 527 http://colombiareports.com/colombia-news/news/13038-weekend-floods-kills.html
## 594 http://colombiareports.com/colombia-news/news/14728-deadly-mudslide-triggers-mass-evacuation-order.html
## 643 http://www.laht.com/article.asp?ArticleId=391822&CategoryId=12393
## 715 http://colombiareports.com/colombia-news/news/16289-2-die-and-1-missing-in-santander-landslide.html
ggplot(data=df_San, aes(x=City, y=Distance)) + geom_bar(stat="identity", color="blue", fill="white")
ggplot(data=df_San, aes(x = "Santander", y = Distance, fill=City)) +
geom_bar(stat = "identity", width = 1, color = "black") +
coord_polar("y", start = 0)
library(ggplot2)
library(dplyr)
df_San <- df_San %>%
arrange(desc(City)) %>%
mutate(prop = Distance / sum(df_San$Distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_San, aes(x=State, y = prop, fill=City)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(Distance/100)), color = "white", size=6) +
scale_fill_brewer(palette="Set8")
## Warning in pal_name(palette, type): Unknown palette Set8
library(qcc)
Distance <- df_San$Distance
names(Distance) <- df_San$City
pareto.chart(Distance,
ylab="Distance",
col = heat.colors(length(Distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "DONDE SE CONCENTRAN LAS CIUDADES CON MAYORES DESLIZAMIENTOS"
)
##
## Pareto chart analysis for Distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Los Santos 12.6136200 12.6136200 24.5919368 24.5919368
## Piedecuesta 10.5598600 23.1734800 20.5878574 45.1797942
## Lebrija 9.2121700 32.3856500 17.9603558 63.1401500
## Matanza 6.1638500 38.5495000 12.0172488 75.1573988
## San Andrés 3.0638300 41.6133300 5.9733458 81.1307446
## Málaga 2.9992900 44.6126200 5.8475164 86.9782610
## Málaga 2.9992900 47.6119100 5.8475164 92.8257774
## Bucaramanga 1.3382900 48.9502000 2.6091751 95.4349525
## San Vicente de Chucurà 1.0896400 50.0398400 2.1243987 97.5593512
## San Vicente de Chucurà 0.8043200 50.8441600 1.5681293 99.1274805
## Florián 0.4475300 51.2916900 0.8725195 100.0000000
stem(df_San$"Distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 0111333
## 0 | 69
## 1 | 13
head(df_San)
## id date time America Country country_code State population
## 1 3503 5/18/11 3:30:00 SA Colombia CO Santander 11265
## 2 6309 10/21/14 3:00 SA Colombia CO Santander 11265
## 3 2740 11/20/10 SA Colombia CO Santander 3032
## 4 3501 5/17/11 Evening SA Colombia CO Santander 86387
## 5 3170 3/3/11 Afternoon SA Colombia CO Santander 1669
## 6 2331 8/25/10 SA Colombia CO Santander 18739
## City Distance location_description latitude longitude
## 1 San Vicente de Chucurà 0.80432 6.8835 -73.4166
## 2 San Vicente de Chucurà 1.08964 Above road 6.8907 -73.4081
## 3 San Andrés 3.06383 6.8202 -72.8756
## 4 Piedecuesta 10.55986 6.9050 -73.0021
## 5 Matanza 6.16385 7.3500 -72.9667
## 6 Málaga 2.99929 6.7254 -72.7260
## geolocation hazard_type landslide_type
## 1 (6.8834999999999997, -73.416600000000003) Landslide Complex
## 2 (6.8906999999999998, -73.408100000000005) Landslide Rockfall
## 3 (6.8201999999999998, -72.875600000000006) Landslide Landslide
## 4 (6.9050000000000002, -73.002099999999999) Landslide Landslide
## 5 (7.35, -72.966700000000003) Landslide Mudslide
## 6 (6.7253999999999996, -72.725999999999999) Landslide Landslide
## landslide_size trigger storm_name injuries fatalities source_name
## 1 Medium Downpour NA 1
## 2 Small Rain 0 0 Vanguardia
## 3 Medium Downpour NA 0
## 4 Medium Downpour NA 0
## 5 Medium Downpour NA 3
## 6 Medium Downpour NA 0
## source_link
## 1 http://colombiareports.com/colombia-news/news/16350-north-colombia-landslide-kills-one-injures-10.html
## 2 http://www.vanguardia.com/economia/local/283826-derrumbe-cerro-por-10-horas-via-a-san-vicente-de-chucuri
## 3 http://colombiareports.com/colombia-news/news/13038-weekend-floods-kills.html
## 4 http://colombiareports.com/colombia-news/news/16343-bogota-cut-off-from-northeast-colombia.html
## 5 http://colombiareports.com/colombia-news/news/14728-deadly-mudslide-triggers-mass-evacuation-order.html
## 6
## prop ypos
## 1 1.568129 0.7840646
## 2 2.124399 2.6303286
## 3 5.973346 6.6792009
## 4 20.587857 19.9598025
## 5 12.017249 36.2623556
## 6 5.847516 45.1947382
knitr::kable(head(df_San))
id | date | time | America | Country | country_code | State | population | City | Distance | location_description | latitude | longitude | geolocation | hazard_type | landslide_type | landslide_size | trigger | storm_name | injuries | fatalities | source_name | source_link | prop | ypos |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
3503 | 5/18/11 | 3:30:00 | SA | Colombia | CO | Santander | 11265 | San Vicente de Chucurà | 0.80432 | 6.8835 | -73.4166 | (6.8834999999999997, -73.416600000000003) | Landslide | Complex | Medium | Downpour | NA | 1 | http://colombiareports.com/colombia-news/news/16350-north-colombia-landslide-kills-one-injures-10.html | 1.568129 | 0.7840646 | |||
6309 | 10/21/14 | 3:00 | SA | Colombia | CO | Santander | 11265 | San Vicente de Chucurà | 1.08964 | Above road | 6.8907 | -73.4081 | (6.8906999999999998, -73.408100000000005) | Landslide | Rockfall | Small | Rain | 0 | 0 | Vanguardia | http://www.vanguardia.com/economia/local/283826-derrumbe-cerro-por-10-horas-via-a-san-vicente-de-chucuri | 2.124399 | 2.6303286 | |
2740 | 11/20/10 | SA | Colombia | CO | Santander | 3032 | San Andrés | 3.06383 | 6.8202 | -72.8756 | (6.8201999999999998, -72.875600000000006) | Landslide | Landslide | Medium | Downpour | NA | 0 | http://colombiareports.com/colombia-news/news/13038-weekend-floods-kills.html | 5.973346 | 6.6792009 | ||||
3501 | 5/17/11 | Evening | SA | Colombia | CO | Santander | 86387 | Piedecuesta | 10.55986 | 6.9050 | -73.0021 | (6.9050000000000002, -73.002099999999999) | Landslide | Landslide | Medium | Downpour | NA | 0 | http://colombiareports.com/colombia-news/news/16343-bogota-cut-off-from-northeast-colombia.html | 20.587857 | 19.9598025 | |||
3170 | 3/3/11 | Afternoon | SA | Colombia | CO | Santander | 1669 | Matanza | 6.16385 | 7.3500 | -72.9667 | (7.35, -72.966700000000003) | Landslide | Mudslide | Medium | Downpour | NA | 3 | http://colombiareports.com/colombia-news/news/14728-deadly-mudslide-triggers-mass-evacuation-order.html | 12.017249 | 36.2623556 | |||
2331 | 8/25/10 | SA | Colombia | CO | Santander | 18739 | Málaga | 2.99929 | 6.7254 | -72.7260 | (6.7253999999999996, -72.725999999999999) | Landslide | Landslide | Medium | Downpour | NA | 0 | 5.847516 | 45.1947382 |
stem(df_San$"Distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 0111333
## 0 | 69
## 1 | 13
stem(df_San$"Distance", scale = 2)
##
## The decimal point is at the |
##
## 0 | 4813
## 2 | 001
## 4 |
## 6 | 2
## 8 | 2
## 10 | 6
## 12 | 6
library(forecast)
data_serie<- ts(df_San$Distance, frequency=12, start=2007)
head(data_serie)
## Jan Feb Mar Apr May Jun
## 2007 0.80432 1.08964 3.06383 10.55986 6.16385 2.99929
autoplot(data_serie)+
labs(title = "Serie de Deslizamiento", x="Tiempo", y = "Distancia", colour = "#00a0dc") +theme_bw()
library(questionr)
table <- questionr::freq(Distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n | % | val% | %cum | val%cum | |
---|---|---|---|---|---|
2.99929 | 2 | 18.2 | 18.2 | 18.2 | 18.2 |
0.44753 | 1 | 9.1 | 9.1 | 27.3 | 27.3 |
0.80432 | 1 | 9.1 | 9.1 | 36.4 | 36.4 |
1.08964 | 1 | 9.1 | 9.1 | 45.5 | 45.5 |
1.33829 | 1 | 9.1 | 9.1 | 54.5 | 54.5 |
3.06383 | 1 | 9.1 | 9.1 | 63.6 | 63.6 |
6.16385 | 1 | 9.1 | 9.1 | 72.7 | 72.7 |
9.21217 | 1 | 9.1 | 9.1 | 81.8 | 81.8 |
10.55986 | 1 | 9.1 | 9.1 | 90.9 | 90.9 |
12.61362 | 1 | 9.1 | 9.1 | 100.0 | 100.0 |
Total | 11 | 100.0 | 100.0 | 100.0 | 100.0 |
str(table)
## Classes 'freqtab' and 'data.frame': 11 obs. of 5 variables:
## $ n : num 2 1 1 1 1 1 1 1 1 1 ...
## $ % : num 18.2 9.1 9.1 9.1 9.1 9.1 9.1 9.1 9.1 9.1 ...
## $ val% : num 18.2 9.1 9.1 9.1 9.1 9.1 9.1 9.1 9.1 9.1 ...
## $ %cum : num 18.2 27.3 36.4 45.5 54.5 63.6 72.7 81.8 90.9 100 ...
## $ val%cum: num 18.2 27.3 36.4 45.5 54.5 63.6 72.7 81.8 90.9 100 ...
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x | y |
---|---|
2.99929 | 2 |
0.44753 | 1 |
0.80432 | 1 |
1.08964 | 1 |
1.33829 | 1 |
3.06383 | 1 |
6.16385 | 1 |
9.21217 | 1 |
10.55986 | 1 |
12.61362 | 1 |
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="white", fill="blue") +
xlab("Número de asistencias") +
ylab("Frecuencia")
n_sturges = 1 + log(length(Distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(Distance) - min(Distance)
w = ceiling(R/n_clases)
bins <- seq(min(Distance), max(Distance) + w, by = w)
bins
## [1] 0.44753 3.44753 6.44753 9.44753 12.44753 15.44753
Edades <- cut(Distance, bins)
Freq_table <- transform(table(Distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
Distance | Freq | Rel_Freq | Cum_Freq |
---|---|---|---|
0.44753 | 1 | 0.0909091 | 1 |
0.80432 | 1 | 0.0909091 | 2 |
1.08964 | 1 | 0.0909091 | 3 |
1.33829 | 1 | 0.0909091 | 4 |
2.99929 | 2 | 0.1818182 | 6 |
3.06383 | 1 | 0.0909091 | 7 |
6.16385 | 1 | 0.0909091 | 8 |
9.21217 | 1 | 0.0909091 | 9 |
10.55986 | 1 | 0.0909091 | 10 |
12.61362 | 1 | 0.0909091 | 11 |
str(Freq_table)
## 'data.frame': 10 obs. of 4 variables:
## $ Distance: Factor w/ 10 levels "0.44753","0.80432",..: 1 2 3 4 5 6 7 8 9 10
## $ Freq : int 1 1 1 1 2 1 1 1 1 1
## $ Rel_Freq: num 0.0909 0.0909 0.0909 0.0909 0.1818 ...
## $ Cum_Freq: int 1 2 3 4 6 7 8 9 10 11
df <- data.frame(x = Freq_table$Distance, y = Freq_table$Freq)
knitr::kable(df)
x | y |
---|---|
0.44753 | 1 |
0.80432 | 1 |
1.08964 | 1 |
1.33829 | 1 |
2.99929 | 2 |
3.06383 | 1 |
6.16385 | 1 |
9.21217 | 1 |
10.55986 | 1 |
12.61362 | 1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="blue", fill="green") +
xlab("Rango de Distance") +
ylab("Frecuencia")
summary(df_San$Distance)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.4475 1.2140 2.9993 4.6629 7.6880 12.6136
library(pastecs)
stat.desc(df_San)
## id date time America Country country_code State
## nbr.val 1.100000e+01 NA NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA NA
## min 2.331000e+03 NA NA NA NA NA NA
## max 6.309000e+03 NA NA NA NA NA NA
## range 3.978000e+03 NA NA NA NA NA NA
## sum 4.082300e+04 NA NA NA NA NA NA
## median 3.500000e+03 NA NA NA NA NA NA
## mean 3.711182e+03 NA NA NA NA NA NA
## SE.mean 3.951672e+02 NA NA NA NA NA NA
## CI.mean.0.95 8.804874e+02 NA NA NA NA NA NA
## var 1.717728e+06 NA NA NA NA NA NA
## std.dev 1.310621e+03 NA NA NA NA NA NA
## coef.var 3.531547e-01 NA NA NA NA NA NA
## population City Distance location_description latitude
## nbr.val 1.100000e+01 NA 11.0000000 NA 11.00000000
## nbr.null 0.000000e+00 NA 0.0000000 NA 0.00000000
## nbr.na 0.000000e+00 NA 0.0000000 NA 0.00000000
## min 1.227000e+03 NA 0.4475300 NA 5.80440000
## max 5.718200e+05 NA 12.6136200 NA 7.35000000
## range 5.705930e+05 NA 12.1660900 NA 1.54560000
## sum 7.344020e+05 NA 51.2916900 NA 75.51610000
## median 1.126500e+04 NA 2.9992900 NA 6.89070000
## mean 6.676382e+04 NA 4.6628809 NA 6.86510000
## SE.mean 5.103125e+04 NA 1.2986294 NA 0.12181958
## CI.mean.0.95 1.137047e+05 NA 2.8935266 NA 0.27143094
## var 2.864607e+10 NA 18.5508218 NA 0.16324011
## std.dev 1.692515e+05 NA 4.3070665 NA 0.40402984
## coef.var 2.535078e+00 NA 0.9236922 NA 0.05885272
## longitude geolocation hazard_type landslide_type
## nbr.val 1.100000e+01 NA NA NA
## nbr.null 0.000000e+00 NA NA NA
## nbr.na 0.000000e+00 NA NA NA
## min -7.397430e+01 NA NA NA
## max -7.272600e+01 NA NA NA
## range 1.248300e+00 NA NA NA
## sum -8.044988e+02 NA NA NA
## median -7.300210e+01 NA NA NA
## mean -7.313625e+01 NA NA NA
## SE.mean 1.110412e-01 NA NA NA
## CI.mean.0.95 2.474153e-01 NA NA NA
## var 1.356317e-01 NA NA NA
## std.dev 3.682821e-01 NA NA NA
## coef.var -5.035561e-03 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 2 11.0000000 NA
## nbr.null NA NA NA 2 5.0000000 NA
## nbr.na NA NA NA 9 0.0000000 NA
## min NA NA NA 0 0.0000000 NA
## max NA NA NA 0 11.0000000 NA
## range NA NA NA 0 11.0000000 NA
## sum NA NA NA 0 21.0000000 NA
## median NA NA NA 0 1.0000000 NA
## mean NA NA NA 0 1.9090909 NA
## SE.mean NA NA NA 0 0.9672317 NA
## CI.mean.0.95 NA NA NA 0 2.1551266 NA
## var NA NA NA 0 10.2909091 NA
## std.dev NA NA NA 0 3.2079447 NA
## coef.var NA NA NA NaN 1.6803520 NA
## source_link prop ypos
## nbr.val NA 11.0000000 11.0000000
## nbr.null NA 0.0000000 0.0000000
## nbr.na NA 0.0000000 0.0000000
## min NA 0.8725195 0.7840646
## max NA 24.5919368 98.6954125
## range NA 23.7194173 97.9113478
## sum NA 100.0000000 512.0028313
## median NA 5.8475164 45.1947382
## mean NA 9.0909091 46.5457119
## SE.mean NA 2.5318515 11.1910610
## CI.mean.0.95 NA 5.6413166 24.9352379
## var NA 70.5129909 1377.6383146
## std.dev NA 8.3972014 37.1165504
## coef.var NA 0.9236922 0.7974215
boxplot(Distance, horizontal=TRUE, col='steelblue')
library(tidyverse)
library(hrbrthemes)
library(viridis)
df <- data.frame(Distance)
df %>% ggplot(aes(x = "", y = Distance)) +
geom_boxplot(color="red", fill="orange", alpha=0.5) +
theme_ipsum() +
theme(legend.position="none", plot.title = element_text(size=11)) +
ggtitle("Deslizamientos ") +
coord_flip() +
xlab("") +
ylab("")
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
df_Nor %>%
select(Country, State, City, Distance, date)
## Country State City Distance date
## 110 Colombia Norte de Santander Hacarà 0.38844 6/24/08
## 403 Colombia Norte de Santander San Calixto 7.85369 8/3/10
## 526 Colombia Norte de Santander Arboledas 14.62503 11/20/10
## 621 Colombia Norte de Santander Cúcuta 3.60520 3/26/11
## 921 Colombia Norte de Santander San Calixto 11.19714 11/28/12
head(df_Nor)
## id date time America Country country_code State
## 110 605 6/24/08 SA Colombia CO Norte de Santander
## 403 2194 8/3/10 SA Colombia CO Norte de Santander
## 526 2739 11/20/10 SA Colombia CO Norte de Santander
## 621 3300 3/26/11 SA Colombia CO Norte de Santander
## 921 4637 11/28/12 SA Colombia CO Norte de Santander
## population City Distance location_description latitude longitude
## 110 1502 Hacarà 0.38844 8.3200 -73.1500
## 403 2080 San Calixto 7.85369 8.4418 -73.2665
## 526 2702 Arboledas 14.62503 7.6213 -72.9303
## 621 721398 Cúcuta 3.60520 Unknown 7.9467 -72.4908
## 921 2080 San Calixto 11.19714 8.4683 -73.2843
## geolocation hazard_type landslide_type
## 110 (8.32, -73.150000000000006) Landslide Landslide
## 403 (8.4418000000000006, -73.266499999999994) Landslide Complex
## 526 (7.6212999999999997, -72.930300000000003) Landslide Landslide
## 621 (7.9466999999999999, -72.490799999999993) Landslide Mudslide
## 921 (8.4682999999999993, -73.284300000000002) Landslide Landslide
## landslide_size trigger storm_name injuries fatalities source_name
## 110 Medium Downpour NA 10
## 403 Medium Downpour NA 4
## 526 Medium Downpour NA 0
## 621 Medium Downpour NA 0 Caracol Radio
## 921 Large Rain NA NA
## source_link
## 110 http://news.xinhuanet.com/english/2008-06/25/content_8434589.htm
## 403
## 526 http://colombiareports.com/colombia-news/news/13038-weekend-floods-kills.html
## 621 http://colombiareports.com/colombia-news/news/15175-landslide-destroys-houses-in-north-east-colombia.html
## 921 http://latino.foxnews.com/latino/news/2012/11/28/10-missing-after-landslide-in-colombia/
ggplot(data=df_Nor, aes(x=City, y=Distance)) + geom_bar(stat="identity", color="blue", fill="white")
ggplot(data=df_Nor, aes(x = "Norte de Santander", y = Distance, fill=City)) +
geom_bar(stat = "identity", width = 1, color = "black") +
coord_polar("y", start = 0)
ggplot(df_Nor,aes(x="Norte de Santander",y=Distance, fill=City))+
geom_bar(stat = "identity",
color="white")+
geom_text(aes(label=(Distance*10)),
position=position_stack(vjust=0.5),color="white",size=6)+
coord_polar(theta = "y")+
labs(title="Gráfico de Deslizamiento")
library(qcc)
Distance <- df_Nor$Distance
names(Distance) <- df_Nor$City
pareto.chart(Distance,
ylab="Distance",
col = heat.colors(length(Distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "DONDE SE CONCENTRAN LAS CIUDADES CON MAYORES DESLIZAMIENTOS"
)
##
## Pareto chart analysis for Distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Arboledas 14.625030 14.625030 38.824593 38.824593
## San Calixto 11.197140 25.822170 29.724684 68.549277
## San Calixto 7.853690 33.675860 20.848936 89.398213
## Cúcuta 3.605200 37.281060 9.570608 98.968821
## Hacarà 0.388440 37.669500 1.031179 100.000000
stem(df_Nor$"Distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 04
## 0 | 8
## 1 | 1
## 1 | 5
head(df_Nor)
## id date time America Country country_code State
## 110 605 6/24/08 SA Colombia CO Norte de Santander
## 403 2194 8/3/10 SA Colombia CO Norte de Santander
## 526 2739 11/20/10 SA Colombia CO Norte de Santander
## 621 3300 3/26/11 SA Colombia CO Norte de Santander
## 921 4637 11/28/12 SA Colombia CO Norte de Santander
## population City Distance location_description latitude longitude
## 110 1502 Hacarà 0.38844 8.3200 -73.1500
## 403 2080 San Calixto 7.85369 8.4418 -73.2665
## 526 2702 Arboledas 14.62503 7.6213 -72.9303
## 621 721398 Cúcuta 3.60520 Unknown 7.9467 -72.4908
## 921 2080 San Calixto 11.19714 8.4683 -73.2843
## geolocation hazard_type landslide_type
## 110 (8.32, -73.150000000000006) Landslide Landslide
## 403 (8.4418000000000006, -73.266499999999994) Landslide Complex
## 526 (7.6212999999999997, -72.930300000000003) Landslide Landslide
## 621 (7.9466999999999999, -72.490799999999993) Landslide Mudslide
## 921 (8.4682999999999993, -73.284300000000002) Landslide Landslide
## landslide_size trigger storm_name injuries fatalities source_name
## 110 Medium Downpour NA 10
## 403 Medium Downpour NA 4
## 526 Medium Downpour NA 0
## 621 Medium Downpour NA 0 Caracol Radio
## 921 Large Rain NA NA
## source_link
## 110 http://news.xinhuanet.com/english/2008-06/25/content_8434589.htm
## 403
## 526 http://colombiareports.com/colombia-news/news/13038-weekend-floods-kills.html
## 621 http://colombiareports.com/colombia-news/news/15175-landslide-destroys-houses-in-north-east-colombia.html
## 921 http://latino.foxnews.com/latino/news/2012/11/28/10-missing-after-landslide-in-colombia/
knitr::kable(head(df_Nor))
id | date | time | America | Country | country_code | State | population | City | Distance | location_description | latitude | longitude | geolocation | hazard_type | landslide_type | landslide_size | trigger | storm_name | injuries | fatalities | source_name | source_link | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
110 | 605 | 6/24/08 | SA | Colombia | CO | Norte de Santander | 1502 | Hacarà | 0.38844 | 8.3200 | -73.1500 | (8.32, -73.150000000000006) | Landslide | Landslide | Medium | Downpour | NA | 10 | http://news.xinhuanet.com/english/2008-06/25/content_8434589.htm | ||||
403 | 2194 | 8/3/10 | SA | Colombia | CO | Norte de Santander | 2080 | San Calixto | 7.85369 | 8.4418 | -73.2665 | (8.4418000000000006, -73.266499999999994) | Landslide | Complex | Medium | Downpour | NA | 4 | |||||
526 | 2739 | 11/20/10 | SA | Colombia | CO | Norte de Santander | 2702 | Arboledas | 14.62503 | 7.6213 | -72.9303 | (7.6212999999999997, -72.930300000000003) | Landslide | Landslide | Medium | Downpour | NA | 0 | http://colombiareports.com/colombia-news/news/13038-weekend-floods-kills.html | ||||
621 | 3300 | 3/26/11 | SA | Colombia | CO | Norte de Santander | 721398 | Cúcuta | 3.60520 | Unknown | 7.9467 | -72.4908 | (7.9466999999999999, -72.490799999999993) | Landslide | Mudslide | Medium | Downpour | NA | 0 | Caracol Radio | http://colombiareports.com/colombia-news/news/15175-landslide-destroys-houses-in-north-east-colombia.html | ||
921 | 4637 | 11/28/12 | SA | Colombia | CO | Norte de Santander | 2080 | San Calixto | 11.19714 | 8.4683 | -73.2843 | (8.4682999999999993, -73.284300000000002) | Landslide | Landslide | Large | Rain | NA | NA | http://latino.foxnews.com/latino/news/2012/11/28/10-missing-after-landslide-in-colombia/ |
stem(df_Nor$"Distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 04
## 0 | 8
## 1 | 1
## 1 | 5
stem(df_Nor$"Distance", scale = 2)
##
## The decimal point is at the |
##
## 0 | 4
## 2 | 6
## 4 |
## 6 | 9
## 8 |
## 10 | 2
## 12 |
## 14 | 6
library(forecast)
data_serie<- ts(df_Nor$Distance, frequency=12, start=2007)
head(data_serie)
## Jan Feb Mar Apr May
## 2007 0.38844 7.85369 14.62503 3.60520 11.19714
autoplot(data_serie)+
labs(title = "Serie de Deslizamiento", x="Tiempo", y = "Distancia", colour = "#00a0dc") +theme_bw()
library(questionr)
table <- questionr::freq(Distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n | % | val% | %cum | val%cum | |
---|---|---|---|---|---|
0.38844 | 1 | 20 | 20 | 20 | 20 |
3.6052 | 1 | 20 | 20 | 40 | 40 |
7.85369 | 1 | 20 | 20 | 60 | 60 |
11.19714 | 1 | 20 | 20 | 80 | 80 |
14.62503 | 1 | 20 | 20 | 100 | 100 |
Total | 5 | 100 | 100 | 100 | 100 |
str(table)
## Classes 'freqtab' and 'data.frame': 6 obs. of 5 variables:
## $ n : num 1 1 1 1 1 5
## $ % : num 20 20 20 20 20 100
## $ val% : num 20 20 20 20 20 100
## $ %cum : num 20 40 60 80 100 100
## $ val%cum: num 20 40 60 80 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x | y |
---|---|
0.38844 | 1 |
3.6052 | 1 |
7.85369 | 1 |
11.19714 | 1 |
14.62503 | 1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="white", fill="blue") +
xlab("Número de asistencias") +
ylab("Frecuencia")
n_sturges = 1 + log(length(Distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(Distance) - min(Distance)
w = ceiling(R/n_clases)
bins <- seq(min(Distance), max(Distance) + w, by = w)
bins
## [1] 0.38844 5.38844 10.38844 15.38844
Edades <- cut(Distance, bins)
Freq_table <- transform(table(Distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
Distance | Freq | Rel_Freq | Cum_Freq |
---|---|---|---|
0.38844 | 1 | 0.2 | 1 |
3.6052 | 1 | 0.2 | 2 |
7.85369 | 1 | 0.2 | 3 |
11.19714 | 1 | 0.2 | 4 |
14.62503 | 1 | 0.2 | 5 |
str(Freq_table)
## 'data.frame': 5 obs. of 4 variables:
## $ Distance: Factor w/ 5 levels "0.38844","3.6052",..: 1 2 3 4 5
## $ Freq : int 1 1 1 1 1
## $ Rel_Freq: num 0.2 0.2 0.2 0.2 0.2
## $ Cum_Freq: int 1 2 3 4 5
df <- data.frame(x = Freq_table$Distance, y = Freq_table$Freq)
knitr::kable(df)
x | y |
---|---|
0.38844 | 1 |
3.6052 | 1 |
7.85369 | 1 |
11.19714 | 1 |
14.62503 | 1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="blue", fill="green") +
xlab("Rango de Distance") +
ylab("Frecuencia")
summary(df_Nor$Distance)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.3884 3.6052 7.8537 7.5339 11.1971 14.6250
library(pastecs)
stat.desc(df_Nor)
## Warning in min(x): ningún argumento finito para min; retornando Inf
## Warning in max(x): ningun argumento finito para max; retornando -Inf
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## id date time America Country country_code State
## nbr.val 5.000000e+00 NA NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA NA
## min 6.050000e+02 NA NA NA NA NA NA
## max 4.637000e+03 NA NA NA NA NA NA
## range 4.032000e+03 NA NA NA NA NA NA
## sum 1.347500e+04 NA NA NA NA NA NA
## median 2.739000e+03 NA NA NA NA NA NA
## mean 2.695000e+03 NA NA NA NA NA NA
## SE.mean 6.617562e+02 NA NA NA NA NA NA
## CI.mean.0.95 1.837330e+03 NA NA NA NA NA NA
## var 2.189607e+06 NA NA NA NA NA NA
## std.dev 1.479732e+03 NA NA NA NA NA NA
## coef.var 5.490656e-01 NA NA NA NA NA NA
## population City Distance location_description latitude
## nbr.val 5.000000e+00 NA 5.0000000 NA 5.0000000
## nbr.null 0.000000e+00 NA 0.0000000 NA 0.0000000
## nbr.na 0.000000e+00 NA 0.0000000 NA 0.0000000
## min 1.502000e+03 NA 0.3884400 NA 7.6213000
## max 7.213980e+05 NA 14.6250300 NA 8.4683000
## range 7.198960e+05 NA 14.2365900 NA 0.8470000
## sum 7.297620e+05 NA 37.6695000 NA 40.7981000
## median 2.080000e+03 NA 7.8536900 NA 8.3200000
## mean 1.459524e+05 NA 7.5339000 NA 8.1596200
## SE.mean 1.438615e+05 NA 2.5524302 NA 0.1636695
## CI.mean.0.95 3.994236e+05 NA 7.0866824 NA 0.4544193
## var 1.034807e+11 NA 32.5745000 NA 0.1339385
## std.dev 3.216841e+05 NA 5.7074075 NA 0.3659761
## coef.var 2.204035e+00 NA 0.7575635 NA 0.0448521
## longitude geolocation hazard_type landslide_type
## nbr.val 5.000000e+00 NA NA NA
## nbr.null 0.000000e+00 NA NA NA
## nbr.na 0.000000e+00 NA NA NA
## min -7.328430e+01 NA NA NA
## max -7.249080e+01 NA NA NA
## range 7.935000e-01 NA NA NA
## sum -3.651219e+02 NA NA NA
## median -7.315000e+01 NA NA NA
## mean -7.302438e+01 NA NA NA
## SE.mean 1.475669e-01 NA NA NA
## CI.mean.0.95 4.097113e-01 NA NA NA
## var 1.088799e-01 NA NA NA
## std.dev 3.299695e-01 NA NA NA
## coef.var -4.518621e-03 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 0 4.000000 NA
## nbr.null NA NA NA 0 2.000000 NA
## nbr.na NA NA NA 5 1.000000 NA
## min NA NA NA Inf 0.000000 NA
## max NA NA NA -Inf 10.000000 NA
## range NA NA NA -Inf 10.000000 NA
## sum NA NA NA 0 14.000000 NA
## median NA NA NA NA 2.000000 NA
## mean NA NA NA NaN 3.500000 NA
## SE.mean NA NA NA NA 2.362908 NA
## CI.mean.0.95 NA NA NA NaN 7.519827 NA
## var NA NA NA NA 22.333333 NA
## std.dev NA NA NA NA 4.725816 NA
## coef.var NA NA NA NA 1.350233 NA
## source_link
## nbr.val NA
## nbr.null NA
## nbr.na NA
## min NA
## max NA
## range NA
## sum NA
## median NA
## mean NA
## SE.mean NA
## CI.mean.0.95 NA
## var NA
## std.dev NA
## coef.var NA
boxplot(Distance, horizontal=TRUE, col='steelblue')
library(tidyverse)
library(hrbrthemes)
library(viridis)
df <- data.frame(Distance)
df %>% ggplot(aes(x = "", y = Distance)) +
geom_boxplot(color="red", fill="orange", alpha=0.5) +
theme_ipsum() +
theme(legend.position="none", plot.title = element_text(size=11)) +
ggtitle("Deslizamientos ") +
coord_flip() +
xlab("") +
ylab("")
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
df_CostaRica %>%
select(Country, State, City, Distance, date)
## Country State City Distance date
## 38 Costa Rica Heredia Heredia 0.26208 9/9/07
## 44 Costa Rica San José San Ignacio 4.57763 10/9/07
## 45 Costa Rica Alajuela Atenas 3.08459 10/11/07
## 46 Costa Rica San José 9.56251 10/11/07
## 51 Costa Rica Puntarenas Miramar 3.82425 10/24/07
## 102 Costa Rica Guanacaste Bagaces 17.65521 5/29/08
## 147 Costa Rica San José Daniel Flores 1.85787 9/6/08
## 153 Costa Rica San José San Isidro 16.24937 10/12/08
## 154 Costa Rica San José Santiago 12.85801 10/12/08
## 156 Costa Rica Puntarenas Golfito 11.74074 10/15/08
## 157 Costa Rica Puntarenas Miramar 8.92048 10/16/08
## 229 Costa Rica Puntarenas San Vito 18.00524 11/13/09
## 302 Costa Rica Alajuela Desamparados 6.88715 4/14/10
## 311 Costa Rica Heredia Ã\201ngeles 19.51432 4/27/10
## 347 Costa Rica Alajuela Desamparados 6.92174 5/22/10
## 395 Costa Rica Alajuela Desamparados 4.24199 7/30/10
## 459 Costa Rica Alajuela San Rafael 1.47396 9/29/10
## 469 Costa Rica San José Salitral 0.25254 10/1/10
## 470 Costa Rica San José Salitral 0.25254 10/1/10
## 480 Costa Rica Heredia Ã\201ngeles 14.81614 10/15/10
## 501 Costa Rica San José Escazú 3.67691 11/4/10
## 502 Costa Rica San José San Marcos 0.55804 11/4/10
## 503 Costa Rica Alajuela San Rafael 9.61692 11/4/10
## 504 Costa Rica Guanacaste Tilarán 10.21631 11/4/10
## 505 Costa Rica Cartago Orosà 19.28722 11/4/10
## 506 Costa Rica Puntarenas Golfito 7.87044 11/4/10
## 507 Costa Rica San José Tejar 6.49523 11/4/10
## 508 Costa Rica San José San Isidro 15.64997 11/4/10
## 509 Costa Rica Puntarenas Corredor 4.93053 11/4/10
## 510 Costa Rica Puntarenas Parrita 13.48919 11/4/10
## 511 Costa Rica Puntarenas Ciudad Cortés 20.06633 11/4/10
## 512 Costa Rica San José San Isidro 11.31047 11/4/10
## 513 Costa Rica San José Mercedes 8.21372 11/4/10
## 514 Costa Rica Alajuela Santiago 5.43516 11/5/10
## 529 Costa Rica Heredia Ã\201ngeles 19.54581 11/21/10
## 579 Costa Rica Limón Guápiles 17.23264 1/11/11
## 702 Costa Rica Heredia Ã\201ngeles 15.05161 5/8/11
## 780 Costa Rica Alajuela Upala 0.70048 7/12/11
## 819 Costa Rica San José San Isidro 21.67452 9/25/11
## 828 Costa Rica Cartago Cot 9.63616 10/31/11
## 884 Costa Rica Heredia Santo Domingo 21.95470 5/13/12
## 888 Costa Rica Guanacaste Tilarán 12.33807 5/31/12
## 889 Costa Rica Limón Siquirres 5.36500 6/14/12
## 913 Costa Rica San José Daniel Flores 4.89954 10/23/12
## 1098 Costa Rica Alajuela Sabanilla 4.87432 8/27/13
## 1156 Costa Rica Alajuela Sabanilla 10.32968 9/16/13
## 1157 Costa Rica Heredia Santo Domingo 9.85736 9/16/13
## 1169 Costa Rica Guanacaste Tilarán 12.21952 10/3/13
## 1173 Costa Rica Guanacaste Tilarán 12.18115 10/8/13
## 1289 Costa Rica Alajuela La Fortuna 9.84213 10/4/14
## 1301 Costa Rica Alajuela 5.57523 9/19/14
## 1308 Costa Rica Alajuela Desamparados 5.95519 11/1/14
## 1342 Costa Rica Alajuela Rio Segundo 11.96524 8/21/14
## 1364 Costa Rica Alajuela Desamparados 5.12667 8/10/14
## 1383 Costa Rica Cartago Cartago 3.07297 9/13/14
## 1384 Costa Rica Heredia Dulce Nombre de Jesus 10.01310 12/13/14
## 1385 Costa Rica San José Dulce Nombre de Jesus 2.92605 11/3/14
## 1386 Costa Rica San José San Isidro 10.73752 9/19/14
## 1404 Costa Rica San José San Isidro 22.32368 1/28/15
## 1406 Costa Rica San José Dulce Nombre de Jesus 8.39161 2/6/15
## 1461 Costa Rica Alajuela La Fortuna 5.96634 6/17/15
## 1475 Costa Rica Alajuela Atenas 6.80061 6/3/15
## 1528 Costa Rica San José Ã\201ngeles 9.53611 7/6/15
## 1529 Costa Rica San José Dulce Nombre de Jesus 3.71407 7/6/15
## 1600 Costa Rica San José San Juan 0.72957 10/29/15
## 1642 Costa Rica Alajuela Santo Domingo 3.21979 10/27/15
## 1643 Costa Rica Alajuela Alajuela 3.08916 11/18/15
## 1644 Costa Rica Alajuela Naranjo 2.08469 10/29/15
## 1646 Costa Rica Cartago 5.15142 10/15/15
## 1647 Costa Rica Cartago Cot 9.53493 3/20/15
## 1648 Costa Rica Cartago Cartago 2.94804 3/18/15
## 1649 Costa Rica Puntarenas Buenos Aires 0.35225 11/23/15
## 1650 Costa Rica San José San José 1.16705 9/25/15
## 1651 Costa Rica San José Mercedes 10.01198 11/5/15
## 1652 Costa Rica San José Santiago 8.27042 11/11/15
ggplot(data=df_CostaRica, aes(fill=State, y=Distance, x="Costa Rica")) +
geom_bar(position="dodge", stat="identity")
ggplot(data=df_CostaRica, aes(fill=State, y=Distance, x="Costa Rica")) +
geom_bar(position="stack", stat="identity")
head(df_Alajuela %>%
select(Country, State, City, Distance, date))
## Country State City Distance date
## 45 Costa Rica Alajuela Atenas 3.08459 10/11/07
## 302 Costa Rica Alajuela Desamparados 6.88715 4/14/10
## 347 Costa Rica Alajuela Desamparados 6.92174 5/22/10
## 395 Costa Rica Alajuela Desamparados 4.24199 7/30/10
## 459 Costa Rica Alajuela San Rafael 1.47396 9/29/10
## 503 Costa Rica Alajuela San Rafael 9.61692 11/4/10
head(df_Alajuela)
## id date time America Country country_code State population
## 45 301 10/11/07 <NA> Costa Rica CR Alajuela 7014
## 302 1749 4/14/10 <NA> Costa Rica CR Alajuela 14448
## 347 1886 5/22/10 18:00:00 <NA> Costa Rica CR Alajuela 14448
## 395 2174 7/30/10 9:30:00 <NA> Costa Rica CR Alajuela 14448
## 459 2516 9/29/10 <NA> Costa Rica CR Alajuela 3624
## 503 2682 11/4/10 <NA> Costa Rica CR Alajuela 3624
## City Distance location_description latitude longitude
## 45 Atenas 3.08459 9.9869 -84.4070
## 302 Desamparados 6.88715 Above road 9.9323 -84.4453
## 347 Desamparados 6.92174 Above road 9.9290 -84.4428
## 395 Desamparados 4.24199 Above road 9.9271 -84.4568
## 459 San Rafael 1.47396 10.0757 -84.4793
## 503 San Rafael 9.61692 10.0421 -84.5577
## geolocation hazard_type landslide_type
## 45 (9.9869000000000003, -84.406999999999996) Landslide Mudslide
## 302 (9.9322999999999997, -84.445300000000003) Landslide Landslide
## 347 (9.9290000000000003, -84.442800000000005) Landslide Landslide
## 395 (9.9270999999999994, -84.456800000000001) Landslide Landslide
## 459 (10.075699999999999, -84.479299999999995) Landslide Mudslide
## 503 (10.0421, -84.557699999999997) Landslide Landslide
## landslide_size trigger storm_name injuries fatalities
## 45 Large Rain NA 14
## 302 Medium Downpour NA 0
## 347 Medium Downpour 3 0
## 395 Medium Rain NA 0
## 459 Medium Downpour NA 0
## 503 Medium Tropical cyclone Tropical Storm Tomas NA 0
## source_name
## 45 Agence France-Presse, afp.google.com
## 302
## 347 Costa Rica News
## 395 La Fortuna
## 459
## 503
## source_link
## 45 http://afp.google.com/article/ALeqM5hu6a8oyAM1ycq9nU_6Zyj_l7F0AA
## 302 http://www.insidecostarica.com/dailynews/2010/april/16/costarica10041602.htm
## 347 http://thecostaricanews.com/rains-cause-landslides-and-road-accidents-on-caldera/3255
## 395 https://lafortunatimes.wordpress.com/2010/07/30/landslide-caused-closure-of-san-jose-caldera-for-most-of-the-day-friday/
## 459 http://www.ticotimes.net/News/Daily-News/Inter-American-Highway-Reopens-Caldera-Highway-Under-Repair_Monday-October-04-2010
## 503 http://fortunatimes.com/2010/11/06/no-passage-to-the-south-and-central-pacific/
ggplot(data=df_Alajuela, aes(x=City, y=Distance)) + geom_bar(stat="identity", color="blue", fill="white")
ggplot(data=df_Alajuela, aes(x = "Alajuela", y = Distance, fill=City)) +
geom_bar(stat = "identity", width = 1, color = "black") +
coord_polar("y", start = 0)
library(ggplot2)
library(dplyr)
df_Alajuela <- df_Alajuela %>%
arrange(desc(City)) %>%
mutate(prop = Distance / sum(df_Alajuela$Distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_Alajuela, aes(x=State, y = prop, fill=City)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(Distance/100)), color = "white", size=6) +
scale_fill_brewer(palette="Set8")
## Warning in pal_name(palette, type): Unknown palette Set8
## Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Greens is 9
## Returning the palette you asked for with that many colors
library(qcc)
Distance <- df_Alajuela$Distance
names(Distance) <- df_Alajuela$City
pareto.chart(Distance,
ylab="Distance",
col = heat.colors(length(Distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "DONDE SE CONCENTRAN LAS CIUDADES CON MAYORES DESLIZAMIENTOS"
)
##
## Pareto chart analysis for Distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Rio Segundo 11.9652400 11.9652400 10.5708367 10.5708367
## Sabanilla 10.3296800 22.2949200 9.1258813 19.6967180
## La Fortuna 9.8421300 32.1370500 8.6951494 28.3918674
## San Rafael 9.6169200 41.7539700 8.4961849 36.8880523
## Desamparados 6.9217400 48.6757100 6.1150953 43.0031476
## Desamparados 6.8871500 55.5628600 6.0845364 49.0876840
## Atenas 6.8006100 62.3634700 6.0080816 55.0957655
## La Fortuna 5.9663400 68.3298100 5.2710356 60.3668011
## Desamparados 5.9551900 74.2850000 5.2611850 65.6279861
## 5.5752300 79.8602300 4.9255047 70.5534908
## Santiago 5.4351600 85.2953900 4.8017582 75.3552490
## Desamparados 5.1266700 90.4220600 4.5292189 79.8844679
## Sabanilla 4.8743200 95.2963800 4.3062772 84.1907451
## Desamparados 4.2419900 99.5383700 3.7476376 87.9383828
## Santo Domingo 3.2197900 102.7581600 2.8445626 90.7829454
## Alajuela 3.0891600 105.8473200 2.7291559 93.5121013
## Atenas 3.0845900 108.9319100 2.7251185 96.2372198
## Naranjo 2.0846900 111.0166000 1.8417447 98.0789646
## San Rafael 1.4739600 112.4905600 1.3021879 99.3811524
## Upala 0.7004800 113.1910400 0.6188476 100.0000000
stem(df_Alajuela$"Distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 1123334
## 0 | 555666777
## 1 | 0002
head(df_Alajuela)
## id date time America Country country_code State population
## 1 3762 7/12/11 <NA> Costa Rica CR Alajuela 4185
## 2 7486 10/27/15 <NA> Costa Rica CR Alajuela 5745
## 3 2703 11/5/10 <NA> Costa Rica CR Alajuela 2107
## 4 2516 9/29/10 <NA> Costa Rica CR Alajuela 3624
## 5 2682 11/4/10 <NA> Costa Rica CR Alajuela 3624
## 6 5408 8/27/13 <NA> Costa Rica CR Alajuela 1015
## City Distance location_description latitude longitude
## 1 Upala 0.70048 10.8916 -85.0141
## 2 Santo Domingo 3.21979 Above road 10.0600 -84.1841
## 3 Santiago 5.43516 9.9990 -84.4876
## 4 San Rafael 1.47396 10.0757 -84.4793
## 5 San Rafael 9.61692 10.0421 -84.5577
## 6 Sabanilla 4.87432 10.1181 -84.2146
## geolocation hazard_type landslide_type
## 1 (10.8916, -85.014099999999999) Landslide Landslide
## 2 (10.06, -84.184100000000001) Landslide Mudslide
## 3 (9.9990000000000006, -84.4876) Landslide Landslide
## 4 (10.075699999999999, -84.479299999999995) Landslide Mudslide
## 5 (10.0421, -84.557699999999997) Landslide Landslide
## 6 (10.1181, -84.214600000000004) Landslide Landslide
## landslide_size trigger storm_name injuries fatalities
## 1 Medium Downpour NA 0
## 2 Small Rain 0 0
## 3 Medium Tropical cyclone Tropical Storm Tomas NA 0
## 4 Medium Downpour NA 0
## 5 Medium Tropical cyclone Tropical Storm Tomas NA 0
## 6 Medium Downpour NA NA
## source_name
## 1
## 2 La Prensa Libre
## 3
## 4
## 5
## 6 insidecostarica.com
## source_link
## 1 http://www.ticotimes.net/Current-Edition/News-Briefs/Upala-on-yellow-alert-after-earthquake-heavy-rains_Wednesday-July-13-2011
## 2 http://www.laprensalibre.cr/Noticias/detalle/45060/430/deslizamiento-deja-bus-atrapado-en-alajuela
## 3 http://fortunatimes.com/2010/11/06/no-passage-to-the-south-and-central-pacific/
## 4 http://www.ticotimes.net/News/Daily-News/Inter-American-Highway-Reopens-Caldera-Highway-Under-Repair_Monday-October-04-2010
## 5 http://fortunatimes.com/2010/11/06/no-passage-to-the-south-and-central-pacific/
## 6 http://insidecostarica.com/2013/08/28/flooding-and-landslides-after-heavy-downpours-across-the-country/
## prop ypos
## 1 0.6188476 0.3094238
## 2 2.8445626 2.0411289
## 3 4.8017582 5.8642893
## 4 1.3021879 8.9162623
## 5 8.4961849 13.8154486
## 6 4.3062772 20.2166797
knitr::kable(head(df_Alajuela))
id | date | time | America | Country | country_code | State | population | City | Distance | location_description | latitude | longitude | geolocation | hazard_type | landslide_type | landslide_size | trigger | storm_name | injuries | fatalities | source_name | source_link | prop | ypos |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
3762 | 7/12/11 | NA | Costa Rica | CR | Alajuela | 4185 | Upala | 0.70048 | 10.8916 | -85.0141 | (10.8916, -85.014099999999999) | Landslide | Landslide | Medium | Downpour | NA | 0 | http://www.ticotimes.net/Current-Edition/News-Briefs/Upala-on-yellow-alert-after-earthquake-heavy-rains_Wednesday-July-13-2011 | 0.6188476 | 0.3094238 | ||||
7486 | 10/27/15 | NA | Costa Rica | CR | Alajuela | 5745 | Santo Domingo | 3.21979 | Above road | 10.0600 | -84.1841 | (10.06, -84.184100000000001) | Landslide | Mudslide | Small | Rain | 0 | 0 | La Prensa Libre | http://www.laprensalibre.cr/Noticias/detalle/45060/430/deslizamiento-deja-bus-atrapado-en-alajuela | 2.8445626 | 2.0411289 | ||
2703 | 11/5/10 | NA | Costa Rica | CR | Alajuela | 2107 | Santiago | 5.43516 | 9.9990 | -84.4876 | (9.9990000000000006, -84.4876) | Landslide | Landslide | Medium | Tropical cyclone | Tropical Storm Tomas | NA | 0 | http://fortunatimes.com/2010/11/06/no-passage-to-the-south-and-central-pacific/ | 4.8017582 | 5.8642893 | |||
2516 | 9/29/10 | NA | Costa Rica | CR | Alajuela | 3624 | San Rafael | 1.47396 | 10.0757 | -84.4793 | (10.075699999999999, -84.479299999999995) | Landslide | Mudslide | Medium | Downpour | NA | 0 | http://www.ticotimes.net/News/Daily-News/Inter-American-Highway-Reopens-Caldera-Highway-Under-Repair_Monday-October-04-2010 | 1.3021879 | 8.9162623 | ||||
2682 | 11/4/10 | NA | Costa Rica | CR | Alajuela | 3624 | San Rafael | 9.61692 | 10.0421 | -84.5577 | (10.0421, -84.557699999999997) | Landslide | Landslide | Medium | Tropical cyclone | Tropical Storm Tomas | NA | 0 | http://fortunatimes.com/2010/11/06/no-passage-to-the-south-and-central-pacific/ | 8.4961849 | 13.8154486 | |||
5408 | 8/27/13 | NA | Costa Rica | CR | Alajuela | 1015 | Sabanilla | 4.87432 | 10.1181 | -84.2146 | (10.1181, -84.214600000000004) | Landslide | Landslide | Medium | Downpour | NA | NA | insidecostarica.com | http://insidecostarica.com/2013/08/28/flooding-and-landslides-after-heavy-downpours-across-the-country/ | 4.3062772 | 20.2166797 |
stem(df_Alajuela$"Distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 1123334
## 0 | 555666777
## 1 | 0002
stem(df_Alajuela$"Distance", scale = 2)
##
## The decimal point is at the |
##
## 0 | 75
## 2 | 1112
## 4 | 29146
## 6 | 00899
## 8 | 68
## 10 | 3
## 12 | 0
library(forecast)
data_serie<- ts(df_Alajuela$Distance, frequency=12, start=2007)
head(data_serie)
## Jan Feb Mar Apr May Jun
## 2007 0.70048 3.21979 5.43516 1.47396 9.61692 4.87432
autoplot(data_serie)+
labs(title = "Serie de Deslizamiento", x="Tiempo", y = "Distancia", colour = "#00a0dc") +theme_bw()
library(questionr)
table <- questionr::freq(Distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n | % | val% | %cum | val%cum | |
---|---|---|---|---|---|
0.70048 | 1 | 5 | 5 | 5 | 5 |
1.47396 | 1 | 5 | 5 | 10 | 10 |
2.08469 | 1 | 5 | 5 | 15 | 15 |
3.08459 | 1 | 5 | 5 | 20 | 20 |
3.08916 | 1 | 5 | 5 | 25 | 25 |
3.21979 | 1 | 5 | 5 | 30 | 30 |
4.24199 | 1 | 5 | 5 | 35 | 35 |
4.87432 | 1 | 5 | 5 | 40 | 40 |
5.12667 | 1 | 5 | 5 | 45 | 45 |
5.43516 | 1 | 5 | 5 | 50 | 50 |
5.57523 | 1 | 5 | 5 | 55 | 55 |
5.95519 | 1 | 5 | 5 | 60 | 60 |
5.96634 | 1 | 5 | 5 | 65 | 65 |
6.80061 | 1 | 5 | 5 | 70 | 70 |
6.88715 | 1 | 5 | 5 | 75 | 75 |
6.92174 | 1 | 5 | 5 | 80 | 80 |
9.61692 | 1 | 5 | 5 | 85 | 85 |
9.84213 | 1 | 5 | 5 | 90 | 90 |
10.32968 | 1 | 5 | 5 | 95 | 95 |
11.96524 | 1 | 5 | 5 | 100 | 100 |
Total | 20 | 100 | 100 | 100 | 100 |
str(table)
## Classes 'freqtab' and 'data.frame': 21 obs. of 5 variables:
## $ n : num 1 1 1 1 1 1 1 1 1 1 ...
## $ % : num 5 5 5 5 5 5 5 5 5 5 ...
## $ val% : num 5 5 5 5 5 5 5 5 5 5 ...
## $ %cum : num 5 10 15 20 25 30 35 40 45 50 ...
## $ val%cum: num 5 10 15 20 25 30 35 40 45 50 ...
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x | y |
---|---|
0.70048 | 1 |
1.47396 | 1 |
2.08469 | 1 |
3.08459 | 1 |
3.08916 | 1 |
3.21979 | 1 |
4.24199 | 1 |
4.87432 | 1 |
5.12667 | 1 |
5.43516 | 1 |
5.57523 | 1 |
5.95519 | 1 |
5.96634 | 1 |
6.80061 | 1 |
6.88715 | 1 |
6.92174 | 1 |
9.61692 | 1 |
9.84213 | 1 |
10.32968 | 1 |
11.96524 | 1 |
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="white", fill="blue") +
xlab("Número de asistencias") +
ylab("Frecuencia")
n_sturges = 1 + log(length(Distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(Distance) - min(Distance)
w = ceiling(R/n_clases)
bins <- seq(min(Distance), max(Distance) + w, by = w)
bins
## [1] 0.70048 3.70048 6.70048 9.70048 12.70048
Edades <- cut(Distance, bins)
Freq_table <- transform(table(Distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
Distance | Freq | Rel_Freq | Cum_Freq |
---|---|---|---|
0.70048 | 1 | 0.05 | 1 |
1.47396 | 1 | 0.05 | 2 |
2.08469 | 1 | 0.05 | 3 |
3.08459 | 1 | 0.05 | 4 |
3.08916 | 1 | 0.05 | 5 |
3.21979 | 1 | 0.05 | 6 |
4.24199 | 1 | 0.05 | 7 |
4.87432 | 1 | 0.05 | 8 |
5.12667 | 1 | 0.05 | 9 |
5.43516 | 1 | 0.05 | 10 |
5.57523 | 1 | 0.05 | 11 |
5.95519 | 1 | 0.05 | 12 |
5.96634 | 1 | 0.05 | 13 |
6.80061 | 1 | 0.05 | 14 |
6.88715 | 1 | 0.05 | 15 |
6.92174 | 1 | 0.05 | 16 |
9.61692 | 1 | 0.05 | 17 |
9.84213 | 1 | 0.05 | 18 |
10.32968 | 1 | 0.05 | 19 |
11.96524 | 1 | 0.05 | 20 |
str(Freq_table)
## 'data.frame': 20 obs. of 4 variables:
## $ Distance: Factor w/ 20 levels "0.70048","1.47396",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ Freq : int 1 1 1 1 1 1 1 1 1 1 ...
## $ Rel_Freq: num 0.05 0.05 0.05 0.05 0.05 0.05 0.05 0.05 0.05 0.05 ...
## $ Cum_Freq: int 1 2 3 4 5 6 7 8 9 10 ...
df <- data.frame(x = Freq_table$Distance, y = Freq_table$Freq)
knitr::kable(df)
x | y |
---|---|
0.70048 | 1 |
1.47396 | 1 |
2.08469 | 1 |
3.08459 | 1 |
3.08916 | 1 |
3.21979 | 1 |
4.24199 | 1 |
4.87432 | 1 |
5.12667 | 1 |
5.43516 | 1 |
5.57523 | 1 |
5.95519 | 1 |
5.96634 | 1 |
6.80061 | 1 |
6.88715 | 1 |
6.92174 | 1 |
9.61692 | 1 |
9.84213 | 1 |
10.32968 | 1 |
11.96524 | 1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="blue", fill="green") +
xlab("Rango de Distance") +
ylab("Frecuencia")
summary(df_Alajuela$Distance)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.7005 3.1871 5.5052 5.6596 6.8958 11.9652
library(pastecs)
stat.desc(df_Alajuela)
## id date time America Country country_code State
## nbr.val 2.000000e+01 NA NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA NA
## min 3.010000e+02 NA NA NA NA NA NA
## max 7.488000e+03 NA NA NA NA NA NA
## range 7.187000e+03 NA NA NA NA NA NA
## sum 9.718800e+04 NA NA NA NA NA NA
## median 5.878000e+03 NA NA NA NA NA NA
## mean 4.859400e+03 NA NA NA NA NA NA
## SE.mean 5.261514e+02 NA NA NA NA NA NA
## CI.mean.0.95 1.101248e+03 NA NA NA NA NA NA
## var 5.536707e+06 NA NA NA NA NA NA
## std.dev 2.353021e+03 NA NA NA NA NA NA
## coef.var 4.842204e-01 NA NA NA NA NA NA
## population City Distance location_description latitude
## nbr.val 2.000000e+01 NA 20.0000000 NA 20.00000000
## nbr.null 0.000000e+00 NA 0.0000000 NA 0.00000000
## nbr.na 0.000000e+00 NA 0.0000000 NA 0.00000000
## min 1.015000e+03 NA 0.7004800 NA 9.91890000
## max 4.749400e+04 NA 11.9652400 NA 10.89160000
## range 4.647900e+04 NA 11.2647600 NA 0.97270000
## sum 1.924900e+05 NA 113.1910400 NA 202.24760000
## median 7.014000e+03 NA 5.5051950 NA 10.04315000
## mean 9.624500e+03 NA 5.6595520 NA 10.11238000
## SE.mean 2.281502e+03 NA 0.6812501 NA 0.05493583
## CI.mean.0.95 4.775238e+03 NA 1.4258729 NA 0.11498201
## var 1.041050e+08 NA 9.2820347 NA 0.06035891
## std.dev 1.020319e+04 NA 3.0466432 NA 0.24568050
## coef.var 1.060126e+00 NA 0.5383188 NA 0.02429502
## longitude geolocation hazard_type landslide_type
## nbr.val 2.000000e+01 NA NA NA
## nbr.null 0.000000e+00 NA NA NA
## nbr.na 0.000000e+00 NA NA NA
## min -8.501410e+01 NA NA NA
## max -8.418070e+01 NA NA NA
## range 8.334000e-01 NA NA NA
## sum -1.688552e+03 NA NA NA
## median -8.444405e+01 NA NA NA
## mean -8.442758e+01 NA NA NA
## SE.mean 4.594981e-02 NA NA NA
## CI.mean.0.95 9.617405e-02 NA NA NA
## var 4.222770e-02 NA NA NA
## std.dev 2.054938e-01 NA NA NA
## coef.var -2.433965e-03 NA NA NA
## landslide_size trigger storm_name injuries fatalities
## nbr.val NA NA NA 11.0000000 18.0000000
## nbr.null NA NA NA 10.0000000 15.0000000
## nbr.na NA NA NA 9.0000000 2.0000000
## min NA NA NA 0.0000000 0.0000000
## max NA NA NA 3.0000000 14.0000000
## range NA NA NA 3.0000000 14.0000000
## sum NA NA NA 3.0000000 16.0000000
## median NA NA NA 0.0000000 0.0000000
## mean NA NA NA 0.2727273 0.8888889
## SE.mean NA NA NA 0.2727273 0.7749716
## CI.mean.0.95 NA NA NA 0.6076742 1.6350471
## var NA NA NA 0.8181818 10.8104575
## std.dev NA NA NA 0.9045340 3.2879260
## coef.var NA NA NA 3.3166248 3.6989168
## source_name source_link prop ypos
## nbr.val NA NA 20.0000000 20.0000000
## nbr.null NA NA 0.0000000 0.0000000
## nbr.na NA NA 0.0000000 0.0000000
## min NA NA 0.6188476 0.3094238
## max NA NA 10.5708367 97.5372476
## range NA NA 9.9519891 97.2278239
## sum NA NA 100.0000000 984.6035428
## median NA NA 4.8636314 51.7474020
## mean NA NA 5.0000000 49.2301771
## SE.mean NA NA 0.6018587 7.4144621
## CI.mean.0.95 NA NA 1.2597047 15.5186476
## var NA NA 7.2446780 1099.4849742
## std.dev NA NA 2.6915940 33.1584827
## coef.var NA NA 0.5383188 0.6735398
boxplot(Distance, horizontal=TRUE, col='steelblue')
library(tidyverse)
library(hrbrthemes)
library(viridis)
df <- data.frame(Distance)
df %>% ggplot(aes(x = "", y = Distance)) +
geom_boxplot(color="red", fill="orange", alpha=0.5) +
theme_ipsum() +
theme(legend.position="none", plot.title = element_text(size=11)) +
ggtitle("Deslizamientos ") +
coord_flip() +
xlab("") +
ylab("")
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
df_Cartago %>%
select(Country, State, City, Distance, date)
## Country State City Distance date
## 505 Costa Rica Cartago Orosà 19.28722 11/4/10
## 828 Costa Rica Cartago Cot 9.63616 10/31/11
## 1383 Costa Rica Cartago Cartago 3.07297 9/13/14
## 1646 Costa Rica Cartago 5.15142 10/15/15
## 1647 Costa Rica Cartago Cot 9.53493 3/20/15
## 1648 Costa Rica Cartago Cartago 2.94804 3/18/15
head(df_Cartago)
## id date time America Country country_code State population
## 505 2684 11/4/10 <NA> Costa Rica CR Cartago 4350
## 828 4031 10/31/11 <NA> Costa Rica CR Cartago 6784
## 1383 6695 9/13/14 <NA> Costa Rica CR Cartago 26594
## 1646 7490 10/15/15 <NA> Costa Rica CR Cartago 4060
## 1647 7491 3/20/15 8:00 <NA> Costa Rica CR Cartago 6784
## 1648 7492 3/18/15 <NA> Costa Rica CR Cartago 26594
## City Distance location_description latitude longitude
## 505 Orosà 19.28722 9.6227 -83.8359
## 828 Cot 9.63616 Natural slope 9.9792 -83.8525
## 1383 Cartago 3.07297 Below road 9.8895 -83.9316
## 1646 5.15142 Above road 9.7917 -83.9815
## 1647 Cot 9.53493 Natural slope 9.9786 -83.8542
## 1648 Cartago 2.94804 Urban area 9.8815 -83.9401
## geolocation hazard_type landslide_type
## 505 (9.6227, -83.835899999999995) Landslide Landslide
## 828 (9.9792000000000005, -83.852500000000006) Landslide Landslide
## 1383 (9.8895, -83.931600000000003) Landslide Landslide
## 1646 (9.7917000000000005, -83.981499999999997) Landslide Landslide
## 1647 (9.9786000000000001, -83.854200000000006) Landslide Other
## 1648 (9.8815000000000008, -83.940100000000001) Landslide Landslide
## landslide_size trigger storm_name injuries fatalities
## 505 Medium Tropical cyclone Tropical Storm Tomas NA 0
## 828 Medium Downpour NA 0
## 1383 Small Rain 0 0
## 1646 Medium Downpour 0 0
## 1647 Medium Volcano 0 0
## 1648 Medium Downpour 0 0
## source_name
## 505
## 828 Inside Costa Rica
## 1383 Ahora
## 1646 crhoy
## 1647 Costa Rica Star
## 1648 CIUDADREGION
## source_link
## 505 http://fortunatimes.com/2010/11/06/no-passage-to-the-south-and-central-pacific/
## 828 http://www.insidecostarica.com/dailynews/2011/october/31/costarica11103102.htm
## 1383 http://www.ahora.cr/nacionales/Derrumbe-pone-riesgo-linea-Cartago_0_1439256064.html
## 1646 http://www.crhoy.com/carril-cerrado-sobre-interamericana-sur-por-deslizamiento/
## 1647 http://news.co.cr/landslides-irazu-volcano-restrict-travel-tourism/37698/
## 1648 http://www.ciudadregion.com/cartago/fuerte-aguacero-manana-del-miercoles-causo-inundaciones-cartago_1426729745
ggplot(data=df_Cartago, aes(x=City, y=Distance)) + geom_bar(stat="identity", color="blue", fill="white")
ggplot(data=df_Cartago, aes(x = "Cartago", y = Distance, fill=City)) +
geom_bar(stat = "identity", width = 1, color = "black") +
coord_polar("y", start = 0)
library(ggplot2)
library(dplyr)
df_Cartago <- df_Cartago %>%
arrange(desc(City)) %>%
mutate(prop = Distance / sum(df_Cartago$Distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_Cartago, aes(x=State, y = prop, fill=City)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(Distance/100)), color = "white", size=6) +
scale_fill_brewer(palette="Set8")
## Warning in pal_name(palette, type): Unknown palette Set8
library(qcc)
Distance <- df_Cartago$Distance
names(Distance) <- df_Cartago$City
pareto.chart(Distance,
ylab="Distance",
col = heat.colors(length(Distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "DONDE SE CONCENTRAN LAS CIUDADES CON MAYORES DESLIZAMIENTOS"
)
##
## Pareto chart analysis for Distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Orosà 19.287220 19.287220 38.861440 38.861440
## Cot 9.636160 28.923380 19.415709 58.277148
## Cot 9.534930 38.458310 19.211743 77.488891
## 5.151420 43.609730 10.379495 87.868386
## Cartago 3.072970 46.682700 6.191667 94.060052
## Cartago 2.948040 49.630740 5.939948 100.000000
stem(df_Cartago$"Distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 33
## 0 | 5
## 1 | 00
## 1 | 9
head(df_Cartago)
## id date time America Country country_code State population City
## 1 2684 11/4/10 <NA> Costa Rica CR Cartago 4350 OrosÃ
## 2 4031 10/31/11 <NA> Costa Rica CR Cartago 6784 Cot
## 3 7491 3/20/15 8:00 <NA> Costa Rica CR Cartago 6784 Cot
## 4 6695 9/13/14 <NA> Costa Rica CR Cartago 26594 Cartago
## 5 7492 3/18/15 <NA> Costa Rica CR Cartago 26594 Cartago
## 6 7490 10/15/15 <NA> Costa Rica CR Cartago 4060
## Distance location_description latitude longitude
## 1 19.28722 9.6227 -83.8359
## 2 9.63616 Natural slope 9.9792 -83.8525
## 3 9.53493 Natural slope 9.9786 -83.8542
## 4 3.07297 Below road 9.8895 -83.9316
## 5 2.94804 Urban area 9.8815 -83.9401
## 6 5.15142 Above road 9.7917 -83.9815
## geolocation hazard_type landslide_type
## 1 (9.6227, -83.835899999999995) Landslide Landslide
## 2 (9.9792000000000005, -83.852500000000006) Landslide Landslide
## 3 (9.9786000000000001, -83.854200000000006) Landslide Other
## 4 (9.8895, -83.931600000000003) Landslide Landslide
## 5 (9.8815000000000008, -83.940100000000001) Landslide Landslide
## 6 (9.7917000000000005, -83.981499999999997) Landslide Landslide
## landslide_size trigger storm_name injuries fatalities
## 1 Medium Tropical cyclone Tropical Storm Tomas NA 0
## 2 Medium Downpour NA 0
## 3 Medium Volcano 0 0
## 4 Small Rain 0 0
## 5 Medium Downpour 0 0
## 6 Medium Downpour 0 0
## source_name
## 1
## 2 Inside Costa Rica
## 3 Costa Rica Star
## 4 Ahora
## 5 CIUDADREGION
## 6 crhoy
## source_link
## 1 http://fortunatimes.com/2010/11/06/no-passage-to-the-south-and-central-pacific/
## 2 http://www.insidecostarica.com/dailynews/2011/october/31/costarica11103102.htm
## 3 http://news.co.cr/landslides-irazu-volcano-restrict-travel-tourism/37698/
## 4 http://www.ahora.cr/nacionales/Derrumbe-pone-riesgo-linea-Cartago_0_1439256064.html
## 5 http://www.ciudadregion.com/cartago/fuerte-aguacero-manana-del-miercoles-causo-inundaciones-cartago_1426729745
## 6 http://www.crhoy.com/carril-cerrado-sobre-interamericana-sur-por-deslizamiento/
## prop ypos
## 1 38.861440 19.43072
## 2 19.415709 48.56929
## 3 19.211743 67.88302
## 4 6.191667 80.58472
## 5 5.939948 86.65053
## 6 10.379495 94.81025
knitr::kable(head(df_Cartago))
id | date | time | America | Country | country_code | State | population | City | Distance | location_description | latitude | longitude | geolocation | hazard_type | landslide_type | landslide_size | trigger | storm_name | injuries | fatalities | source_name | source_link | prop | ypos |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
2684 | 11/4/10 | NA | Costa Rica | CR | Cartago | 4350 | Orosà | 19.28722 | 9.6227 | -83.8359 | (9.6227, -83.835899999999995) | Landslide | Landslide | Medium | Tropical cyclone | Tropical Storm Tomas | NA | 0 | http://fortunatimes.com/2010/11/06/no-passage-to-the-south-and-central-pacific/ | 38.861440 | 19.43072 | |||
4031 | 10/31/11 | NA | Costa Rica | CR | Cartago | 6784 | Cot | 9.63616 | Natural slope | 9.9792 | -83.8525 | (9.9792000000000005, -83.852500000000006) | Landslide | Landslide | Medium | Downpour | NA | 0 | Inside Costa Rica | http://www.insidecostarica.com/dailynews/2011/october/31/costarica11103102.htm | 19.415709 | 48.56929 | ||
7491 | 3/20/15 | 8:00 | NA | Costa Rica | CR | Cartago | 6784 | Cot | 9.53493 | Natural slope | 9.9786 | -83.8542 | (9.9786000000000001, -83.854200000000006) | Landslide | Other | Medium | Volcano | 0 | 0 | Costa Rica Star | http://news.co.cr/landslides-irazu-volcano-restrict-travel-tourism/37698/ | 19.211743 | 67.88302 | |
6695 | 9/13/14 | NA | Costa Rica | CR | Cartago | 26594 | Cartago | 3.07297 | Below road | 9.8895 | -83.9316 | (9.8895, -83.931600000000003) | Landslide | Landslide | Small | Rain | 0 | 0 | Ahora | http://www.ahora.cr/nacionales/Derrumbe-pone-riesgo-linea-Cartago_0_1439256064.html | 6.191667 | 80.58472 | ||
7492 | 3/18/15 | NA | Costa Rica | CR | Cartago | 26594 | Cartago | 2.94804 | Urban area | 9.8815 | -83.9401 | (9.8815000000000008, -83.940100000000001) | Landslide | Landslide | Medium | Downpour | 0 | 0 | CIUDADREGION | http://www.ciudadregion.com/cartago/fuerte-aguacero-manana-del-miercoles-causo-inundaciones-cartago_1426729745 | 5.939948 | 86.65053 | ||
7490 | 10/15/15 | NA | Costa Rica | CR | Cartago | 4060 | 5.15142 | Above road | 9.7917 | -83.9815 | (9.7917000000000005, -83.981499999999997) | Landslide | Landslide | Medium | Downpour | 0 | 0 | crhoy | http://www.crhoy.com/carril-cerrado-sobre-interamericana-sur-por-deslizamiento/ | 10.379495 | 94.81025 |
stem(df_Cartago$"Distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 33
## 0 | 5
## 1 | 00
## 1 | 9
stem(df_Cartago$"Distance", scale = 2)
##
## The decimal point is at the |
##
## 2 | 91
## 4 | 2
## 6 |
## 8 | 56
## 10 |
## 12 |
## 14 |
## 16 |
## 18 | 3
library(forecast)
data_serie<- ts(df_Cartago$Distance, frequency=12, start=2007)
head(data_serie)
## Jan Feb Mar Apr May Jun
## 2007 19.28722 9.63616 9.53493 3.07297 2.94804 5.15142
autoplot(data_serie)+
labs(title = "Serie de Deslizamiento", x="Tiempo", y = "Distancia", colour = "#00a0dc") +theme_bw()
library(questionr)
table <- questionr::freq(Distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n | % | val% | %cum | val%cum | |
---|---|---|---|---|---|
2.94804 | 1 | 16.7 | 16.7 | 16.7 | 16.7 |
3.07297 | 1 | 16.7 | 16.7 | 33.3 | 33.3 |
5.15142 | 1 | 16.7 | 16.7 | 50.0 | 50.0 |
9.53493 | 1 | 16.7 | 16.7 | 66.7 | 66.7 |
9.63616 | 1 | 16.7 | 16.7 | 83.3 | 83.3 |
19.28722 | 1 | 16.7 | 16.7 | 100.0 | 100.0 |
Total | 6 | 100.0 | 100.0 | 100.0 | 100.0 |
str(table)
## Classes 'freqtab' and 'data.frame': 7 obs. of 5 variables:
## $ n : num 1 1 1 1 1 1 6
## $ % : num 16.7 16.7 16.7 16.7 16.7 16.7 100
## $ val% : num 16.7 16.7 16.7 16.7 16.7 16.7 100
## $ %cum : num 16.7 33.3 50 66.7 83.3 100 100
## $ val%cum: num 16.7 33.3 50 66.7 83.3 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x | y |
---|---|
2.94804 | 1 |
3.07297 | 1 |
5.15142 | 1 |
9.53493 | 1 |
9.63616 | 1 |
19.28722 | 1 |
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="white", fill="blue") +
xlab("Número de asistencias") +
ylab("Frecuencia")
n_sturges = 1 + log(length(Distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(Distance) - min(Distance)
w = ceiling(R/n_clases)
bins <- seq(min(Distance), max(Distance) + w, by = w)
bins
## [1] 2.94804 8.94804 14.94804 20.94804
Edades <- cut(Distance, bins)
Freq_table <- transform(table(Distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
Distance | Freq | Rel_Freq | Cum_Freq |
---|---|---|---|
2.94804 | 1 | 0.1666667 | 1 |
3.07297 | 1 | 0.1666667 | 2 |
5.15142 | 1 | 0.1666667 | 3 |
9.53493 | 1 | 0.1666667 | 4 |
9.63616 | 1 | 0.1666667 | 5 |
19.28722 | 1 | 0.1666667 | 6 |
str(Freq_table)
## 'data.frame': 6 obs. of 4 variables:
## $ Distance: Factor w/ 6 levels "2.94804","3.07297",..: 1 2 3 4 5 6
## $ Freq : int 1 1 1 1 1 1
## $ Rel_Freq: num 0.167 0.167 0.167 0.167 0.167 ...
## $ Cum_Freq: int 1 2 3 4 5 6
df <- data.frame(x = Freq_table$Distance, y = Freq_table$Freq)
knitr::kable(df)
x | y |
---|---|
2.94804 | 1 |
3.07297 | 1 |
5.15142 | 1 |
9.53493 | 1 |
9.63616 | 1 |
19.28722 | 1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="blue", fill="green") +
xlab("Rango de Distance") +
ylab("Frecuencia")
summary(df_Cartago$Distance)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.948 3.593 7.343 8.272 9.611 19.287
library(pastecs)
stat.desc(df_Cartago)
## id date time America Country country_code State
## nbr.val 6.000000e+00 NA NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA NA
## min 2.684000e+03 NA NA NA NA NA NA
## max 7.492000e+03 NA NA NA NA NA NA
## range 4.808000e+03 NA NA NA NA NA NA
## sum 3.588300e+04 NA NA NA NA NA NA
## median 7.092500e+03 NA NA NA NA NA NA
## mean 5.980500e+03 NA NA NA NA NA NA
## SE.mean 8.567926e+02 NA NA NA NA NA NA
## CI.mean.0.95 2.202455e+03 NA NA NA NA NA NA
## var 4.404561e+06 NA NA NA NA NA NA
## std.dev 2.098705e+03 NA NA NA NA NA NA
## coef.var 3.509246e-01 NA NA NA NA NA NA
## population City Distance location_description latitude
## nbr.val 6.000000e+00 NA 6.0000000 NA 6.00000000
## nbr.null 0.000000e+00 NA 0.0000000 NA 0.00000000
## nbr.na 0.000000e+00 NA 0.0000000 NA 0.00000000
## min 4.060000e+03 NA 2.9480400 NA 9.62270000
## max 2.659400e+04 NA 19.2872200 NA 9.97920000
## range 2.253400e+04 NA 16.3391800 NA 0.35650000
## sum 7.516600e+04 NA 49.6307400 NA 59.14320000
## median 6.784000e+03 NA 7.3431750 NA 9.88550000
## mean 1.252767e+04 NA 8.2717900 NA 9.85720000
## SE.mean 4.473174e+03 NA 2.5159722 NA 0.05493519
## CI.mean.0.95 1.149866e+04 NA 6.4675124 NA 0.14121539
## var 1.200557e+08 NA 37.9806957 NA 0.01810725
## std.dev 1.095699e+04 NA 6.1628480 NA 0.13456317
## coef.var 8.746236e-01 NA 0.7450441 NA 0.01365126
## longitude geolocation hazard_type landslide_type
## nbr.val 6.000000e+00 NA NA NA
## nbr.null 0.000000e+00 NA NA NA
## nbr.na 0.000000e+00 NA NA NA
## min -8.398150e+01 NA NA NA
## max -8.383590e+01 NA NA NA
## range 1.456000e-01 NA NA NA
## sum -5.033958e+02 NA NA NA
## median -8.389290e+01 NA NA NA
## mean -8.389930e+01 NA NA NA
## SE.mean 2.429580e-02 NA NA NA
## CI.mean.0.95 6.245435e-02 NA NA NA
## var 3.541716e-03 NA NA NA
## std.dev 5.951232e-02 NA NA NA
## coef.var -7.093303e-04 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 4 6 NA
## nbr.null NA NA NA 4 6 NA
## nbr.na NA NA NA 2 0 NA
## min NA NA NA 0 0 NA
## max NA NA NA 0 0 NA
## range NA NA NA 0 0 NA
## sum NA NA NA 0 0 NA
## median NA NA NA 0 0 NA
## mean NA NA NA 0 0 NA
## SE.mean NA NA NA 0 0 NA
## CI.mean.0.95 NA NA NA 0 0 NA
## var NA NA NA 0 0 NA
## std.dev NA NA NA 0 0 NA
## coef.var NA NA NA NaN NaN NA
## source_link prop ypos
## nbr.val NA 6.0000000 6.0000000
## nbr.null NA 0.0000000 0.0000000
## nbr.na NA 0.0000000 0.0000000
## min NA 5.9399477 19.4307198
## max NA 38.8614395 94.8102527
## range NA 32.9214918 75.3795329
## sum NA 100.0000000 397.9285419
## median NA 14.7956186 74.2338720
## mean NA 16.6666667 66.3214236
## SE.mean NA 5.0693827 11.4668254
## CI.mean.0.95 NA 13.0312632 29.4764131
## var NA 154.1918484 788.9285109
## std.dev NA 12.4174010 28.0878712
## coef.var NA 0.7450441 0.4235113
boxplot(Distance, horizontal=TRUE, col='steelblue')
library(tidyverse)
library(hrbrthemes)
library(viridis)
df <- data.frame(Distance)
df %>% ggplot(aes(x = "", y = Distance)) +
geom_boxplot(color="red", fill="orange", alpha=0.5) +
theme_ipsum() +
theme(legend.position="none", plot.title = element_text(size=11)) +
ggtitle("Deslizamientos ") +
coord_flip() +
xlab("") +
ylab("")
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
df_Guanacaste %>%
select(Country, State, City, Distance, date)
## Country State City Distance date
## 102 Costa Rica Guanacaste Bagaces 17.65521 5/29/08
## 504 Costa Rica Guanacaste Tilarán 10.21631 11/4/10
## 888 Costa Rica Guanacaste Tilarán 12.33807 5/31/12
## 1169 Costa Rica Guanacaste Tilarán 12.21952 10/3/13
## 1173 Costa Rica Guanacaste Tilarán 12.18115 10/8/13
head(df_Guanacaste)
## id date time America Country country_code State population
## 102 556 5/29/08 <NA> Costa Rica CR Guanacaste 4108
## 504 2683 11/4/10 <NA> Costa Rica CR Guanacaste 7301
## 888 4375 5/31/12 <NA> Costa Rica CR Guanacaste 7301
## 1169 5571 10/3/13 <NA> Costa Rica CR Guanacaste 7301
## 1173 5591 10/8/13 Morning <NA> Costa Rica CR Guanacaste 7301
## City Distance location_description latitude longitude
## 102 Bagaces 17.65521 10.4024 -85.3555
## 504 Tilarán 10.21631 10.4548 -84.8751
## 888 Tilarán 12.33807 10.5562 -84.8952
## 1169 Tilarán 12.21952 10.5543 -84.8946
## 1173 Tilarán 12.18115 10.5546 -84.8955
## geolocation hazard_type landslide_type
## 102 (10.4024, -85.355500000000006) Landslide Landslide
## 504 (10.454800000000001, -84.875100000000003) Landslide Landslide
## 888 (10.5562, -84.895200000000003) Landslide Landslide
## 1169 (10.5543, -84.894599999999997) Landslide Landslide
## 1173 (10.554600000000001, -84.895499999999998) Landslide Landslide
## landslide_size trigger storm_name injuries fatalities
## 102 Medium Tropical cyclone Tropical Storm Alma NA NA
## 504 Medium Tropical cyclone Tropical Storm Tomas NA 0
## 888 Large Downpour NA NA
## 1169 Medium Mining digging NA NA
## 1173 Medium Rain NA 2
## source_name
## 102
## 504
## 888
## 1169 www.ticotimes.net
## 1173 insidecostarica.com
## source_link
## 102 http://www.reliefweb.int/rw/RWB.NSF/db900SID/ASAZ-7FHCHL?OpenDocument
## 504 http://fortunatimes.com/2010/11/06/no-passage-to-the-south-and-central-pacific/
## 888 http://thecostaricanews.com/landslides-and-wash-outs-continue-to-cause-problems-in-northern-costa-rica/12129
## 1169 http://www.ticotimes.net/More-news/News-Briefs/TRAVEL-ALERT-UPDATE-Rains-landslides-close-eight-routes-across-Costa-Rica_Friday-October-04-2013
## 1173 http://insidecostarica.com/2013/10/14/bodies-man-son-buried-landslide-nuevo-arenal-located/
ggplot(data=df_Guanacaste, aes(x=City, y=Distance)) + geom_bar(stat="identity", color="blue", fill="white")
ggplot(data=df_Guanacaste, aes(x = "Guanacaste", y = Distance, fill=City)) +
geom_bar(stat = "identity", width = 1, color = "black") +
coord_polar("y", start = 0)
library(ggplot2)
library(dplyr)
df_Guanacaste <- df_Guanacaste %>%
arrange(desc(City)) %>%
mutate(prop = Distance / sum(df_Guanacaste$Distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_Guanacaste, aes(x=State, y = prop, fill=City)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(Distance/100)), color = "white", size=6) +
scale_fill_brewer(palette="Set8")
## Warning in pal_name(palette, type): Unknown palette Set8
library(qcc)
Distance <- df_Guanacaste$Distance
names(Distance) <- df_Guanacaste$City
pareto.chart(Distance,
ylab="Distance",
col = heat.colors(length(Distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "DONDE SE CONCENTRAN LAS CIUDADES CON MAYORES DESLIZAMIENTOS"
)
##
## Pareto chart analysis for Distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Bagaces 17.65521 17.65521 27.32571 27.32571
## Tilarán 12.33807 29.99328 19.09615 46.42185
## Tilarán 12.21952 42.21280 18.91266 65.33451
## Tilarán 12.18115 54.39395 18.85328 84.18779
## Tilarán 10.21631 64.61026 15.81221 100.00000
stem(df_Guanacaste$"Distance")
##
## The decimal point is at the |
##
## 10 | 2
## 12 | 223
## 14 |
## 16 | 7
head(df_Guanacaste)
## id date time America Country country_code State population
## 1 2683 11/4/10 <NA> Costa Rica CR Guanacaste 7301
## 2 4375 5/31/12 <NA> Costa Rica CR Guanacaste 7301
## 3 5571 10/3/13 <NA> Costa Rica CR Guanacaste 7301
## 4 5591 10/8/13 Morning <NA> Costa Rica CR Guanacaste 7301
## 5 556 5/29/08 <NA> Costa Rica CR Guanacaste 4108
## City Distance location_description latitude longitude
## 1 Tilarán 10.21631 10.4548 -84.8751
## 2 Tilarán 12.33807 10.5562 -84.8952
## 3 Tilarán 12.21952 10.5543 -84.8946
## 4 Tilarán 12.18115 10.5546 -84.8955
## 5 Bagaces 17.65521 10.4024 -85.3555
## geolocation hazard_type landslide_type
## 1 (10.454800000000001, -84.875100000000003) Landslide Landslide
## 2 (10.5562, -84.895200000000003) Landslide Landslide
## 3 (10.5543, -84.894599999999997) Landslide Landslide
## 4 (10.554600000000001, -84.895499999999998) Landslide Landslide
## 5 (10.4024, -85.355500000000006) Landslide Landslide
## landslide_size trigger storm_name injuries fatalities
## 1 Medium Tropical cyclone Tropical Storm Tomas NA 0
## 2 Large Downpour NA NA
## 3 Medium Mining digging NA NA
## 4 Medium Rain NA 2
## 5 Medium Tropical cyclone Tropical Storm Alma NA NA
## source_name
## 1
## 2
## 3 www.ticotimes.net
## 4 insidecostarica.com
## 5
## source_link
## 1 http://fortunatimes.com/2010/11/06/no-passage-to-the-south-and-central-pacific/
## 2 http://thecostaricanews.com/landslides-and-wash-outs-continue-to-cause-problems-in-northern-costa-rica/12129
## 3 http://www.ticotimes.net/More-news/News-Briefs/TRAVEL-ALERT-UPDATE-Rains-landslides-close-eight-routes-across-Costa-Rica_Friday-October-04-2013
## 4 http://insidecostarica.com/2013/10/14/bodies-man-son-buried-landslide-nuevo-arenal-located/
## 5 http://www.reliefweb.int/rw/RWB.NSF/db900SID/ASAZ-7FHCHL?OpenDocument
## prop ypos
## 1 15.81221 7.906105
## 2 19.09615 25.360283
## 3 18.91266 44.364688
## 4 18.85328 63.247656
## 5 27.32571 86.337147
knitr::kable(head(df_Guanacaste))
id | date | time | America | Country | country_code | State | population | City | Distance | location_description | latitude | longitude | geolocation | hazard_type | landslide_type | landslide_size | trigger | storm_name | injuries | fatalities | source_name | source_link | prop | ypos |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
2683 | 11/4/10 | NA | Costa Rica | CR | Guanacaste | 7301 | Tilarán | 10.21631 | 10.4548 | -84.8751 | (10.454800000000001, -84.875100000000003) | Landslide | Landslide | Medium | Tropical cyclone | Tropical Storm Tomas | NA | 0 | http://fortunatimes.com/2010/11/06/no-passage-to-the-south-and-central-pacific/ | 15.81221 | 7.906105 | |||
4375 | 5/31/12 | NA | Costa Rica | CR | Guanacaste | 7301 | Tilarán | 12.33807 | 10.5562 | -84.8952 | (10.5562, -84.895200000000003) | Landslide | Landslide | Large | Downpour | NA | NA | http://thecostaricanews.com/landslides-and-wash-outs-continue-to-cause-problems-in-northern-costa-rica/12129 | 19.09615 | 25.360283 | ||||
5571 | 10/3/13 | NA | Costa Rica | CR | Guanacaste | 7301 | Tilarán | 12.21952 | 10.5543 | -84.8946 | (10.5543, -84.894599999999997) | Landslide | Landslide | Medium | Mining digging | NA | NA | www.ticotimes.net | http://www.ticotimes.net/More-news/News-Briefs/TRAVEL-ALERT-UPDATE-Rains-landslides-close-eight-routes-across-Costa-Rica_Friday-October-04-2013 | 18.91266 | 44.364688 | |||
5591 | 10/8/13 | Morning | NA | Costa Rica | CR | Guanacaste | 7301 | Tilarán | 12.18115 | 10.5546 | -84.8955 | (10.554600000000001, -84.895499999999998) | Landslide | Landslide | Medium | Rain | NA | 2 | insidecostarica.com | http://insidecostarica.com/2013/10/14/bodies-man-son-buried-landslide-nuevo-arenal-located/ | 18.85328 | 63.247656 | ||
556 | 5/29/08 | NA | Costa Rica | CR | Guanacaste | 4108 | Bagaces | 17.65521 | 10.4024 | -85.3555 | (10.4024, -85.355500000000006) | Landslide | Landslide | Medium | Tropical cyclone | Tropical Storm Alma | NA | NA | http://www.reliefweb.int/rw/RWB.NSF/db900SID/ASAZ-7FHCHL?OpenDocument | 27.32571 | 86.337147 |
stem(df_Guanacaste$"Distance")
##
## The decimal point is at the |
##
## 10 | 2
## 12 | 223
## 14 |
## 16 | 7
stem(df_Guanacaste$"Distance", scale = 2)
##
## The decimal point is at the |
##
## 10 | 2
## 11 |
## 12 | 223
## 13 |
## 14 |
## 15 |
## 16 |
## 17 | 7
library(forecast)
data_serie<- ts(df_Guanacaste$Distance, frequency=12, start=2007)
head(data_serie)
## Jan Feb Mar Apr May
## 2007 10.21631 12.33807 12.21952 12.18115 17.65521
autoplot(data_serie)+
labs(title = "Serie de Deslizamiento", x="Tiempo", y = "Distancia", colour = "#00a0dc") +theme_bw()
library(questionr)
table <- questionr::freq(Distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n | % | val% | %cum | val%cum | |
---|---|---|---|---|---|
10.21631 | 1 | 20 | 20 | 20 | 20 |
12.18115 | 1 | 20 | 20 | 40 | 40 |
12.21952 | 1 | 20 | 20 | 60 | 60 |
12.33807 | 1 | 20 | 20 | 80 | 80 |
17.65521 | 1 | 20 | 20 | 100 | 100 |
Total | 5 | 100 | 100 | 100 | 100 |
str(table)
## Classes 'freqtab' and 'data.frame': 6 obs. of 5 variables:
## $ n : num 1 1 1 1 1 5
## $ % : num 20 20 20 20 20 100
## $ val% : num 20 20 20 20 20 100
## $ %cum : num 20 40 60 80 100 100
## $ val%cum: num 20 40 60 80 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x | y |
---|---|
10.21631 | 1 |
12.18115 | 1 |
12.21952 | 1 |
12.33807 | 1 |
17.65521 | 1 |
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="white", fill="blue") +
xlab("Número de asistencias") +
ylab("Frecuencia")
n_sturges = 1 + log(length(Distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(Distance) - min(Distance)
w = ceiling(R/n_clases)
bins <- seq(min(Distance), max(Distance) + w, by = w)
bins
## [1] 10.21631 13.21631 16.21631 19.21631
Edades <- cut(Distance, bins)
Freq_table <- transform(table(Distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
Distance | Freq | Rel_Freq | Cum_Freq |
---|---|---|---|
10.21631 | 1 | 0.2 | 1 |
12.18115 | 1 | 0.2 | 2 |
12.21952 | 1 | 0.2 | 3 |
12.33807 | 1 | 0.2 | 4 |
17.65521 | 1 | 0.2 | 5 |
str(Freq_table)
## 'data.frame': 5 obs. of 4 variables:
## $ Distance: Factor w/ 5 levels "10.21631","12.18115",..: 1 2 3 4 5
## $ Freq : int 1 1 1 1 1
## $ Rel_Freq: num 0.2 0.2 0.2 0.2 0.2
## $ Cum_Freq: int 1 2 3 4 5
df <- data.frame(x = Freq_table$Distance, y = Freq_table$Freq)
knitr::kable(df)
x | y |
---|---|
10.21631 | 1 |
12.18115 | 1 |
12.21952 | 1 |
12.33807 | 1 |
17.65521 | 1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="blue", fill="green") +
xlab("Rango de Distance") +
ylab("Frecuencia")
summary(df_Guanacaste$Distance)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 10.22 12.18 12.22 12.92 12.34 17.66
library(pastecs)
stat.desc(df_Guanacaste)
## Warning in min(x): ningún argumento finito para min; retornando Inf
## Warning in max(x): ningun argumento finito para max; retornando -Inf
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## id date time America Country country_code State
## nbr.val 5.000000e+00 NA NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA NA
## min 5.560000e+02 NA NA NA NA NA NA
## max 5.591000e+03 NA NA NA NA NA NA
## range 5.035000e+03 NA NA NA NA NA NA
## sum 1.877600e+04 NA NA NA NA NA NA
## median 4.375000e+03 NA NA NA NA NA NA
## mean 3.755200e+03 NA NA NA NA NA NA
## SE.mean 9.601025e+02 NA NA NA NA NA NA
## CI.mean.0.95 2.665672e+03 NA NA NA NA NA NA
## var 4.608984e+06 NA NA NA NA NA NA
## std.dev 2.146854e+03 NA NA NA NA NA NA
## coef.var 5.717018e-01 NA NA NA NA NA NA
## population City Distance location_description latitude
## nbr.val 5.000000e+00 NA 5.0000000 NA 5.000000000
## nbr.null 0.000000e+00 NA 0.0000000 NA 0.000000000
## nbr.na 0.000000e+00 NA 0.0000000 NA 0.000000000
## min 4.108000e+03 NA 10.2163100 NA 10.402400000
## max 7.301000e+03 NA 17.6552100 NA 10.556200000
## range 3.193000e+03 NA 7.4389000 NA 0.153800000
## sum 3.331200e+04 NA 64.6102600 NA 52.522300000
## median 7.301000e+03 NA 12.2195200 NA 10.554300000
## mean 6.662400e+03 NA 12.9220520 NA 10.504460000
## SE.mean 6.386000e+02 NA 1.2471437 NA 0.032060437
## CI.mean.0.95 1.773038e+03 NA 3.4626259 NA 0.089014042
## var 2.039050e+06 NA 7.7768366 NA 0.005139358
## std.dev 1.427953e+03 NA 2.7886980 NA 0.071689316
## coef.var 2.143301e-01 NA 0.2158092 NA 0.006824655
## longitude geolocation hazard_type landslide_type
## nbr.val 5.000000e+00 NA NA NA
## nbr.null 0.000000e+00 NA NA NA
## nbr.na 0.000000e+00 NA NA NA
## min -8.535550e+01 NA NA NA
## max -8.487510e+01 NA NA NA
## range 4.804000e-01 NA NA NA
## sum -4.249159e+02 NA NA NA
## median -8.489520e+01 NA NA NA
## mean -8.498318e+01 NA NA NA
## SE.mean 9.316065e-02 NA NA NA
## CI.mean.0.95 2.586554e-01 NA NA NA
## var 4.339454e-02 NA NA NA
## std.dev 2.083136e-01 NA NA NA
## coef.var -2.451233e-03 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 0 2.000000 NA
## nbr.null NA NA NA 0 1.000000 NA
## nbr.na NA NA NA 5 3.000000 NA
## min NA NA NA Inf 0.000000 NA
## max NA NA NA -Inf 2.000000 NA
## range NA NA NA -Inf 2.000000 NA
## sum NA NA NA 0 2.000000 NA
## median NA NA NA NA 1.000000 NA
## mean NA NA NA NaN 1.000000 NA
## SE.mean NA NA NA NA 1.000000 NA
## CI.mean.0.95 NA NA NA NaN 12.706205 NA
## var NA NA NA NA 2.000000 NA
## std.dev NA NA NA NA 1.414214 NA
## coef.var NA NA NA NA 1.414214 NA
## source_link prop ypos
## nbr.val NA 5.0000000 5.000000
## nbr.null NA 0.0000000 0.000000
## nbr.na NA 0.0000000 0.000000
## min NA 15.8122100 7.906105
## max NA 27.3257065 86.337147
## range NA 11.5134965 78.431042
## sum NA 100.0000000 227.215879
## median NA 18.9126619 44.364688
## mean NA 20.0000000 45.443176
## SE.mean NA 1.9302564 13.789187
## CI.mean.0.95 NA 5.3592509 38.284922
## var NA 18.6294484 950.708442
## std.dev NA 4.3161845 30.833560
## coef.var NA 0.2158092 0.678508
boxplot(Distance, horizontal=TRUE, col='steelblue')
library(tidyverse)
library(hrbrthemes)
library(viridis)
df <- data.frame(Distance)
df %>% ggplot(aes(x = "", y = Distance)) +
geom_boxplot(color="red", fill="orange", alpha=0.5) +
theme_ipsum() +
theme(legend.position="none", plot.title = element_text(size=11)) +
ggtitle("Deslizamientos ") +
coord_flip() +
xlab("") +
ylab("")
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
df_Heredia %>%
select(Country, State, City, Distance, date)
## Country State City Distance date
## 38 Costa Rica Heredia Heredia 0.26208 9/9/07
## 311 Costa Rica Heredia Ã\201ngeles 19.51432 4/27/10
## 480 Costa Rica Heredia Ã\201ngeles 14.81614 10/15/10
## 529 Costa Rica Heredia Ã\201ngeles 19.54581 11/21/10
## 702 Costa Rica Heredia Ã\201ngeles 15.05161 5/8/11
## 884 Costa Rica Heredia Santo Domingo 21.95470 5/13/12
## 1157 Costa Rica Heredia Santo Domingo 9.85736 9/16/13
## 1384 Costa Rica Heredia Dulce Nombre de Jesus 10.01310 12/13/14
head(df_Heredia)
## id date time America Country country_code State
## 38 249 9/9/07 <NA> Costa Rica CR Heredia
## 311 1786 4/27/10 Early morning <NA> Costa Rica CR Heredia
## 480 2598 10/15/10 <NA> Costa Rica CR Heredia
## 529 2742 11/21/10 <NA> Costa Rica CR Heredia
## 702 3472 5/8/11 Night <NA> Costa Rica CR Heredia
## 884 4358 5/13/12 <NA> Costa Rica CR Heredia
## population City Distance location_description latitude longitude
## 38 21947 Heredia 0.26208 10.0000 -84.1167
## 311 1355 Ã\201ngeles 19.51432 10.1452 -83.9564
## 480 1355 Ã\201ngeles 14.81614 10.1067 -83.9753
## 529 1355 Ã\201ngeles 19.54581 10.1433 -83.9529
## 702 1355 Ã\201ngeles 15.05161 10.1118 -83.9793
## 884 5745 Santo Domingo 21.95470 10.1981 -84.0074
## geolocation hazard_type landslide_type
## 38 (10, -84.116699999999994) Landslide Landslide
## 311 (10.145200000000001, -83.956400000000002) Landslide Landslide
## 480 (10.1067, -83.975300000000004) Landslide Rockfall
## 529 (10.1433, -83.9529) Landslide Landslide
## 702 (10.111800000000001, -83.979299999999995) Landslide Landslide
## 884 (10.1981, -84.007400000000004) Landslide Landslide
## landslide_size trigger storm_name injuries fatalities source_name
## 38 Medium Rain NA NA ticotimes.net
## 311 Medium Downpour NA 0
## 480 Medium Downpour NA 2
## 529 Medium Downpour NA 0
## 702 Medium Rain NA 0
## 884 Medium Downpour NA NA
## source_link
## 38 http://www.ticotimes.net/dailyarchive/2007_09/0911072.htm
## 311 http://en.trend.az/news/incident/1678592.html
## 480 http://www.ticotimes.net/News/Daily-News/Two-People-Die-in-Landslide-on-Limon-Highway_Saturday-October-16-2010
## 529 http://insidecostarica.com/dailynews/2010/november/22/costarica10112204.htm
## 702 http://insidecostarica.com/dailynews/2011/may/10/costarica11051010.htm
## 884 http://www.insidecostarica.com/dailynews/2012/may/17/costarica12051708.htm
ggplot(data=df_Heredia, aes(x=City, y=Distance)) + geom_bar(stat="identity", color="blue", fill="white")
ggplot(data=df_Heredia, aes(x = "Heredia", y = Distance, fill=City)) +
geom_bar(stat = "identity", width = 1, color = "black") +
coord_polar("y", start = 0)
library(ggplot2)
library(dplyr)
df_Heredia <- df_Heredia %>%
arrange(desc(City)) %>%
mutate(prop = Distance / sum(df_Heredia$Distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_Heredia, aes(x=State, y = prop, fill=City)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(Distance/100)), color = "white", size=6) +
scale_fill_brewer(palette="Set8")
## Warning in pal_name(palette, type): Unknown palette Set8
library(qcc)
Distance <- df_Heredia$Distance
names(Distance) <- df_Heredia$City
pareto.chart(Distance,
ylab="Distance",
col = heat.colors(length(Distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "DONDE SE CONCENTRAN LAS CIUDADES CON MAYORES DESLIZAMIENTOS"
)
##
## Pareto chart analysis for Distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Santo Domingo 21.954700 21.954700 19.776315 19.776315
## Ã\201ngeles 19.545810 41.500510 17.606440 37.382755
## Ã\201ngeles 19.514320 61.014830 17.578074 54.960829
## Ã\201ngeles 15.051610 76.066440 13.558162 68.518991
## Ã\201ngeles 14.816140 90.882580 13.346056 81.865047
## Dulce Nombre de Jesus 10.013100 100.895680 9.019582 90.884629
## Santo Domingo 9.857360 110.753040 8.879295 99.763924
## Heredia 0.262080 111.015120 0.236076 100.000000
stem(df_Heredia$"Distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 0
## 0 |
## 1 | 00
## 1 | 55
## 2 | 002
head(df_Heredia)
## id date time America Country country_code State
## 1 4358 5/13/12 <NA> Costa Rica CR Heredia
## 2 5541 9/16/13 <NA> Costa Rica CR Heredia
## 3 249 9/9/07 <NA> Costa Rica CR Heredia
## 4 6696 12/13/14 Night <NA> Costa Rica CR Heredia
## 5 1786 4/27/10 Early morning <NA> Costa Rica CR Heredia
## 6 2598 10/15/10 <NA> Costa Rica CR Heredia
## population City Distance location_description latitude
## 1 5745 Santo Domingo 21.95470 10.1981
## 2 5745 Santo Domingo 9.85736 10.1528
## 3 21947 Heredia 0.26208 10.0000
## 4 0 Dulce Nombre de Jesus 10.01310 Unknown 10.2054
## 5 1355 Ã\201ngeles 19.51432 10.1452
## 6 1355 Ã\201ngeles 14.81614 10.1067
## longitude geolocation hazard_type
## 1 -84.0074 (10.1981, -84.007400000000004) Landslide
## 2 -84.1489 (10.152799999999999, -84.148899999999998) Landslide
## 3 -84.1167 (10, -84.116699999999994) Landslide
## 4 -83.9041 (10.205399999999999, -83.9041) Landslide
## 5 -83.9564 (10.145200000000001, -83.956400000000002) Landslide
## 6 -83.9753 (10.1067, -83.975300000000004) Landslide
## landslide_type landslide_size trigger storm_name injuries fatalities
## 1 Landslide Medium Downpour NA NA
## 2 Landslide Medium Tropical cyclone Manuel NA 0
## 3 Landslide Medium Rain NA NA
## 4 Landslide Medium Unknown 0 0
## 5 Landslide Medium Downpour NA 0
## 6 Rockfall Medium Downpour NA 2
## source_name
## 1
## 2 insidecostarica.com
## 3 ticotimes.net
## 4 Columbia
## 5
## 6
## source_link
## 1 http://www.insidecostarica.com/dailynews/2012/may/17/costarica12051708.htm
## 2 http://insidecostarica.com/2013/09/17/torrential-rains-flooding-washed-out-bridges-and-landslides-wreak-havoc/
## 3 http://www.ticotimes.net/dailyarchive/2007_09/0911072.htm
## 4 http://www.columbia.co.cr/index.php/nacionales/transporte/6953-la-ruta-32-se-mantiene-cerrada-desde-anoche-por-un-deslizamiento-en-el-sector-de-rio-sucio
## 5 http://en.trend.az/news/incident/1678592.html
## 6 http://www.ticotimes.net/News/Daily-News/Two-People-Die-in-Landslide-on-Limon-Highway_Saturday-October-16-2010
## prop ypos
## 1 19.776315 9.888158
## 2 8.879295 24.215963
## 3 0.236076 28.773648
## 4 9.019582 33.401477
## 5 17.578074 46.700305
## 6 13.346056 62.162370
knitr::kable(head(df_Heredia))
id | date | time | America | Country | country_code | State | population | City | Distance | location_description | latitude | longitude | geolocation | hazard_type | landslide_type | landslide_size | trigger | storm_name | injuries | fatalities | source_name | source_link | prop | ypos |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
4358 | 5/13/12 | NA | Costa Rica | CR | Heredia | 5745 | Santo Domingo | 21.95470 | 10.1981 | -84.0074 | (10.1981, -84.007400000000004) | Landslide | Landslide | Medium | Downpour | NA | NA | http://www.insidecostarica.com/dailynews/2012/may/17/costarica12051708.htm | 19.776315 | 9.888158 | ||||
5541 | 9/16/13 | NA | Costa Rica | CR | Heredia | 5745 | Santo Domingo | 9.85736 | 10.1528 | -84.1489 | (10.152799999999999, -84.148899999999998) | Landslide | Landslide | Medium | Tropical cyclone | Manuel | NA | 0 | insidecostarica.com | http://insidecostarica.com/2013/09/17/torrential-rains-flooding-washed-out-bridges-and-landslides-wreak-havoc/ | 8.879295 | 24.215963 | ||
249 | 9/9/07 | NA | Costa Rica | CR | Heredia | 21947 | Heredia | 0.26208 | 10.0000 | -84.1167 | (10, -84.116699999999994) | Landslide | Landslide | Medium | Rain | NA | NA | ticotimes.net | http://www.ticotimes.net/dailyarchive/2007_09/0911072.htm | 0.236076 | 28.773648 | |||
6696 | 12/13/14 | Night | NA | Costa Rica | CR | Heredia | 0 | Dulce Nombre de Jesus | 10.01310 | Unknown | 10.2054 | -83.9041 | (10.205399999999999, -83.9041) | Landslide | Landslide | Medium | Unknown | 0 | 0 | Columbia | http://www.columbia.co.cr/index.php/nacionales/transporte/6953-la-ruta-32-se-mantiene-cerrada-desde-anoche-por-un-deslizamiento-en-el-sector-de-rio-sucio | 9.019582 | 33.401477 | |
1786 | 4/27/10 | Early morning | NA | Costa Rica | CR | Heredia | 1355 | Ãngeles | 19.51432 | 10.1452 | -83.9564 | (10.145200000000001, -83.956400000000002) | Landslide | Landslide | Medium | Downpour | NA | 0 | http://en.trend.az/news/incident/1678592.html | 17.578074 | 46.700305 | |||
2598 | 10/15/10 | NA | Costa Rica | CR | Heredia | 1355 | Ãngeles | 14.81614 | 10.1067 | -83.9753 | (10.1067, -83.975300000000004) | Landslide | Rockfall | Medium | Downpour | NA | 2 | http://www.ticotimes.net/News/Daily-News/Two-People-Die-in-Landslide-on-Limon-Highway_Saturday-October-16-2010 | 13.346056 | 62.162370 |
stem(df_Heredia$"Distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 0
## 0 |
## 1 | 00
## 1 | 55
## 2 | 002
stem(df_Heredia$"Distance", scale = 2)
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 0
## 0 |
## 1 | 00
## 1 | 55
## 2 | 002
library(forecast)
data_serie<- ts(df_Heredia$Distance, frequency=12, start=2007)
head(data_serie)
## Jan Feb Mar Apr May Jun
## 2007 21.95470 9.85736 0.26208 10.01310 19.51432 14.81614
autoplot(data_serie)+
labs(title = "Serie de Deslizamiento", x="Tiempo", y = "Distancia", colour = "#00a0dc") +theme_bw()
library(questionr)
table <- questionr::freq(Distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n | % | val% | %cum | val%cum | |
---|---|---|---|---|---|
0.26208 | 1 | 12.5 | 12.5 | 12.5 | 12.5 |
9.85736 | 1 | 12.5 | 12.5 | 25.0 | 25.0 |
10.0131 | 1 | 12.5 | 12.5 | 37.5 | 37.5 |
14.81614 | 1 | 12.5 | 12.5 | 50.0 | 50.0 |
15.05161 | 1 | 12.5 | 12.5 | 62.5 | 62.5 |
19.51432 | 1 | 12.5 | 12.5 | 75.0 | 75.0 |
19.54581 | 1 | 12.5 | 12.5 | 87.5 | 87.5 |
21.9547 | 1 | 12.5 | 12.5 | 100.0 | 100.0 |
Total | 8 | 100.0 | 100.0 | 100.0 | 100.0 |
str(table)
## Classes 'freqtab' and 'data.frame': 9 obs. of 5 variables:
## $ n : num 1 1 1 1 1 1 1 1 8
## $ % : num 12.5 12.5 12.5 12.5 12.5 12.5 12.5 12.5 100
## $ val% : num 12.5 12.5 12.5 12.5 12.5 12.5 12.5 12.5 100
## $ %cum : num 12.5 25 37.5 50 62.5 75 87.5 100 100
## $ val%cum: num 12.5 25 37.5 50 62.5 75 87.5 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x | y |
---|---|
0.26208 | 1 |
9.85736 | 1 |
10.0131 | 1 |
14.81614 | 1 |
15.05161 | 1 |
19.51432 | 1 |
19.54581 | 1 |
21.9547 | 1 |
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="white", fill="blue") +
xlab("Número de asistencias") +
ylab("Frecuencia")
n_sturges = 1 + log(length(Distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(Distance) - min(Distance)
w = ceiling(R/n_clases)
bins <- seq(min(Distance), max(Distance) + w, by = w)
bins
## [1] 0.26208 6.26208 12.26208 18.26208 24.26208
Edades <- cut(Distance, bins)
Freq_table <- transform(table(Distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
Distance | Freq | Rel_Freq | Cum_Freq |
---|---|---|---|
0.26208 | 1 | 0.125 | 1 |
9.85736 | 1 | 0.125 | 2 |
10.0131 | 1 | 0.125 | 3 |
14.81614 | 1 | 0.125 | 4 |
15.05161 | 1 | 0.125 | 5 |
19.51432 | 1 | 0.125 | 6 |
19.54581 | 1 | 0.125 | 7 |
21.9547 | 1 | 0.125 | 8 |
str(Freq_table)
## 'data.frame': 8 obs. of 4 variables:
## $ Distance: Factor w/ 8 levels "0.26208","9.85736",..: 1 2 3 4 5 6 7 8
## $ Freq : int 1 1 1 1 1 1 1 1
## $ Rel_Freq: num 0.125 0.125 0.125 0.125 0.125 0.125 0.125 0.125
## $ Cum_Freq: int 1 2 3 4 5 6 7 8
df <- data.frame(x = Freq_table$Distance, y = Freq_table$Freq)
knitr::kable(df)
x | y |
---|---|
0.26208 | 1 |
9.85736 | 1 |
10.0131 | 1 |
14.81614 | 1 |
15.05161 | 1 |
19.51432 | 1 |
19.54581 | 1 |
21.9547 | 1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="blue", fill="green") +
xlab("Rango de Distance") +
ylab("Frecuencia")
summary(df_Heredia$Distance)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.2621 9.9742 14.9339 13.8769 19.5222 21.9547
library(pastecs)
stat.desc(df_Heredia)
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## id date time America Country country_code State
## nbr.val 8.000000e+00 NA NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA NA
## min 2.490000e+02 NA NA NA NA NA NA
## max 6.696000e+03 NA NA NA NA NA NA
## range 6.447000e+03 NA NA NA NA NA NA
## sum 2.744200e+04 NA NA NA NA NA NA
## median 3.107000e+03 NA NA NA NA NA NA
## mean 3.430250e+03 NA NA NA NA NA NA
## SE.mean 7.315967e+02 NA NA NA NA NA NA
## CI.mean.0.95 1.729951e+03 NA NA NA NA NA NA
## var 4.281870e+06 NA NA NA NA NA NA
## std.dev 2.069268e+03 NA NA NA NA NA NA
## coef.var 6.032412e-01 NA NA NA NA NA NA
## population City Distance location_description latitude
## nbr.val 8.000000e+00 NA 8.0000000 NA 8.000000000
## nbr.null 1.000000e+00 NA 0.0000000 NA 0.000000000
## nbr.na 0.000000e+00 NA 0.0000000 NA 0.000000000
## min 0.000000e+00 NA 0.2620800 NA 10.000000000
## max 2.194700e+04 NA 21.9547000 NA 10.205400000
## range 2.194700e+04 NA 21.6926200 NA 0.205400000
## sum 3.885700e+04 NA 111.0151200 NA 81.063300000
## median 1.355000e+03 NA 14.9338750 NA 10.144250000
## mean 4.857125e+03 NA 13.8768900 NA 10.132912500
## SE.mean 2.557523e+03 NA 2.4924134 NA 0.022739522
## CI.mean.0.95 6.047580e+03 NA 5.8936213 NA 0.053770426
## var 5.232738e+07 NA 49.6969984 NA 0.004136687
## std.dev 7.233767e+03 NA 7.0496098 NA 0.064317081
## coef.var 1.489310e+00 NA 0.5080108 NA 0.006347344
## longitude geolocation hazard_type landslide_type
## nbr.val 8.000000e+00 NA NA NA
## nbr.null 0.000000e+00 NA NA NA
## nbr.na 0.000000e+00 NA NA NA
## min -8.414890e+01 NA NA NA
## max -8.390410e+01 NA NA NA
## range 2.448000e-01 NA NA NA
## sum -6.720410e+02 NA NA NA
## median -8.397730e+01 NA NA NA
## mean -8.400512e+01 NA NA NA
## SE.mean 2.987758e-02 NA NA NA
## CI.mean.0.95 7.064924e-02 NA NA NA
## var 7.141356e-03 NA NA NA
## std.dev 8.450655e-02 NA NA NA
## coef.var -1.005969e-03 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 1 6.0000000 NA
## nbr.null NA NA NA 1 5.0000000 NA
## nbr.na NA NA NA 7 2.0000000 NA
## min NA NA NA 0 0.0000000 NA
## max NA NA NA 0 2.0000000 NA
## range NA NA NA 0 2.0000000 NA
## sum NA NA NA 0 2.0000000 NA
## median NA NA NA 0 0.0000000 NA
## mean NA NA NA 0 0.3333333 NA
## SE.mean NA NA NA NA 0.3333333 NA
## CI.mean.0.95 NA NA NA NaN 0.8568606 NA
## var NA NA NA NA 0.6666667 NA
## std.dev NA NA NA NA 0.8164966 NA
## coef.var NA NA NA NA 2.4494897 NA
## source_link prop ypos
## nbr.val NA 8.0000000 8.0000000
## nbr.null NA 0.0000000 0.0000000
## nbr.na NA 0.0000000 0.0000000
## min NA 0.2360760 9.8881576
## max NA 19.7763152 93.2209189
## range NA 19.5402392 83.3327613
## sum NA 100.0000000 376.0014582
## median NA 13.4521090 40.0508913
## mean NA 12.5000000 47.0001823
## SE.mean NA 2.2451117 10.1053438
## CI.mean.0.95 NA 5.3088456 23.8953410
## var NA 40.3242124 816.9437832
## std.dev NA 6.3501348 28.5822285
## coef.var NA 0.5080108 0.6081302
boxplot(Distance, horizontal=TRUE, col='steelblue')
library(tidyverse)
library(hrbrthemes)
library(viridis)
df <- data.frame(Distance)
df %>% ggplot(aes(x = "", y = Distance)) +
geom_boxplot(color="red", fill="orange", alpha=0.5) +
theme_ipsum() +
theme(legend.position="none", plot.title = element_text(size=11)) +
ggtitle("Deslizamientos ") +
coord_flip() +
xlab("") +
ylab("")
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
df_Mir %>%
select(Country, State, City, Distance, date)
## Country State City Distance date
## 530 Venezuela Miranda Santa Teresa 8.45736 11/25/10
## 542 Venezuela Miranda Baruta 2.69644 11/27/10
## 543 Venezuela Miranda El Hatillo 1.04263 11/27/10
## 546 Venezuela Miranda Guatire 15.84114 11/29/10
head(df_Mir)
## id date time America Country country_code State population
## 530 2749 11/25/10 SA Venezuela VE Miranda 278890
## 542 2765 11/27/10 Night SA Venezuela VE Miranda 244216
## 543 2766 11/27/10 Night SA Venezuela VE Miranda 57591
## 546 2771 11/29/10 SA Venezuela VE Miranda 191903
## City Distance location_description latitude longitude
## 530 Santa Teresa 8.45736 10.2452 -66.5867
## 542 Baruta 2.69644 10.4447 -66.8545
## 543 El Hatillo 1.04263 10.4393 -66.8150
## 546 Guatire 15.84114 10.6144 -66.5806
## geolocation hazard_type landslide_type
## 530 (10.245200000000001, -66.586699999999993) Landslide Mudslide
## 542 (10.444699999999999, -66.854500000000002) Landslide Landslide
## 543 (10.439299999999999, -66.814999999999998) Landslide Landslide
## 546 (10.6144, -66.580600000000004) Landslide Mudslide
## landslide_size trigger storm_name injuries fatalities source_name
## 530 Medium Downpour NA 1
## 542 Medium Downpour NA 0
## 543 Medium Downpour NA 0
## 546 Medium Downpour NA 0
## source_link
## 530 http://www.laht.com/article.asp?ArticleId=379809&CategoryId=10717
## 542 http://english.eluniversal.com/2010/11/30/en_pol_esp_landslides-hit-sever_30A4792571.shtml
## 543 http://english.eluniversal.com/2010/11/30/en_pol_esp_landslides-hit-sever_30A4792571.shtml
## 546 http://www.google.com/hostednews/ap/article/ALeqM5gEWcL7PPm0K3Ut10_J9xJK41TZog?docId=51e8b1855b1344c781f245d32d0e1f4a
ggplot(data=df_Mir, aes(x=City, y=Distance)) + geom_bar(stat="identity", color="blue", fill="white")
ggplot(data=df_Mir, aes(x = "Miranda", y = Distance, fill=City)) +
geom_bar(stat = "identity", width = 1, color = "black") +
coord_polar("y", start = 0)
ggplot(df_Mir,aes(x="Miranda",y=Distance, fill=City))+
geom_bar(stat = "identity",
color="white")+
geom_text(aes(label=(Distance*10)),
position=position_stack(vjust=0.5),color="white",size=3)+
coord_polar(theta = "y")+
labs(title="Gráfico de Deslizamiento")
library(qcc)
Distance <- df_Mir$Distance
names(Distance) <- df_Mir$City
pareto.chart(Distance,
ylab="Distance",
col = heat.colors(length(Distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "DONDE SE CONCENTRAN LAS CIUDADES CON MAYORES DESLIZAMIENTOS"
)
##
## Pareto chart analysis for Distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Guatire 15.841140 15.841140 56.499690 56.499690
## Santa Teresa 8.457360 24.298500 30.164383 86.664073
## Baruta 2.696440 26.994940 9.617239 96.281311
## El Hatillo 1.042630 28.037570 3.718689 100.000000
stem(df_Mir$"Distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 13
## 0 | 8
## 1 |
## 1 | 6
head(df_Mir)
## id date time America Country country_code State population
## 530 2749 11/25/10 SA Venezuela VE Miranda 278890
## 542 2765 11/27/10 Night SA Venezuela VE Miranda 244216
## 543 2766 11/27/10 Night SA Venezuela VE Miranda 57591
## 546 2771 11/29/10 SA Venezuela VE Miranda 191903
## City Distance location_description latitude longitude
## 530 Santa Teresa 8.45736 10.2452 -66.5867
## 542 Baruta 2.69644 10.4447 -66.8545
## 543 El Hatillo 1.04263 10.4393 -66.8150
## 546 Guatire 15.84114 10.6144 -66.5806
## geolocation hazard_type landslide_type
## 530 (10.245200000000001, -66.586699999999993) Landslide Mudslide
## 542 (10.444699999999999, -66.854500000000002) Landslide Landslide
## 543 (10.439299999999999, -66.814999999999998) Landslide Landslide
## 546 (10.6144, -66.580600000000004) Landslide Mudslide
## landslide_size trigger storm_name injuries fatalities source_name
## 530 Medium Downpour NA 1
## 542 Medium Downpour NA 0
## 543 Medium Downpour NA 0
## 546 Medium Downpour NA 0
## source_link
## 530 http://www.laht.com/article.asp?ArticleId=379809&CategoryId=10717
## 542 http://english.eluniversal.com/2010/11/30/en_pol_esp_landslides-hit-sever_30A4792571.shtml
## 543 http://english.eluniversal.com/2010/11/30/en_pol_esp_landslides-hit-sever_30A4792571.shtml
## 546 http://www.google.com/hostednews/ap/article/ALeqM5gEWcL7PPm0K3Ut10_J9xJK41TZog?docId=51e8b1855b1344c781f245d32d0e1f4a
knitr::kable(head(df_Mir))
id | date | time | America | Country | country_code | State | population | City | Distance | location_description | latitude | longitude | geolocation | hazard_type | landslide_type | landslide_size | trigger | storm_name | injuries | fatalities | source_name | source_link | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
530 | 2749 | 11/25/10 | SA | Venezuela | VE | Miranda | 278890 | Santa Teresa | 8.45736 | 10.2452 | -66.5867 | (10.245200000000001, -66.586699999999993) | Landslide | Mudslide | Medium | Downpour | NA | 1 | http://www.laht.com/article.asp?ArticleId=379809&CategoryId=10717 | ||||
542 | 2765 | 11/27/10 | Night | SA | Venezuela | VE | Miranda | 244216 | Baruta | 2.69644 | 10.4447 | -66.8545 | (10.444699999999999, -66.854500000000002) | Landslide | Landslide | Medium | Downpour | NA | 0 | http://english.eluniversal.com/2010/11/30/en_pol_esp_landslides-hit-sever_30A4792571.shtml | |||
543 | 2766 | 11/27/10 | Night | SA | Venezuela | VE | Miranda | 57591 | El Hatillo | 1.04263 | 10.4393 | -66.8150 | (10.439299999999999, -66.814999999999998) | Landslide | Landslide | Medium | Downpour | NA | 0 | http://english.eluniversal.com/2010/11/30/en_pol_esp_landslides-hit-sever_30A4792571.shtml | |||
546 | 2771 | 11/29/10 | SA | Venezuela | VE | Miranda | 191903 | Guatire | 15.84114 | 10.6144 | -66.5806 | (10.6144, -66.580600000000004) | Landslide | Mudslide | Medium | Downpour | NA | 0 | http://www.google.com/hostednews/ap/article/ALeqM5gEWcL7PPm0K3Ut10_J9xJK41TZog?docId=51e8b1855b1344c781f245d32d0e1f4a |
stem(df_Mir$"Distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 13
## 0 | 8
## 1 |
## 1 | 6
stem(df_Mir$"Distance", scale = 2)
##
## The decimal point is at the |
##
## 0 | 0
## 2 | 7
## 4 |
## 6 |
## 8 | 5
## 10 |
## 12 |
## 14 | 8
library(forecast)
data_serie<- ts(df_Mir$Distance, frequency=12, start=2007)
head(data_serie)
## Jan Feb Mar Apr
## 2007 8.45736 2.69644 1.04263 15.84114
autoplot(data_serie)+
labs(title = "Serie de Deslizamiento", x="Tiempo", y = "Distancia", colour = "#00a0dc") +theme_bw()
library(questionr)
table <- questionr::freq(Distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n | % | val% | %cum | val%cum | |
---|---|---|---|---|---|
1.04263 | 1 | 25 | 25 | 25 | 25 |
2.69644 | 1 | 25 | 25 | 50 | 50 |
8.45736 | 1 | 25 | 25 | 75 | 75 |
15.84114 | 1 | 25 | 25 | 100 | 100 |
Total | 4 | 100 | 100 | 100 | 100 |
str(table)
## Classes 'freqtab' and 'data.frame': 5 obs. of 5 variables:
## $ n : num 1 1 1 1 4
## $ % : num 25 25 25 25 100
## $ val% : num 25 25 25 25 100
## $ %cum : num 25 50 75 100 100
## $ val%cum: num 25 50 75 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x | y |
---|---|
1.04263 | 1 |
2.69644 | 1 |
8.45736 | 1 |
15.84114 | 1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="white", fill="blue") +
xlab("Número de asistencias") +
ylab("Frecuencia")
n_sturges = 1 + log(length(Distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(Distance) - min(Distance)
w = ceiling(R/n_clases)
bins <- seq(min(Distance), max(Distance) + w, by = w)
bins
## [1] 1.04263 6.04263 11.04263 16.04263
Edades <- cut(Distance, bins)
Freq_table <- transform(table(Distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
Distance | Freq | Rel_Freq | Cum_Freq |
---|---|---|---|
1.04263 | 1 | 0.25 | 1 |
2.69644 | 1 | 0.25 | 2 |
8.45736 | 1 | 0.25 | 3 |
15.84114 | 1 | 0.25 | 4 |
str(Freq_table)
## 'data.frame': 4 obs. of 4 variables:
## $ Distance: Factor w/ 4 levels "1.04263","2.69644",..: 1 2 3 4
## $ Freq : int 1 1 1 1
## $ Rel_Freq: num 0.25 0.25 0.25 0.25
## $ Cum_Freq: int 1 2 3 4
df <- data.frame(x = Freq_table$Distance, y = Freq_table$Freq)
knitr::kable(df)
x | y |
---|---|
1.04263 | 1 |
2.69644 | 1 |
8.45736 | 1 |
15.84114 | 1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="blue", fill="green") +
xlab("Rango de Distance") +
ylab("Frecuencia")
summary(df_Mir$Distance)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.043 2.283 5.577 7.009 10.303 15.841
library(pastecs)
stat.desc(df_Mir)
## Warning in min(x): ningún argumento finito para min; retornando Inf
## Warning in max(x): ningun argumento finito para max; retornando -Inf
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## id date time America Country country_code State
## nbr.val 4.000000e+00 NA NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA NA
## min 2.749000e+03 NA NA NA NA NA NA
## max 2.771000e+03 NA NA NA NA NA NA
## range 2.200000e+01 NA NA NA NA NA NA
## sum 1.105100e+04 NA NA NA NA NA NA
## median 2.765500e+03 NA NA NA NA NA NA
## mean 2.762750e+03 NA NA NA NA NA NA
## SE.mean 4.767512e+00 NA NA NA NA NA NA
## CI.mean.0.95 1.517235e+01 NA NA NA NA NA NA
## var 9.091667e+01 NA NA NA NA NA NA
## std.dev 9.535023e+00 NA NA NA NA NA NA
## coef.var 3.451280e-03 NA NA NA NA NA NA
## population City Distance location_description latitude
## nbr.val 4.000000e+00 NA 4.000000 NA 4.00000000
## nbr.null 0.000000e+00 NA 0.000000 NA 0.00000000
## nbr.na 0.000000e+00 NA 0.000000 NA 0.00000000
## min 5.759100e+04 NA 1.042630 NA 10.24520000
## max 2.788900e+05 NA 15.841140 NA 10.61440000
## range 2.212990e+05 NA 14.798510 NA 0.36920000
## sum 7.726000e+05 NA 28.037570 NA 41.74360000
## median 2.180595e+05 NA 5.576900 NA 10.44200000
## mean 1.931500e+05 NA 7.009392 NA 10.43590000
## SE.mean 4.859431e+04 NA 3.345397 NA 0.07545293
## CI.mean.0.95 1.546488e+05 NA 10.646547 NA 0.24012491
## var 9.445627e+09 NA 44.766729 NA 0.02277258
## std.dev 9.718862e+04 NA 6.690794 NA 0.15090586
## coef.var 5.031769e-01 NA 0.954547 NA 0.01446026
## longitude geolocation hazard_type landslide_type
## nbr.val 4.000000e+00 NA NA NA
## nbr.null 0.000000e+00 NA NA NA
## nbr.na 0.000000e+00 NA NA NA
## min -6.685450e+01 NA NA NA
## max -6.658060e+01 NA NA NA
## range 2.739000e-01 NA NA NA
## sum -2.668368e+02 NA NA NA
## median -6.670085e+01 NA NA NA
## mean -6.670920e+01 NA NA NA
## SE.mean 7.294401e-02 NA NA NA
## CI.mean.0.95 2.321404e-01 NA NA NA
## var 2.128331e-02 NA NA NA
## std.dev 1.458880e-01 NA NA NA
## coef.var -2.186925e-03 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 0 4.0000000 NA
## nbr.null NA NA NA 0 3.0000000 NA
## nbr.na NA NA NA 4 0.0000000 NA
## min NA NA NA Inf 0.0000000 NA
## max NA NA NA -Inf 1.0000000 NA
## range NA NA NA -Inf 1.0000000 NA
## sum NA NA NA 0 1.0000000 NA
## median NA NA NA NA 0.0000000 NA
## mean NA NA NA NaN 0.2500000 NA
## SE.mean NA NA NA NA 0.2500000 NA
## CI.mean.0.95 NA NA NA NaN 0.7956116 NA
## var NA NA NA NA 0.2500000 NA
## std.dev NA NA NA NA 0.5000000 NA
## coef.var NA NA NA NA 2.0000000 NA
## source_link
## nbr.val NA
## nbr.null NA
## nbr.na NA
## min NA
## max NA
## range NA
## sum NA
## median NA
## mean NA
## SE.mean NA
## CI.mean.0.95 NA
## var NA
## std.dev NA
## coef.var NA
boxplot(Distance, horizontal=TRUE, col='steelblue')
library(tidyverse)
library(hrbrthemes)
library(viridis)
df <- data.frame(Distance)
df %>% ggplot(aes(x = "", y = Distance)) +
geom_boxplot(color="red", fill="orange", alpha=0.5) +
theme_ipsum() +
theme(legend.position="none", plot.title = element_text(size=11)) +
ggtitle("Deslizamientos ") +
coord_flip() +
xlab("") +
ylab("")
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
ggplot(data=df_Venezuela, aes(fill=State, y=Distance, x="Venezuela")) +
geom_bar(position="dodge", stat="identity")
ggplot(data=df_Venezuela, aes(fill=State, y=Distance, x="Venezuela")) +
geom_bar(position="stack", stat="identity")
df_DF %>%
select(Country, State, City, Distance, date)
## Country State City Distance date
## 165 Venezuela Distrito Federal Caracas 2.92493 11/20/08
## 531 Venezuela Distrito Federal Caracas 2.95706 11/26/10
## 538 Venezuela Distrito Federal Caricuao 7.90754 11/27/10
## 539 Venezuela Distrito Federal Caricuao 5.64050 11/27/10
## 540 Venezuela Distrito Federal Caricuao 5.74106 11/27/10
## 541 Venezuela Distrito Federal Caracas 4.58994 11/27/10
## 544 Venezuela Distrito Federal Caracas 3.14060 11/29/10
## 545 Venezuela Distrito Federal Caracas 6.04235 11/29/10
## 547 Venezuela Distrito Federal Caracas 3.87793 11/29/10
## 548 Venezuela Distrito Federal Caracas 3.65044 11/29/10
## 549 Venezuela Distrito Federal Caracas 8.11953 11/30/10
## 847 Venezuela Distrito Federal Caracas 2.55507 12/6/11
head(df_DF)
## id date time America Country country_code State
## 165 896 11/20/08 SA Venezuela VE Distrito Federal
## 531 2753 11/26/10 Before dawn SA Venezuela VE Distrito Federal
## 538 2761 11/27/10 SA Venezuela VE Distrito Federal
## 539 2762 11/27/10 Night SA Venezuela VE Distrito Federal
## 540 2763 11/27/10 Night SA Venezuela VE Distrito Federal
## 541 2764 11/27/10 Night SA Venezuela VE Distrito Federal
## population City Distance location_description latitude longitude
## 165 3000000 Caracas 2.92493 10.4660 -66.8940
## 531 3000000 Caracas 2.95706 10.4913 -66.9060
## 538 0 Caricuao 7.90754 10.5030 -66.9995
## 539 0 Caricuao 5.64050 10.4267 -67.0342
## 540 0 Caricuao 5.74106 10.4669 -66.9431
## 541 3000000 Caracas 4.58994 10.4867 -66.9211
## geolocation hazard_type landslide_type
## 165 (10.465999999999999, -66.894000000000005) Landslide Mudslide
## 531 (10.491300000000001, -66.906000000000006) Landslide Mudslide
## 538 (10.503, -66.999499999999998) Landslide Mudslide
## 539 (10.4267, -67.034199999999998) Landslide Landslide
## 540 (10.466900000000001, -66.943100000000001) Landslide Landslide
## 541 (10.486700000000001, -66.921099999999996) Landslide Landslide
## landslide_size trigger storm_name injuries fatalities source_name
## 165 Medium Downpour NA 8
## 531 Medium Downpour NA 3
## 538 Medium Downpour NA 1
## 539 Medium Downpour NA 0
## 540 Medium Downpour NA 2
## 541 Medium Downpour NA 0
## source_link
## 165 http://www.foxnews.com/story/0,2933,456304,00.html
## 531 http://www.laht.com/article.asp?ArticleId=379809&CategoryId=10717
## 538 http://www.laht.com/article.asp?ArticleId=380021&CategoryId=10717
## 539 http://english.eluniversal.com/2010/11/30/en_pol_esp_landslides-hit-sever_30A4792571.shtml
## 540 http://english.eluniversal.com/2010/11/30/en_pol_esp_landslides-hit-sever_30A4792571.shtml
## 541 http://english.eluniversal.com/2010/11/30/en_pol_esp_landslides-hit-sever_30A4792571.shtml
ggplot(data=df_DF, aes(x=City, y=Distance)) + geom_bar(stat="identity", color="blue", fill="white")
ggplot(data=df_DF, aes(x = "Distrito Federal ", y = Distance, fill=City)) +
geom_bar(stat = "identity", width = 1, color = "black") +
coord_polar("y", start = 0)
library(ggplot2)
library(dplyr)
df_DF <- df_DF %>%
arrange(desc(City)) %>%
mutate(prop = Distance / sum(df_DF$Distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
library(ggplot2)
library(dplyr)
df_DF <- df_DF %>%
arrange(desc(City)) %>%
mutate(prop = Distance / sum(df_DF$Distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_DF, aes(x=State, y = prop, fill=City)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(Distance/100)), color = "white", size=3) +
scale_fill_brewer(palette="Set8")
## Warning in pal_name(palette, type): Unknown palette Set8
library(qcc)
Distance <- df_DF$Distance
names(Distance) <- df_DF$City
pareto.chart(Distance,
ylab="Distance",
col = heat.colors(length(Distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "DONDE SE CONCENTRAN LAS CIUDADES CON MAYORES DESLIZAMIENTOS"
)
##
## Pareto chart analysis for Distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Caracas 8.119530 8.119530 14.208160 14.208160
## Caricuao 7.907540 16.027070 13.837204 28.045364
## Caracas 6.042350 22.069420 10.573355 38.618719
## Caricuao 5.741060 27.810480 10.046135 48.664854
## Caricuao 5.640500 33.450980 9.870168 58.535022
## Caracas 4.589940 38.040920 8.031820 66.566842
## Caracas 3.877930 41.918850 6.785891 73.352734
## Caracas 3.650440 45.569290 6.387812 79.740546
## Caracas 3.140600 48.709890 5.495656 85.236202
## Caracas 2.957060 51.666950 5.174484 90.410687
## Caracas 2.924930 54.591880 5.118261 95.528948
## Caracas 2.555070 57.146950 4.471052 100.000000
stem(df_DF$"Distance")
##
## The decimal point is at the |
##
## 2 | 690179
## 4 | 667
## 6 | 09
## 8 | 1
head(df_DF)
## id date time America Country country_code State
## 1 2761 11/27/10 SA Venezuela VE Distrito Federal
## 2 2762 11/27/10 Night SA Venezuela VE Distrito Federal
## 3 2763 11/27/10 Night SA Venezuela VE Distrito Federal
## 4 896 11/20/08 SA Venezuela VE Distrito Federal
## 5 2753 11/26/10 Before dawn SA Venezuela VE Distrito Federal
## 6 2764 11/27/10 Night SA Venezuela VE Distrito Federal
## population City Distance location_description latitude longitude
## 1 0 Caricuao 7.90754 10.5030 -66.9995
## 2 0 Caricuao 5.64050 10.4267 -67.0342
## 3 0 Caricuao 5.74106 10.4669 -66.9431
## 4 3000000 Caracas 2.92493 10.4660 -66.8940
## 5 3000000 Caracas 2.95706 10.4913 -66.9060
## 6 3000000 Caracas 4.58994 10.4867 -66.9211
## geolocation hazard_type landslide_type
## 1 (10.503, -66.999499999999998) Landslide Mudslide
## 2 (10.4267, -67.034199999999998) Landslide Landslide
## 3 (10.466900000000001, -66.943100000000001) Landslide Landslide
## 4 (10.465999999999999, -66.894000000000005) Landslide Mudslide
## 5 (10.491300000000001, -66.906000000000006) Landslide Mudslide
## 6 (10.486700000000001, -66.921099999999996) Landslide Landslide
## landslide_size trigger storm_name injuries fatalities source_name
## 1 Medium Downpour NA 1
## 2 Medium Downpour NA 0
## 3 Medium Downpour NA 2
## 4 Medium Downpour NA 8
## 5 Medium Downpour NA 3
## 6 Medium Downpour NA 0
## source_link
## 1 http://www.laht.com/article.asp?ArticleId=380021&CategoryId=10717
## 2 http://english.eluniversal.com/2010/11/30/en_pol_esp_landslides-hit-sever_30A4792571.shtml
## 3 http://english.eluniversal.com/2010/11/30/en_pol_esp_landslides-hit-sever_30A4792571.shtml
## 4 http://www.foxnews.com/story/0,2933,456304,00.html
## 5 http://www.laht.com/article.asp?ArticleId=379809&CategoryId=10717
## 6 http://english.eluniversal.com/2010/11/30/en_pol_esp_landslides-hit-sever_30A4792571.shtml
## prop ypos
## 1 13.837204 6.918602
## 2 9.870168 18.772288
## 3 10.046135 28.730440
## 4 5.118261 36.312638
## 5 5.174484 41.459010
## 6 8.031820 48.062163
knitr::kable(head(df_DF))
id | date | time | America | Country | country_code | State | population | City | Distance | location_description | latitude | longitude | geolocation | hazard_type | landslide_type | landslide_size | trigger | storm_name | injuries | fatalities | source_name | source_link | prop | ypos |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
2761 | 11/27/10 | SA | Venezuela | VE | Distrito Federal | 0e+00 | Caricuao | 7.90754 | 10.5030 | -66.9995 | (10.503, -66.999499999999998) | Landslide | Mudslide | Medium | Downpour | NA | 1 | http://www.laht.com/article.asp?ArticleId=380021&CategoryId=10717 | 13.837204 | 6.918602 | ||||
2762 | 11/27/10 | Night | SA | Venezuela | VE | Distrito Federal | 0e+00 | Caricuao | 5.64050 | 10.4267 | -67.0342 | (10.4267, -67.034199999999998) | Landslide | Landslide | Medium | Downpour | NA | 0 | http://english.eluniversal.com/2010/11/30/en_pol_esp_landslides-hit-sever_30A4792571.shtml | 9.870168 | 18.772288 | |||
2763 | 11/27/10 | Night | SA | Venezuela | VE | Distrito Federal | 0e+00 | Caricuao | 5.74106 | 10.4669 | -66.9431 | (10.466900000000001, -66.943100000000001) | Landslide | Landslide | Medium | Downpour | NA | 2 | http://english.eluniversal.com/2010/11/30/en_pol_esp_landslides-hit-sever_30A4792571.shtml | 10.046135 | 28.730440 | |||
896 | 11/20/08 | SA | Venezuela | VE | Distrito Federal | 3e+06 | Caracas | 2.92493 | 10.4660 | -66.8940 | (10.465999999999999, -66.894000000000005) | Landslide | Mudslide | Medium | Downpour | NA | 8 | http://www.foxnews.com/story/0,2933,456304,00.html | 5.118261 | 36.312638 | ||||
2753 | 11/26/10 | Before dawn | SA | Venezuela | VE | Distrito Federal | 3e+06 | Caracas | 2.95706 | 10.4913 | -66.9060 | (10.491300000000001, -66.906000000000006) | Landslide | Mudslide | Medium | Downpour | NA | 3 | http://www.laht.com/article.asp?ArticleId=379809&CategoryId=10717 | 5.174484 | 41.459010 | |||
2764 | 11/27/10 | Night | SA | Venezuela | VE | Distrito Federal | 3e+06 | Caracas | 4.58994 | 10.4867 | -66.9211 | (10.486700000000001, -66.921099999999996) | Landslide | Landslide | Medium | Downpour | NA | 0 | http://english.eluniversal.com/2010/11/30/en_pol_esp_landslides-hit-sever_30A4792571.shtml | 8.031820 | 48.062162 |
stem(df_DF$"Distance")
##
## The decimal point is at the |
##
## 2 | 690179
## 4 | 667
## 6 | 09
## 8 | 1
stem(df_DF$"Distance", scale = 2)
##
## The decimal point is at the |
##
## 2 | 69
## 3 | 0179
## 4 | 6
## 5 | 67
## 6 | 0
## 7 | 9
## 8 | 1
library(forecast)
data_serie<- ts(df_DF$Distance, frequency=12, start=2007)
head(data_serie)
## Jan Feb Mar Apr May Jun
## 2007 7.90754 5.64050 5.74106 2.92493 2.95706 4.58994
autoplot(data_serie)+
labs(title = "Serie de Deslizamiento", x="Tiempo", y = "Distancia", colour = "#00a0dc") +theme_bw()
library(questionr)
table <- questionr::freq(Distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n | % | val% | %cum | val%cum | |
---|---|---|---|---|---|
2.55507 | 1 | 8.3 | 8.3 | 8.3 | 8.3 |
2.92493 | 1 | 8.3 | 8.3 | 16.7 | 16.7 |
2.95706 | 1 | 8.3 | 8.3 | 25.0 | 25.0 |
3.1406 | 1 | 8.3 | 8.3 | 33.3 | 33.3 |
3.65044 | 1 | 8.3 | 8.3 | 41.7 | 41.7 |
3.87793 | 1 | 8.3 | 8.3 | 50.0 | 50.0 |
4.58994 | 1 | 8.3 | 8.3 | 58.3 | 58.3 |
5.6405 | 1 | 8.3 | 8.3 | 66.7 | 66.7 |
5.74106 | 1 | 8.3 | 8.3 | 75.0 | 75.0 |
6.04235 | 1 | 8.3 | 8.3 | 83.3 | 83.3 |
7.90754 | 1 | 8.3 | 8.3 | 91.7 | 91.7 |
8.11953 | 1 | 8.3 | 8.3 | 100.0 | 100.0 |
Total | 12 | 100.0 | 100.0 | 100.0 | 100.0 |
str(table)
## Classes 'freqtab' and 'data.frame': 13 obs. of 5 variables:
## $ n : num 1 1 1 1 1 1 1 1 1 1 ...
## $ % : num 8.3 8.3 8.3 8.3 8.3 8.3 8.3 8.3 8.3 8.3 ...
## $ val% : num 8.3 8.3 8.3 8.3 8.3 8.3 8.3 8.3 8.3 8.3 ...
## $ %cum : num 8.3 16.7 25 33.3 41.7 50 58.3 66.7 75 83.3 ...
## $ val%cum: num 8.3 16.7 25 33.3 41.7 50 58.3 66.7 75 83.3 ...
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x | y |
---|---|
2.55507 | 1 |
2.92493 | 1 |
2.95706 | 1 |
3.1406 | 1 |
3.65044 | 1 |
3.87793 | 1 |
4.58994 | 1 |
5.6405 | 1 |
5.74106 | 1 |
6.04235 | 1 |
7.90754 | 1 |
8.11953 | 1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="white", fill="blue") +
xlab("Número de asistencias") +
ylab("Frecuencia")
n_sturges = 1 + log(length(Distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(Distance) - min(Distance)
w = ceiling(R/n_clases)
bins <- seq(min(Distance), max(Distance) + w, by = w)
bins
## [1] 2.55507 4.55507 6.55507 8.55507
Edades <- cut(Distance, bins)
Freq_table <- transform(table(Distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
Distance | Freq | Rel_Freq | Cum_Freq |
---|---|---|---|
2.55507 | 1 | 0.0833333 | 1 |
2.92493 | 1 | 0.0833333 | 2 |
2.95706 | 1 | 0.0833333 | 3 |
3.1406 | 1 | 0.0833333 | 4 |
3.65044 | 1 | 0.0833333 | 5 |
3.87793 | 1 | 0.0833333 | 6 |
4.58994 | 1 | 0.0833333 | 7 |
5.6405 | 1 | 0.0833333 | 8 |
5.74106 | 1 | 0.0833333 | 9 |
6.04235 | 1 | 0.0833333 | 10 |
7.90754 | 1 | 0.0833333 | 11 |
8.11953 | 1 | 0.0833333 | 12 |
str(Freq_table)
## 'data.frame': 12 obs. of 4 variables:
## $ Distance: Factor w/ 12 levels "2.55507","2.92493",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ Freq : int 1 1 1 1 1 1 1 1 1 1 ...
## $ Rel_Freq: num 0.0833 0.0833 0.0833 0.0833 0.0833 ...
## $ Cum_Freq: int 1 2 3 4 5 6 7 8 9 10 ...
df <- data.frame(x = Freq_table$Distance, y = Freq_table$Freq)
knitr::kable(df)
x | y |
---|---|
2.55507 | 1 |
2.92493 | 1 |
2.95706 | 1 |
3.1406 | 1 |
3.65044 | 1 |
3.87793 | 1 |
4.58994 | 1 |
5.6405 | 1 |
5.74106 | 1 |
6.04235 | 1 |
7.90754 | 1 |
8.11953 | 1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="blue", fill="green") +
xlab("Rango de Distance") +
ylab("Frecuencia")
summary(df_DF$Distance)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.555 3.095 4.234 4.762 5.816 8.120
library(pastecs)
stat.desc(df_DF)
## Warning in min(x): ningún argumento finito para min; retornando Inf
## Warning in max(x): ningun argumento finito para max; retornando -Inf
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## id date time America Country country_code State
## nbr.val 1.200000e+01 NA NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA NA
## min 8.960000e+02 NA NA NA NA NA NA
## max 4.087000e+03 NA NA NA NA NA NA
## range 3.191000e+03 NA NA NA NA NA NA
## sum 3.264400e+04 NA NA NA NA NA NA
## median 2.766500e+03 NA NA NA NA NA NA
## mean 2.720333e+03 NA NA NA NA NA NA
## SE.mean 1.988092e+02 NA NA NA NA NA NA
## CI.mean.0.95 4.375761e+02 NA NA NA NA NA NA
## var 4.743012e+05 NA NA NA NA NA NA
## std.dev 6.886953e+02 NA NA NA NA NA NA
## coef.var 2.531658e-01 NA NA NA NA NA NA
## population City Distance location_description latitude
## nbr.val 1.200000e+01 NA 12.0000000 NA 1.200000e+01
## nbr.null 3.000000e+00 NA 0.0000000 NA 0.000000e+00
## nbr.na 0.000000e+00 NA 0.0000000 NA 0.000000e+00
## min 0.000000e+00 NA 2.5550700 NA 1.042670e+01
## max 3.000000e+06 NA 8.1195300 NA 1.051390e+01
## range 3.000000e+06 NA 5.5644600 NA 8.720000e-02
## sum 2.700000e+07 NA 57.1469500 NA 1.258214e+02
## median 3.000000e+06 NA 4.2339350 NA 1.049020e+01
## mean 2.250000e+06 NA 4.7622458 NA 1.048512e+01
## SE.mean 3.916747e+05 NA 0.5553862 NA 7.110799e-03
## CI.mean.0.95 8.620703e+05 NA 1.2223968 NA 1.565076e-02
## var 1.840909e+12 NA 3.7014462 NA 6.067615e-04
## std.dev 1.356801e+06 NA 1.9239143 NA 2.463253e-02
## coef.var 6.030227e-01 NA 0.4039931 NA 2.349285e-03
## longitude geolocation hazard_type landslide_type
## nbr.val 1.200000e+01 NA NA NA
## nbr.null 0.000000e+00 NA NA NA
## nbr.na 0.000000e+00 NA NA NA
## min -6.703420e+01 NA NA NA
## max -6.689300e+01 NA NA NA
## range 1.412000e-01 NA NA NA
## sum -8.031958e+02 NA NA NA
## median -6.691785e+01 NA NA NA
## mean -6.693298e+01 NA NA NA
## SE.mean 1.259930e-02 NA NA NA
## CI.mean.0.95 2.773088e-02 NA NA NA
## var 1.904909e-03 NA NA NA
## std.dev 4.364526e-02 NA NA NA
## coef.var -6.520740e-04 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 0 12.0000000 NA
## nbr.null NA NA NA 0 4.0000000 NA
## nbr.na NA NA NA 12 0.0000000 NA
## min NA NA NA Inf 0.0000000 NA
## max NA NA NA -Inf 8.0000000 NA
## range NA NA NA -Inf 8.0000000 NA
## sum NA NA NA 0 20.0000000 NA
## median NA NA NA NA 1.0000000 NA
## mean NA NA NA NaN 1.6666667 NA
## SE.mean NA NA NA NA 0.6435382 NA
## CI.mean.0.95 NA NA NA NaN 1.4164180 NA
## var NA NA NA NA 4.9696970 NA
## std.dev NA NA NA NA 2.2292817 NA
## coef.var NA NA NA NA 1.3375690 NA
## source_link prop ypos
## nbr.val NA 12.0000000 12.0000000
## nbr.null NA 0.0000000 0.0000000
## nbr.na NA 0.0000000 0.0000000
## min NA 4.4710523 6.9186020
## max NA 14.2081598 97.7644739
## range NA 9.7371076 90.8458719
## sum NA 100.0000000 633.7977005
## median NA 7.4088556 51.4440316
## mean NA 8.3333333 52.8164750
## SE.mean NA 0.9718563 8.0970053
## CI.mean.0.95 NA 2.1390412 17.8213886
## var NA 11.3340554 786.7379476
## std.dev NA 3.3666089 28.0488493
## coef.var NA 0.4039931 0.5310625
boxplot(Distance, horizontal=TRUE, col='steelblue')
library(tidyverse)
library(hrbrthemes)
library(viridis)
df <- data.frame(Distance)
df %>% ggplot(aes(x = "", y = Distance)) +
geom_boxplot(color="red", fill="orange", alpha=0.5) +
theme_ipsum() +
theme(legend.position="none", plot.title = element_text(size=11)) +
ggtitle("Deslizamientos ") +
coord_flip() +
xlab("") +
ylab("")
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
df_var %>%
select(Country, State, City, Distance, date)
## Country State City Distance date
## 448 Venezuela Vargas MaiquetÃa 6.65506 9/20/10
## 599 Venezuela Vargas MaiquetÃa 7.89319 3/7/11
head(df_var)
## id date time America Country country_code State population
## 448 2473 9/20/10 SA Venezuela VE Vargas 52564
## 599 3180 3/7/11 SA Venezuela VE Vargas 52564
## City Distance location_description latitude longitude
## 448 MaiquetÃa 6.65506 10.5363 -66.9492
## 599 MaiquetÃa 7.89319 10.5259 -66.9425
## geolocation hazard_type landslide_type
## 448 (10.536300000000001, -66.949200000000005) Landslide Landslide
## 599 (10.5259, -66.942499999999995) Landslide Mudslide
## landslide_size trigger storm_name injuries fatalities source_name
## 448 Medium Downpour NA 7
## 599 Medium Downpour NA 0
## source_link
## 448 http://www.laht.com/article.asp?ArticleId=367610&CategoryId=14091
## 599 http://www.vheadline.com/readnews.asp?id=101578
ggplot(data=df_var, aes(x=City, y=Distance)) + geom_bar(stat="identity", color="blue", fill="white")
ggplot(data=df_var, aes(x = "Vargas", y = Distance, fill=City)) +
geom_bar(stat = "identity", width = 1, color = "black") +
coord_polar("y", start = 0)
library(ggplot2)
library(dplyr)
df_var <- df_var %>%
arrange(desc(City)) %>%
mutate(prop = Distance / sum(df_var$Distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_var, aes(x=State, y = prop, fill=City)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(Distance/100)), color = "white", size=3) +
scale_fill_brewer(palette="Set8")
## Warning in pal_name(palette, type): Unknown palette Set8
library(qcc)
Distance <- df_var$Distance
names(Distance) <- df_var$City
pareto.chart(Distance,
ylab="Distance",
col = heat.colors(length(Distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "DONDE SE CONCENTRAN LAS CIUDADES CON MAYORES DESLIZAMIENTOS"
)
##
## Pareto chart analysis for Distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## MaiquetÃa 7.89319 7.89319 54.25525 54.25525
## MaiquetÃa 6.65506 14.54825 45.74475 100.00000
stem(df_var$"Distance")
##
## The decimal point is at the |
##
## 6 | 7
## 7 |
## 7 | 9
head(df_var)
## id date time America Country country_code State population City
## 1 2473 9/20/10 SA Venezuela VE Vargas 52564 MaiquetÃa
## 2 3180 3/7/11 SA Venezuela VE Vargas 52564 MaiquetÃa
## Distance location_description latitude longitude
## 1 6.65506 10.5363 -66.9492
## 2 7.89319 10.5259 -66.9425
## geolocation hazard_type landslide_type
## 1 (10.536300000000001, -66.949200000000005) Landslide Landslide
## 2 (10.5259, -66.942499999999995) Landslide Mudslide
## landslide_size trigger storm_name injuries fatalities source_name
## 1 Medium Downpour NA 7
## 2 Medium Downpour NA 0
## source_link prop
## 1 http://www.laht.com/article.asp?ArticleId=367610&CategoryId=14091 45.74475
## 2 http://www.vheadline.com/readnews.asp?id=101578 54.25525
## ypos
## 1 22.87237
## 2 72.87237
knitr::kable(head(df_var))
id | date | time | America | Country | country_code | State | population | City | Distance | location_description | latitude | longitude | geolocation | hazard_type | landslide_type | landslide_size | trigger | storm_name | injuries | fatalities | source_name | source_link | prop | ypos |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
2473 | 9/20/10 | SA | Venezuela | VE | Vargas | 52564 | MaiquetÃa | 6.65506 | 10.5363 | -66.9492 | (10.536300000000001, -66.949200000000005) | Landslide | Landslide | Medium | Downpour | NA | 7 | http://www.laht.com/article.asp?ArticleId=367610&CategoryId=14091 | 45.74475 | 22.87237 | ||||
3180 | 3/7/11 | SA | Venezuela | VE | Vargas | 52564 | MaiquetÃa | 7.89319 | 10.5259 | -66.9425 | (10.5259, -66.942499999999995) | Landslide | Mudslide | Medium | Downpour | NA | 0 | http://www.vheadline.com/readnews.asp?id=101578 | 54.25525 | 72.87237 |
stem(df_var$"Distance")
##
## The decimal point is at the |
##
## 6 | 7
## 7 |
## 7 | 9
stem(df_var$"Distance", scale = 2)
##
## The decimal point is 1 digit(s) to the left of the |
##
## 66 | 6
## 68 |
## 70 |
## 72 |
## 74 |
## 76 |
## 78 | 9
library(forecast)
data_serie<- ts(df_var$Distance, frequency=12, start=2007)
head(data_serie)
## Jan Feb
## 2007 6.65506 7.89319
autoplot(data_serie)+
labs(title = "Serie de Deslizamiento", x="Tiempo", y = "Distancia", colour = "#00a0dc") +theme_bw()
library(questionr)
table <- questionr::freq(Distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n | % | val% | %cum | val%cum | |
---|---|---|---|---|---|
6.65506 | 1 | 50 | 50 | 50 | 50 |
7.89319 | 1 | 50 | 50 | 100 | 100 |
Total | 2 | 100 | 100 | 100 | 100 |
str(table)
## Classes 'freqtab' and 'data.frame': 3 obs. of 5 variables:
## $ n : num 1 1 2
## $ % : num 50 50 100
## $ val% : num 50 50 100
## $ %cum : num 50 100 100
## $ val%cum: num 50 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x | y |
---|---|
6.65506 | 1 |
7.89319 | 1 |
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="white", fill="blue") +
xlab("Número de asistencias") +
ylab("Frecuencia")
n_sturges = 1 + log(length(Distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(Distance) - min(Distance)
w = ceiling(R/n_clases)
bins <- seq(min(Distance), max(Distance) + w, by = w)
bins
## [1] 6.65506 7.65506 8.65506
Edades <- cut(Distance, bins)
Freq_table <- transform(table(Distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
Distance | Freq | Rel_Freq | Cum_Freq |
---|---|---|---|
6.65506 | 1 | 0.5 | 1 |
7.89319 | 1 | 0.5 | 2 |
str(Freq_table)
## 'data.frame': 2 obs. of 4 variables:
## $ Distance: Factor w/ 2 levels "6.65506","7.89319": 1 2
## $ Freq : int 1 1
## $ Rel_Freq: num 0.5 0.5
## $ Cum_Freq: int 1 2
df <- data.frame(x = Freq_table$Distance, y = Freq_table$Freq)
knitr::kable(df)
x | y |
---|---|
6.65506 | 1 |
7.89319 | 1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="blue", fill="green") +
xlab("Rango de Distance") +
ylab("Frecuencia")
summary(df_var$Distance)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 6.655 6.965 7.274 7.274 7.584 7.893
library(pastecs)
stat.desc(df_var)
## Warning in min(x): ningún argumento finito para min; retornando Inf
## Warning in max(x): ningun argumento finito para max; retornando -Inf
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## id date time America Country country_code State
## nbr.val 2.000000e+00 NA NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA NA
## min 2.473000e+03 NA NA NA NA NA NA
## max 3.180000e+03 NA NA NA NA NA NA
## range 7.070000e+02 NA NA NA NA NA NA
## sum 5.653000e+03 NA NA NA NA NA NA
## median 2.826500e+03 NA NA NA NA NA NA
## mean 2.826500e+03 NA NA NA NA NA NA
## SE.mean 3.535000e+02 NA NA NA NA NA NA
## CI.mean.0.95 4.491643e+03 NA NA NA NA NA NA
## var 2.499245e+05 NA NA NA NA NA NA
## std.dev 4.999245e+02 NA NA NA NA NA NA
## coef.var 1.768705e-01 NA NA NA NA NA NA
## population City Distance location_description latitude
## nbr.val 2 NA 2.0000000 NA 2.000000e+00
## nbr.null 0 NA 0.0000000 NA 0.000000e+00
## nbr.na 0 NA 0.0000000 NA 0.000000e+00
## min 52564 NA 6.6550600 NA 1.052590e+01
## max 52564 NA 7.8931900 NA 1.053630e+01
## range 0 NA 1.2381300 NA 1.040000e-02
## sum 105128 NA 14.5482500 NA 2.106220e+01
## median 52564 NA 7.2741250 NA 1.053110e+01
## mean 52564 NA 7.2741250 NA 1.053110e+01
## SE.mean 0 NA 0.6190650 NA 5.200000e-03
## CI.mean.0.95 0 NA 7.8659666 NA 6.607226e-02
## var 0 NA 0.7664829 NA 5.408000e-05
## std.dev 0 NA 0.8754901 NA 7.353911e-03
## coef.var 0 NA 0.1203568 NA 6.983041e-04
## longitude geolocation hazard_type landslide_type
## nbr.val 2.000000e+00 NA NA NA
## nbr.null 0.000000e+00 NA NA NA
## nbr.na 0.000000e+00 NA NA NA
## min -6.694920e+01 NA NA NA
## max -6.694250e+01 NA NA NA
## range 6.700000e-03 NA NA NA
## sum -1.338917e+02 NA NA NA
## median -6.694585e+01 NA NA NA
## mean -6.694585e+01 NA NA NA
## SE.mean 3.350000e-03 NA NA NA
## CI.mean.0.95 4.256579e-02 NA NA NA
## var 2.244500e-05 NA NA NA
## std.dev 4.737615e-03 NA NA NA
## coef.var -7.076787e-05 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 0 2.000000 NA
## nbr.null NA NA NA 0 1.000000 NA
## nbr.na NA NA NA 2 0.000000 NA
## min NA NA NA Inf 0.000000 NA
## max NA NA NA -Inf 7.000000 NA
## range NA NA NA -Inf 7.000000 NA
## sum NA NA NA 0 7.000000 NA
## median NA NA NA NA 3.500000 NA
## mean NA NA NA NaN 3.500000 NA
## SE.mean NA NA NA NA 3.500000 NA
## CI.mean.0.95 NA NA NA NaN 44.471717 NA
## var NA NA NA NA 24.500000 NA
## std.dev NA NA NA NA 4.949747 NA
## coef.var NA NA NA NA 1.414214 NA
## source_link prop ypos
## nbr.val NA 2.0000000 2.0000000
## nbr.null NA 0.0000000 0.0000000
## nbr.na NA 0.0000000 0.0000000
## min NA 45.7447459 22.8723730
## max NA 54.2552541 72.8723730
## range NA 8.5105081 50.0000000
## sum NA 100.0000000 95.7447459
## median NA 50.0000000 47.8723730
## mean NA 50.0000000 47.8723730
## SE.mean NA 4.2552541 25.0000000
## CI.mean.0.95 NA 54.0681294 317.6551184
## var NA 36.2143744 1250.0000000
## std.dev NA 6.0178380 35.3553391
## coef.var NA 0.1203568 0.7385332
boxplot(Distance, horizontal=TRUE, col='steelblue')
library(tidyverse)
library(hrbrthemes)
library(viridis)
df <- data.frame(Distance)
df %>% ggplot(aes(x = "", y = Distance)) +
geom_boxplot(color="red", fill="orange", alpha=0.5) +
theme_ipsum() +
theme(legend.position="none", plot.title = element_text(size=11)) +
ggtitle("Deslizamientos ") +
coord_flip() +
xlab("") +
ylab("")
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
library(readr)
library(knitr)
df <- read.csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
library(dplyr)
colnames(df)[4] <- "America"
colnames(df)[10] <- "Distance"
colnames(df)[5] <- "Country"
colnames(df)[7] <- "State"
colnames(df)[9] <- "City"
colnames(df)[2] <- "date"
library(readr)
library(knitr)
df_Peru <- subset (df, Country == "Peru")
knitr::kable(head(df_Peru,n=4))
id | date | time | America | Country | country_code | State | population | City | Distance | location_description | latitude | longitude | geolocation | hazard_type | landslide_type | landslide_size | trigger | storm_name | injuries | fatalities | source_name | source_link | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
80 | 472 | 3/5/08 | SA | Peru | PE | Huanuco | 53177 | Tingo MarÃa | 17.34318 | -9.2114 | -76.1311 | (-9.2113999999999994, -76.131100000000004) | Landslide | Landslide | Medium | Rain | NA | 7 | http://news.xinhuanet.com/english/2008-03/06/content_7727748.htm | ||||
90 | 497 | 4/11/08 | SA | Peru | PE | Ancash | 5044 | Carhuaz | 2.43089 | -9.2965 | -77.6613 | (-9.2965, -77.661299999999997) | Landslide | Landslide | Medium | Rain | NA | NA | http://news.xinhuanet.com/english/2008-04/14/content_7972049.htm | ||||
184 | 1004 | 4/14/09 | SA | Peru | PE | La Libertad | 0 | Parcoy | 9.64894 | -7.9589 | -77.5239 | (-7.9588999999999999, -77.523899999999998) | Landslide | Mudslide | Medium | Downpour | NA | 12 | http://rawstory.com/news/afp/Nine_dead_in_Peru_mudslides_04142009.html | ||||
185 | 1005 | 4/14/09 | SA | Peru | PE | La Libertad | 0 | Aricapampa | 0.00442 | -7.8058 | -77.7172 | (-7.8057999999999996, -77.717200000000005) | Landslide | Mudslide | Medium | Downpour | NA | 0 | http://rawstory.com/news/afp/Nine_dead_in_Peru_mudslides_04142009.html |
df_Peru %>%
select(Country, State, City, Distance, date)
## Country State City Distance date
## 80 Peru Huanuco Tingo MarÃa 17.34318 3/5/08
## 90 Peru Ancash Carhuaz 2.43089 4/11/08
## 184 Peru La Libertad Parcoy 9.64894 4/14/09
## 185 Peru La Libertad Aricapampa 0.00442 4/14/09
## 252 Peru Ancash Anta 0.13147 1/28/10
## 294 Peru Huanuco Acomayo 3.49890 4/1/10
## 295 Peru Huanuco Ambo 0.55865 4/2/10
## 516 Peru Huanuco Huánuco 1.01932 11/7/10
## 578 Peru Ancash Yanac 3.70678 1/9/11
## 864 Peru Ancash Tauca 6.77672 2/15/12
## 912 Peru San MartÃn Yuracyacu 8.89799 10/17/12
## 1378 Peru Ancash Huachis 3.26788 11/19/14
## 1424 Peru San MartÃn Naranjos 12.70296 3/3/15
## 1425 Peru San MartÃn Tarapoto 2.89809 3/4/15
ggplot(data=df_Peru, aes(x = "Peru", y = Distance, fill=State)) +
geom_bar(stat = "identity", width = 1, color = "black") +
coord_polar("y", start = 0)
ggplot(data=df_Peru, aes(fill=State, y=Distance, x="Peru")) +
geom_bar(position="dodge", stat="identity")
ggplot(data=df_Peru, aes(fill=State, y=Distance, x="Peru")) +
geom_bar(position="stack", stat="identity")
df_Ansc %>%
select(Country, State, City, Distance, date)
## Country State City Distance date
## 90 Peru Ancash Carhuaz 2.43089 4/11/08
## 252 Peru Ancash Anta 0.13147 1/28/10
## 578 Peru Ancash Yanac 3.70678 1/9/11
## 864 Peru Ancash Tauca 6.77672 2/15/12
## 1378 Peru Ancash Huachis 3.26788 11/19/14
head(df_Ansc)
## id date time America Country country_code State population
## 90 497 4/11/08 SA Peru PE Ancash 5044
## 252 1459 1/28/10 15:00:00 SA Peru PE Ancash 0
## 578 2948 1/9/11 SA Peru PE Ancash 0
## 864 4200 2/15/12 Early morning SA Peru PE Ancash 0
## 1378 6690 11/19/14 SA Peru PE Ancash 0
## City Distance location_description latitude longitude
## 90 Carhuaz 2.43089 -9.2965 -77.6613
## 252 Anta 0.13147 -9.3584 -77.5984
## 578 Yanac 3.70678 -8.6252 -77.8341
## 864 Tauca 6.77672 -8.5348 -78.0834
## 1378 Huachis 3.26788 Above road -9.3884 -77.1255
## geolocation hazard_type landslide_type
## 90 (-9.2965, -77.661299999999997) Landslide Landslide
## 252 (-9.3583999999999996, -77.598399999999998) Landslide Complex
## 578 (-8.6251999999999995, -77.834100000000007) Landslide Mudslide
## 864 (-8.5348000000000006, -78.083399999999997) Landslide Mudslide
## 1378 (-9.3884000000000007, -77.125500000000002) Landslide Landslide
## landslide_size trigger storm_name injuries fatalities source_name
## 90 Medium Rain NA NA
## 252 Medium Downpour NA 0
## 578 Medium Downpour NA 0
## 864 Medium Downpour NA 3
## 1378 Medium Downpour 0 0 RPP
## source_link
## 90 http://news.xinhuanet.com/english/2008-04/14/content_7972049.htm
## 252 http://daveslandslideblog.blogspot.com/2010/01/new-images-of-level-of-destruction-in.html
## 578 http://www.laht.com/article.asp?ArticleId=383476&CategoryId=14095
## 864 http://www.peruviantimes.com/16/three-missing-after-landslide-hits-mine-camp/15046/
## 1378 http://www.rpp.com.pe/2014-11-19-deslizamiento-de-piedras-y-tierra-bloquea-carretera-en-huari-noticia_743504.html
ggplot(data=df_Ansc, aes(x=City, y=Distance)) + geom_bar(stat="identity", color="blue", fill="white")
ggplot(data=df_Ansc, aes(x = "Ancash", y = Distance, fill=City)) +
geom_bar(stat = "identity", width = 1, color = "black") +
coord_polar("y", start = 0)
library(ggplot2)
library(dplyr)
df_Ansc <- df_Ansc %>%
arrange(desc(City)) %>%
mutate(prop = Distance / sum(df_Ansc$Distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_Ansc, aes(x=State, y = prop, fill=City)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(Distance/100)), color = "white", size=6) +
scale_fill_brewer(palette="Set8")
## Warning in pal_name(palette, type): Unknown palette Set8
library(qcc)
Distance <- df_Ansc$Distance
names(Distance) <- df_Ansc$City
pareto.chart(Distance,
ylab="Distance",
col = heat.colors(length(Distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "DONDE SE CONCENTRAN LAS CIUDADES CON MAYORES DESLIZAMIENTOS"
)
##
## Pareto chart analysis for Distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Tauca 6.7767200 6.7767200 41.5399534 41.5399534
## Yanac 3.7067800 10.4835000 22.7218283 64.2617818
## Huachis 3.2678800 13.7513800 20.0314581 84.2932399
## Carhuaz 2.4308900 16.1822700 14.9008750 99.1941149
## Anta 0.1314700 16.3137400 0.8058851 100.0000000
stem(df_Ansc$"Distance")
##
## The decimal point is at the |
##
## 0 | 1
## 2 | 437
## 4 |
## 6 | 8
head(df_Ansc)
## id date time America Country country_code State population
## 1 2948 1/9/11 SA Peru PE Ancash 0
## 2 4200 2/15/12 Early morning SA Peru PE Ancash 0
## 3 6690 11/19/14 SA Peru PE Ancash 0
## 4 497 4/11/08 SA Peru PE Ancash 5044
## 5 1459 1/28/10 15:00:00 SA Peru PE Ancash 0
## City Distance location_description latitude longitude
## 1 Yanac 3.70678 -8.6252 -77.8341
## 2 Tauca 6.77672 -8.5348 -78.0834
## 3 Huachis 3.26788 Above road -9.3884 -77.1255
## 4 Carhuaz 2.43089 -9.2965 -77.6613
## 5 Anta 0.13147 -9.3584 -77.5984
## geolocation hazard_type landslide_type
## 1 (-8.6251999999999995, -77.834100000000007) Landslide Mudslide
## 2 (-8.5348000000000006, -78.083399999999997) Landslide Mudslide
## 3 (-9.3884000000000007, -77.125500000000002) Landslide Landslide
## 4 (-9.2965, -77.661299999999997) Landslide Landslide
## 5 (-9.3583999999999996, -77.598399999999998) Landslide Complex
## landslide_size trigger storm_name injuries fatalities source_name
## 1 Medium Downpour NA 0
## 2 Medium Downpour NA 3
## 3 Medium Downpour 0 0 RPP
## 4 Medium Rain NA NA
## 5 Medium Downpour NA 0
## source_link
## 1 http://www.laht.com/article.asp?ArticleId=383476&CategoryId=14095
## 2 http://www.peruviantimes.com/16/three-missing-after-landslide-hits-mine-camp/15046/
## 3 http://www.rpp.com.pe/2014-11-19-deslizamiento-de-piedras-y-tierra-bloquea-carretera-en-huari-noticia_743504.html
## 4 http://news.xinhuanet.com/english/2008-04/14/content_7972049.htm
## 5 http://daveslandslideblog.blogspot.com/2010/01/new-images-of-level-of-destruction-in.html
## prop ypos
## 1 22.7218283 11.36091
## 2 41.5399534 43.49181
## 3 20.0314581 74.27751
## 4 14.9008750 91.74368
## 5 0.8058851 99.59706
knitr::kable(head(df_Ansc))
id | date | time | America | Country | country_code | State | population | City | Distance | location_description | latitude | longitude | geolocation | hazard_type | landslide_type | landslide_size | trigger | storm_name | injuries | fatalities | source_name | source_link | prop | ypos |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
2948 | 1/9/11 | SA | Peru | PE | Ancash | 0 | Yanac | 3.70678 | -8.6252 | -77.8341 | (-8.6251999999999995, -77.834100000000007) | Landslide | Mudslide | Medium | Downpour | NA | 0 | http://www.laht.com/article.asp?ArticleId=383476&CategoryId=14095 | 22.7218283 | 11.36091 | ||||
4200 | 2/15/12 | Early morning | SA | Peru | PE | Ancash | 0 | Tauca | 6.77672 | -8.5348 | -78.0834 | (-8.5348000000000006, -78.083399999999997) | Landslide | Mudslide | Medium | Downpour | NA | 3 | http://www.peruviantimes.com/16/three-missing-after-landslide-hits-mine-camp/15046/ | 41.5399534 | 43.49181 | |||
6690 | 11/19/14 | SA | Peru | PE | Ancash | 0 | Huachis | 3.26788 | Above road | -9.3884 | -77.1255 | (-9.3884000000000007, -77.125500000000002) | Landslide | Landslide | Medium | Downpour | 0 | 0 | RPP | http://www.rpp.com.pe/2014-11-19-deslizamiento-de-piedras-y-tierra-bloquea-carretera-en-huari-noticia_743504.html | 20.0314581 | 74.27751 | ||
497 | 4/11/08 | SA | Peru | PE | Ancash | 5044 | Carhuaz | 2.43089 | -9.2965 | -77.6613 | (-9.2965, -77.661299999999997) | Landslide | Landslide | Medium | Rain | NA | NA | http://news.xinhuanet.com/english/2008-04/14/content_7972049.htm | 14.9008750 | 91.74368 | ||||
1459 | 1/28/10 | 15:00:00 | SA | Peru | PE | Ancash | 0 | Anta | 0.13147 | -9.3584 | -77.5984 | (-9.3583999999999996, -77.598399999999998) | Landslide | Complex | Medium | Downpour | NA | 0 | http://daveslandslideblog.blogspot.com/2010/01/new-images-of-level-of-destruction-in.html | 0.8058851 | 99.59706 |
stem(df_Ansc$"Distance")
##
## The decimal point is at the |
##
## 0 | 1
## 2 | 437
## 4 |
## 6 | 8
stem(df_Ansc$"Distance", scale = 2)
##
## The decimal point is at the |
##
## 0 | 1
## 1 |
## 2 | 4
## 3 | 37
## 4 |
## 5 |
## 6 | 8
library(forecast)
data_serie<- ts(df_Ansc$Distance, frequency=12, start=2007)
head(data_serie)
## Jan Feb Mar Apr May
## 2007 3.70678 6.77672 3.26788 2.43089 0.13147
autoplot(data_serie)+
labs(title = "Serie de Deslizamiento", x="Tiempo", y = "Distancia", colour = "#00a0dc") +theme_bw()
library(questionr)
table <- questionr::freq(Distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n | % | val% | %cum | val%cum | |
---|---|---|---|---|---|
0.13147 | 1 | 20 | 20 | 20 | 20 |
2.43089 | 1 | 20 | 20 | 40 | 40 |
3.26788 | 1 | 20 | 20 | 60 | 60 |
3.70678 | 1 | 20 | 20 | 80 | 80 |
6.77672 | 1 | 20 | 20 | 100 | 100 |
Total | 5 | 100 | 100 | 100 | 100 |
str(table)
## Classes 'freqtab' and 'data.frame': 6 obs. of 5 variables:
## $ n : num 1 1 1 1 1 5
## $ % : num 20 20 20 20 20 100
## $ val% : num 20 20 20 20 20 100
## $ %cum : num 20 40 60 80 100 100
## $ val%cum: num 20 40 60 80 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x | y |
---|---|
0.13147 | 1 |
2.43089 | 1 |
3.26788 | 1 |
3.70678 | 1 |
6.77672 | 1 |
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="white", fill="blue") +
xlab("Número de asistencias") +
ylab("Frecuencia")
n_sturges = 1 + log(length(Distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(Distance) - min(Distance)
w = ceiling(R/n_clases)
bins <- seq(min(Distance), max(Distance) + w, by = w)
bins
## [1] 0.13147 3.13147 6.13147 9.13147
Edades <- cut(Distance, bins)
Freq_table <- transform(table(Distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
Distance | Freq | Rel_Freq | Cum_Freq |
---|---|---|---|
0.13147 | 1 | 0.2 | 1 |
2.43089 | 1 | 0.2 | 2 |
3.26788 | 1 | 0.2 | 3 |
3.70678 | 1 | 0.2 | 4 |
6.77672 | 1 | 0.2 | 5 |
str(Freq_table)
## 'data.frame': 5 obs. of 4 variables:
## $ Distance: Factor w/ 5 levels "0.13147","2.43089",..: 1 2 3 4 5
## $ Freq : int 1 1 1 1 1
## $ Rel_Freq: num 0.2 0.2 0.2 0.2 0.2
## $ Cum_Freq: int 1 2 3 4 5
df <- data.frame(x = Freq_table$Distance, y = Freq_table$Freq)
knitr::kable(df)
x | y |
---|---|
0.13147 | 1 |
2.43089 | 1 |
3.26788 | 1 |
3.70678 | 1 |
6.77672 | 1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="blue", fill="green") +
xlab("Rango de Distance") +
ylab("Frecuencia")
summary(df_Ansc$Distance)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.1315 2.4309 3.2679 3.2627 3.7068 6.7767
library(pastecs)
stat.desc(df_Ansc)
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## id date time America Country country_code State
## nbr.val 5.000000e+00 NA NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA NA
## min 4.970000e+02 NA NA NA NA NA NA
## max 6.690000e+03 NA NA NA NA NA NA
## range 6.193000e+03 NA NA NA NA NA NA
## sum 1.579400e+04 NA NA NA NA NA NA
## median 2.948000e+03 NA NA NA NA NA NA
## mean 3.158800e+03 NA NA NA NA NA NA
## SE.mean 1.085643e+03 NA NA NA NA NA NA
## CI.mean.0.95 3.014228e+03 NA NA NA NA NA NA
## var 5.893102e+06 NA NA NA NA NA NA
## std.dev 2.427571e+03 NA NA NA NA NA NA
## coef.var 7.685106e-01 NA NA NA NA NA NA
## population City Distance location_description latitude
## nbr.val 5.000000e+00 NA 5.0000000 NA 5.00000000
## nbr.null 4.000000e+00 NA 0.0000000 NA 0.00000000
## nbr.na 0.000000e+00 NA 0.0000000 NA 0.00000000
## min 0.000000e+00 NA 0.1314700 NA -9.38840000
## max 5.044000e+03 NA 6.7767200 NA -8.53480000
## range 5.044000e+03 NA 6.6452500 NA 0.85360000
## sum 5.044000e+03 NA 16.3137400 NA -45.20330000
## median 0.000000e+00 NA 3.2678800 NA -9.29650000
## mean 1.008800e+03 NA 3.2627480 NA -9.04066000
## SE.mean 1.008800e+03 NA 1.0733611 NA 0.18918738
## CI.mean.0.95 2.800878e+03 NA 2.9801281 NA 0.52526837
## var 5.088387e+06 NA 5.7605199 NA 0.17895932
## std.dev 2.255745e+03 NA 2.4001083 NA 0.42303584
## coef.var 2.236068e+00 NA 0.7356095 NA -0.04679258
## longitude geolocation hazard_type landslide_type
## nbr.val 5.000000e+00 NA NA NA
## nbr.null 0.000000e+00 NA NA NA
## nbr.na 0.000000e+00 NA NA NA
## min -7.808340e+01 NA NA NA
## max -7.712550e+01 NA NA NA
## range 9.579000e-01 NA NA NA
## sum -3.883027e+02 NA NA NA
## median -7.766130e+01 NA NA NA
## mean -7.766054e+01 NA NA NA
## SE.mean 1.579657e-01 NA NA NA
## CI.mean.0.95 4.385832e-01 NA NA NA
## var 1.247659e-01 NA NA NA
## std.dev 3.532221e-01 NA NA NA
## coef.var -4.548283e-03 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 1 4.000000 NA
## nbr.null NA NA NA 1 3.000000 NA
## nbr.na NA NA NA 4 1.000000 NA
## min NA NA NA 0 0.000000 NA
## max NA NA NA 0 3.000000 NA
## range NA NA NA 0 3.000000 NA
## sum NA NA NA 0 3.000000 NA
## median NA NA NA 0 0.000000 NA
## mean NA NA NA 0 0.750000 NA
## SE.mean NA NA NA NA 0.750000 NA
## CI.mean.0.95 NA NA NA NaN 2.386835 NA
## var NA NA NA NA 2.250000 NA
## std.dev NA NA NA NA 1.500000 NA
## coef.var NA NA NA NA 2.000000 NA
## source_link prop ypos
## nbr.val NA 5.0000000 5.0000000
## nbr.null NA 0.0000000 0.0000000
## nbr.na NA 0.0000000 0.0000000
## min NA 0.8058851 11.3609142
## max NA 41.5399534 99.5970574
## range NA 40.7340683 88.2361433
## sum NA 100.0000000 320.4709650
## median NA 20.0314581 74.2775109
## mean NA 20.0000000 64.0941930
## SE.mean NA 6.5794911 16.3308059
## CI.mean.0.95 NA 18.2675958 45.3415861
## var NA 216.4485140 1333.4761049
## std.dev NA 14.7121893 36.5167921
## coef.var NA 0.7356095 0.5697364
boxplot(Distance, horizontal=TRUE, col='steelblue')
library(tidyverse)
library(hrbrthemes)
library(viridis)
df <- data.frame(Distance)
df %>% ggplot(aes(x = "", y = Distance)) +
geom_boxplot(color="red", fill="orange", alpha=0.5) +
theme_ipsum() +
theme(legend.position="none", plot.title = element_text(size=11)) +
ggtitle("Deslizamientos ") +
coord_flip() +
xlab("") +
ylab("")
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
df_La %>%
select(Country, State, City, Distance, date)
## Country State City Distance date
## 105 El Salvador La Libertad Santa Tecla 4.96416 6/2/08
## 184 Peru La Libertad Parcoy 9.64894 4/14/09
## 185 Peru La Libertad Aricapampa 0.00442 4/14/09
## 225 El Salvador La Libertad Antiguo Cuscatlán 4.86219 11/8/09
## 1374 El Salvador La Libertad Santa Tecla 4.60655 10/12/14
## 1596 El Salvador La Libertad Santa Tecla 4.67722 11/3/15
## 1597 El Salvador La Libertad Santa Tecla 9.87553 11/4/15
head(df_La )
## id date time America Country country_code State population
## 105 564 6/2/08 <NA> El Salvador SV La Libertad 124694
## 184 1004 4/14/09 SA Peru PE La Libertad 0
## 185 1005 4/14/09 SA Peru PE La Libertad 0
## 225 1286 11/8/09 <NA> El Salvador SV La Libertad 33767
## 1374 6686 10/12/14 <NA> El Salvador SV La Libertad 124694
## 1596 7440 11/3/15 1:00 <NA> El Salvador SV La Libertad 124694
## City Distance location_description latitude longitude
## 105 Santa Tecla 4.96416 13.7205 -89.2687
## 184 Parcoy 9.64894 -7.9589 -77.5239
## 185 Aricapampa 0.00442 -7.8058 -77.7172
## 225 Antiguo Cuscatlán 4.86219 13.7156 -89.2521
## 1374 Santa Tecla 4.60655 Above road 13.6905 -89.3200
## 1596 Santa Tecla 4.67722 Unknown 13.7178 -89.2685
## geolocation hazard_type landslide_type
## 105 (13.720499999999999, -89.268699999999995) Landslide Landslide
## 184 (-7.9588999999999999, -77.523899999999998) Landslide Mudslide
## 185 (-7.8057999999999996, -77.717200000000005) Landslide Mudslide
## 225 (13.7156, -89.252099999999999) Landslide Mudslide
## 1374 (13.6905, -89.32) Landslide Landslide
## 1596 (13.7178, -89.268500000000003) Landslide Mudslide
## landslide_size trigger storm_name injuries fatalities
## 105 Medium Tropical cyclone Tropical Storm Arthur NA NA
## 184 Medium Downpour NA 12
## 185 Medium Downpour NA 0
## 225 Medium Tropical cyclone Tropical Cyclone Ida NA 4
## 1374 Medium Rain 0 0
## 1596 Medium Rain 0 0
## source_name
## 105
## 184
## 185
## 225
## 1374 Mexicano
## 1596 El Salvador Noticias
## source_link
## 105 http://news.xinhuanet.com/english/2008-06/04/content_8310737.htm
## 184 http://rawstory.com/news/afp/Nine_dead_in_Peru_mudslides_04142009.html
## 185 http://rawstory.com/news/afp/Nine_dead_in_Peru_mudslides_04142009.html
## 225 http://www.google.com/hostednews/ap/article/ALeqM5j0XCCb1n12DyhoBoDzGj_hTyEtrAD9BRKPRG0
## 1374 http://www.oem.com.mx/elmexicano/notas/n3569793.htm
## 1596 http://www.elsalvadornoticias.net/2015/11/03/deslave-en-colonia-escalon-por-desborde-quebrada-las-lajas/
ggplot(data=df_La , aes(x=City, y=Distance)) + geom_bar(stat="identity", color="blue", fill="white")
ggplot(data=df_La , aes(x = "La Libertad", y = Distance, fill=City)) +
geom_bar(stat = "identity", width = 1, color = "black") +
coord_polar("y", start = 0)
ggplot(df_La ,aes(x="La Libertad",y=Distance, fill=City))+
geom_bar(stat = "identity",
color="white")+
geom_text(aes(label=(Distance*10)),
position=position_stack(vjust=0.5),color="white",size=3)+
coord_polar(theta = "y")+
labs(title="Gráfico de Deslizamiento")
library(qcc)
Distance <- df_La $Distance
names(Distance) <- df_La $City
pareto.chart(Distance,
ylab="Distance",
col = heat.colors(length(Distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "DONDE SE CONCENTRAN LAS CIUDADES CON MAYORES DESLIZAMIENTOS"
)
##
## Pareto chart analysis for Distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Santa Tecla 9.87553000 9.87553000 25.55844469 25.55844469
## Parcoy 9.64894000 19.52447000 24.97201662 50.53046131
## Santa Tecla 4.96416000 24.48863000 12.84753414 63.37799545
## Antiguo Cuscatlán 4.86219000 29.35082000 12.58362986 75.96162531
## Santa Tecla 4.67722000 34.02804000 12.10491677 88.06654208
## Santa Tecla 4.60655000 38.63459000 11.92201871 99.98856078
## Aricapampa 0.00442000 38.63901000 0.01143922 100.00000000
stem(df_La $"Distance")
##
## The decimal point is at the |
##
## 0 | 0
## 2 |
## 4 | 6790
## 6 |
## 8 | 69
head(df_La )
## id date time America Country country_code State population
## 105 564 6/2/08 <NA> El Salvador SV La Libertad 124694
## 184 1004 4/14/09 SA Peru PE La Libertad 0
## 185 1005 4/14/09 SA Peru PE La Libertad 0
## 225 1286 11/8/09 <NA> El Salvador SV La Libertad 33767
## 1374 6686 10/12/14 <NA> El Salvador SV La Libertad 124694
## 1596 7440 11/3/15 1:00 <NA> El Salvador SV La Libertad 124694
## City Distance location_description latitude longitude
## 105 Santa Tecla 4.96416 13.7205 -89.2687
## 184 Parcoy 9.64894 -7.9589 -77.5239
## 185 Aricapampa 0.00442 -7.8058 -77.7172
## 225 Antiguo Cuscatlán 4.86219 13.7156 -89.2521
## 1374 Santa Tecla 4.60655 Above road 13.6905 -89.3200
## 1596 Santa Tecla 4.67722 Unknown 13.7178 -89.2685
## geolocation hazard_type landslide_type
## 105 (13.720499999999999, -89.268699999999995) Landslide Landslide
## 184 (-7.9588999999999999, -77.523899999999998) Landslide Mudslide
## 185 (-7.8057999999999996, -77.717200000000005) Landslide Mudslide
## 225 (13.7156, -89.252099999999999) Landslide Mudslide
## 1374 (13.6905, -89.32) Landslide Landslide
## 1596 (13.7178, -89.268500000000003) Landslide Mudslide
## landslide_size trigger storm_name injuries fatalities
## 105 Medium Tropical cyclone Tropical Storm Arthur NA NA
## 184 Medium Downpour NA 12
## 185 Medium Downpour NA 0
## 225 Medium Tropical cyclone Tropical Cyclone Ida NA 4
## 1374 Medium Rain 0 0
## 1596 Medium Rain 0 0
## source_name
## 105
## 184
## 185
## 225
## 1374 Mexicano
## 1596 El Salvador Noticias
## source_link
## 105 http://news.xinhuanet.com/english/2008-06/04/content_8310737.htm
## 184 http://rawstory.com/news/afp/Nine_dead_in_Peru_mudslides_04142009.html
## 185 http://rawstory.com/news/afp/Nine_dead_in_Peru_mudslides_04142009.html
## 225 http://www.google.com/hostednews/ap/article/ALeqM5j0XCCb1n12DyhoBoDzGj_hTyEtrAD9BRKPRG0
## 1374 http://www.oem.com.mx/elmexicano/notas/n3569793.htm
## 1596 http://www.elsalvadornoticias.net/2015/11/03/deslave-en-colonia-escalon-por-desborde-quebrada-las-lajas/
knitr::kable(head(df_La ))
id | date | time | America | Country | country_code | State | population | City | Distance | location_description | latitude | longitude | geolocation | hazard_type | landslide_type | landslide_size | trigger | storm_name | injuries | fatalities | source_name | source_link | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
105 | 564 | 6/2/08 | NA | El Salvador | SV | La Libertad | 124694 | Santa Tecla | 4.96416 | 13.7205 | -89.2687 | (13.720499999999999, -89.268699999999995) | Landslide | Landslide | Medium | Tropical cyclone | Tropical Storm Arthur | NA | NA | http://news.xinhuanet.com/english/2008-06/04/content_8310737.htm | |||
184 | 1004 | 4/14/09 | SA | Peru | PE | La Libertad | 0 | Parcoy | 9.64894 | -7.9589 | -77.5239 | (-7.9588999999999999, -77.523899999999998) | Landslide | Mudslide | Medium | Downpour | NA | 12 | http://rawstory.com/news/afp/Nine_dead_in_Peru_mudslides_04142009.html | ||||
185 | 1005 | 4/14/09 | SA | Peru | PE | La Libertad | 0 | Aricapampa | 0.00442 | -7.8058 | -77.7172 | (-7.8057999999999996, -77.717200000000005) | Landslide | Mudslide | Medium | Downpour | NA | 0 | http://rawstory.com/news/afp/Nine_dead_in_Peru_mudslides_04142009.html | ||||
225 | 1286 | 11/8/09 | NA | El Salvador | SV | La Libertad | 33767 | Antiguo Cuscatlán | 4.86219 | 13.7156 | -89.2521 | (13.7156, -89.252099999999999) | Landslide | Mudslide | Medium | Tropical cyclone | Tropical Cyclone Ida | NA | 4 | http://www.google.com/hostednews/ap/article/ALeqM5j0XCCb1n12DyhoBoDzGj_hTyEtrAD9BRKPRG0 | |||
1374 | 6686 | 10/12/14 | NA | El Salvador | SV | La Libertad | 124694 | Santa Tecla | 4.60655 | Above road | 13.6905 | -89.3200 | (13.6905, -89.32) | Landslide | Landslide | Medium | Rain | 0 | 0 | Mexicano | http://www.oem.com.mx/elmexicano/notas/n3569793.htm | ||
1596 | 7440 | 11/3/15 | 1:00 | NA | El Salvador | SV | La Libertad | 124694 | Santa Tecla | 4.67722 | Unknown | 13.7178 | -89.2685 | (13.7178, -89.268500000000003) | Landslide | Mudslide | Medium | Rain | 0 | 0 | El Salvador Noticias | http://www.elsalvadornoticias.net/2015/11/03/deslave-en-colonia-escalon-por-desborde-quebrada-las-lajas/ |
stem(df_La $"Distance")
##
## The decimal point is at the |
##
## 0 | 0
## 2 |
## 4 | 6790
## 6 |
## 8 | 69
stem(df_La $"Distance", scale = 2)
##
## The decimal point is at the |
##
## 0 | 0
## 1 |
## 2 |
## 3 |
## 4 | 679
## 5 | 0
## 6 |
## 7 |
## 8 |
## 9 | 69
library(forecast)
data_serie<- ts(df_La $Distance, frequency=12, start=2007)
head(data_serie)
## Jan Feb Mar Apr May Jun
## 2007 4.96416 9.64894 0.00442 4.86219 4.60655 4.67722
autoplot(data_serie)+
labs(title = "Serie de Deslizamiento", x="Tiempo", y = "Distancia", colour = "#00a0dc") +theme_bw()
library(questionr)
table <- questionr::freq(Distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n | % | val% | %cum | val%cum | |
---|---|---|---|---|---|
0.00442 | 1 | 14.3 | 14.3 | 14.3 | 14.3 |
4.60655 | 1 | 14.3 | 14.3 | 28.6 | 28.6 |
4.67722 | 1 | 14.3 | 14.3 | 42.9 | 42.9 |
4.86219 | 1 | 14.3 | 14.3 | 57.1 | 57.1 |
4.96416 | 1 | 14.3 | 14.3 | 71.4 | 71.4 |
9.64894 | 1 | 14.3 | 14.3 | 85.7 | 85.7 |
9.87553 | 1 | 14.3 | 14.3 | 100.0 | 100.0 |
Total | 7 | 100.0 | 100.0 | 100.0 | 100.0 |
str(table)
## Classes 'freqtab' and 'data.frame': 8 obs. of 5 variables:
## $ n : num 1 1 1 1 1 1 1 7
## $ % : num 14.3 14.3 14.3 14.3 14.3 14.3 14.3 100
## $ val% : num 14.3 14.3 14.3 14.3 14.3 14.3 14.3 100
## $ %cum : num 14.3 28.6 42.9 57.1 71.4 85.7 100 100
## $ val%cum: num 14.3 28.6 42.9 57.1 71.4 85.7 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x | y |
---|---|
0.00442 | 1 |
4.60655 | 1 |
4.67722 | 1 |
4.86219 | 1 |
4.96416 | 1 |
9.64894 | 1 |
9.87553 | 1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="white", fill="blue") +
xlab("Número de asistencias") +
ylab("Frecuencia")
n_sturges = 1 + log(length(Distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(Distance) - min(Distance)
w = ceiling(R/n_clases)
bins <- seq(min(Distance), max(Distance) + w, by = w)
bins
## [1] 0.00442 4.00442 8.00442 12.00442
Edades <- cut(Distance, bins)
Freq_table <- transform(table(Distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
Distance | Freq | Rel_Freq | Cum_Freq |
---|---|---|---|
0.00442 | 1 | 0.1428571 | 1 |
4.60655 | 1 | 0.1428571 | 2 |
4.67722 | 1 | 0.1428571 | 3 |
4.86219 | 1 | 0.1428571 | 4 |
4.96416 | 1 | 0.1428571 | 5 |
9.64894 | 1 | 0.1428571 | 6 |
9.87553 | 1 | 0.1428571 | 7 |
str(Freq_table)
## 'data.frame': 7 obs. of 4 variables:
## $ Distance: Factor w/ 7 levels "0.00442","4.60655",..: 1 2 3 4 5 6 7
## $ Freq : int 1 1 1 1 1 1 1
## $ Rel_Freq: num 0.143 0.143 0.143 0.143 0.143 ...
## $ Cum_Freq: int 1 2 3 4 5 6 7
df <- data.frame(x = Freq_table$Distance, y = Freq_table$Freq)
knitr::kable(df)
x | y |
---|---|
0.00442 | 1 |
4.60655 | 1 |
4.67722 | 1 |
4.86219 | 1 |
4.96416 | 1 |
9.64894 | 1 |
9.87553 | 1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="blue", fill="green") +
xlab("Rango de Distance") +
ylab("Frecuencia")
summary(df_La $Distance)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00442 4.64189 4.86219 5.51986 7.30655 9.87553
library(pastecs)
stat.desc(df_La )
## id date time America Country country_code State
## nbr.val 7.000000e+00 NA NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA NA
## min 5.640000e+02 NA NA NA NA NA NA
## max 7.441000e+03 NA NA NA NA NA NA
## range 6.877000e+03 NA NA NA NA NA NA
## sum 2.542600e+04 NA NA NA NA NA NA
## median 1.286000e+03 NA NA NA NA NA NA
## mean 3.632286e+03 NA NA NA NA NA NA
## SE.mean 1.263594e+03 NA NA NA NA NA NA
## CI.mean.0.95 3.091903e+03 NA NA NA NA NA NA
## var 1.117669e+07 NA NA NA NA NA NA
## std.dev 3.343155e+03 NA NA NA NA NA NA
## coef.var 9.203998e-01 NA NA NA NA NA NA
## population City Distance location_description latitude
## nbr.val 7.000000e+00 NA 7.0000000 NA 7.000000
## nbr.null 2.000000e+00 NA 0.0000000 NA 0.000000
## nbr.na 0.000000e+00 NA 0.0000000 NA 0.000000
## min 0.000000e+00 NA 0.0044200 NA -7.958900
## max 1.246940e+05 NA 9.8755300 NA 13.720500
## range 1.246940e+05 NA 9.8711100 NA 21.679400
## sum 5.325430e+05 NA 38.6390100 NA 52.794400
## median 1.246940e+05 NA 4.8621900 NA 13.714700
## mean 7.607757e+04 NA 5.5198586 NA 7.542057
## SE.mean 2.330952e+04 NA 1.2791994 NA 3.982602
## CI.mean.0.95 5.703633e+04 NA 3.1300883 NA 9.745075
## var 3.803334e+09 NA 11.4544585 NA 111.027808
## std.dev 6.167118e+04 NA 3.3844436 NA 10.536973
## coef.var 8.106355e-01 NA 0.6131395 NA 1.397095
## longitude geolocation hazard_type landslide_type
## nbr.val 7.00000000 NA NA NA
## nbr.null 0.00000000 NA NA NA
## nbr.na 0.00000000 NA NA NA
## min -89.36250000 NA NA NA
## max -77.52390000 NA NA NA
## range 11.83860000 NA NA NA
## sum -601.71290000 NA NA NA
## median -89.26850000 NA NA NA
## mean -85.95898571 NA NA NA
## SE.mean 2.15312466 NA NA NA
## CI.mean.0.95 5.26850626 NA NA NA
## var 32.45162074 NA NA NA
## std.dev 5.69663240 NA NA NA
## coef.var -0.06627152 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 3.0000000 6.000000 NA
## nbr.null NA NA NA 2.0000000 4.000000 NA
## nbr.na NA NA NA 4.0000000 1.000000 NA
## min NA NA NA 0.0000000 0.000000 NA
## max NA NA NA 1.0000000 12.000000 NA
## range NA NA NA 1.0000000 12.000000 NA
## sum NA NA NA 1.0000000 16.000000 NA
## median NA NA NA 0.0000000 0.000000 NA
## mean NA NA NA 0.3333333 2.666667 NA
## SE.mean NA NA NA 0.3333333 1.977653 NA
## CI.mean.0.95 NA NA NA 1.4342176 5.083719 NA
## var NA NA NA 0.3333333 23.466667 NA
## std.dev NA NA NA 0.5773503 4.844241 NA
## coef.var NA NA NA 1.7320508 1.816590 NA
## source_link
## nbr.val NA
## nbr.null NA
## nbr.na NA
## min NA
## max NA
## range NA
## sum NA
## median NA
## mean NA
## SE.mean NA
## CI.mean.0.95 NA
## var NA
## std.dev NA
## coef.var NA
boxplot(Distance, horizontal=TRUE, col='steelblue')
library(tidyverse)
library(hrbrthemes)
library(viridis)
df <- data.frame(Distance)
df %>% ggplot(aes(x = "", y = Distance)) +
geom_boxplot(color="red", fill="orange", alpha=0.5) +
theme_ipsum() +
theme(legend.position="none", plot.title = element_text(size=11)) +
ggtitle("Deslizamientos ") +
coord_flip() +
xlab("") +
ylab("")
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
library(readr)
library(knitr)
df <- read.csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
library(dplyr)
colnames(df)[4] <- "America"
colnames(df)[10] <- "Distance"
colnames(df)[5] <- "Country"
colnames(df)[7] <- "State"
colnames(df)[9] <- "City"
colnames(df)[2] <- "date"
library(readr)
library(knitr)
df_Hua <- subset (df, State == "Huanuco")
df_Hua %>%
select(Country, State, City, Distance, date)
## Country State City Distance date
## 80 Peru Huanuco Tingo MarÃa 17.34318 3/5/08
## 294 Peru Huanuco Acomayo 3.49890 4/1/10
## 295 Peru Huanuco Ambo 0.55865 4/2/10
## 516 Peru Huanuco Huánuco 1.01932 11/7/10
head(df_Hua)
## id date time America Country country_code State population
## 80 472 3/5/08 SA Peru PE Huanuco 53177
## 294 1612 4/1/10 Early morning SA Peru PE Huanuco 0
## 295 1614 4/2/10 SA Peru PE Huanuco 6865
## 516 2707 11/7/10 SA Peru PE Huanuco 147959
## City Distance location_description latitude longitude
## 80 Tingo MarÃa 17.34318 -9.2114 -76.1311
## 294 Acomayo 3.49890 -9.7996 -76.1038
## 295 Ambo 0.55865 -10.1258 -76.2043
## 516 Huánuco 1.01932 -9.9241 -76.2488
## geolocation hazard_type landslide_type
## 80 (-9.2113999999999994, -76.131100000000004) Landslide Landslide
## 294 (-9.7995999999999999, -76.103800000000007) Landslide Lahar
## 295 (-10.1258, -76.204300000000003) Landslide Landslide
## 516 (-9.9240999999999993, -76.248800000000003) Landslide Landslide
## landslide_size trigger storm_name injuries fatalities source_name
## 80 Medium Rain NA 7
## 294 Large Downpour NA 12
## 295 Large Downpour NA 28
## 516 Medium Downpour NA 2
## source_link
## 80 http://news.xinhuanet.com/english/2008-03/06/content_7727748.htm
## 294 http://www.thejakartapost.com/news/2010/03/18/mudslide-isolates-1500-residents-solok.html
## 295 http://www.thedailytimes.com/article/20100405/NEWS/304059983
## 516 http://english.peopledaily.com.cn/90001/90777/90852/7191360.html
ggplot(data=df_Hua, aes(x=City, y=Distance)) + geom_bar(stat="identity", color="blue", fill="white")
ggplot(data=df_Hua, aes(x = "Huanuco", y = Distance, fill=City)) +
geom_bar(stat = "identity", width = 1, color = "black") +
coord_polar("y", start = 0)
ggplot(df_Hua,aes(x="Huanuco",y=Distance, fill=City))+
geom_bar(stat = "identity",
color="white")+
geom_text(aes(label=(Distance*10)),
position=position_stack(vjust=0.5),color="white",size=3)+
coord_polar(theta = "y")+
labs(title="Gráfico de Deslizamiento")
library(qcc)
Distance <- df_Hua$Distance
names(Distance) <- df_Hua$City
pareto.chart(Distance,
ylab="Distance",
col = heat.colors(length(Distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "DONDE SE CONCENTRAN LAS CIUDADES CON MAYORES DESLIZAMIENTOS"
)
##
## Pareto chart analysis for Distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Tingo MarÃa 17.343180 17.343180 77.355670 77.355670
## Acomayo 3.498900 20.842080 15.606120 92.961791
## Huánuco 1.019320 21.861400 4.546466 97.508257
## Ambo 0.558650 22.420050 2.491743 100.000000
stem(df_Hua$"Distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 113
## 0 |
## 1 |
## 1 | 7
head(df_Hua)
## id date time America Country country_code State population
## 80 472 3/5/08 SA Peru PE Huanuco 53177
## 294 1612 4/1/10 Early morning SA Peru PE Huanuco 0
## 295 1614 4/2/10 SA Peru PE Huanuco 6865
## 516 2707 11/7/10 SA Peru PE Huanuco 147959
## City Distance location_description latitude longitude
## 80 Tingo MarÃa 17.34318 -9.2114 -76.1311
## 294 Acomayo 3.49890 -9.7996 -76.1038
## 295 Ambo 0.55865 -10.1258 -76.2043
## 516 Huánuco 1.01932 -9.9241 -76.2488
## geolocation hazard_type landslide_type
## 80 (-9.2113999999999994, -76.131100000000004) Landslide Landslide
## 294 (-9.7995999999999999, -76.103800000000007) Landslide Lahar
## 295 (-10.1258, -76.204300000000003) Landslide Landslide
## 516 (-9.9240999999999993, -76.248800000000003) Landslide Landslide
## landslide_size trigger storm_name injuries fatalities source_name
## 80 Medium Rain NA 7
## 294 Large Downpour NA 12
## 295 Large Downpour NA 28
## 516 Medium Downpour NA 2
## source_link
## 80 http://news.xinhuanet.com/english/2008-03/06/content_7727748.htm
## 294 http://www.thejakartapost.com/news/2010/03/18/mudslide-isolates-1500-residents-solok.html
## 295 http://www.thedailytimes.com/article/20100405/NEWS/304059983
## 516 http://english.peopledaily.com.cn/90001/90777/90852/7191360.html
knitr::kable(head(df_Hua))
id | date | time | America | Country | country_code | State | population | City | Distance | location_description | latitude | longitude | geolocation | hazard_type | landslide_type | landslide_size | trigger | storm_name | injuries | fatalities | source_name | source_link | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
80 | 472 | 3/5/08 | SA | Peru | PE | Huanuco | 53177 | Tingo MarÃa | 17.34318 | -9.2114 | -76.1311 | (-9.2113999999999994, -76.131100000000004) | Landslide | Landslide | Medium | Rain | NA | 7 | http://news.xinhuanet.com/english/2008-03/06/content_7727748.htm | ||||
294 | 1612 | 4/1/10 | Early morning | SA | Peru | PE | Huanuco | 0 | Acomayo | 3.49890 | -9.7996 | -76.1038 | (-9.7995999999999999, -76.103800000000007) | Landslide | Lahar | Large | Downpour | NA | 12 | http://www.thejakartapost.com/news/2010/03/18/mudslide-isolates-1500-residents-solok.html | |||
295 | 1614 | 4/2/10 | SA | Peru | PE | Huanuco | 6865 | Ambo | 0.55865 | -10.1258 | -76.2043 | (-10.1258, -76.204300000000003) | Landslide | Landslide | Large | Downpour | NA | 28 | http://www.thedailytimes.com/article/20100405/NEWS/304059983 | ||||
516 | 2707 | 11/7/10 | SA | Peru | PE | Huanuco | 147959 | Huánuco | 1.01932 | -9.9241 | -76.2488 | (-9.9240999999999993, -76.248800000000003) | Landslide | Landslide | Medium | Downpour | NA | 2 | http://english.peopledaily.com.cn/90001/90777/90852/7191360.html |
stem(df_Hua$"Distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 113
## 0 |
## 1 |
## 1 | 7
stem(df_Hua$"Distance", scale = 2)
##
## The decimal point is at the |
##
## 0 | 60
## 2 | 5
## 4 |
## 6 |
## 8 |
## 10 |
## 12 |
## 14 |
## 16 | 3
library(forecast)
data_serie<- ts(df_Hua$Distance, frequency=12, start=2007)
head(data_serie)
## Jan Feb Mar Apr
## 2007 17.34318 3.49890 0.55865 1.01932
autoplot(data_serie)+
labs(title = "Serie de Deslizamiento", x="Tiempo", y = "Distancia", colour = "#00a0dc") +theme_bw()
library(questionr)
table <- questionr::freq(Distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n | % | val% | %cum | val%cum | |
---|---|---|---|---|---|
0.55865 | 1 | 25 | 25 | 25 | 25 |
1.01932 | 1 | 25 | 25 | 50 | 50 |
3.4989 | 1 | 25 | 25 | 75 | 75 |
17.34318 | 1 | 25 | 25 | 100 | 100 |
Total | 4 | 100 | 100 | 100 | 100 |
str(table)
## Classes 'freqtab' and 'data.frame': 5 obs. of 5 variables:
## $ n : num 1 1 1 1 4
## $ % : num 25 25 25 25 100
## $ val% : num 25 25 25 25 100
## $ %cum : num 25 50 75 100 100
## $ val%cum: num 25 50 75 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x | y |
---|---|
0.55865 | 1 |
1.01932 | 1 |
3.4989 | 1 |
17.34318 | 1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="white", fill="blue") +
xlab("Número de asistencias") +
ylab("Frecuencia")
n_sturges = 1 + log(length(Distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(Distance) - min(Distance)
w = ceiling(R/n_clases)
bins <- seq(min(Distance), max(Distance) + w, by = w)
bins
## [1] 0.55865 6.55865 12.55865 18.55865
Edades <- cut(Distance, bins)
Freq_table <- transform(table(Distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
Distance | Freq | Rel_Freq | Cum_Freq |
---|---|---|---|
0.55865 | 1 | 0.25 | 1 |
1.01932 | 1 | 0.25 | 2 |
3.4989 | 1 | 0.25 | 3 |
17.34318 | 1 | 0.25 | 4 |
str(Freq_table)
## 'data.frame': 4 obs. of 4 variables:
## $ Distance: Factor w/ 4 levels "0.55865","1.01932",..: 1 2 3 4
## $ Freq : int 1 1 1 1
## $ Rel_Freq: num 0.25 0.25 0.25 0.25
## $ Cum_Freq: int 1 2 3 4
df <- data.frame(x = Freq_table$Distance, y = Freq_table$Freq)
knitr::kable(df)
x | y |
---|---|
0.55865 | 1 |
1.01932 | 1 |
3.4989 | 1 |
17.34318 | 1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="blue", fill="green") +
xlab("Rango de Distance") +
ylab("Frecuencia")
summary(df_Hua$Distance)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.5586 0.9042 2.2591 5.6050 6.9600 17.3432
library(pastecs)
stat.desc(df_Hua)
## Warning in min(x): ningún argumento finito para min; retornando Inf
## Warning in max(x): ningun argumento finito para max; retornando -Inf
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## id date time America Country country_code State
## nbr.val 4.000000e+00 NA NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA NA
## min 4.720000e+02 NA NA NA NA NA NA
## max 2.707000e+03 NA NA NA NA NA NA
## range 2.235000e+03 NA NA NA NA NA NA
## sum 6.405000e+03 NA NA NA NA NA NA
## median 1.613000e+03 NA NA NA NA NA NA
## mean 1.601250e+03 NA NA NA NA NA NA
## SE.mean 4.562681e+02 NA NA NA NA NA NA
## CI.mean.0.95 1.452049e+03 NA NA NA NA NA NA
## var 8.327223e+05 NA NA NA NA NA NA
## std.dev 9.125362e+02 NA NA NA NA NA NA
## coef.var 5.698899e-01 NA NA NA NA NA NA
## population City Distance location_description latitude
## nbr.val 4.000000e+00 NA 4.000000 NA 4.0000000
## nbr.null 1.000000e+00 NA 0.000000 NA 0.0000000
## nbr.na 0.000000e+00 NA 0.000000 NA 0.0000000
## min 0.000000e+00 NA 0.558650 NA -10.1258000
## max 1.479590e+05 NA 17.343180 NA -9.2114000
## range 1.479590e+05 NA 16.784530 NA 0.9144000
## sum 2.080010e+05 NA 22.420050 NA -39.0609000
## median 3.002100e+04 NA 2.259110 NA -9.8618500
## mean 5.200025e+04 NA 5.605012 NA -9.7652250
## SE.mean 3.409629e+04 NA 3.965630 NA 0.1964602
## CI.mean.0.95 1.085096e+05 NA 12.620404 NA 0.6252241
## var 4.650228e+09 NA 62.904879 NA 0.1543865
## std.dev 6.819258e+04 NA 7.931260 NA 0.3929204
## coef.var 1.311389e+00 NA 1.415030 NA -0.0402367
## longitude geolocation hazard_type landslide_type
## nbr.val 4.000000e+00 NA NA NA
## nbr.null 0.000000e+00 NA NA NA
## nbr.na 0.000000e+00 NA NA NA
## min -7.624880e+01 NA NA NA
## max -7.610380e+01 NA NA NA
## range 1.450000e-01 NA NA NA
## sum -3.046880e+02 NA NA NA
## median -7.616770e+01 NA NA NA
## mean -7.617200e+01 NA NA NA
## SE.mean 3.324853e-02 NA NA NA
## CI.mean.0.95 1.058117e-01 NA NA NA
## var 4.421860e-03 NA NA NA
## std.dev 6.649707e-02 NA NA NA
## coef.var -8.729857e-04 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 0 4.0000000 NA
## nbr.null NA NA NA 0 0.0000000 NA
## nbr.na NA NA NA 4 0.0000000 NA
## min NA NA NA Inf 2.0000000 NA
## max NA NA NA -Inf 28.0000000 NA
## range NA NA NA -Inf 26.0000000 NA
## sum NA NA NA 0 49.0000000 NA
## median NA NA NA NA 9.5000000 NA
## mean NA NA NA NaN 12.2500000 NA
## SE.mean NA NA NA NA 5.6328649 NA
## CI.mean.0.95 NA NA NA NaN 17.9262900 NA
## var NA NA NA NA 126.9166667 NA
## std.dev NA NA NA NA 11.2657297 NA
## coef.var NA NA NA NA 0.9196514 NA
## source_link
## nbr.val NA
## nbr.null NA
## nbr.na NA
## min NA
## max NA
## range NA
## sum NA
## median NA
## mean NA
## SE.mean NA
## CI.mean.0.95 NA
## var NA
## std.dev NA
## coef.var NA
boxplot(Distance, horizontal=TRUE, col='steelblue')
library(tidyverse)
library(hrbrthemes)
library(viridis)
df <- data.frame(Distance)
df %>% ggplot(aes(x = "", y = Distance)) +
geom_boxplot(color="red", fill="orange", alpha=0.5) +
theme_ipsum() +
theme(legend.position="none", plot.title = element_text(size=11)) +
ggtitle("Deslizamientos ") +
coord_flip() +
xlab("") +
ylab("")
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
library(readr)
library(knitr)
df <- read.csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
library(dplyr)
colnames(df)[4] <- "America"
colnames(df)[10] <- "Distance"
colnames(df)[5] <- "Country"
colnames(df)[7] <- "State"
colnames(df)[9] <- "City"
colnames(df)[2] <- "date"
library(readr)
library(knitr)
df_Ecuador <- subset (df, Country == "Ecuador")
knitr::kable(head(df_Ecuador,n=4))
id | date | time | America | Country | country_code | State | population | City | Distance | location_description | latitude | longitude | geolocation | hazard_type | landslide_type | landslide_size | trigger | storm_name | injuries | fatalities | source_name | source_link | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
9 | 105 | 6/27/07 | SA | Ecuador | EC | Zamora-Chinchipe | 15276 | Zamora | 0.47714 | -4.0650 | -78.9510 | (-4.0650000000000004, -78.950999999999993) | Landslide | Landslide | Medium | Downpour | NA | NA | Red Cross - Field reports | https://www-secure.ifrc.org/dmis/prepare/view_report.asp?ReportID=2908 | |||
10 | 106 | 6/27/07 | SA | Ecuador | EC | Loja | 117796 | Loja | 0.35649 | -3.9900 | -79.2050 | (-3.99, -79.204999999999998) | Landslide | Landslide | Medium | Downpour | NA | NA | Red Cross - Field reports | https://www-secure.ifrc.org/dmis/prepare/view_report.asp?ReportID=2908 | |||
11 | 107 | 6/27/07 | SA | Ecuador | EC | Pichincha | 5114 | Sangolquà | 33.94603 | -0.3560 | -78.1480 | (-0.35599999999999998, -78.147999999999996) | Landslide | Landslide | Medium | Downpour | NA | NA | Red Cross - Field reports | https://www-secure.ifrc.org/dmis/prepare/view_report.asp?ReportID=2908 | |||
78 | 468 | 2/28/08 | SA | Ecuador | EC | Napo | 7309 | Archidona | 50.21741 | -0.4635 | -77.8928 | (-0.46350000000000002, -77.892799999999994) | Landslide | Landslide | Medium | Rain | NA | 7 | http://english.aljazeera.net/NR/exeres/868843D7-B211-4DE5-AFBE-31C29CF79C5A.htm |
df_Ecuador %>%
select(Country, State, City, Distance, date)
## Country State City
## 9 Ecuador Zamora-Chinchipe Zamora
## 10 Ecuador Loja Loja
## 11 Ecuador Pichincha SangolquÃ
## 78 Ecuador Napo Archidona
## 87 Ecuador Pichincha Quito
## 218 Ecuador Santo Domingo de los Tsáchilas Santo Domingo de los Colorados
## 238 Ecuador Pichincha Machachi
## 253 Ecuador Tungurahua Baños
## 339 Ecuador Esmeraldas Esmeraldas
## 586 Ecuador Pichincha Machachi
## 587 Ecuador Cotopaxi SaquisilÃ
## 660 Ecuador Morona-Santiago Macas
## 697 Ecuador Pichincha Quito
## 751 Ecuador Carchi El Ã\201ngel
## 868 Ecuador Manabi Chone
## 872 Ecuador Manabi Tosagua
## 942 Ecuador Guayas Balao
## 956 Ecuador Esmeraldas Esmeraldas
## 984 Ecuador Pichincha Cayambe
## 1304 Ecuador Carchi San Gabriel
## 1332 Ecuador Pichincha Quito
## 1333 Ecuador Pichincha Quito
## 1335 Ecuador Sucumbios Gonzalo Pizarro
## 1365 Ecuador Azuay Cuenca
## 1368 Ecuador Zamora-Chinchipe Zamora
## 1451 Ecuador Loja Macará
## 1452 Ecuador Loja Loja
## 1453 Ecuador Loja Catamayo
## 1454 Ecuador Azuay Cuenca
## 1566 Ecuador Pichincha Quito
## Distance date
## 9 0.47714 6/27/07
## 10 0.35649 6/27/07
## 11 33.94603 6/27/07
## 78 50.21741 2/28/08
## 87 1.56942 4/1/08
## 218 1.16036 12/28/09
## 238 26.18676 1/10/10
## 253 11.91442 2/3/10
## 339 2.81891 5/3/10
## 586 25.82923 2/14/11
## 587 30.81169 2/14/11
## 660 46.77007 4/24/11
## 697 4.39517 5/2/11
## 751 28.29459 6/5/11
## 868 19.85816 3/13/12
## 872 7.67919 3/24/12
## 942 16.34404 1/24/13
## 956 21.26652 4/23/13
## 984 45.69792 5/31/13
## 1304 10.47204 10/20/14
## 1332 26.72137 8/12/14
## 1333 23.97854 8/12/14
## 1335 11.55916 12/13/14
## 1365 13.21139 9/29/14
## 1368 1.23724 4/30/14
## 1451 18.88784 3/18/15
## 1452 1.82885 3/18/15
## 1453 17.57187 3/18/15
## 1454 10.16196 3/18/15
## 1566 4.25486 4/29/11
ggplot(data=df_Ecuador, aes(x = "Ecuador", y = Distance, fill=State)) +
geom_bar(stat = "identity", width = 1, color = "black") +
coord_polar("y", start = 0)
ggplot(data=df_Ecuador, aes(fill=State, y=Distance, x="Ecuador")) +
geom_bar(position="dodge", stat="identity")
ggplot(data=df_Ecuador, aes(fill=State, y=Distance, x="Ecuador")) +
geom_bar(position="stack", stat="identity")
library(readr)
library(knitr)
df_Car <- subset (df, State == "Carchi")
df_Car %>%
select(Country, State, City, Distance, date)
## Country State City Distance date
## 751 Ecuador Carchi El Ã\201ngel 28.29459 6/5/11
## 1304 Ecuador Carchi San Gabriel 10.47204 10/20/14
head(df_Car)
## id date time America Country country_code State population
## 751 3572 6/5/11 SA Ecuador EC Carchi 3983
## 1304 6308 10/20/14 19:33 SA Ecuador EC Carchi 15112
## City Distance location_description latitude longitude
## 751 El Ã\201ngel 28.29459 0.8479 -78.0609
## 1304 San Gabriel 10.47204 Above road 0.6194 -77.7404
## geolocation hazard_type landslide_type
## 751 (0.84789999999999999, -78.060900000000004) Landslide Landslide
## 1304 (0.61939999999999995, -77.740399999999994) Landslide Rockfall
## landslide_size trigger storm_name injuries fatalities source_name
## 751 Medium Downpour NA 0
## 1304 Small Earthquake 0 0 Earthquake Report
## source_link
## 751 http://www.cre.com.ec/Desktop.aspx?Id=143&e=153271
## 1304 http://earthquake-report.com/2014/10/20/strong-earthquake-colombia-ecuador-border-region-on-october-20-2014/
ggplot(data=df_Car, aes(x=City, y=Distance)) + geom_bar(stat="identity", color="blue", fill="white")
ggplot(data=df_Car, aes(x = "Carchi", y = Distance, fill=City)) +
geom_bar(stat = "identity", width = 1, color = "black") +
coord_polar("y", start = 0)
ggplot(df_Car,aes(x="Carchi",y=Distance, fill=City))+
geom_bar(stat = "identity",
color="white")+
geom_text(aes(label=(Distance*10)),
position=position_stack(vjust=0.5),color="white",size=3)+
coord_polar(theta = "y")+
labs(title="Gráfico de Deslizamiento")
library(qcc)
Distance <- df_Car$Distance
names(Distance) <- df_Car$City
pareto.chart(Distance,
ylab="Distance",
col = heat.colors(length(Distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "DONDE SE CONCENTRAN LAS CIUDADES CON MAYORES DESLIZAMIENTOS"
)
##
## Pareto chart analysis for Distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## El Ã\201ngel 28.29459 28.29459 72.98697 72.98697
## San Gabriel 10.47204 38.76663 27.01303 100.00000
stem(df_Car$"Distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 1 | 0
## 1 |
## 2 |
## 2 | 8
head(df_Car)
## id date time America Country country_code State population
## 751 3572 6/5/11 SA Ecuador EC Carchi 3983
## 1304 6308 10/20/14 19:33 SA Ecuador EC Carchi 15112
## City Distance location_description latitude longitude
## 751 El Ã\201ngel 28.29459 0.8479 -78.0609
## 1304 San Gabriel 10.47204 Above road 0.6194 -77.7404
## geolocation hazard_type landslide_type
## 751 (0.84789999999999999, -78.060900000000004) Landslide Landslide
## 1304 (0.61939999999999995, -77.740399999999994) Landslide Rockfall
## landslide_size trigger storm_name injuries fatalities source_name
## 751 Medium Downpour NA 0
## 1304 Small Earthquake 0 0 Earthquake Report
## source_link
## 751 http://www.cre.com.ec/Desktop.aspx?Id=143&e=153271
## 1304 http://earthquake-report.com/2014/10/20/strong-earthquake-colombia-ecuador-border-region-on-october-20-2014/
knitr::kable(head(df_Car))
id | date | time | America | Country | country_code | State | population | City | Distance | location_description | latitude | longitude | geolocation | hazard_type | landslide_type | landslide_size | trigger | storm_name | injuries | fatalities | source_name | source_link | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
751 | 3572 | 6/5/11 | SA | Ecuador | EC | Carchi | 3983 | El Ãngel | 28.29459 | 0.8479 | -78.0609 | (0.84789999999999999, -78.060900000000004) | Landslide | Landslide | Medium | Downpour | NA | 0 | http://www.cre.com.ec/Desktop.aspx?Id=143&e=153271 | ||||
1304 | 6308 | 10/20/14 | 19:33 | SA | Ecuador | EC | Carchi | 15112 | San Gabriel | 10.47204 | Above road | 0.6194 | -77.7404 | (0.61939999999999995, -77.740399999999994) | Landslide | Rockfall | Small | Earthquake | 0 | 0 | Earthquake Report | http://earthquake-report.com/2014/10/20/strong-earthquake-colombia-ecuador-border-region-on-october-20-2014/ |
stem(df_Car$"Distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 1 | 0
## 1 |
## 2 |
## 2 | 8
stem(df_Car$"Distance", scale = 2)
##
## The decimal point is at the |
##
## 10 | 5
## 12 |
## 14 |
## 16 |
## 18 |
## 20 |
## 22 |
## 24 |
## 26 |
## 28 | 3
library(forecast)
data_serie<- ts(df_Car$Distance, frequency=12, start=2007)
head(data_serie)
## Jan Feb
## 2007 28.29459 10.47204
autoplot(data_serie)+
labs(title = "Serie de Deslizamiento", x="Tiempo", y = "Distancia", colour = "#00a0dc") +theme_bw()
library(questionr)
table <- questionr::freq(Distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n | % | val% | %cum | val%cum | |
---|---|---|---|---|---|
10.47204 | 1 | 50 | 50 | 50 | 50 |
28.29459 | 1 | 50 | 50 | 100 | 100 |
Total | 2 | 100 | 100 | 100 | 100 |
str(table)
## Classes 'freqtab' and 'data.frame': 3 obs. of 5 variables:
## $ n : num 1 1 2
## $ % : num 50 50 100
## $ val% : num 50 50 100
## $ %cum : num 50 100 100
## $ val%cum: num 50 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x | y |
---|---|
10.47204 | 1 |
28.29459 | 1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="white", fill="blue") +
xlab("Número de asistencias") +
ylab("Frecuencia")
n_sturges = 1 + log(length(Distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(Distance) - min(Distance)
w = ceiling(R/n_clases)
bins <- seq(min(Distance), max(Distance) + w, by = w)
bins
## [1] 10.47204 19.47204 28.47204
Edades <- cut(Distance, bins)
Freq_table <- transform(table(Distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
Distance | Freq | Rel_Freq | Cum_Freq |
---|---|---|---|
10.47204 | 1 | 0.5 | 1 |
28.29459 | 1 | 0.5 | 2 |
str(Freq_table)
## 'data.frame': 2 obs. of 4 variables:
## $ Distance: Factor w/ 2 levels "10.47204","28.29459": 1 2
## $ Freq : int 1 1
## $ Rel_Freq: num 0.5 0.5
## $ Cum_Freq: int 1 2
df <- data.frame(x = Freq_table$Distance, y = Freq_table$Freq)
knitr::kable(df)
x | y |
---|---|
10.47204 | 1 |
28.29459 | 1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="blue", fill="green") +
xlab("Rango de Distance") +
ylab("Frecuencia")
summary(df_Car$Distance)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 10.47 14.93 19.38 19.38 23.84 28.29
library(pastecs)
stat.desc(df_Car)
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## id date time America Country country_code State
## nbr.val 2.000000e+00 NA NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA NA
## min 3.572000e+03 NA NA NA NA NA NA
## max 6.308000e+03 NA NA NA NA NA NA
## range 2.736000e+03 NA NA NA NA NA NA
## sum 9.880000e+03 NA NA NA NA NA NA
## median 4.940000e+03 NA NA NA NA NA NA
## mean 4.940000e+03 NA NA NA NA NA NA
## SE.mean 1.368000e+03 NA NA NA NA NA NA
## CI.mean.0.95 1.738209e+04 NA NA NA NA NA NA
## var 3.742848e+06 NA NA NA NA NA NA
## std.dev 1.934644e+03 NA NA NA NA NA NA
## coef.var 3.916284e-01 NA NA NA NA NA NA
## population City Distance location_description latitude
## nbr.val 2.000000e+00 NA 2.0000000 NA 2.00000000
## nbr.null 0.000000e+00 NA 0.0000000 NA 0.00000000
## nbr.na 0.000000e+00 NA 0.0000000 NA 0.00000000
## min 3.983000e+03 NA 10.4720400 NA 0.61940000
## max 1.511200e+04 NA 28.2945900 NA 0.84790000
## range 1.112900e+04 NA 17.8225500 NA 0.22850000
## sum 1.909500e+04 NA 38.7666300 NA 1.46730000
## median 9.547500e+03 NA 19.3833150 NA 0.73365000
## mean 9.547500e+03 NA 19.3833150 NA 0.73365000
## SE.mean 5.564500e+03 NA 8.9112750 NA 0.11425000
## CI.mean.0.95 7.070368e+04 NA 113.2284846 NA 1.45168389
## var 6.192732e+07 NA 158.8216443 NA 0.02610613
## std.dev 7.869391e+03 NA 12.6024460 NA 0.16157390
## coef.var 8.242358e-01 NA 0.6501698 NA 0.22023294
## longitude geolocation hazard_type landslide_type
## nbr.val 2.000000e+00 NA NA NA
## nbr.null 0.000000e+00 NA NA NA
## nbr.na 0.000000e+00 NA NA NA
## min -7.806090e+01 NA NA NA
## max -7.774040e+01 NA NA NA
## range 3.205000e-01 NA NA NA
## sum -1.558013e+02 NA NA NA
## median -7.790065e+01 NA NA NA
## mean -7.790065e+01 NA NA NA
## SE.mean 1.602500e-01 NA NA NA
## CI.mean.0.95 2.036169e+00 NA NA NA
## var 5.136013e-02 NA NA NA
## std.dev 2.266277e-01 NA NA NA
## coef.var -2.909189e-03 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 1 2 NA
## nbr.null NA NA NA 1 2 NA
## nbr.na NA NA NA 1 0 NA
## min NA NA NA 0 0 NA
## max NA NA NA 0 0 NA
## range NA NA NA 0 0 NA
## sum NA NA NA 0 0 NA
## median NA NA NA 0 0 NA
## mean NA NA NA 0 0 NA
## SE.mean NA NA NA NA 0 NA
## CI.mean.0.95 NA NA NA NaN 0 NA
## var NA NA NA NA 0 NA
## std.dev NA NA NA NA 0 NA
## coef.var NA NA NA NA NaN NA
## source_link
## nbr.val NA
## nbr.null NA
## nbr.na NA
## min NA
## max NA
## range NA
## sum NA
## median NA
## mean NA
## SE.mean NA
## CI.mean.0.95 NA
## var NA
## std.dev NA
## coef.var NA
boxplot(Distance, horizontal=TRUE, col='steelblue')
library(tidyverse)
library(hrbrthemes)
library(viridis)
df <- data.frame(Distance)
df %>% ggplot(aes(x = "", y = Distance)) +
geom_boxplot(color="red", fill="orange", alpha=0.5) +
theme_ipsum() +
theme(legend.position="none", plot.title = element_text(size=11)) +
ggtitle("Deslizamientos ") +
coord_flip() +
xlab("") +
ylab("")
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
library(readr)
library(knitr)
df <- read.csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
library(dplyr)
colnames(df)[4] <- "America"
colnames(df)[10] <- "Distance"
colnames(df)[5] <- "Country"
colnames(df)[7] <- "State"
colnames(df)[9] <- "City"
colnames(df)[2] <- "date"
library(readr)
library(knitr)
df_Loj <- subset (df, State == "Loja")
df_Loj %>%
select(Country, State, City, Distance, date)
## Country State City Distance date
## 10 Ecuador Loja Loja 0.35649 6/27/07
## 1451 Ecuador Loja Macará 18.88784 3/18/15
## 1452 Ecuador Loja Loja 1.82885 3/18/15
## 1453 Ecuador Loja Catamayo 17.57187 3/18/15
head(df_Loj)
## id date time America Country country_code State population City
## 10 106 6/27/07 SA Ecuador EC Loja 117796 Loja
## 1451 6893 3/18/15 Night SA Ecuador EC Loja 13035 Macará
## 1452 6900 3/18/15 SA Ecuador EC Loja 117796 Loja
## 1453 6901 3/18/15 SA Ecuador EC Loja 18565 Catamayo
## Distance location_description latitude longitude
## 10 0.35649 -3.9900 -79.2050
## 1451 18.88784 Above road -4.3313 -79.7811
## 1452 1.82885 Unknown -4.0094 -79.2073
## 1453 17.57187 Above road -4.1380 -79.4069
## geolocation hazard_type landslide_type
## 10 (-3.99, -79.204999999999998) Landslide Landslide
## 1451 (-4.3312999999999997, -79.781099999999995) Landslide Landslide
## 1452 (-4.0094000000000003, -79.207300000000004) Landslide Landslide
## 1453 (-4.1379999999999999, -79.406899999999993) Landslide Landslide
## landslide_size trigger storm_name injuries fatalities
## 10 Medium Downpour NA NA
## 1451 Small Continuous rain 0 0
## 1452 Medium Continuous rain 0 0
## 1453 Medium Continuous rain 0 0
## source_name
## 10 Red Cross - Field reports
## 1451 El Comercio
## 1452 El Comercio
## 1453 El Comercio
## source_link
## 10 https://www-secure.ifrc.org/dmis/prepare/view_report.asp?ReportID=2908
## 1451 http://www.elcomercio.com/actualidad/deslizamientos-cuenca-loja-lluvia-clima.html
## 1452 http://www.elcomercio.com/actualidad/deslizamientos-cuenca-loja-lluvia-clima.html
## 1453 http://www.elcomercio.com/actualidad/deslizamientos-cuenca-loja-lluvia-clima.html
ggplot(data=df_Loj, aes(x=City, y=Distance)) + geom_bar(stat="identity", color="blue", fill="white")
ggplot(data=df_Loj, aes(x = "Loja", y = Distance, fill=City)) +
geom_bar(stat = "identity", width = 1, color = "black") +
coord_polar("y", start = 0)
ggplot(df_Loj,aes(x="Loja",y=Distance, fill=City))+
geom_bar(stat = "identity",
color="white")+
geom_text(aes(label=(Distance*10)),
position=position_stack(vjust=0.5),color="white",size=3)+
coord_polar(theta = "y")+
labs(title="Gráfico de Deslizamiento")
library(qcc)
Distance <- df_Loj$Distance
names(Distance) <- df_Loj$City
pareto.chart(Distance,
ylab="Distance",
col = heat.colors(length(Distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "DONDE SE CONCENTRAN LAS CIUDADES CON MAYORES DESLIZAMIENTOS"
)
##
## Pareto chart analysis for Distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Macará 18.8878400 18.8878400 48.8751858 48.8751858
## Catamayo 17.5718700 36.4597100 45.4699114 94.3450972
## Loja 1.8288500 38.2885600 4.7324302 99.0775274
## Loja 0.3564900 38.6450500 0.9224726 100.0000000
stem(df_Loj$"Distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 02
## 0 |
## 1 |
## 1 | 89
head(df_Loj)
## id date time America Country country_code State population City
## 10 106 6/27/07 SA Ecuador EC Loja 117796 Loja
## 1451 6893 3/18/15 Night SA Ecuador EC Loja 13035 Macará
## 1452 6900 3/18/15 SA Ecuador EC Loja 117796 Loja
## 1453 6901 3/18/15 SA Ecuador EC Loja 18565 Catamayo
## Distance location_description latitude longitude
## 10 0.35649 -3.9900 -79.2050
## 1451 18.88784 Above road -4.3313 -79.7811
## 1452 1.82885 Unknown -4.0094 -79.2073
## 1453 17.57187 Above road -4.1380 -79.4069
## geolocation hazard_type landslide_type
## 10 (-3.99, -79.204999999999998) Landslide Landslide
## 1451 (-4.3312999999999997, -79.781099999999995) Landslide Landslide
## 1452 (-4.0094000000000003, -79.207300000000004) Landslide Landslide
## 1453 (-4.1379999999999999, -79.406899999999993) Landslide Landslide
## landslide_size trigger storm_name injuries fatalities
## 10 Medium Downpour NA NA
## 1451 Small Continuous rain 0 0
## 1452 Medium Continuous rain 0 0
## 1453 Medium Continuous rain 0 0
## source_name
## 10 Red Cross - Field reports
## 1451 El Comercio
## 1452 El Comercio
## 1453 El Comercio
## source_link
## 10 https://www-secure.ifrc.org/dmis/prepare/view_report.asp?ReportID=2908
## 1451 http://www.elcomercio.com/actualidad/deslizamientos-cuenca-loja-lluvia-clima.html
## 1452 http://www.elcomercio.com/actualidad/deslizamientos-cuenca-loja-lluvia-clima.html
## 1453 http://www.elcomercio.com/actualidad/deslizamientos-cuenca-loja-lluvia-clima.html
knitr::kable(head(df_Loj))
id | date | time | America | Country | country_code | State | population | City | Distance | location_description | latitude | longitude | geolocation | hazard_type | landslide_type | landslide_size | trigger | storm_name | injuries | fatalities | source_name | source_link | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
10 | 106 | 6/27/07 | SA | Ecuador | EC | Loja | 117796 | Loja | 0.35649 | -3.9900 | -79.2050 | (-3.99, -79.204999999999998) | Landslide | Landslide | Medium | Downpour | NA | NA | Red Cross - Field reports | https://www-secure.ifrc.org/dmis/prepare/view_report.asp?ReportID=2908 | |||
1451 | 6893 | 3/18/15 | Night | SA | Ecuador | EC | Loja | 13035 | Macará | 18.88784 | Above road | -4.3313 | -79.7811 | (-4.3312999999999997, -79.781099999999995) | Landslide | Landslide | Small | Continuous rain | 0 | 0 | El Comercio | http://www.elcomercio.com/actualidad/deslizamientos-cuenca-loja-lluvia-clima.html | |
1452 | 6900 | 3/18/15 | SA | Ecuador | EC | Loja | 117796 | Loja | 1.82885 | Unknown | -4.0094 | -79.2073 | (-4.0094000000000003, -79.207300000000004) | Landslide | Landslide | Medium | Continuous rain | 0 | 0 | El Comercio | http://www.elcomercio.com/actualidad/deslizamientos-cuenca-loja-lluvia-clima.html | ||
1453 | 6901 | 3/18/15 | SA | Ecuador | EC | Loja | 18565 | Catamayo | 17.57187 | Above road | -4.1380 | -79.4069 | (-4.1379999999999999, -79.406899999999993) | Landslide | Landslide | Medium | Continuous rain | 0 | 0 | El Comercio | http://www.elcomercio.com/actualidad/deslizamientos-cuenca-loja-lluvia-clima.html |
stem(df_Loj$"Distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 02
## 0 |
## 1 |
## 1 | 89
stem(df_Loj$"Distance", scale = 2)
##
## The decimal point is at the |
##
## 0 | 48
## 2 |
## 4 |
## 6 |
## 8 |
## 10 |
## 12 |
## 14 |
## 16 | 6
## 18 | 9
library(forecast)
data_serie<- ts(df_Loj$Distance, frequency=12, start=2007)
head(data_serie)
## Jan Feb Mar Apr
## 2007 0.35649 18.88784 1.82885 17.57187
autoplot(data_serie)+
labs(title = "Serie de Deslizamiento", x="Tiempo", y = "Distancia", colour = "#00a0dc") +theme_bw()
library(questionr)
table <- questionr::freq(Distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n | % | val% | %cum | val%cum | |
---|---|---|---|---|---|
0.35649 | 1 | 25 | 25 | 25 | 25 |
1.82885 | 1 | 25 | 25 | 50 | 50 |
17.57187 | 1 | 25 | 25 | 75 | 75 |
18.88784 | 1 | 25 | 25 | 100 | 100 |
Total | 4 | 100 | 100 | 100 | 100 |
str(table)
## Classes 'freqtab' and 'data.frame': 5 obs. of 5 variables:
## $ n : num 1 1 1 1 4
## $ % : num 25 25 25 25 100
## $ val% : num 25 25 25 25 100
## $ %cum : num 25 50 75 100 100
## $ val%cum: num 25 50 75 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x | y |
---|---|
0.35649 | 1 |
1.82885 | 1 |
17.57187 | 1 |
18.88784 | 1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="white", fill="blue") +
xlab("Número de asistencias") +
ylab("Frecuencia")
n_sturges = 1 + log(length(Distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(Distance) - min(Distance)
w = ceiling(R/n_clases)
bins <- seq(min(Distance), max(Distance) + w, by = w)
bins
## [1] 0.35649 7.35649 14.35649 21.35649
Edades <- cut(Distance, bins)
Freq_table <- transform(table(Distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
Distance | Freq | Rel_Freq | Cum_Freq |
---|---|---|---|
0.35649 | 1 | 0.25 | 1 |
1.82885 | 1 | 0.25 | 2 |
17.57187 | 1 | 0.25 | 3 |
18.88784 | 1 | 0.25 | 4 |
str(Freq_table)
## 'data.frame': 4 obs. of 4 variables:
## $ Distance: Factor w/ 4 levels "0.35649","1.82885",..: 1 2 3 4
## $ Freq : int 1 1 1 1
## $ Rel_Freq: num 0.25 0.25 0.25 0.25
## $ Cum_Freq: int 1 2 3 4
df <- data.frame(x = Freq_table$Distance, y = Freq_table$Freq)
knitr::kable(df)
x | y |
---|---|
0.35649 | 1 |
1.82885 | 1 |
17.57187 | 1 |
18.88784 | 1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="blue", fill="green") +
xlab("Rango de Distance") +
ylab("Frecuencia")
summary(df_Loj$Distance)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.3565 1.4608 9.7004 9.6613 17.9009 18.8878
library(pastecs)
stat.desc(df_Loj)
## id date time America Country country_code State
## nbr.val 4.000000e+00 NA NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA NA
## min 1.060000e+02 NA NA NA NA NA NA
## max 6.901000e+03 NA NA NA NA NA NA
## range 6.795000e+03 NA NA NA NA NA NA
## sum 2.080000e+04 NA NA NA NA NA NA
## median 6.896500e+03 NA NA NA NA NA NA
## mean 5.200000e+03 NA NA NA NA NA NA
## SE.mean 1.698001e+03 NA NA NA NA NA NA
## CI.mean.0.95 5.403797e+03 NA NA NA NA NA NA
## var 1.153283e+07 NA NA NA NA NA NA
## std.dev 3.396002e+03 NA NA NA NA NA NA
## coef.var 6.530773e-01 NA NA NA NA NA NA
## population City Distance location_description latitude
## nbr.val 4.000000e+00 NA 4.000000 NA 4.00000000
## nbr.null 0.000000e+00 NA 0.000000 NA 0.00000000
## nbr.na 0.000000e+00 NA 0.000000 NA 0.00000000
## min 1.303500e+04 NA 0.356490 NA -4.33130000
## max 1.177960e+05 NA 18.887840 NA -3.99000000
## range 1.047610e+05 NA 18.531350 NA 0.34130000
## sum 2.671920e+05 NA 38.645050 NA -16.46870000
## median 6.818050e+04 NA 9.700360 NA -4.07370000
## mean 6.679800e+04 NA 9.661262 NA -4.11717500
## SE.mean 2.946534e+04 NA 4.963474 NA 0.07856639
## CI.mean.0.95 9.377186e+04 NA 15.795990 NA 0.25003331
## var 3.472825e+09 NA 98.544307 NA 0.02469071
## std.dev 5.893068e+04 NA 9.926949 NA 0.15713278
## coef.var 8.822222e-01 NA 1.027500 NA -0.03816519
## longitude geolocation hazard_type landslide_type
## nbr.val 4.000000e+00 NA NA NA
## nbr.null 0.000000e+00 NA NA NA
## nbr.na 0.000000e+00 NA NA NA
## min -7.978110e+01 NA NA NA
## max -7.920500e+01 NA NA NA
## range 5.761000e-01 NA NA NA
## sum -3.176003e+02 NA NA NA
## median -7.930710e+01 NA NA NA
## mean -7.940008e+01 NA NA NA
## SE.mean 1.355369e-01 NA NA NA
## CI.mean.0.95 4.313390e-01 NA NA NA
## var 7.348103e-02 NA NA NA
## std.dev 2.710738e-01 NA NA NA
## coef.var -3.414025e-03 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 3 3 NA
## nbr.null NA NA NA 3 3 NA
## nbr.na NA NA NA 1 1 NA
## min NA NA NA 0 0 NA
## max NA NA NA 0 0 NA
## range NA NA NA 0 0 NA
## sum NA NA NA 0 0 NA
## median NA NA NA 0 0 NA
## mean NA NA NA 0 0 NA
## SE.mean NA NA NA 0 0 NA
## CI.mean.0.95 NA NA NA 0 0 NA
## var NA NA NA 0 0 NA
## std.dev NA NA NA 0 0 NA
## coef.var NA NA NA NaN NaN NA
## source_link
## nbr.val NA
## nbr.null NA
## nbr.na NA
## min NA
## max NA
## range NA
## sum NA
## median NA
## mean NA
## SE.mean NA
## CI.mean.0.95 NA
## var NA
## std.dev NA
## coef.var NA
boxplot(Distance, horizontal=TRUE, col='steelblue')
library(tidyverse)
library(hrbrthemes)
library(viridis)
df <- data.frame(Distance)
df %>% ggplot(aes(x = "", y = Distance)) +
geom_boxplot(color="red", fill="orange", alpha=0.5) +
theme_ipsum() +
theme(legend.position="none", plot.title = element_text(size=11)) +
ggtitle("Deslizamientos ") +
coord_flip() +
xlab("") +
ylab("")
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
df_za %>%
select(Country, State, City, Distance, date)
## Country State City Distance date
## 9 Ecuador Zamora-Chinchipe Zamora 0.47714 6/27/07
## 1368 Ecuador Zamora-Chinchipe Zamora 1.23724 4/30/14
head(df_za)
## id date time America Country country_code State population
## 9 105 6/27/07 SA Ecuador EC Zamora-Chinchipe 15276
## 1368 6680 4/30/14 4:30 SA Ecuador EC Zamora-Chinchipe 15276
## City Distance location_description latitude longitude
## 9 Zamora 0.47714 -4.0650 -78.9510
## 1368 Zamora 1.23724 Urban area -4.0602 -78.9638
## geolocation hazard_type landslide_type
## 9 (-4.0650000000000004, -78.950999999999993) Landslide Landslide
## 1368 (-4.0602, -78.963800000000006) Landslide Landslide
## landslide_size trigger storm_name injuries fatalities
## 9 Medium Downpour NA NA
## 1368 Small Downpour 0 3
## source_name
## 9 Red Cross - Field reports
## 1368 notimerica
## source_link
## 9 https://www-secure.ifrc.org/dmis/prepare/view_report.asp?ReportID=2908
## 1368 http://www.notimerica.com/sociedad/noticia-ecuador-mueren-tres-ninas-deslizamiento-tierras-sureste-ecuador-20140507151426.html
ggplot(data=df_za, aes(x=City, y=Distance)) + geom_bar(stat="identity", color="blue", fill="white")
ggplot(data=df_za, aes(x = "Zamora-Chinchipe", y = Distance, fill=City)) +
geom_bar(stat = "identity", width = 1, color = "black") +
coord_polar("y", start = 0)
library(ggplot2)
library(dplyr)
df_za <- df_za %>%
arrange(desc(City)) %>%
mutate(prop = Distance / sum(df_za$Distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
library(ggplot2)
library(dplyr)
df_za <- df_za %>%
arrange(desc(City)) %>%
mutate(prop = Distance / sum(df_za$Distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_za, aes(x=State, y = prop, fill=City)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(Distance/100)), color = "white", size=6) +
scale_fill_brewer(palette="Set8")
## Warning in pal_name(palette, type): Unknown palette Set8
library(qcc)
Distance <- df_za$Distance
names(Distance) <- df_za$City
pareto.chart(Distance,
ylab="Distance",
col = heat.colors(length(Distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "DONDE SE CONCENTRAN LAS CIUDADES CON MAYORES DESLIZAMIENTOS"
)
##
## Pareto chart analysis for Distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Zamora 1.23724 1.23724 72.16836 72.16836
## Zamora 0.47714 1.71438 27.83164 100.00000
stem(df_za$"Distance")
##
## The decimal point is 1 digit(s) to the left of the |
##
## 4 | 8
## 6 |
## 8 |
## 10 |
## 12 | 4
head(df_za)
## id date time America Country country_code State population
## 1 105 6/27/07 SA Ecuador EC Zamora-Chinchipe 15276
## 2 6680 4/30/14 4:30 SA Ecuador EC Zamora-Chinchipe 15276
## City Distance location_description latitude longitude
## 1 Zamora 0.47714 -4.0650 -78.9510
## 2 Zamora 1.23724 Urban area -4.0602 -78.9638
## geolocation hazard_type landslide_type
## 1 (-4.0650000000000004, -78.950999999999993) Landslide Landslide
## 2 (-4.0602, -78.963800000000006) Landslide Landslide
## landslide_size trigger storm_name injuries fatalities
## 1 Medium Downpour NA NA
## 2 Small Downpour 0 3
## source_name
## 1 Red Cross - Field reports
## 2 notimerica
## source_link
## 1 https://www-secure.ifrc.org/dmis/prepare/view_report.asp?ReportID=2908
## 2 http://www.notimerica.com/sociedad/noticia-ecuador-mueren-tres-ninas-deslizamiento-tierras-sureste-ecuador-20140507151426.html
## prop ypos
## 1 27.83164 13.91582
## 2 72.16836 63.91582
knitr::kable(head(df_za))
id | date | time | America | Country | country_code | State | population | City | Distance | location_description | latitude | longitude | geolocation | hazard_type | landslide_type | landslide_size | trigger | storm_name | injuries | fatalities | source_name | source_link | prop | ypos |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
105 | 6/27/07 | SA | Ecuador | EC | Zamora-Chinchipe | 15276 | Zamora | 0.47714 | -4.0650 | -78.9510 | (-4.0650000000000004, -78.950999999999993) | Landslide | Landslide | Medium | Downpour | NA | NA | Red Cross - Field reports | https://www-secure.ifrc.org/dmis/prepare/view_report.asp?ReportID=2908 | 27.83164 | 13.91582 | |||
6680 | 4/30/14 | 4:30 | SA | Ecuador | EC | Zamora-Chinchipe | 15276 | Zamora | 1.23724 | Urban area | -4.0602 | -78.9638 | (-4.0602, -78.963800000000006) | Landslide | Landslide | Small | Downpour | 0 | 3 | notimerica | http://www.notimerica.com/sociedad/noticia-ecuador-mueren-tres-ninas-deslizamiento-tierras-sureste-ecuador-20140507151426.html | 72.16836 | 63.91582 |
stem(df_za$"Distance")
##
## The decimal point is 1 digit(s) to the left of the |
##
## 4 | 8
## 6 |
## 8 |
## 10 |
## 12 | 4
stem(df_za$"Distance", scale = 2)
##
## The decimal point is 1 digit(s) to the left of the |
##
## 4 | 8
## 5 |
## 6 |
## 7 |
## 8 |
## 9 |
## 10 |
## 11 |
## 12 | 4
library(forecast)
data_serie<- ts(df_za$Distance, frequency=12, start=2007)
head(data_serie)
## Jan Feb
## 2007 0.47714 1.23724
autoplot(data_serie)+
labs(title = "Serie de Deslizamiento", x="Tiempo", y = "Distancia", colour = "#00a0dc") +theme_bw()
library(questionr)
table <- questionr::freq(Distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
n | % | val% | %cum | val%cum | |
---|---|---|---|---|---|
0.47714 | 1 | 50 | 50 | 50 | 50 |
1.23724 | 1 | 50 | 50 | 100 | 100 |
Total | 2 | 100 | 100 | 100 | 100 |
str(table)
## Classes 'freqtab' and 'data.frame': 3 obs. of 5 variables:
## $ n : num 1 1 2
## $ % : num 50 50 100
## $ val% : num 50 50 100
## $ %cum : num 50 100 100
## $ val%cum: num 50 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
x | y |
---|---|
0.47714 | 1 |
1.23724 | 1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="white", fill="blue") +
xlab("Número de asistencias") +
ylab("Frecuencia")
n_sturges = 1 + log(length(Distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(Distance) - min(Distance)
w = ceiling(R/n_clases)
bins <- seq(min(Distance), max(Distance) + w, by = w)
bins
## [1] 0.47714 1.47714
Edades <- cut(Distance, bins)
Freq_table <- transform(table(Distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
Distance | Freq | Rel_Freq | Cum_Freq |
---|---|---|---|
0.47714 | 1 | 0.5 | 1 |
1.23724 | 1 | 0.5 | 2 |
str(Freq_table)
## 'data.frame': 2 obs. of 4 variables:
## $ Distance: Factor w/ 2 levels "0.47714","1.23724": 1 2
## $ Freq : int 1 1
## $ Rel_Freq: num 0.5 0.5
## $ Cum_Freq: int 1 2
df <- data.frame(x = Freq_table$Distance, y = Freq_table$Freq)
knitr::kable(df)
x | y |
---|---|
0.47714 | 1 |
1.23724 | 1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="blue", fill="green") +
xlab("Rango de Distance") +
ylab("Frecuencia")
summary(df_za$Distance)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.4771 0.6672 0.8572 0.8572 1.0472 1.2372
library(pastecs)
stat.desc(df_za)
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## id date time America Country country_code State
## nbr.val 2.000000e+00 NA NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA NA
## min 1.050000e+02 NA NA NA NA NA NA
## max 6.680000e+03 NA NA NA NA NA NA
## range 6.575000e+03 NA NA NA NA NA NA
## sum 6.785000e+03 NA NA NA NA NA NA
## median 3.392500e+03 NA NA NA NA NA NA
## mean 3.392500e+03 NA NA NA NA NA NA
## SE.mean 3.287500e+03 NA NA NA NA NA NA
## CI.mean.0.95 4.177165e+04 NA NA NA NA NA NA
## var 2.161531e+07 NA NA NA NA NA NA
## std.dev 4.649227e+03 NA NA NA NA NA NA
## coef.var 1.370443e+00 NA NA NA NA NA NA
## population City Distance location_description latitude
## nbr.val 2 NA 2.0000000 NA 2.0000000000
## nbr.null 0 NA 0.0000000 NA 0.0000000000
## nbr.na 0 NA 0.0000000 NA 0.0000000000
## min 15276 NA 0.4771400 NA -4.0650000000
## max 15276 NA 1.2372400 NA -4.0602000000
## range 0 NA 0.7601000 NA 0.0048000000
## sum 30552 NA 1.7143800 NA -8.1252000000
## median 15276 NA 0.8571900 NA -4.0626000000
## mean 15276 NA 0.8571900 NA -4.0626000000
## SE.mean 0 NA 0.3800500 NA 0.0024000000
## CI.mean.0.95 0 NA 4.8289931 NA 0.0304948914
## var 0 NA 0.2888760 NA 0.0000115200
## std.dev 0 NA 0.5374719 NA 0.0033941125
## coef.var 0 NA 0.6270160 NA -0.0008354533
## longitude geolocation hazard_type landslide_type
## nbr.val 2.000000e+00 NA NA NA
## nbr.null 0.000000e+00 NA NA NA
## nbr.na 0.000000e+00 NA NA NA
## min -7.896380e+01 NA NA NA
## max -7.895100e+01 NA NA NA
## range 1.280000e-02 NA NA NA
## sum -1.579148e+02 NA NA NA
## median -7.895740e+01 NA NA NA
## mean -7.895740e+01 NA NA NA
## SE.mean 6.400000e-03 NA NA NA
## CI.mean.0.95 8.131971e-02 NA NA NA
## var 8.192000e-05 NA NA NA
## std.dev 9.050967e-03 NA NA NA
## coef.var -1.146310e-04 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 1 1 NA
## nbr.null NA NA NA 1 0 NA
## nbr.na NA NA NA 1 1 NA
## min NA NA NA 0 3 NA
## max NA NA NA 0 3 NA
## range NA NA NA 0 0 NA
## sum NA NA NA 0 3 NA
## median NA NA NA 0 3 NA
## mean NA NA NA 0 3 NA
## SE.mean NA NA NA NA NA NA
## CI.mean.0.95 NA NA NA NaN NaN NA
## var NA NA NA NA NA NA
## std.dev NA NA NA NA NA NA
## coef.var NA NA NA NA NA NA
## source_link prop ypos
## nbr.val NA 2.000000 2.0000000
## nbr.null NA 0.000000 0.0000000
## nbr.na NA 0.000000 0.0000000
## min NA 27.831636 13.9158180
## max NA 72.168364 63.9158180
## range NA 44.336728 50.0000000
## sum NA 100.000000 77.8316359
## median NA 50.000000 38.9158180
## mean NA 50.000000 38.9158180
## SE.mean NA 22.168364 25.0000000
## CI.mean.0.95 NA 281.675773 317.6551184
## var NA 982.872731 1250.0000000
## std.dev NA 31.350801 35.3553391
## coef.var NA 0.627016 0.9085082
boxplot(Distance, horizontal=TRUE, col='steelblue')
library(tidyverse)
library(hrbrthemes)
library(viridis)
df <- data.frame(Distance)
df %>% ggplot(aes(x = "", y = Distance)) +
geom_boxplot(color="red", fill="orange", alpha=0.5) +
theme_ipsum() +
theme(legend.position="none", plot.title = element_text(size=11)) +
ggtitle("Deslizamientos ") +
coord_flip() +
xlab("") +
ylab("")
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
df_pi %>%
select(Country, State, City, Distance, date)
## Country State City Distance date
## 11 Ecuador Pichincha Sangolquà 33.94603 6/27/07
## 87 Ecuador Pichincha Quito 1.56942 4/1/08
## 238 Ecuador Pichincha Machachi 26.18676 1/10/10
## 586 Ecuador Pichincha Machachi 25.82923 2/14/11
## 697 Ecuador Pichincha Quito 4.39517 5/2/11
## 984 Ecuador Pichincha Cayambe 45.69792 5/31/13
## 1332 Ecuador Pichincha Quito 26.72137 8/12/14
## 1333 Ecuador Pichincha Quito 23.97854 8/12/14
## 1566 Ecuador Pichincha Quito 4.25486 4/29/11
head(df_pi)
## id date time America Country country_code State population
## 11 107 6/27/07 SA Ecuador EC Pichincha 5114
## 87 489 4/1/08 SA Ecuador EC Pichincha 1399814
## 238 1393 1/10/10 Night SA Ecuador EC Pichincha 25742
## 586 3119 2/14/11 SA Ecuador EC Pichincha 25742
## 697 3460 5/2/11 12:15 SA Ecuador EC Pichincha 1399814
## 984 4890 5/31/13 SA Ecuador EC Pichincha 26582
## City Distance location_description latitude longitude
## 11 Sangolquà 33.94603 -0.3560 -78.1480
## 87 Quito 1.56942 -0.2196 -78.5347
## 238 Machachi 26.18676 -0.4167 -78.7833
## 586 Machachi 25.82923 -0.6273 -78.3664
## 697 Quito 4.39517 Urban area -0.2648 -78.5074
## 984 Cayambe 45.69792 -0.1193 -77.7668
## geolocation hazard_type landslide_type
## 11 (-0.35599999999999998, -78.147999999999996) Landslide Landslide
## 87 (-0.21959999999999999, -78.534700000000001) Landslide Landslide
## 238 (-0.41670000000000001, -78.783299999999997) Landslide Mudslide
## 586 (-0.62729999999999997, -78.366399999999999) Landslide Mudslide
## 697 (-0.26479999999999998, -78.507400000000004) Landslide Mudslide
## 984 (-0.1193, -77.766800000000003) Landslide Landslide
## landslide_size trigger storm_name injuries fatalities
## 11 Medium Downpour NA NA
## 87 Medium Rain NA NA
## 238 Medium Rain NA 3
## 586 Medium Downpour NA 0
## 697 Medium Unknown 7 5
## 984 Medium Downpour NA 0
## source_name
## 11 Red Cross - Field reports
## 87
## 238
## 586
## 697 Latin American Herald Tribune
## 984 ens-newswire.com
## source_link
## 11 https://www-secure.ifrc.org/dmis/prepare/view_report.asp?ReportID=2908
## 87 http://www.plenglish.com/Article.asp?ID=%7B39BA85A0-5900-4A4A-A329-818B3FC61EA1%7D&language=EN
## 238 http://www.laht.com/article.asp?ArticleId=349500&CategoryId=14089
## 586 http://www.theweathernetwork.com/news/storm_watch_stories3&stormfile=mudslide_in_ecuador_destroys_150211
## 697 http://www.laht.com/article.asp?ArticleId=393028&CategoryId=14089
## 984 http://ens-newswire.com/2013/06/11/oil-spilled-into-ecuadors-rivers-reaches-peru/
ggplot(data=df_pi, aes(x=City, y=Distance)) + geom_bar(stat="identity", color="blue", fill="white")
ggplot(data=df_pi, aes(x = "Pichincha", y = Distance, fill=City)) +
geom_bar(stat = "identity", width = 1, color = "black") +
coord_polar("y", start = 0)
library(ggplot2)
library(dplyr)
df_pi <- df_pi %>%
arrange(desc(City)) %>%
mutate(prop = Distance / sum(df_pi$Distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_pi, aes(x=State, y = prop, fill=City)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(Distance/100)), color = "white", size=3) +
scale_fill_brewer(palette="Set8")
## Warning in pal_name(palette, type): Unknown palette Set8