Gráfico para Panamá
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_PA <- subset(df, country_name == "Panama")
knitr::kable(head(df_PA))
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_PA, aes(fill= state, y=distance, x=country_name)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_PA, aes(fill=state, y=distance, x=country_name)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_PA, aes(x=country_name, y=distance, fill=state)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_PA <- df_PA %>%
arrange(desc(state)) %>%
mutate(prop = distance / sum(df_PA$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
## Loading required package: scales
##
## Attaching package: 'scales'
## The following object is masked from 'package:readr':
##
## col_factor
ggplot(df_PA, aes(x=country_name, y=prop, fill=state)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales
library(forecast)
## Warning: package 'forecast' was built under R version 4.1.1
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
data<- ts(df_PA$distance, frequency=12, start=2008)
knitr::kable(head(data))
| 1.84596 |
| 2.81479 |
| 1.41526 |
| 4.18074 |
| 2.24069 |
| 0.48725 |
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto
library(qcc)
## Package 'qcc' version 2.7
## Type 'citation("qcc")' for citing this R package in publications.
distance <- df_PA$distance
names(distance) <- df_PA$state
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por estados"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Los Santos 13.40535000 13.40535000 11.36719394 11.36719394
## Chiriquí 9.83700000 23.24235000 8.34137764 19.70857158
## Bocas del Toro 8.41491000 31.65726000 7.13550291 26.84407449
## Chiriquí 6.74338000 38.40064000 5.71811316 32.56218765
## Bocas del Toro 6.65185000 45.05249000 5.64049943 38.20268708
## Bocas del Toro 5.97977000 51.03226000 5.07060280 43.27328989
## Bocas del Toro 5.13913000 56.17139000 4.35777413 47.63106401
## Colón 4.74914000 60.92053000 4.02707840 51.65814241
## Bocas del Toro 4.45630000 65.37683000 3.77876194 55.43690435
## Panamá 4.18074000 69.55757000 3.54509822 58.98200257
## Bocas del Toro 3.92621000 73.48378000 3.32926708 62.31126965
## Chiriquí 3.66775000 77.15153000 3.11010347 65.42137312
## Panamá 3.54386000 80.69539000 3.00504977 68.42642289
## Chiriquí 3.33873000 84.03412000 2.83110784 71.25753073
## Panamá 3.30848000 87.34260000 2.80545706 74.06298779
## Panamá 2.81479000 90.15739000 2.38682793 76.44981572
## Colón 2.67409000 92.83148000 2.26752003 78.71733575
## Panamá 2.59449000 95.42597000 2.20002245 80.91735821
## Panamá 2.57852000 98.00449000 2.18648054 83.10383875
## Chiriquí 2.44254000 100.44703000 2.07117501 85.17501375
## Colón 2.28589000 102.73292000 1.93834215 87.11335590
## Panamá 2.24069000 104.97361000 1.90001438 89.01337029
## Panamá 1.84596000 106.81957000 1.56529933 90.57866961
## Panamá 1.80341000 108.62298000 1.52921865 92.10788826
## Colón 1.80330000 110.42628000 1.52912537 93.63701364
## Panamá 1.41526000 111.84154000 1.20008317 94.83709680
## Chiriquí 0.90169000 112.74323000 0.76459661 95.60169341
## Colón 0.76740000 113.51063000 0.65072412 96.25241753
## Colón 0.74760000 114.25823000 0.63393453 96.88635206
## Chiriquí 0.64491000 114.90314000 0.54685756 97.43320962
## Colón 0.63948000 115.54262000 0.54225314 97.97546277
## Panamá 0.48725000 116.02987000 0.41316827 98.38863103
## Chiriquí 0.37999000 116.40986000 0.32221613 98.71084717
## Chiriquí 0.35187000 116.76173000 0.29837151 99.00921868
## Bocas del Toro 0.29869000 117.06042000 0.25327702 99.26249570
## Coclé 0.21605000 117.27647000 0.18320165 99.44569735
## Colón 0.20365000 117.48012000 0.17268695 99.61838430
## Colón 0.18619000 117.66631000 0.15788158 99.77626588
## Colón 0.16894000 117.83525000 0.14325428 99.91952016
## Colón 0.09491000 117.93016000 0.08047984 100.00000000
stem(df_PA$"distance")
##
## The decimal point is at the |
##
## 0 | 122223445667894888
## 2 | 234667833579
## 4 | 2571
## 6 | 077
## 8 | 48
## 10 |
## 12 | 4
head(df_PA)
## # A tibble: 6 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 750 8/26/08 <NA> <NA> Panama PA Panamá 1287
## 2 5562 9/29/13 <NA> <NA> Panama PA Panamá 69102
## 3 6701 9/14/14 Morning <NA> Panama PA Panamá 19782
## 4 6703 6/17/14 <NA> <NA> Panama PA Panamá 321501
## 5 6705 6/24/14 <NA> <NA> Panama PA Panamá 9169
## 6 7453 6/5/15 Afternoon <NA> Panama PA Panamá 0
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_PA))
stem(df_PA$"distance")
##
## The decimal point is at the |
##
## 0 | 122223445667894888
## 2 | 234667833579
## 4 | 2571
## 6 | 077
## 8 | 48
## 10 |
## 12 | 4
stem(df_PA$"distance", scale = 2)
##
## The decimal point is at the |
##
## 0 | 12222344566789
## 1 | 4888
## 2 | 2346678
## 3 | 33579
## 4 | 257
## 5 | 1
## 6 | 077
## 7 |
## 8 | 4
## 9 | 8
## 10 |
## 11 |
## 12 |
## 13 | 4
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 0.09491 |
1 |
2.5 |
2.5 |
2.5 |
2.5 |
| 0.16894 |
1 |
2.5 |
2.5 |
5.0 |
5.0 |
| 0.18619 |
1 |
2.5 |
2.5 |
7.5 |
7.5 |
| 0.20365 |
1 |
2.5 |
2.5 |
10.0 |
10.0 |
| 0.21605 |
1 |
2.5 |
2.5 |
12.5 |
12.5 |
| 0.29869 |
1 |
2.5 |
2.5 |
15.0 |
15.0 |
| 0.35187 |
1 |
2.5 |
2.5 |
17.5 |
17.5 |
| 0.37999 |
1 |
2.5 |
2.5 |
20.0 |
20.0 |
| 0.48725 |
1 |
2.5 |
2.5 |
22.5 |
22.5 |
| 0.63948 |
1 |
2.5 |
2.5 |
25.0 |
25.0 |
| 0.64491 |
1 |
2.5 |
2.5 |
27.5 |
27.5 |
| 0.7476 |
1 |
2.5 |
2.5 |
30.0 |
30.0 |
| 0.7674 |
1 |
2.5 |
2.5 |
32.5 |
32.5 |
| 0.90169 |
1 |
2.5 |
2.5 |
35.0 |
35.0 |
| 1.41526 |
1 |
2.5 |
2.5 |
37.5 |
37.5 |
| 1.8033 |
1 |
2.5 |
2.5 |
40.0 |
40.0 |
| 1.80341 |
1 |
2.5 |
2.5 |
42.5 |
42.5 |
| 1.84596 |
1 |
2.5 |
2.5 |
45.0 |
45.0 |
| 2.24069 |
1 |
2.5 |
2.5 |
47.5 |
47.5 |
| 2.28589 |
1 |
2.5 |
2.5 |
50.0 |
50.0 |
| 2.44254 |
1 |
2.5 |
2.5 |
52.5 |
52.5 |
| 2.57852 |
1 |
2.5 |
2.5 |
55.0 |
55.0 |
| 2.59449 |
1 |
2.5 |
2.5 |
57.5 |
57.5 |
| 2.67409 |
1 |
2.5 |
2.5 |
60.0 |
60.0 |
| 2.81479 |
1 |
2.5 |
2.5 |
62.5 |
62.5 |
| 3.30848 |
1 |
2.5 |
2.5 |
65.0 |
65.0 |
| 3.33873 |
1 |
2.5 |
2.5 |
67.5 |
67.5 |
| 3.54386 |
1 |
2.5 |
2.5 |
70.0 |
70.0 |
| 3.66775 |
1 |
2.5 |
2.5 |
72.5 |
72.5 |
| 3.92621 |
1 |
2.5 |
2.5 |
75.0 |
75.0 |
| 4.18074 |
1 |
2.5 |
2.5 |
77.5 |
77.5 |
| 4.4563 |
1 |
2.5 |
2.5 |
80.0 |
80.0 |
| 4.74914 |
1 |
2.5 |
2.5 |
82.5 |
82.5 |
| 5.13913 |
1 |
2.5 |
2.5 |
85.0 |
85.0 |
| 5.97977 |
1 |
2.5 |
2.5 |
87.5 |
87.5 |
| 6.65185 |
1 |
2.5 |
2.5 |
90.0 |
90.0 |
| 6.74338 |
1 |
2.5 |
2.5 |
92.5 |
92.5 |
| 8.41491 |
1 |
2.5 |
2.5 |
95.0 |
95.0 |
| 9.837 |
1 |
2.5 |
2.5 |
97.5 |
97.5 |
| 13.40535 |
1 |
2.5 |
2.5 |
100.0 |
100.0 |
| Total |
40 |
100.0 |
100.0 |
100.0 |
100.0 |
str(table)
## Classes 'freqtab' and 'data.frame': 41 obs. of 5 variables:
## $ n : num 1 1 1 1 1 1 1 1 1 1 ...
## $ % : num 2.5 2.5 2.5 2.5 2.5 2.5 2.5 2.5 2.5 2.5 ...
## $ val% : num 2.5 2.5 2.5 2.5 2.5 2.5 2.5 2.5 2.5 2.5 ...
## $ %cum : num 2.5 5 7.5 10 12.5 15 17.5 20 22.5 25 ...
## $ val%cum: num 2.5 5 7.5 10 12.5 15 17.5 20 22.5 25 ...
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
| 0.09491 |
1 |
| 0.16894 |
1 |
| 0.18619 |
1 |
| 0.20365 |
1 |
| 0.21605 |
1 |
| 0.29869 |
1 |
| 0.35187 |
1 |
| 0.37999 |
1 |
| 0.48725 |
1 |
| 0.63948 |
1 |
| 0.64491 |
1 |
| 0.7476 |
1 |
| 0.7674 |
1 |
| 0.90169 |
1 |
| 1.41526 |
1 |
| 1.8033 |
1 |
| 1.80341 |
1 |
| 1.84596 |
1 |
| 2.24069 |
1 |
| 2.28589 |
1 |
| 2.44254 |
1 |
| 2.57852 |
1 |
| 2.59449 |
1 |
| 2.67409 |
1 |
| 2.81479 |
1 |
| 3.30848 |
1 |
| 3.33873 |
1 |
| 3.54386 |
1 |
| 3.66775 |
1 |
| 3.92621 |
1 |
| 4.18074 |
1 |
| 4.4563 |
1 |
| 4.74914 |
1 |
| 5.13913 |
1 |
| 5.97977 |
1 |
| 6.65185 |
1 |
| 6.74338 |
1 |
| 8.41491 |
1 |
| 9.837 |
1 |
| 13.40535 |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.09491 2.09491 4.09491 6.09491 8.09491 10.09491 12.09491 14.09491
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
| (0.0949,2.09] |
17 |
0.4358974 |
17 |
| (2.09,4.09] |
12 |
0.3076923 |
29 |
| (4.09,6.09] |
5 |
0.1282051 |
34 |
| (6.09,8.09] |
2 |
0.0512821 |
36 |
| (8.09,10.1] |
2 |
0.0512821 |
38 |
| (10.1,12.1] |
0 |
0.0000000 |
38 |
| (12.1,14.1] |
1 |
0.0256410 |
39 |
str(Freq_table)
## 'data.frame': 7 obs. of 4 variables:
## $ distance: Factor w/ 7 levels "(0.0949,2.09]",..: 1 2 3 4 5 6 7
## $ Freq : int 17 12 5 2 2 0 1
## $ Rel_Freq: num 0.4359 0.3077 0.1282 0.0513 0.0513 ...
## $ Cum_Freq: int 17 29 34 36 38 38 39
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
| (0.0949,2.09] |
17 |
| (2.09,4.09] |
12 |
| (4.09,6.09] |
5 |
| (6.09,8.09] |
2 |
| (8.09,10.1] |
2 |
| (10.1,12.1] |
0 |
| (12.1,14.1] |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")

library(pastecs)
## Warning: package 'pastecs' was built under R version 4.1.1
##
## Attaching package: 'pastecs'
## The following objects are masked from 'package:dplyr':
##
## first, last
stat.desc(df_PA)
## id date time continent_code country_name country_code
## nbr.val 4.000000e+01 NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA
## min 7.500000e+02 NA NA NA NA NA
## max 7.512000e+03 NA NA NA NA NA
## range 6.762000e+03 NA NA NA NA NA
## sum 2.479400e+05 NA NA NA NA NA
## median 7.450500e+03 NA NA NA NA NA
## mean 6.198500e+03 NA NA NA NA NA
## SE.mean 3.051354e+02 NA NA NA NA NA
## CI.mean.0.95 6.171945e+02 NA NA NA NA NA
## var 3.724304e+06 NA NA NA NA NA
## std.dev 1.929846e+03 NA NA NA NA NA
## coef.var 3.113407e-01 NA NA NA NA NA
## state population city distance location_description
## nbr.val NA 4.000000e+01 NA 40.0000000 NA
## nbr.null NA 8.000000e+00 NA 0.0000000 NA
## nbr.na NA 0.000000e+00 NA 0.0000000 NA
## min NA 0.000000e+00 NA 0.0949100 NA
## max NA 3.215010e+05 NA 13.4053500 NA
## range NA 3.215010e+05 NA 13.3104400 NA
## sum NA 2.041483e+06 NA 117.9301600 NA
## median NA 2.365500e+03 NA 2.3642150 NA
## mean NA 5.103707e+04 NA 2.9482540 NA
## SE.mean NA 1.674111e+04 NA 0.4634140 NA
## CI.mean.0.95 NA 3.386208e+04 NA 0.9373432 NA
## var NA 1.121058e+10 NA 8.5900997 NA
## std.dev NA 1.058800e+05 NA 2.9308872 NA
## coef.var NA 2.074571e+00 NA 0.9941095 NA
## latitude longitude geolocation hazard_type landslide_type
## nbr.val 40.00000000 4.000000e+01 NA NA NA
## nbr.null 0.00000000 0.000000e+00 NA NA NA
## nbr.na 0.00000000 0.000000e+00 NA NA NA
## min 7.59550000 -8.286620e+01 NA NA NA
## max 9.56760000 -7.908960e+01 NA NA NA
## range 1.97210000 3.776600e+00 NA NA NA
## sum 361.65930000 -3.232397e+03 NA NA NA
## median 9.07300000 -7.989490e+01 NA NA NA
## mean 9.04148250 -8.080992e+01 NA NA NA
## SE.mean 0.05632097 2.230672e-01 NA NA NA
## CI.mean.0.95 0.11391991 4.511960e-01 NA NA NA
## var 0.12688206 1.990359e+00 NA NA NA
## std.dev 0.35620508 1.410801e+00 NA NA NA
## coef.var 0.03939676 -1.745826e-02 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 28.000000 38.0000000 NA
## nbr.null NA NA NA 27.000000 33.0000000 NA
## nbr.na NA NA NA 12.000000 2.0000000 NA
## min NA NA NA 0.000000 0.0000000 NA
## max NA NA NA 45.000000 8.0000000 NA
## range NA NA NA 45.000000 8.0000000 NA
## sum NA NA NA 45.000000 23.0000000 NA
## median NA NA NA 0.000000 0.0000000 NA
## mean NA NA NA 1.607143 0.6052632 NA
## SE.mean NA NA NA 1.607143 0.3099568 NA
## CI.mean.0.95 NA NA NA 3.297585 0.6280321 NA
## var NA NA NA 72.321429 3.6507824 NA
## std.dev NA NA NA 8.504201 1.9107021 NA
## coef.var NA NA NA 5.291503 3.1568121 NA
## source_link prop ypos
## nbr.val NA 40.00000000 40.0000000
## nbr.null NA 0.00000000 0.0000000
## nbr.na NA 0.00000000 0.0000000
## min NA 0.08047984 0.7826497
## max NA 11.36719394 98.1106190
## range NA 11.28671410 97.3279694
## sum NA 100.00000000 1766.6812798
## median NA 2.00475858 42.6414180
## mean NA 2.50000000 44.1670320
## SE.mean NA 0.39295627 4.2497154
## CI.mean.0.95 NA 0.79482907 8.5958607
## var NA 6.17658511 722.4032223
## std.dev NA 2.48527365 26.8775598
## coef.var NA 0.99410946 0.6085435
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Bocas del Toro (Panama)
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_PA<- subset(df, state == "Bocas del Toro")
knitr::kable(head(df_PA))
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_PA, aes(fill=city, y=distance, x=state)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_PA, aes(fill=city, y=distance, x=state)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_PA, aes(x=state, y=distance, fill=city)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_PA <- df_PA %>%
arrange(desc(city)) %>%
mutate(prop = distance / sum(df_PA$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_PA, aes(x=state, y = prop, fill=city)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales
library(forecast)
data<- ts(df_PA$distance, frequency=12, start=2008)
knitr::kable(head(data))
| 6.65185 |
| 5.13913 |
| 0.29869 |
| 3.92621 |
| 5.97977 |
| 4.45630 |
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto
library(qcc)
distance <- df_PA$distance
names(distance) <- df_PA$city
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por ciudades"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Almirante 8.4149100 8.4149100 24.1344073 24.1344073
## Valle del Risco 6.6518500 15.0667600 19.0778579 43.2122652
## Changuinola 5.9797700 21.0465300 17.1502969 60.3625620
## Valle del Risco 5.1391300 26.1856600 14.7392969 75.1018589
## Cauchero 4.4563000 30.6419600 12.7809043 87.8827632
## Pueblo Nuevo 3.9262100 34.5681700 11.2605781 99.1433413
## Punta Peña 0.2986900 34.8668600 0.8566587 100.0000000
stem(df_PA$"distance")
##
## The decimal point is at the |
##
## 0 | 3
## 2 | 9
## 4 | 51
## 6 | 07
## 8 | 4
head(df_PA)
## # A tibble: 6 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 7498 6/29/15 <NA> <NA> Panama PA Boca~ 918
## 2 7500 5/24/15 23:30 <NA> Panama PA Boca~ 918
## 3 7489 6/21/15 Morning <NA> Panama PA Boca~ 993
## 4 7499 12/21/15 <NA> <NA> Panama PA Boca~ 0
## 5 7497 6/29/15 <NA> <NA> Panama PA Boca~ 22900
## 6 7501 10/8/15 <NA> <NA> Panama PA Boca~ 0
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_PA))
stem(df_PA$"distance")
##
## The decimal point is at the |
##
## 0 | 3
## 2 | 9
## 4 | 51
## 6 | 07
## 8 | 4
stem(df_PA$"distance", scale = 2)
##
## The decimal point is at the |
##
## 0 | 3
## 1 |
## 2 |
## 3 | 9
## 4 | 5
## 5 | 1
## 6 | 07
## 7 |
## 8 | 4
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 0.29869 |
1 |
14.3 |
14.3 |
14.3 |
14.3 |
| 3.92621 |
1 |
14.3 |
14.3 |
28.6 |
28.6 |
| 4.4563 |
1 |
14.3 |
14.3 |
42.9 |
42.9 |
| 5.13913 |
1 |
14.3 |
14.3 |
57.1 |
57.1 |
| 5.97977 |
1 |
14.3 |
14.3 |
71.4 |
71.4 |
| 6.65185 |
1 |
14.3 |
14.3 |
85.7 |
85.7 |
| 8.41491 |
1 |
14.3 |
14.3 |
100.0 |
100.0 |
| Total |
7 |
100.0 |
100.0 |
100.0 |
100.0 |
str(table)
## Classes 'freqtab' and 'data.frame': 8 obs. of 5 variables:
## $ n : num 1 1 1 1 1 1 1 7
## $ % : num 14.3 14.3 14.3 14.3 14.3 14.3 14.3 100
## $ val% : num 14.3 14.3 14.3 14.3 14.3 14.3 14.3 100
## $ %cum : num 14.3 28.6 42.9 57.1 71.4 85.7 100 100
## $ val%cum: num 14.3 28.6 42.9 57.1 71.4 85.7 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
| 0.29869 |
1 |
| 3.92621 |
1 |
| 4.4563 |
1 |
| 5.13913 |
1 |
| 5.97977 |
1 |
| 6.65185 |
1 |
| 8.41491 |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.29869 3.29869 6.29869 9.29869
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
| (0.299,3.3] |
0 |
0.0000000 |
0 |
| (3.3,6.3] |
4 |
0.6666667 |
4 |
| (6.3,9.3] |
2 |
0.3333333 |
6 |
str(Freq_table)
## 'data.frame': 3 obs. of 4 variables:
## $ distance: Factor w/ 3 levels "(0.299,3.3]",..: 1 2 3
## $ Freq : int 0 4 2
## $ Rel_Freq: num 0 0.667 0.333
## $ Cum_Freq: int 0 4 6
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
| (0.299,3.3] |
0 |
| (3.3,6.3] |
4 |
| (6.3,9.3] |
2 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")

library(pastecs)
stat.desc(df_PA)
## id date time continent_code country_name country_code
## nbr.val 7.000000e+00 NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA
## min 3.168000e+03 NA NA NA NA NA
## max 7.501000e+03 NA NA NA NA NA
## range 4.333000e+03 NA NA NA NA NA
## sum 4.815200e+04 NA NA NA NA NA
## median 7.498000e+03 NA NA NA NA NA
## mean 6.878857e+03 NA NA NA NA NA
## SE.mean 6.184780e+02 NA NA NA NA NA
## CI.mean.0.95 1.513361e+03 NA NA NA NA NA
## var 2.677605e+06 NA NA NA NA NA
## std.dev 1.636339e+03 NA NA NA NA NA
## coef.var 2.378795e-01 NA NA NA NA NA
## state population city distance location_description
## nbr.val NA 7.000000e+00 NA 7.0000000 NA
## nbr.null NA 2.000000e+00 NA 0.0000000 NA
## nbr.na NA 0.000000e+00 NA 0.0000000 NA
## min NA 0.000000e+00 NA 0.2986900 NA
## max NA 2.290000e+04 NA 8.4149100 NA
## range NA 2.290000e+04 NA 8.1162200 NA
## sum NA 3.384300e+04 NA 34.8668600 NA
## median NA 9.180000e+02 NA 5.1391300 NA
## mean NA 4.834714e+03 NA 4.9809800 NA
## SE.mean NA 3.197087e+03 NA 0.9626098 NA
## CI.mean.0.95 NA 7.822989e+03 NA 2.3554214 NA
## var NA 7.154954e+07 NA 6.4863239 NA
## std.dev NA 8.458696e+03 NA 2.5468262 NA
## coef.var NA 1.749575e+00 NA 0.5113103 NA
## latitude longitude geolocation hazard_type landslide_type
## nbr.val 7.00000000 7.000000e+00 NA NA NA
## nbr.null 0.00000000 0.000000e+00 NA NA NA
## nbr.na 0.00000000 0.000000e+00 NA NA NA
## min 8.85430000 -8.248580e+01 NA NA NA
## max 9.38800000 -8.218250e+01 NA NA NA
## range 0.53370000 3.033000e-01 NA NA NA
## sum 64.03590000 -5.763791e+02 NA NA NA
## median 9.21020000 -8.237050e+01 NA NA NA
## mean 9.14798571 -8.233987e+01 NA NA NA
## SE.mean 0.07607773 4.668383e-02 NA NA NA
## CI.mean.0.95 0.18615551 1.142312e-01 NA NA NA
## var 0.04051475 1.525566e-02 NA NA NA
## std.dev 0.20128276 1.235138e-01 NA NA NA
## coef.var 0.02200296 -1.500049e-03 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 6 7 NA
## nbr.null NA NA NA 6 7 NA
## nbr.na NA NA NA 1 0 NA
## min NA NA NA 0 0 NA
## max NA NA NA 0 0 NA
## range NA NA NA 0 0 NA
## sum NA NA NA 0 0 NA
## median NA NA NA 0 0 NA
## mean NA NA NA 0 0 NA
## SE.mean NA NA NA 0 0 NA
## CI.mean.0.95 NA NA NA 0 0 NA
## var NA NA NA 0 0 NA
## std.dev NA NA NA 0 0 NA
## coef.var NA NA NA NaN NaN NA
## source_link prop ypos
## nbr.val NA 7.0000000 7.0000000
## nbr.null NA 0.0000000 0.0000000
## nbr.na NA 0.0000000 0.0000000
## min NA 0.8566587 9.5389289
## max NA 24.1344073 87.9327964
## range NA 23.2777486 78.3938674
## sum NA 100.0000000 322.4534988
## median NA 14.7392969 40.3041025
## mean NA 14.2857143 46.0647855
## SE.mean NA 2.7608160 10.0760751
## CI.mean.0.95 NA 6.7554733 24.6552675
## var NA 53.3547333 710.6910217
## std.dev NA 7.3044324 26.6587888
## coef.var NA 0.5113103 0.5787238
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Chiriquí (Panama)
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_PA<- subset(df, state == "Chiriquí")
knitr::kable(head(df_PA))
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_PA, aes(fill=city, y=distance, x=state)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_PA, aes(fill=city, y=distance, x=state)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_PA, aes(x=state, y=distance, fill=city)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_PA <- df_PA %>%
arrange(desc(city)) %>%
mutate(prop = distance / sum(df_PA$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_PA, aes(x=state, y = prop, fill=city)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales
library(forecast)
data<- ts(df_PA$distance, frequency=12, start=2008)
knitr::kable(head(data))
| 0.35187 |
| 0.64491 |
| 3.66775 |
| 0.90169 |
| 6.74338 |
| 3.33873 |
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto
library(qcc)
distance <- df_PA$distance
names(distance) <- df_PA$city
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por ciudades"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Boquete 9.837000 9.837000 34.750066 34.750066
## Monte Lirio 6.743380 16.580380 23.821582 58.571648
## Río Sereno 3.667750 20.248130 12.956649 71.528296
## Cerro Punta 3.338730 23.586860 11.794357 83.322653
## Boca de Balsa 2.442540 26.029400 8.628487 91.951140
## Palmira Centro 0.901690 26.931090 3.185299 95.136439
## Río Sereno 0.644910 27.576000 2.278201 97.414640
## Breñón 0.379990 27.955990 1.342348 98.756988
## Volcán 0.351870 28.307860 1.243012 100.000000
stem(df_PA$"distance")
##
## The decimal point is at the |
##
## 0 | 4469
## 2 | 437
## 4 |
## 6 | 7
## 8 | 8
head(df_PA)
## # A tibble: 6 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 6699 9/12/14 <NA> <NA> Panama PA Chir~ 11245
## 2 6700 9/25/14 <NA> <NA> Panama PA Chir~ 1908
## 3 7503 11/23/15 <NA> <NA> Panama PA Chir~ 1908
## 4 7504 11/22/15 <NA> <NA> Panama PA Chir~ 0
## 5 7507 11/11/15 <NA> <NA> Panama PA Chir~ 2823
## 6 6704 8/17/14 Afternoon <NA> Panama PA Chir~ 2957
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_PA))
stem(df_PA$"distance")
##
## The decimal point is at the |
##
## 0 | 4469
## 2 | 437
## 4 |
## 6 | 7
## 8 | 8
stem(df_PA$"distance", scale = 2)
##
## The decimal point is at the |
##
## 0 | 4469
## 1 |
## 2 | 4
## 3 | 37
## 4 |
## 5 |
## 6 | 7
## 7 |
## 8 |
## 9 | 8
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 0.35187 |
1 |
11.1 |
11.1 |
11.1 |
11.1 |
| 0.37999 |
1 |
11.1 |
11.1 |
22.2 |
22.2 |
| 0.64491 |
1 |
11.1 |
11.1 |
33.3 |
33.3 |
| 0.90169 |
1 |
11.1 |
11.1 |
44.4 |
44.4 |
| 2.44254 |
1 |
11.1 |
11.1 |
55.6 |
55.6 |
| 3.33873 |
1 |
11.1 |
11.1 |
66.7 |
66.7 |
| 3.66775 |
1 |
11.1 |
11.1 |
77.8 |
77.8 |
| 6.74338 |
1 |
11.1 |
11.1 |
88.9 |
88.9 |
| 9.837 |
1 |
11.1 |
11.1 |
100.0 |
100.0 |
| Total |
9 |
100.0 |
100.0 |
100.0 |
100.0 |
str(table)
## Classes 'freqtab' and 'data.frame': 10 obs. of 5 variables:
## $ n : num 1 1 1 1 1 1 1 1 1 9
## $ % : num 11.1 11.1 11.1 11.1 11.1 11.1 11.1 11.1 11.1 100
## $ val% : num 11.1 11.1 11.1 11.1 11.1 11.1 11.1 11.1 11.1 100
## $ %cum : num 11.1 22.2 33.3 44.4 55.6 66.7 77.8 88.9 100 100
## $ val%cum: num 11.1 22.2 33.3 44.4 55.6 66.7 77.8 88.9 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
| 0.35187 |
1 |
| 0.37999 |
1 |
| 0.64491 |
1 |
| 0.90169 |
1 |
| 2.44254 |
1 |
| 3.33873 |
1 |
| 3.66775 |
1 |
| 6.74338 |
1 |
| 9.837 |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.35187 2.35187 4.35187 6.35187 8.35187 10.35187
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
| (0.352,2.35] |
3 |
0.375 |
3 |
| (2.35,4.35] |
3 |
0.375 |
6 |
| (4.35,6.35] |
0 |
0.000 |
6 |
| (6.35,8.35] |
1 |
0.125 |
7 |
| (8.35,10.4] |
1 |
0.125 |
8 |
str(Freq_table)
## 'data.frame': 5 obs. of 4 variables:
## $ distance: Factor w/ 5 levels "(0.352,2.35]",..: 1 2 3 4 5
## $ Freq : int 3 3 0 1 1
## $ Rel_Freq: num 0.375 0.375 0 0.125 0.125
## $ Cum_Freq: int 3 6 6 7 8
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
| (0.352,2.35] |
3 |
| (2.35,4.35] |
3 |
| (4.35,6.35] |
0 |
| (6.35,8.35] |
1 |
| (8.35,10.4] |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")

library(pastecs)
stat.desc(df_PA)
## id date time continent_code country_name country_code
## nbr.val 9.000000e+00 NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA
## min 9.050000e+02 NA NA NA NA NA
## max 7.507000e+03 NA NA NA NA NA
## range 6.602000e+03 NA NA NA NA NA
## sum 5.852900e+04 NA NA NA NA NA
## median 7.502000e+03 NA NA NA NA NA
## mean 6.503222e+03 NA NA NA NA NA
## SE.mean 7.116808e+02 NA NA NA NA NA
## CI.mean.0.95 1.641139e+03 NA NA NA NA NA
## var 4.558406e+06 NA NA NA NA NA
## std.dev 2.135042e+03 NA NA NA NA NA
## coef.var 3.283053e-01 NA NA NA NA NA
## state population city distance location_description latitude
## nbr.val NA 9.000000e+00 NA 9.000000 NA 9.00000000
## nbr.null NA 3.000000e+00 NA 0.000000 NA 0.00000000
## nbr.na NA 0.000000e+00 NA 0.000000 NA 0.00000000
## min NA 0.000000e+00 NA 0.351870 NA 8.51210000
## max NA 1.124500e+04 NA 9.837000 NA 8.85280000
## range NA 1.124500e+04 NA 9.485130 NA 0.34070000
## sum NA 2.696400e+04 NA 28.307860 NA 78.68170000
## median NA 1.908000e+03 NA 2.442540 NA 8.78680000
## mean NA 2.996000e+03 NA 3.145318 NA 8.74241111
## SE.mean NA 1.219922e+03 NA 1.088227 NA 0.03937552
## CI.mean.0.95 NA 2.813145e+03 NA 2.509457 NA 0.09080011
## var NA 1.339389e+07 NA 10.658149 NA 0.01395388
## std.dev NA 3.659766e+03 NA 3.264682 NA 0.11812656
## coef.var NA 1.221551e+00 NA 1.037950 NA 0.01351190
## longitude geolocation hazard_type landslide_type
## nbr.val 9.000000e+00 NA NA NA
## nbr.null 0.000000e+00 NA NA NA
## nbr.na 0.000000e+00 NA NA NA
## min -8.286620e+01 NA NA NA
## max -8.205130e+01 NA NA NA
## range 8.149000e-01 NA NA NA
## sum -7.434157e+02 NA NA NA
## median -8.263420e+01 NA NA NA
## mean -8.260174e+01 NA NA NA
## SE.mean 9.188235e-02 NA NA NA
## CI.mean.0.95 2.118811e-01 NA NA NA
## var 7.598130e-02 NA NA NA
## std.dev 2.756470e-01 NA NA NA
## coef.var -3.337061e-03 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 8 9.0000000 NA
## nbr.null NA NA NA 8 8.0000000 NA
## nbr.na NA NA NA 1 0.0000000 NA
## min NA NA NA 0 0.0000000 NA
## max NA NA NA 0 8.0000000 NA
## range NA NA NA 0 8.0000000 NA
## sum NA NA NA 0 8.0000000 NA
## median NA NA NA 0 0.0000000 NA
## mean NA NA NA 0 0.8888889 NA
## SE.mean NA NA NA 0 0.8888889 NA
## CI.mean.0.95 NA NA NA 0 2.0497815 NA
## var NA NA NA 0 7.1111111 NA
## std.dev NA NA NA 0 2.6666667 NA
## coef.var NA NA NA NaN 3.0000000 NA
## source_link prop ypos
## nbr.val NA 9.000000 9.0000000
## nbr.null NA 0.000000 0.0000000
## nbr.na NA 0.000000 0.0000000
## min NA 1.243012 0.6215058
## max NA 34.750066 95.6857565
## range NA 33.507054 95.0642507
## sum NA 100.000000 337.6620486
## median NA 8.628487 31.5739515
## mean NA 11.111111 37.5180054
## SE.mean NA 3.844259 11.1563092
## CI.mean.0.95 NA 8.864877 25.7264952
## var NA 133.004927 1120.1691177
## std.dev NA 11.532776 33.4689276
## coef.var NA 1.037950 0.8920764
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Colón (Panama)
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_PA<- subset(df, state == "Colón")
knitr::kable(head(df_PA))
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_PA, aes(fill=city, y=distance, x=state)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_PA, aes(fill=city, y=distance, x=state)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_PA, aes(x=state, y=distance, fill=city)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_PA <- df_PA %>%
arrange(desc(city)) %>%
mutate(prop = distance / sum(df_PA$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_PA, aes(x=state, y = prop, fill=city)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales
library(forecast)
data<- ts(df_PA$distance, frequency=12, start=2008)
knitr::kable(head(data))
| 0.09491 |
| 2.67409 |
| 4.74914 |
| 0.18619 |
| 2.28589 |
| 0.74760 |
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto
library(qcc)
distance <- df_PA$distance
names(distance) <- df_PA$city
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por ciudades"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Cusuna 36.3762900 36.3762900 71.7525220 71.7525220
## Nuevo San Juan 4.7491400 41.1254300 9.3677165 81.1202386
## Portobelo 2.6740900 43.7995200 5.2746638 86.3949024
## María Chiquita 2.2858900 46.0854100 4.5089363 90.9038387
## El Giral 1.8033000 47.8887100 3.5570236 94.4608623
## Margarita 0.7674000 48.6561100 1.5137026 95.9745649
## Margarita 0.7476000 49.4037100 1.4746470 97.4492119
## Cativá 0.6394800 50.0431900 1.2613794 98.7105913
## Colón 0.2036500 50.2468400 0.4017012 99.1122925
## Nueva Providencia 0.1861900 50.4330300 0.3672613 99.4795538
## Colón 0.1689400 50.6019700 0.3332355 99.8127893
## Portobelo 0.0949100 50.6968800 0.1872107 100.0000000
stem(df_PA$"distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 00001112235
## 1 |
## 2 |
## 3 | 6
head(df_PA)
## # A tibble: 6 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 2795 12/9/10 Morning <NA> Panama PA Colón 1274
## 2 4879 5/28/13 <NA> <NA> Panama PA Colón 1274
## 3 6702 5/9/14 <NA> <NA> Panama PA Colón 1232
## 4 7450 9/7/15 <NA> <NA> Panama PA Colón 0
## 5 7451 7/2/15 <NA> <NA> Panama PA Colón 1146
## 6 4880 5/28/13 <NA> <NA> Panama PA Colón 3302
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_PA))
stem(df_PA$"distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 00001112235
## 1 |
## 2 |
## 3 | 6
stem(df_PA$"distance", scale = 2)
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 0000111223
## 0 | 5
## 1 |
## 1 |
## 2 |
## 2 |
## 3 |
## 3 | 6
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 0.09491 |
1 |
8.3 |
8.3 |
8.3 |
8.3 |
| 0.16894 |
1 |
8.3 |
8.3 |
16.7 |
16.7 |
| 0.18619 |
1 |
8.3 |
8.3 |
25.0 |
25.0 |
| 0.20365 |
1 |
8.3 |
8.3 |
33.3 |
33.3 |
| 0.63948 |
1 |
8.3 |
8.3 |
41.7 |
41.7 |
| 0.7476 |
1 |
8.3 |
8.3 |
50.0 |
50.0 |
| 0.7674 |
1 |
8.3 |
8.3 |
58.3 |
58.3 |
| 1.8033 |
1 |
8.3 |
8.3 |
66.7 |
66.7 |
| 2.28589 |
1 |
8.3 |
8.3 |
75.0 |
75.0 |
| 2.67409 |
1 |
8.3 |
8.3 |
83.3 |
83.3 |
| 4.74914 |
1 |
8.3 |
8.3 |
91.7 |
91.7 |
| 36.37629 |
1 |
8.3 |
8.3 |
100.0 |
100.0 |
| Total |
12 |
100.0 |
100.0 |
100.0 |
100.0 |
str(table)
## Classes 'freqtab' and 'data.frame': 13 obs. of 5 variables:
## $ n : num 1 1 1 1 1 1 1 1 1 1 ...
## $ % : num 8.3 8.3 8.3 8.3 8.3 8.3 8.3 8.3 8.3 8.3 ...
## $ val% : num 8.3 8.3 8.3 8.3 8.3 8.3 8.3 8.3 8.3 8.3 ...
## $ %cum : num 8.3 16.7 25 33.3 41.7 50 58.3 66.7 75 83.3 ...
## $ val%cum: num 8.3 16.7 25 33.3 41.7 50 58.3 66.7 75 83.3 ...
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
| 0.09491 |
1 |
| 0.16894 |
1 |
| 0.18619 |
1 |
| 0.20365 |
1 |
| 0.63948 |
1 |
| 0.7476 |
1 |
| 0.7674 |
1 |
| 1.8033 |
1 |
| 2.28589 |
1 |
| 2.67409 |
1 |
| 4.74914 |
1 |
| 36.37629 |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.09491 8.09491 16.09491 24.09491 32.09491 40.09491
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
| (0.0949,8.09] |
10 |
0.9090909 |
10 |
| (8.09,16.1] |
0 |
0.0000000 |
10 |
| (16.1,24.1] |
0 |
0.0000000 |
10 |
| (24.1,32.1] |
0 |
0.0000000 |
10 |
| (32.1,40.1] |
1 |
0.0909091 |
11 |
str(Freq_table)
## 'data.frame': 5 obs. of 4 variables:
## $ distance: Factor w/ 5 levels "(0.0949,8.09]",..: 1 2 3 4 5
## $ Freq : int 10 0 0 0 1
## $ Rel_Freq: num 0.9091 0 0 0 0.0909
## $ Cum_Freq: int 10 10 10 10 11
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
| (0.0949,8.09] |
10 |
| (8.09,16.1] |
0 |
| (16.1,24.1] |
0 |
| (24.1,32.1] |
0 |
| (32.1,40.1] |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")

library(pastecs)
stat.desc(df_PA)
## id date time continent_code country_name country_code
## nbr.val 1.200000e+01 NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA
## min 2.652000e+03 NA NA NA NA NA
## max 7.452000e+03 NA NA NA NA NA
## range 4.800000e+03 NA NA NA NA NA
## sum 6.181400e+04 NA NA NA NA NA
## median 4.880500e+03 NA NA NA NA NA
## mean 5.151167e+03 NA NA NA NA NA
## SE.mean 5.231425e+02 NA NA NA NA NA
## CI.mean.0.95 1.151429e+03 NA NA NA NA NA
## var 3.284136e+06 NA NA NA NA NA
## std.dev 1.812219e+03 NA NA NA NA NA
## coef.var 3.518074e-01 NA NA NA NA NA
## state population city distance location_description
## nbr.val NA 1.200000e+01 NA 12.000000 NA
## nbr.null NA 1.000000e+00 NA 0.000000 NA
## nbr.na NA 0.000000e+00 NA 0.000000 NA
## min NA 0.000000e+00 NA 0.094910 NA
## max NA 7.664300e+04 NA 36.376290 NA
## range NA 7.664300e+04 NA 36.281380 NA
## sum NA 1.971740e+05 NA 50.696880 NA
## median NA 1.375500e+03 NA 0.757500 NA
## mean NA 1.643117e+04 NA 4.224740 NA
## SE.mean NA 8.446243e+03 NA 2.950226 NA
## CI.mean.0.95 NA 1.859006e+04 NA 6.493404 NA
## var NA 8.560683e+08 NA 104.446004 NA
## std.dev NA 2.925864e+04 NA 10.219883 NA
## coef.var NA 1.780680e+00 NA 2.419056 NA
## latitude longitude geolocation hazard_type landslide_type
## nbr.val 12.0000000 12.00000000 NA NA NA
## nbr.null 0.0000000 0.00000000 NA NA NA
## nbr.na 0.0000000 0.00000000 NA NA NA
## min 9.2332000 -85.26500000 NA NA NA
## max 15.5227000 -79.65050000 NA NA NA
## range 6.2895000 5.61450000 NA NA NA
## sum 118.6112000 -963.01940000 NA NA NA
## median 9.3590500 -79.81925000 NA NA NA
## mean 9.8842667 -80.25161667 NA NA NA
## SE.mean 0.5134407 0.45651767 NA NA NA
## CI.mean.0.95 1.1300754 1.00478862 NA NA NA
## var 3.1634562 2.50090061 NA NA NA
## std.dev 1.7786107 1.58142360 NA NA NA
## coef.var 0.1799436 -0.01970582 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 4 12.0000000 NA
## nbr.null NA NA NA 4 8.0000000 NA
## nbr.na NA NA NA 8 0.0000000 NA
## min NA NA NA 0 0.0000000 NA
## max NA NA NA 0 8.0000000 NA
## range NA NA NA 0 8.0000000 NA
## sum NA NA NA 0 15.0000000 NA
## median NA NA NA 0 0.0000000 NA
## mean NA NA NA 0 1.2500000 NA
## SE.mean NA NA NA 0 0.7084447 NA
## CI.mean.0.95 NA NA NA 0 1.5592763 NA
## var NA NA NA 0 6.0227273 NA
## std.dev NA NA NA 0 2.4541245 NA
## coef.var NA NA NA NaN 1.9632996 NA
## source_link prop ypos
## nbr.val NA 12.0000000 1.200000e+01
## nbr.null NA 0.0000000 0.000000e+00
## nbr.na NA 0.0000000 0.000000e+00
## min NA 0.1872107 9.360537e-02
## max NA 71.7525220 9.936931e+01
## range NA 71.5653113 9.927570e+01
## sum NA 100.0000000 4.705863e+02
## median NA 1.4941748 2.119020e+01
## mean NA 8.3333333 3.921552e+01
## SE.mean NA 5.8193444 1.127004e+01
## CI.mean.0.95 NA 12.8082906 2.480520e+01
## var NA 406.3772270 1.524167e+03
## std.dev NA 20.1588002 3.904058e+01
## coef.var NA 2.4190560 9.955389e-01
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Los Santos (Panama)
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_PA<- subset(df, state == "Los Santos")
knitr::kable(head(df_PA))
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_PA, aes(fill=city, y=distance, x=state)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_PA, aes(fill=city, y=distance, x=state)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_PA, aes(x=state, y=distance, fill=city)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_PA <- df_PA %>%
arrange(desc(city)) %>%
mutate(prop = distance / sum(df_PA$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_PA, aes(x=state, y = prop, fill=city)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales
library(forecast)
data<- ts(df_PA$distance, frequency=12, start=2008)
knitr::kable(head(data))
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?

Diagrama de pareto
library(qcc)
distance <- df_PA$distance
names(distance) <- df_PA$city
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por ciudades"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## La Palma 13.40535 13.40535 100.00000 100.00000
stem(df_PA$"distance")
head(df_PA)
## # A tibble: 1 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 3167 3/2/11 Night <NA> Panama PA Los Santos 1283
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_PA))
stem(df_PA$"distance")
stem(df_PA$"distance", scale = 2)
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 13.40535 |
1 |
100 |
100 |
100 |
100 |
| Total |
1 |
100 |
100 |
100 |
100 |
str(table)
## Classes 'freqtab' and 'data.frame': 2 obs. of 5 variables:
## $ n : num 1 1
## $ % : num 100 100
## $ val% : num 100 100
## $ %cum : num 100 100
## $ val%cum: num 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 13.40535
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
| (13.392,13.394] |
0 |
0 |
0 |
| (13.394,13.396] |
0 |
0 |
0 |
| (13.396,13.398] |
0 |
0 |
0 |
| (13.398,13.4] |
0 |
0 |
0 |
| (13.4,13.402] |
0 |
0 |
0 |
| (13.402,13.404] |
0 |
0 |
0 |
| (13.404,13.406] |
1 |
1 |
1 |
| (13.406,13.408] |
0 |
0 |
1 |
| (13.408,13.411] |
0 |
0 |
1 |
| (13.411,13.413] |
0 |
0 |
1 |
| (13.413,13.415] |
0 |
0 |
1 |
| (13.415,13.417] |
0 |
0 |
1 |
| (13.417,13.419] |
0 |
0 |
1 |
str(Freq_table)
## 'data.frame': 13 obs. of 4 variables:
## $ distance: Factor w/ 13 levels "(13.392,13.394]",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ Freq : int 0 0 0 0 0 0 1 0 0 0 ...
## $ Rel_Freq: num 0 0 0 0 0 0 1 0 0 0 ...
## $ Cum_Freq: int 0 0 0 0 0 0 1 1 1 1 ...
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
| (13.392,13.394] |
0 |
| (13.394,13.396] |
0 |
| (13.396,13.398] |
0 |
| (13.398,13.4] |
0 |
| (13.4,13.402] |
0 |
| (13.402,13.404] |
0 |
| (13.404,13.406] |
1 |
| (13.406,13.408] |
0 |
| (13.408,13.411] |
0 |
| (13.411,13.413] |
0 |
| (13.413,13.415] |
0 |
| (13.415,13.417] |
0 |
| (13.417,13.419] |
0 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")

library(pastecs)
stat.desc(df_PA)
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## Warning in min(x): ningún argumento finito para min; retornando Inf
## Warning in max(x): ningun argumento finito para max; retornando -Inf
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## id date time continent_code country_name country_code state
## nbr.val 1 NA NA NA NA NA NA
## nbr.null 0 NA NA NA NA NA NA
## nbr.na 0 NA NA NA NA NA NA
## min 3167 NA NA NA NA NA NA
## max 3167 NA NA NA NA NA NA
## range 0 NA NA NA NA NA NA
## sum 3167 NA NA NA NA NA NA
## median 3167 NA NA NA NA NA NA
## mean 3167 NA NA NA NA NA NA
## SE.mean NA NA NA NA NA NA NA
## CI.mean.0.95 NaN NA NA NA NA NA NA
## var NA NA NA NA NA NA NA
## std.dev NA NA NA NA NA NA NA
## coef.var NA NA NA NA NA NA NA
## population city distance location_description latitude longitude
## nbr.val 1 NA 1.00000 NA 1.0000 1.00
## nbr.null 0 NA 0.00000 NA 0.0000 0.00
## nbr.na 0 NA 0.00000 NA 0.0000 0.00
## min 1283 NA 13.40535 NA 7.5955 -80.38
## max 1283 NA 13.40535 NA 7.5955 -80.38
## range 0 NA 0.00000 NA 0.0000 0.00
## sum 1283 NA 13.40535 NA 7.5955 -80.38
## median 1283 NA 13.40535 NA 7.5955 -80.38
## mean 1283 NA 13.40535 NA 7.5955 -80.38
## SE.mean NA NA NA NA NA NA
## CI.mean.0.95 NaN NA NaN NA NaN NaN
## var NA NA NA NA NA NA
## std.dev NA NA NA NA NA NA
## coef.var NA NA NA NA NA NA
## geolocation hazard_type landslide_type landslide_size trigger
## nbr.val NA NA NA NA NA
## nbr.null NA NA NA NA NA
## nbr.na NA NA NA NA NA
## min NA NA NA NA NA
## max NA NA NA NA NA
## range NA NA NA NA NA
## sum NA NA NA NA NA
## median NA NA NA NA NA
## mean NA NA NA NA NA
## SE.mean NA NA NA NA NA
## CI.mean.0.95 NA NA NA NA NA
## var NA NA NA NA NA
## std.dev NA NA NA NA NA
## coef.var NA NA NA NA NA
## storm_name injuries fatalities source_name source_link prop ypos
## nbr.val NA 0 1 NA NA 1 1
## nbr.null NA 0 1 NA NA 0 0
## nbr.na NA 1 0 NA NA 0 0
## min NA Inf 0 NA NA 100 50
## max NA -Inf 0 NA NA 100 50
## range NA -Inf 0 NA NA 0 0
## sum NA 0 0 NA NA 100 50
## median NA NA 0 NA NA 100 50
## mean NA NaN 0 NA NA 100 50
## SE.mean NA NA NA NA NA NA NA
## CI.mean.0.95 NA NaN NaN NA NA NaN NaN
## var NA NA NA NA NA NA NA
## std.dev NA NA NA NA NA NA NA
## coef.var NA NA NA NA NA NA NA
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Panamá (Panama)
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_PA<- subset(df, state == "Panamá")
knitr::kable(head(df_PA))
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_PA, aes(fill=city, y=distance, x=state)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_PA, aes(fill=city, y=distance, x=state)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_PA, aes(x=state, y=distance, fill=city)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_PA <- df_PA %>%
arrange(desc(city)) %>%
mutate(prop = distance / sum(df_PA$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_PA, aes(x=state, y = prop, fill=city)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales
library(forecast)
data<- ts(df_PA$distance, frequency=12, start=2008)
knitr::kable(head(data))
| 4.18074 |
| 2.57852 |
| 2.59449 |
| 3.30848 |
| 3.54386 |
| 0.48725 |
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto
library(qcc)
distance <- df_PA$distance
names(distance) <- df_PA$city
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por ciudades"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## San Miguelito 4.180740 4.180740 15.591951 15.591951
## San Miguelito 3.543860 7.724600 13.216725 28.808676
## San Miguelito 3.308480 11.033080 12.338882 41.147558
## Las Cumbres 2.814790 13.847870 10.497679 51.645238
## San Miguelito 2.594490 16.442360 9.676077 61.321314
## San Miguelito 2.578520 19.020880 9.616517 70.937832
## Ancón 2.240690 21.261570 8.356590 79.294421
## Cerro Azul 1.845960 23.107530 6.884455 86.178877
## Arraiján 1.803410 24.910940 6.725766 92.904643
## Alcaldedíaz 1.415260 26.326200 5.278172 98.182815
## Las Margaritas 0.487250 26.813450 1.817185 100.000000
stem(df_PA$"distance")
##
## The decimal point is at the |
##
## 0 | 5
## 1 | 488
## 2 | 2668
## 3 | 35
## 4 | 2
head(df_PA)
## # A tibble: 6 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 6703 6/17/14 <NA> <NA> Panama PA Panamá 321501
## 2 7506 9/10/15 <NA> <NA> Panama PA Panamá 321501
## 3 7509 9/10/15 <NA> <NA> Panama PA Panamá 321501
## 4 7510 8/31/15 <NA> <NA> Panama PA Panamá 321501
## 5 7511 8/31/15 <NA> <NA> Panama PA Panamá 321501
## 6 7453 6/5/15 Afternoon <NA> Panama PA Panamá 0
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_PA))
stem(df_PA$"distance")
##
## The decimal point is at the |
##
## 0 | 5
## 1 | 488
## 2 | 2668
## 3 | 35
## 4 | 2
stem(df_PA$"distance", scale = 2)
##
## The decimal point is at the |
##
## 0 |
## 0 | 5
## 1 | 4
## 1 | 88
## 2 | 2
## 2 | 668
## 3 | 3
## 3 | 5
## 4 | 2
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 0.48725 |
1 |
9.1 |
9.1 |
9.1 |
9.1 |
| 1.41526 |
1 |
9.1 |
9.1 |
18.2 |
18.2 |
| 1.80341 |
1 |
9.1 |
9.1 |
27.3 |
27.3 |
| 1.84596 |
1 |
9.1 |
9.1 |
36.4 |
36.4 |
| 2.24069 |
1 |
9.1 |
9.1 |
45.5 |
45.5 |
| 2.57852 |
1 |
9.1 |
9.1 |
54.5 |
54.5 |
| 2.59449 |
1 |
9.1 |
9.1 |
63.6 |
63.6 |
| 2.81479 |
1 |
9.1 |
9.1 |
72.7 |
72.7 |
| 3.30848 |
1 |
9.1 |
9.1 |
81.8 |
81.8 |
| 3.54386 |
1 |
9.1 |
9.1 |
90.9 |
90.9 |
| 4.18074 |
1 |
9.1 |
9.1 |
100.0 |
100.0 |
| Total |
11 |
100.0 |
100.0 |
100.0 |
100.0 |
str(table)
## Classes 'freqtab' and 'data.frame': 12 obs. of 5 variables:
## $ n : num 1 1 1 1 1 1 1 1 1 1 ...
## $ % : num 9.1 9.1 9.1 9.1 9.1 9.1 9.1 9.1 9.1 9.1 ...
## $ val% : num 9.1 9.1 9.1 9.1 9.1 9.1 9.1 9.1 9.1 9.1 ...
## $ %cum : num 9.1 18.2 27.3 36.4 45.5 54.5 63.6 72.7 81.8 90.9 ...
## $ val%cum: num 9.1 18.2 27.3 36.4 45.5 54.5 63.6 72.7 81.8 90.9 ...
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
| 0.48725 |
1 |
| 1.41526 |
1 |
| 1.80341 |
1 |
| 1.84596 |
1 |
| 2.24069 |
1 |
| 2.57852 |
1 |
| 2.59449 |
1 |
| 2.81479 |
1 |
| 3.30848 |
1 |
| 3.54386 |
1 |
| 4.18074 |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.48725 1.48725 2.48725 3.48725 4.48725
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
| (0.487,1.49] |
1 |
0.1 |
1 |
| (1.49,2.49] |
3 |
0.3 |
4 |
| (2.49,3.49] |
4 |
0.4 |
8 |
| (3.49,4.49] |
2 |
0.2 |
10 |
str(Freq_table)
## 'data.frame': 4 obs. of 4 variables:
## $ distance: Factor w/ 4 levels "(0.487,1.49]",..: 1 2 3 4
## $ Freq : int 1 3 4 2
## $ Rel_Freq: num 0.1 0.3 0.4 0.2
## $ Cum_Freq: int 1 4 8 10
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
| (0.487,1.49] |
1 |
| (1.49,2.49] |
3 |
| (2.49,3.49] |
4 |
| (3.49,4.49] |
2 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")

library(pastecs)
stat.desc(df_PA)
## id date time continent_code country_name country_code
## nbr.val 1.100000e+01 NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA
## min 7.500000e+02 NA NA NA NA NA
## max 7.512000e+03 NA NA NA NA NA
## range 6.762000e+03 NA NA NA NA NA
## sum 7.142200e+04 NA NA NA NA NA
## median 7.453000e+03 NA NA NA NA NA
## mean 6.492909e+03 NA NA NA NA NA
## SE.mean 6.035510e+02 NA NA NA NA NA
## CI.mean.0.95 1.344795e+03 NA NA NA NA NA
## var 4.007012e+06 NA NA NA NA NA
## std.dev 2.001752e+03 NA NA NA NA NA
## coef.var 3.082982e-01 NA NA NA NA NA
## state population city distance location_description
## nbr.val NA 1.100000e+01 NA 11.0000000 NA
## nbr.null NA 1.000000e+00 NA 0.0000000 NA
## nbr.na NA 0.000000e+00 NA 0.0000000 NA
## min NA 0.000000e+00 NA 0.4872500 NA
## max NA 3.215010e+05 NA 4.1807400 NA
## range NA 3.215010e+05 NA 3.6934900 NA
## sum NA 1.783660e+06 NA 26.8134500 NA
## median NA 7.681500e+04 NA 2.5785200 NA
## mean NA 1.621509e+05 NA 2.4375864 NA
## SE.mean NA 4.658766e+04 NA 0.3140292 NA
## CI.mean.0.95 NA 1.038038e+05 NA 0.6997007 NA
## var NA 2.387451e+10 NA 1.0847578 NA
## std.dev NA 1.545138e+05 NA 1.0415171 NA
## coef.var NA 9.529011e-01 NA 0.4272739 NA
## latitude longitude geolocation hazard_type landslide_type
## nbr.val 11.000000000 1.100000e+01 NA NA NA
## nbr.null 0.000000000 0.000000e+00 NA NA NA
## nbr.na 0.000000000 0.000000e+00 NA NA NA
## min 8.949600000 -7.963360e+01 NA NA NA
## max 9.180500000 -7.908960e+01 NA NA NA
## range 0.230900000 5.440000e-01 NA NA NA
## sum 99.716200000 -8.742698e+02 NA NA NA
## median 9.065500000 -7.949450e+01 NA NA NA
## mean 9.065109091 -7.947907e+01 NA NA NA
## SE.mean 0.020679157 4.251380e-02 NA NA NA
## CI.mean.0.95 0.046076033 9.472666e-02 NA NA NA
## var 0.004703903 1.988166e-02 NA NA NA
## std.dev 0.068585005 1.410023e-01 NA NA NA
## coef.var 0.007565822 -1.774081e-03 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 9.00000 9 NA
## nbr.null NA NA NA 8.00000 9 NA
## nbr.na NA NA NA 2.00000 2 NA
## min NA NA NA 0.00000 0 NA
## max NA NA NA 45.00000 0 NA
## range NA NA NA 45.00000 0 NA
## sum NA NA NA 45.00000 0 NA
## median NA NA NA 0.00000 0 NA
## mean NA NA NA 5.00000 0 NA
## SE.mean NA NA NA 5.00000 0 NA
## CI.mean.0.95 NA NA NA 11.53002 0 NA
## var NA NA NA 225.00000 0 NA
## std.dev NA NA NA 15.00000 0 NA
## coef.var NA NA NA 3.00000 NaN NA
## source_link prop ypos
## nbr.val NA 11.0000000 11.0000000
## nbr.null NA 0.0000000 0.0000000
## nbr.na NA 0.0000000 0.0000000
## min NA 1.8171850 7.7959755
## max NA 15.5919511 97.3609140
## range NA 13.7747660 89.5649385
## sum NA 100.0000000 629.0874356
## median NA 9.6165171 61.3487448
## mean NA 9.0909091 57.1897669
## SE.mean NA 1.1711630 8.8826226
## CI.mean.0.95 NA 2.6095138 19.7917165
## var NA 15.0878511 867.9108284
## std.dev NA 3.8843083 29.4603263
## coef.var NA 0.4272739 0.5151328
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Mexico
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_MX <- subset(df, country_name == "Mexico")
knitr::kable(head(df_MX))
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(fill=state, y=distance, x=country_name)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(fill=state, y=distance, x=country_name)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(x=country_name, y=distance, fill=state)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_MX <- df_MX %>%
arrange(desc(state)) %>%
mutate(prop = distance / sum(df_MX$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_MX, aes(x=country_name, y = prop, fill=state)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4
## Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Greens is 9
## Returning the palette you asked for with that many colors

Grafico de series temporales
library(forecast)
data<- ts(df_MX$distance, frequency=12, start=2008)
knitr::kable(head(data))
| 9.51003 |
| 8.28739 |
| 1.27837 |
| 1.52983 |
| 2.85382 |
| 3.73160 |
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto
library(qcc)
distance <- df_MX$distance
names(distance) <- df_MX$state
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por estados"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Guerrero 32.12708000 32.12708000 7.76637116 7.76637116
## Oaxaca 24.67589000 56.80297000 5.96512725 13.73149841
## Nayarit 21.80060000 78.60357000 5.27005726 19.00155566
## Nayarit 19.41353000 98.01710000 4.69300912 23.69456478
## Oaxaca 16.16369000 114.18079000 3.90739575 27.60196053
## Oaxaca 15.74984000 129.93063000 3.80735203 31.40931256
## Tabasco 15.22260000 145.15323000 3.67989751 35.08921007
## Guerrero 14.04274000 159.19597000 3.39467922 38.48388929
## Baja California 12.53758000 171.73355000 3.03082321 41.51471249
## Baja California 12.36500000 184.09855000 2.98910388 44.50381637
## Guerrero 12.33417000 196.43272000 2.98165106 47.48546743
## Oaxaca 11.83490000 208.26762000 2.86095798 50.34642541
## Sinaloa 10.88351000 219.15113000 2.63096983 52.97739524
## Hidalgo 9.78251000 228.93364000 2.36481509 55.34221033
## Oaxaca 9.56829000 238.50193000 2.31302974 57.65524007
## Veracruz-Llave 9.51003000 248.01196000 2.29894602 59.95418609
## Tabasco 8.93271000 256.94467000 2.15938521 62.11357130
## Chiapas 8.46579000 265.41046000 2.04651239 64.16008369
## Veracruz-Llave 8.28739000 273.69785000 2.00338614 66.16346982
## Chiapas 7.93996000 281.63781000 1.91939872 68.08286855
## Puebla 7.93258000 289.57039000 1.91761469 70.00048324
## Guerrero 7.07138000 296.64177000 1.70942898 71.70991222
## Guerrero 6.80950000 303.45127000 1.64612235 73.35603457
## Baja California 6.46156000 309.91283000 1.56201165 74.91804622
## Puebla 5.24855000 315.16138000 1.26877971 76.18682593
## Tabasco 4.81680000 319.97818000 1.16440886 77.35123479
## Chiapas 4.68443000 324.66261000 1.13240986 78.48364465
## Veracruz-Llave 4.51820000 329.18081000 1.09222557 79.57587021
## México 4.40801000 333.58882000 1.06558834 80.64145855
## Tabasco 4.32007000 337.90889000 1.04432980 81.68578835
## Tabasco 4.19108000 342.09997000 1.01314787 82.69893622
## Michoacán 4.18059000 346.28056000 1.01061203 83.70954826
## Guerrero 4.10830000 350.38886000 0.99313671 84.70268497
## Chiapas 3.74149000 354.13035000 0.90446440 85.60714937
## Veracruz-Llave 3.73160000 357.86195000 0.90207360 86.50922296
## Oaxaca 3.64682000 361.50877000 0.88157896 87.39080192
## The Federal District 3.49173000 365.00050000 0.84408764 88.23488956
## Michoacán 3.42740000 368.42790000 0.82853656 89.06342613
## Michoacán 3.36905000 371.79695000 0.81443109 89.87785721
## Nuevo León 3.30074000 375.09769000 0.79791789 90.67577511
## Chihuahua 3.05542000 378.15311000 0.73861446 91.41438956
## Veracruz-Llave 2.85382000 381.00693000 0.68987986 92.10426942
## México 2.59637000 383.60330000 0.62764413 92.73191355
## Baja California 2.49770000 386.10100000 0.60379173 93.33570528
## Veracruz 2.47800000 388.57900000 0.59902947 93.93473475
## Tabasco 2.15703000 390.73603000 0.52143847 94.45617322
## Chiapas 2.06743000 392.80346000 0.49977865 94.95595187
## Veracruz 1.93516000 394.73862000 0.46780382 95.42375570
## Chiapas 1.73469000 396.47331000 0.41934239 95.84309808
## Michoacán 1.69508000 398.16839000 0.40976710 96.25286518
## Puebla 1.68294000 399.85133000 0.40683239 96.65969757
## Veracruz-Llave 1.52983000 401.38116000 0.36981972 97.02951729
## Chiapas 1.41805000 402.79921000 0.34279812 97.37231541
## Veracruz-Llave 1.27837000 404.07758000 0.30903200 97.68134741
## México 1.06048000 405.13806000 0.25635947 97.93770688
## Chiapas 0.94118000 406.07924000 0.22752000 98.16522688
## Guerrero 0.90692000 406.98616000 0.21923802 98.38446490
## Guerrero 0.88149000 407.86765000 0.21309059 98.59755549
## Oaxaca 0.78340000 408.65105000 0.18937840 98.78693390
## Chiapas 0.76257000 409.41362000 0.18434298 98.97127688
## México 0.66626000 410.07988000 0.16106109 99.13233796
## Hidalgo 0.64483000 410.72471000 0.15588062 99.28821858
## Oaxaca 0.63550000 411.36021000 0.15362519 99.44184377
## Veracruz-Llave 0.50188000 411.86209000 0.12132402 99.56316779
## Colima 0.36051000 412.22260000 0.08714936 99.65031715
## Chiapas 0.31118000 412.53378000 0.07522437 99.72554152
## Puebla 0.30326000 412.83704000 0.07330980 99.79885132
## Oaxaca 0.28905000 413.12609000 0.06987468 99.86872600
## The Federal District 0.15208000 413.27817000 0.03676368 99.90548968
## The Federal District 0.15208000 413.43025000 0.03676368 99.94225337
## Veracruz-Llave 0.09971000 413.52996000 0.02410380 99.96635717
## Jalisco 0.08269000 413.61265000 0.01998941 99.98634658
## Baja California Sur 0.05648000 413.66913000 0.01365342 100.00000000
stem(df_MX$"distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 000000000111111111111222222222333333344444444
## 0 | 555567788889
## 1 | 000122234
## 1 | 5669
## 2 | 2
## 2 | 5
## 3 | 2
head(df_MX)
## # A tibble: 6 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 115 7/4/07 <NA> <NA> Mexico MX Vera~ 1947
## 2 2438 9/17/10 <NA> <NA> Mexico MX Vera~ 1324
## 3 3684 7/1/11 <NA> <NA> Mexico MX Vera~ 425148
## 4 5403 8/26/13 20:20:00 <NA> Mexico MX Vera~ 30607
## 5 5405 8/26/13 <NA> <NA> Mexico MX Vera~ 15800
## 6 5406 8/26/13 <NA> <NA> Mexico MX Vera~ 3198
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_MX))
stem(df_MX$"distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 000000000111111111111222222222333333344444444
## 0 | 555567788889
## 1 | 000122234
## 1 | 5669
## 2 | 2
## 2 | 5
## 3 | 2
stem(df_MX$"distance", scale = 2)
##
## The decimal point is at the |
##
## 0 | 11122333456678899913457779
## 2 | 12556913445677
## 4 | 122345782
## 6 | 58199
## 8 | 359568
## 10 | 98
## 12 | 345
## 14 | 027
## 16 | 2
## 18 | 4
## 20 | 8
## 22 |
## 24 | 7
## 26 |
## 28 |
## 30 |
## 32 | 1
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 0.15208 |
2 |
2.7 |
2.7 |
2.7 |
2.7 |
| 0.05648 |
1 |
1.4 |
1.4 |
4.1 |
4.1 |
| 0.08269 |
1 |
1.4 |
1.4 |
5.5 |
5.5 |
| 0.09971 |
1 |
1.4 |
1.4 |
6.8 |
6.8 |
| 0.28905 |
1 |
1.4 |
1.4 |
8.2 |
8.2 |
| 0.30326 |
1 |
1.4 |
1.4 |
9.6 |
9.6 |
| 0.31118 |
1 |
1.4 |
1.4 |
11.0 |
11.0 |
| 0.36051 |
1 |
1.4 |
1.4 |
12.3 |
12.3 |
| 0.50188 |
1 |
1.4 |
1.4 |
13.7 |
13.7 |
| 0.6355 |
1 |
1.4 |
1.4 |
15.1 |
15.1 |
| 0.64483 |
1 |
1.4 |
1.4 |
16.4 |
16.4 |
| 0.66626 |
1 |
1.4 |
1.4 |
17.8 |
17.8 |
| 0.76257 |
1 |
1.4 |
1.4 |
19.2 |
19.2 |
| 0.7834 |
1 |
1.4 |
1.4 |
20.5 |
20.5 |
| 0.88149 |
1 |
1.4 |
1.4 |
21.9 |
21.9 |
| 0.90692 |
1 |
1.4 |
1.4 |
23.3 |
23.3 |
| 0.94118 |
1 |
1.4 |
1.4 |
24.7 |
24.7 |
| 1.06048 |
1 |
1.4 |
1.4 |
26.0 |
26.0 |
| 1.27837 |
1 |
1.4 |
1.4 |
27.4 |
27.4 |
| 1.41805 |
1 |
1.4 |
1.4 |
28.8 |
28.8 |
| 1.52983 |
1 |
1.4 |
1.4 |
30.1 |
30.1 |
| 1.68294 |
1 |
1.4 |
1.4 |
31.5 |
31.5 |
| 1.69508 |
1 |
1.4 |
1.4 |
32.9 |
32.9 |
| 1.73469 |
1 |
1.4 |
1.4 |
34.2 |
34.2 |
| 1.93516 |
1 |
1.4 |
1.4 |
35.6 |
35.6 |
| 2.06743 |
1 |
1.4 |
1.4 |
37.0 |
37.0 |
| 2.15703 |
1 |
1.4 |
1.4 |
38.4 |
38.4 |
| 2.478 |
1 |
1.4 |
1.4 |
39.7 |
39.7 |
| 2.4977 |
1 |
1.4 |
1.4 |
41.1 |
41.1 |
| 2.59637 |
1 |
1.4 |
1.4 |
42.5 |
42.5 |
| 2.85382 |
1 |
1.4 |
1.4 |
43.8 |
43.8 |
| 3.05542 |
1 |
1.4 |
1.4 |
45.2 |
45.2 |
| 3.30074 |
1 |
1.4 |
1.4 |
46.6 |
46.6 |
| 3.36905 |
1 |
1.4 |
1.4 |
47.9 |
47.9 |
| 3.4274 |
1 |
1.4 |
1.4 |
49.3 |
49.3 |
| 3.49173 |
1 |
1.4 |
1.4 |
50.7 |
50.7 |
| 3.64682 |
1 |
1.4 |
1.4 |
52.1 |
52.1 |
| 3.7316 |
1 |
1.4 |
1.4 |
53.4 |
53.4 |
| 3.74149 |
1 |
1.4 |
1.4 |
54.8 |
54.8 |
| 4.1083 |
1 |
1.4 |
1.4 |
56.2 |
56.2 |
| 4.18059 |
1 |
1.4 |
1.4 |
57.5 |
57.5 |
| 4.19108 |
1 |
1.4 |
1.4 |
58.9 |
58.9 |
| 4.32007 |
1 |
1.4 |
1.4 |
60.3 |
60.3 |
| 4.40801 |
1 |
1.4 |
1.4 |
61.6 |
61.6 |
| 4.5182 |
1 |
1.4 |
1.4 |
63.0 |
63.0 |
| 4.68443 |
1 |
1.4 |
1.4 |
64.4 |
64.4 |
| 4.8168 |
1 |
1.4 |
1.4 |
65.8 |
65.8 |
| 5.24855 |
1 |
1.4 |
1.4 |
67.1 |
67.1 |
| 6.46156 |
1 |
1.4 |
1.4 |
68.5 |
68.5 |
| 6.8095 |
1 |
1.4 |
1.4 |
69.9 |
69.9 |
| 7.07138 |
1 |
1.4 |
1.4 |
71.2 |
71.2 |
| 7.93258 |
1 |
1.4 |
1.4 |
72.6 |
72.6 |
| 7.93996 |
1 |
1.4 |
1.4 |
74.0 |
74.0 |
| 8.28739 |
1 |
1.4 |
1.4 |
75.3 |
75.3 |
| 8.46579 |
1 |
1.4 |
1.4 |
76.7 |
76.7 |
| 8.93271 |
1 |
1.4 |
1.4 |
78.1 |
78.1 |
| 9.51003 |
1 |
1.4 |
1.4 |
79.5 |
79.5 |
| 9.56829 |
1 |
1.4 |
1.4 |
80.8 |
80.8 |
| 9.78251 |
1 |
1.4 |
1.4 |
82.2 |
82.2 |
| 10.88351 |
1 |
1.4 |
1.4 |
83.6 |
83.6 |
| 11.8349 |
1 |
1.4 |
1.4 |
84.9 |
84.9 |
| 12.33417 |
1 |
1.4 |
1.4 |
86.3 |
86.3 |
| 12.365 |
1 |
1.4 |
1.4 |
87.7 |
87.7 |
| 12.53758 |
1 |
1.4 |
1.4 |
89.0 |
89.0 |
| 14.04274 |
1 |
1.4 |
1.4 |
90.4 |
90.4 |
| 15.2226 |
1 |
1.4 |
1.4 |
91.8 |
91.8 |
| 15.74984 |
1 |
1.4 |
1.4 |
93.2 |
93.2 |
| 16.16369 |
1 |
1.4 |
1.4 |
94.5 |
94.5 |
| 19.41353 |
1 |
1.4 |
1.4 |
95.9 |
95.9 |
| 21.8006 |
1 |
1.4 |
1.4 |
97.3 |
97.3 |
| 24.67589 |
1 |
1.4 |
1.4 |
98.6 |
98.6 |
| 32.12708 |
1 |
1.4 |
1.4 |
100.0 |
100.0 |
| Total |
73 |
100.0 |
100.0 |
100.0 |
100.0 |
str(table)
## Classes 'freqtab' and 'data.frame': 73 obs. of 5 variables:
## $ n : num 2 1 1 1 1 1 1 1 1 1 ...
## $ % : num 2.7 1.4 1.4 1.4 1.4 1.4 1.4 1.4 1.4 1.4 ...
## $ val% : num 2.7 1.4 1.4 1.4 1.4 1.4 1.4 1.4 1.4 1.4 ...
## $ %cum : num 2.7 4.1 5.5 6.8 8.2 9.6 11 12.3 13.7 15.1 ...
## $ val%cum: num 2.7 4.1 5.5 6.8 8.2 9.6 11 12.3 13.7 15.1 ...
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
| 0.15208 |
2 |
| 0.05648 |
1 |
| 0.08269 |
1 |
| 0.09971 |
1 |
| 0.28905 |
1 |
| 0.30326 |
1 |
| 0.31118 |
1 |
| 0.36051 |
1 |
| 0.50188 |
1 |
| 0.6355 |
1 |
| 0.64483 |
1 |
| 0.66626 |
1 |
| 0.76257 |
1 |
| 0.7834 |
1 |
| 0.88149 |
1 |
| 0.90692 |
1 |
| 0.94118 |
1 |
| 1.06048 |
1 |
| 1.27837 |
1 |
| 1.41805 |
1 |
| 1.52983 |
1 |
| 1.68294 |
1 |
| 1.69508 |
1 |
| 1.73469 |
1 |
| 1.93516 |
1 |
| 2.06743 |
1 |
| 2.15703 |
1 |
| 2.478 |
1 |
| 2.4977 |
1 |
| 2.59637 |
1 |
| 2.85382 |
1 |
| 3.05542 |
1 |
| 3.30074 |
1 |
| 3.36905 |
1 |
| 3.4274 |
1 |
| 3.49173 |
1 |
| 3.64682 |
1 |
| 3.7316 |
1 |
| 3.74149 |
1 |
| 4.1083 |
1 |
| 4.18059 |
1 |
| 4.19108 |
1 |
| 4.32007 |
1 |
| 4.40801 |
1 |
| 4.5182 |
1 |
| 4.68443 |
1 |
| 4.8168 |
1 |
| 5.24855 |
1 |
| 6.46156 |
1 |
| 6.8095 |
1 |
| 7.07138 |
1 |
| 7.93258 |
1 |
| 7.93996 |
1 |
| 8.28739 |
1 |
| 8.46579 |
1 |
| 8.93271 |
1 |
| 9.51003 |
1 |
| 9.56829 |
1 |
| 9.78251 |
1 |
| 10.88351 |
1 |
| 11.8349 |
1 |
| 12.33417 |
1 |
| 12.365 |
1 |
| 12.53758 |
1 |
| 14.04274 |
1 |
| 15.2226 |
1 |
| 15.74984 |
1 |
| 16.16369 |
1 |
| 19.41353 |
1 |
| 21.8006 |
1 |
| 24.67589 |
1 |
| 32.12708 |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")+ theme(axis.text.x = element_text(angle = 90))

library(pastecs)
stat.desc(df_MX)
## id date time continent_code country_name country_code
## nbr.val 7.300000e+01 NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA
## min 1.150000e+02 NA NA NA NA NA
## max 7.518000e+03 NA NA NA NA NA
## range 7.403000e+03 NA NA NA NA NA
## sum 2.913040e+05 NA NA NA NA NA
## median 3.834000e+03 NA NA NA NA NA
## mean 3.990466e+03 NA NA NA NA NA
## SE.mean 2.764651e+02 NA NA NA NA NA
## CI.mean.0.95 5.511232e+02 NA NA NA NA NA
## var 5.579607e+06 NA NA NA NA NA
## std.dev 2.362119e+03 NA NA NA NA NA
## coef.var 5.919407e-01 NA NA NA NA NA
## state population city distance location_description
## nbr.val NA 7.300000e+01 NA 73.0000000 NA
## nbr.null NA 0.000000e+00 NA 0.0000000 NA
## nbr.na NA 0.000000e+00 NA 0.0000000 NA
## min NA 1.005000e+03 NA 0.0564800 NA
## max NA 1.229419e+07 NA 32.1270800 NA
## range NA 1.229319e+07 NA 32.0706000 NA
## sum NA 3.298622e+07 NA 413.6691300 NA
## median NA 6.089000e+03 NA 3.4917300 NA
## mean NA 4.518661e+05 NA 5.6667004 NA
## SE.mean NA 2.369285e+05 NA 0.7442396 NA
## CI.mean.0.95 NA 4.723084e+05 NA 1.4836145 NA
## var NA 4.097865e+12 NA 40.4341560 NA
## std.dev NA 2.024318e+06 NA 6.3587857 NA
## coef.var NA 4.479908e+00 NA 1.1221320 NA
## latitude longitude geolocation hazard_type landslide_type
## nbr.val 73.0000000 7.300000e+01 NA NA NA
## nbr.null 0.0000000 0.000000e+00 NA NA NA
## nbr.na 0.0000000 0.000000e+00 NA NA NA
## min 15.0337000 -1.170898e+02 NA NA NA
## max 32.5755000 -9.129880e+01 NA NA NA
## range 17.5418000 2.579100e+01 NA NA NA
## sum 1427.3326000 -7.208526e+03 NA NA NA
## median 19.0294000 -9.749970e+01 NA NA NA
## mean 19.5525014 -9.874694e+01 NA NA NA
## SE.mean 0.4625032 6.876754e-01 NA NA NA
## CI.mean.0.95 0.9219832 1.370856e+00 NA NA NA
## var 15.6153711 3.452151e+01 NA NA NA
## std.dev 3.9516289 5.875501e+00 NA NA NA
## coef.var 0.2021035 -5.950059e-02 NA NA NA
## landslide_size trigger storm_name injuries fatalities
## nbr.val NA NA NA 19.0000000 70.000000
## nbr.null NA NA NA 15.0000000 27.000000
## nbr.na NA NA NA 54.0000000 3.000000
## min NA NA NA 0.0000000 0.000000
## max NA NA NA 8.0000000 71.000000
## range NA NA NA 8.0000000 71.000000
## sum NA NA NA 15.0000000 284.000000
## median NA NA NA 0.0000000 2.000000
## mean NA NA NA 0.7894737 4.057143
## SE.mean NA NA NA 0.4625062 1.098632
## CI.mean.0.95 NA NA NA 0.9716894 2.191711
## var NA NA NA 4.0643275 84.489441
## std.dev NA NA NA 2.0160177 9.191814
## coef.var NA NA NA 2.5536225 2.265588
## source_name source_link prop ypos
## nbr.val NA NA 73.00000000 73.0000000
## nbr.null NA NA 0.00000000 0.0000000
## nbr.na NA NA 0.00000000 0.0000000
## min NA NA 0.01365342 1.1494730
## max NA NA 7.76637116 99.2189942
## range NA NA 7.75271773 98.0695212
## sum NA NA 100.00000000 3545.1804284
## median NA NA 0.84408764 54.2381528
## mean NA NA 1.36986301 48.5641155
## SE.mean NA NA 0.17991180 3.7293810
## CI.mean.0.95 NA NA 0.35864762 7.4343851
## var NA NA 2.36288267 1015.3046257
## std.dev NA NA 1.53716709 31.8638451
## coef.var NA NA 1.12213198 0.6561191
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Baja California (Mexico)
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_MX <- subset(df, country_name == "Mexico")
knitr::kable(head(df_MX))
library(dplyr)
df_MX <- subset(df, state == "Baja California")
knitr::kable(head(df_MX))
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(fill=city, y=distance, x=state)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(fill=city, y=distance, x=state)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(x=state, y=distance, fill=city)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_MX <- df_MX %>%
arrange(desc(city)) %>%
mutate(prop = distance / sum(df_MX$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_MX, aes(x=state, y = prop, fill=city)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales
library(forecast)
data<- ts(df_MX$distance, frequency=12, start=2008)
knitr::kable(head(data))
| 12.53758 |
| 2.49770 |
| 6.46156 |
| 12.36500 |
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto
library(qcc)
distance <- df_MX$distance
names(distance) <- df_MX$city
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por ciudades"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Tijuana 12.53758 12.53758 37.02569 37.02569
## El Sauzal 12.36500 24.90258 36.51603 73.54172
## La Esperanza [Granjas Familiares] 6.46156 31.36414 19.08213 92.62385
## Tijuana 2.49770 33.86184 7.37615 100.00000
stem(df_MX$"distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 2
## 0 | 6
## 1 | 23
head(df_MX)
## # A tibble: 4 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 417 1/7/08 <NA> <NA> Mexico MX Baja~ 1376457
## 2 1838 5/5/10 <NA> <NA> Mexico MX Baja~ 1376457
## 3 7056 5/17/15 <NA> <NA> Mexico MX Baja~ 1173
## 4 5731 12/28/13 2:00:00 <NA> Mexico MX Baja~ 9085
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_MX))
stem(df_MX$"distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 2
## 0 | 6
## 1 | 23
stem(df_MX$"distance", scale = 2)
##
## The decimal point is at the |
##
## 2 | 5
## 4 |
## 6 | 5
## 8 |
## 10 |
## 12 | 45
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 2.4977 |
1 |
25 |
25 |
25 |
25 |
| 6.46156 |
1 |
25 |
25 |
50 |
50 |
| 12.365 |
1 |
25 |
25 |
75 |
75 |
| 12.53758 |
1 |
25 |
25 |
100 |
100 |
| Total |
4 |
100 |
100 |
100 |
100 |
str(table)
## Classes 'freqtab' and 'data.frame': 5 obs. of 5 variables:
## $ n : num 1 1 1 1 4
## $ % : num 25 25 25 25 100
## $ val% : num 25 25 25 25 100
## $ %cum : num 25 50 75 100 100
## $ val%cum: num 25 50 75 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
| 2.4977 |
1 |
| 6.46156 |
1 |
| 12.365 |
1 |
| 12.53758 |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 2.4977 6.4977 10.4977 14.4977
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
| (2.5,6.5] |
1 |
0.3333333 |
1 |
| (6.5,10.5] |
0 |
0.0000000 |
1 |
| (10.5,14.5] |
2 |
0.6666667 |
3 |
str(Freq_table)
## 'data.frame': 3 obs. of 4 variables:
## $ distance: Factor w/ 3 levels "(2.5,6.5]","(6.5,10.5]",..: 1 2 3
## $ Freq : int 1 0 2
## $ Rel_Freq: num 0.333 0 0.667
## $ Cum_Freq: int 1 1 3
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
| (2.5,6.5] |
1 |
| (6.5,10.5] |
0 |
| (10.5,14.5] |
2 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")

library(pastecs)
stat.desc(df_MX)
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## id date time continent_code country_name country_code
## nbr.val 4.000000e+00 NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA
## min 4.170000e+02 NA NA NA NA NA
## max 7.056000e+03 NA NA NA NA NA
## range 6.639000e+03 NA NA NA NA NA
## sum 1.504200e+04 NA NA NA NA NA
## median 3.784500e+03 NA NA NA NA NA
## mean 3.760500e+03 NA NA NA NA NA
## SE.mean 1.571045e+03 NA NA NA NA NA
## CI.mean.0.95 4.999766e+03 NA NA NA NA NA
## var 9.872730e+06 NA NA NA NA NA
## std.dev 3.142090e+03 NA NA NA NA NA
## coef.var 8.355511e-01 NA NA NA NA NA
## state population city distance location_description
## nbr.val NA 4.000000e+00 NA 4.0000000 NA
## nbr.null NA 0.000000e+00 NA 0.0000000 NA
## nbr.na NA 0.000000e+00 NA 0.0000000 NA
## min NA 1.173000e+03 NA 2.4977000 NA
## max NA 1.376457e+06 NA 12.5375800 NA
## range NA 1.375284e+06 NA 10.0398800 NA
## sum NA 2.763172e+06 NA 33.8618400 NA
## median NA 6.927710e+05 NA 9.4132800 NA
## mean NA 6.907930e+05 NA 8.4654600 NA
## SE.mean NA 3.958716e+05 NA 2.4395756 NA
## CI.mean.0.95 NA 1.259840e+06 NA 7.7638183 NA
## var NA 6.268573e+11 NA 23.8061160 NA
## std.dev NA 7.917432e+05 NA 4.8791512 NA
## coef.var NA 1.146137e+00 NA 0.5763598 NA
## latitude longitude geolocation hazard_type landslide_type
## nbr.val 4.000000e+00 4.000000e+00 NA NA NA
## nbr.null 0.000000e+00 0.000000e+00 NA NA NA
## nbr.na 0.000000e+00 0.000000e+00 NA NA NA
## min 3.197800e+01 -1.170898e+02 NA NA NA
## max 3.257550e+01 -1.167776e+02 NA NA NA
## range 5.975000e-01 3.122000e-01 NA NA NA
## sum 1.295981e+02 -4.677689e+02 NA NA NA
## median 3.252230e+01 -1.169507e+02 NA NA NA
## mean 3.239952e+01 -1.169422e+02 NA NA NA
## SE.mean 1.410711e-01 6.699289e-02 NA NA NA
## CI.mean.0.95 4.489511e-01 2.132013e-01 NA NA NA
## var 7.960417e-02 1.795219e-02 NA NA NA
## std.dev 2.821421e-01 1.339858e-01 NA NA NA
## coef.var 8.708217e-03 -1.145743e-03 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 1 4.0000000 NA
## nbr.null NA NA NA 1 3.0000000 NA
## nbr.na NA NA NA 3 0.0000000 NA
## min NA NA NA 0 0.0000000 NA
## max NA NA NA 0 1.0000000 NA
## range NA NA NA 0 1.0000000 NA
## sum NA NA NA 0 1.0000000 NA
## median NA NA NA 0 0.0000000 NA
## mean NA NA NA 0 0.2500000 NA
## SE.mean NA NA NA NA 0.2500000 NA
## CI.mean.0.95 NA NA NA NaN 0.7956116 NA
## var NA NA NA NA 0.2500000 NA
## std.dev NA NA NA NA 0.5000000 NA
## coef.var NA NA NA NA 2.0000000 NA
## source_link prop ypos
## nbr.val NA 4.0000000 4.0000000
## nbr.null NA 0.0000000 0.0000000
## nbr.na NA 0.0000000 0.0000000
## min NA 7.3761497 18.5128451
## max NA 37.0256903 81.7419845
## range NA 29.6495406 63.2291393
## sum NA 100.0000000 194.9114992
## median NA 27.7990800 47.3283348
## mean NA 25.0000000 48.7278748
## SE.mean NA 7.2044980 13.2107972
## CI.mean.0.95 NA 22.9279280 42.0426527
## var NA 207.6191648 698.1006481
## std.dev NA 14.4089960 26.4215944
## coef.var NA 0.5763598 0.5422275
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Chiapas (Mexico)
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_MX <- subset(df, country_name == "Mexico")
knitr::kable(head(df_MX))
library(dplyr)
df_MX <- subset(df, state == "Chiapas")
knitr::kable(head(df_MX))
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(fill=city, y=distance, x=state)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(fill=city, y=distance, x=state)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(x=state, y=distance, fill=city)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_MX <- df_MX %>%
arrange(desc(city)) %>%
mutate(prop = distance / sum(df_MX$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_MX, aes(x=state, y = prop, fill=city)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4
## Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Greens is 9
## Returning the palette you asked for with that many colors

Grafico de series temporales
library(forecast)
data<- ts(df_MX$distance, frequency=12, start=2008)
knitr::kable(head(data))
| 1.73469 |
| 0.76257 |
| 0.94118 |
| 3.74149 |
| 7.93996 |
| 2.06743 |
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto
library(qcc)
distance <- df_MX$distance
names(distance) <- df_MX$city
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por ciudades"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Chihuahua 8.4657900 8.4657900 26.4005074 26.4005074
## Nueva Libertad 7.9399600 16.4057500 24.7607102 51.1612177
## Emiliano Zapata 4.6844300 21.0901800 14.6083625 65.7695802
## Ostuacán 3.7414900 24.8316700 11.6678106 77.4373908
## Motozintla de Mendoza 2.0674300 26.8991000 6.4472661 83.8846569
## Tila 1.7346900 28.6337900 5.4096187 89.2942757
## Escuintla 1.4180500 30.0518400 4.4221791 93.7164548
## San Cristóbal de las Casas 0.9411800 30.9930200 2.9350633 96.6515181
## Santo Domingo 0.7625700 31.7555900 2.3780693 99.0295873
## Amatán 0.3111800 32.0667700 0.9704127 100.0000000
stem(df_MX$"distance")
##
## The decimal point is at the |
##
## 0 | 38947
## 2 | 17
## 4 | 7
## 6 | 9
## 8 | 5
head(df_MX)
## # A tibble: 6 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 7517 9/12/15 <NA> <NA> Mexico MX Chiapas 6089
## 2 7514 5/23/15 <NA> <NA> Mexico MX Chiapas 3796
## 3 2115 7/22/10 <NA> <NA> Mexico MX Chiapas 128996
## 4 346 11/4/07 <NA> <NA> Mexico MX Chiapas 3183
## 5 3832 7/22/11 <NA> <NA> Mexico MX Chiapas 1005
## 6 7515 10/21/15 <NA> <NA> Mexico MX Chiapas 19092
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_MX))
stem(df_MX$"distance")
##
## The decimal point is at the |
##
## 0 | 38947
## 2 | 17
## 4 | 7
## 6 | 9
## 8 | 5
stem(df_MX$"distance", scale = 2)
##
## The decimal point is at the |
##
## 0 | 389
## 1 | 47
## 2 | 1
## 3 | 7
## 4 | 7
## 5 |
## 6 |
## 7 | 9
## 8 | 5
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 0.31118 |
1 |
10 |
10 |
10 |
10 |
| 0.76257 |
1 |
10 |
10 |
20 |
20 |
| 0.94118 |
1 |
10 |
10 |
30 |
30 |
| 1.41805 |
1 |
10 |
10 |
40 |
40 |
| 1.73469 |
1 |
10 |
10 |
50 |
50 |
| 2.06743 |
1 |
10 |
10 |
60 |
60 |
| 3.74149 |
1 |
10 |
10 |
70 |
70 |
| 4.68443 |
1 |
10 |
10 |
80 |
80 |
| 7.93996 |
1 |
10 |
10 |
90 |
90 |
| 8.46579 |
1 |
10 |
10 |
100 |
100 |
| Total |
10 |
100 |
100 |
100 |
100 |
str(table)
## Classes 'freqtab' and 'data.frame': 11 obs. of 5 variables:
## $ n : num 1 1 1 1 1 1 1 1 1 1 ...
## $ % : num 10 10 10 10 10 10 10 10 10 10 ...
## $ val% : num 10 10 10 10 10 10 10 10 10 10 ...
## $ %cum : num 10 20 30 40 50 60 70 80 90 100 ...
## $ val%cum: num 10 20 30 40 50 60 70 80 90 100 ...
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
| 0.31118 |
1 |
| 0.76257 |
1 |
| 0.94118 |
1 |
| 1.41805 |
1 |
| 1.73469 |
1 |
| 2.06743 |
1 |
| 3.74149 |
1 |
| 4.68443 |
1 |
| 7.93996 |
1 |
| 8.46579 |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.31118 2.31118 4.31118 6.31118 8.31118 10.31118
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
| (0.311,2.31] |
5 |
0.5555556 |
5 |
| (2.31,4.31] |
1 |
0.1111111 |
6 |
| (4.31,6.31] |
1 |
0.1111111 |
7 |
| (6.31,8.31] |
1 |
0.1111111 |
8 |
| (8.31,10.3] |
1 |
0.1111111 |
9 |
str(Freq_table)
## 'data.frame': 5 obs. of 4 variables:
## $ distance: Factor w/ 5 levels "(0.311,2.31]",..: 1 2 3 4 5
## $ Freq : int 5 1 1 1 1
## $ Rel_Freq: num 0.556 0.111 0.111 0.111 0.111
## $ Cum_Freq: int 5 6 7 8 9
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
| (0.311,2.31] |
5 |
| (2.31,4.31] |
1 |
| (4.31,6.31] |
1 |
| (6.31,8.31] |
1 |
| (8.31,10.3] |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")

library(pastecs)
stat.desc(df_MX)
## id date time continent_code country_name country_code
## nbr.val 1.000000e+01 NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA
## min 3.460000e+02 NA NA NA NA NA
## max 7.517000e+03 NA NA NA NA NA
## range 7.171000e+03 NA NA NA NA NA
## sum 4.351000e+04 NA NA NA NA NA
## median 3.176500e+03 NA NA NA NA NA
## mean 4.351000e+03 NA NA NA NA NA
## SE.mean 9.010547e+02 NA NA NA NA NA
## CI.mean.0.95 2.038327e+03 NA NA NA NA NA
## var 8.118995e+06 NA NA NA NA NA
## std.dev 2.849385e+03 NA NA NA NA NA
## coef.var 6.548805e-01 NA NA NA NA NA
## state population city distance location_description
## nbr.val NA 1.000000e+01 NA 10.0000000 NA
## nbr.null NA 0.000000e+00 NA 0.0000000 NA
## nbr.na NA 0.000000e+00 NA 0.0000000 NA
## min NA 1.005000e+03 NA 0.3111800 NA
## max NA 1.289960e+05 NA 8.4657900 NA
## range NA 1.279910e+05 NA 8.1546100 NA
## sum NA 1.953740e+05 NA 32.0667700 NA
## median NA 5.018000e+03 NA 1.9010600 NA
## mean NA 1.953740e+04 NA 3.2066770 NA
## SE.mean NA 1.233991e+04 NA 0.9350334 NA
## CI.mean.0.95 NA 2.791482e+04 NA 2.1151925 NA
## var NA 1.522734e+09 NA 8.7428745 NA
## std.dev NA 3.902222e+04 NA 2.9568352 NA
## coef.var NA 1.997309e+00 NA 0.9220870 NA
## latitude longitude geolocation hazard_type landslide_type
## nbr.val 10.00000000 1.000000e+01 NA NA NA
## nbr.null 0.00000000 0.000000e+00 NA NA NA
## nbr.na 0.00000000 0.000000e+00 NA NA NA
## min 15.03370000 -9.330600e+01 NA NA NA
## max 17.70550000 -9.174160e+01 NA NA NA
## range 2.67180000 1.564400e+00 NA NA NA
## sum 164.38930000 -9.246466e+02 NA NA NA
## median 16.44040000 -9.253910e+01 NA NA NA
## mean 16.43893000 -9.246466e+01 NA NA NA
## SE.mean 0.31427677 1.497619e-01 NA NA NA
## CI.mean.0.95 0.71094344 3.387849e-01 NA NA NA
## var 0.98769885 2.242861e-01 NA NA NA
## std.dev 0.99383039 4.735886e-01 NA NA NA
## coef.var 0.06045591 -5.121833e-03 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 4 10.000000 NA
## nbr.null NA NA NA 4 6.000000 NA
## nbr.na NA NA NA 6 0.000000 NA
## min NA NA NA 0 0.000000 NA
## max NA NA NA 0 16.000000 NA
## range NA NA NA 0 16.000000 NA
## sum NA NA NA 0 27.000000 NA
## median NA NA NA 0 0.000000 NA
## mean NA NA NA 0 2.700000 NA
## SE.mean NA NA NA 0 1.584999 NA
## CI.mean.0.95 NA NA NA 0 3.585517 NA
## var NA NA NA 0 25.122222 NA
## std.dev NA NA NA 0 5.012207 NA
## coef.var NA NA NA NaN 1.856373 NA
## source_link prop ypos
## nbr.val NA 10.0000000 10.0000000
## nbr.null NA 0.0000000 0.0000000
## nbr.na NA 0.0000000 0.0000000
## min NA 0.9704127 2.7048094
## max NA 26.4005074 99.5147937
## range NA 25.4300948 96.8099843
## sum NA 100.0000000 426.7398151
## median NA 5.9284424 42.5729111
## mean NA 10.0000000 42.6739815
## SE.mean NA 2.9158952 10.8517928
## CI.mean.0.95 NA 6.5962131 24.5484607
## var NA 85.0244464 1177.6140634
## std.dev NA 9.2208702 34.3163819
## coef.var NA 0.9220870 0.8041523
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Guerrero (Mexico)
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_MX <- subset(df, country_name == "Mexico")
knitr::kable(head(df_MX))
library(dplyr)
df_MX <- subset(df, state == "Guerrero")
knitr::kable(head(df_MX))
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(fill=city, y=distance, x=state)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(fill=city, y=distance, x=state)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(x=state, y=distance, fill=city)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_MX <- df_MX %>%
arrange(desc(city)) %>%
mutate(prop = distance / sum(df_MX$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_MX, aes(x=state, y = prop, fill=city)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales
library(forecast)
data<- ts(df_MX$distance, frequency=12, start=2008)
knitr::kable(head(data))
| 14.04274 |
| 12.33417 |
| 4.10830 |
| 6.80950 |
| 0.88149 |
| 32.12708 |
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto
library(qcc)
distance <- df_MX$distance
names(distance) <- df_MX$city
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por ciudades"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Atoyac de Álvarez 32.127080 32.127080 41.040408 41.040408
## San Pablo Atzompa 14.042740 46.169820 17.938754 58.979162
## Pochutla 12.334170 58.503990 15.756159 74.735321
## Acapulco de Juárez 7.071380 65.575370 9.033262 83.768583
## Chilpancingo de los Bravos 6.809500 72.384870 8.698726 92.467308
## Colonia Alborada 4.108300 76.493170 5.248106 97.715414
## Acapulco 0.906920 77.400090 1.158536 98.873950
## Chilpancingo de los Bravos 0.881490 78.281580 1.126050 100.000000
stem(df_MX$"distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 11477
## 1 | 24
## 2 |
## 3 | 2
head(df_MX)
## # A tibble: 6 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 5538 9/16/13 <NA> <NA> Mexico MX Guerrero 1252
## 2 7479 10/3/15 Night <NA> Mexico MX Guerrero 1201
## 3 6269 10/18/14 <NA> <NA> Mexico MX Guerrero 1018
## 4 3834 7/22/11 <NA> <NA> Mexico MX Guerrero 165250
## 5 5543 9/16/13 <NA> <NA> Mexico MX Guerrero 165250
## 6 5534 9/16/13 15:30 <NA> Mexico MX Guerrero 21407
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_MX))
stem(df_MX$"distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 11477
## 1 | 24
## 2 |
## 3 | 2
stem(df_MX$"distance", scale = 2)
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 114
## 0 | 77
## 1 | 24
## 1 |
## 2 |
## 2 |
## 3 | 2
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 0.88149 |
1 |
12.5 |
12.5 |
12.5 |
12.5 |
| 0.90692 |
1 |
12.5 |
12.5 |
25.0 |
25.0 |
| 4.1083 |
1 |
12.5 |
12.5 |
37.5 |
37.5 |
| 6.8095 |
1 |
12.5 |
12.5 |
50.0 |
50.0 |
| 7.07138 |
1 |
12.5 |
12.5 |
62.5 |
62.5 |
| 12.33417 |
1 |
12.5 |
12.5 |
75.0 |
75.0 |
| 14.04274 |
1 |
12.5 |
12.5 |
87.5 |
87.5 |
| 32.12708 |
1 |
12.5 |
12.5 |
100.0 |
100.0 |
| Total |
8 |
100.0 |
100.0 |
100.0 |
100.0 |
str(table)
## Classes 'freqtab' and 'data.frame': 9 obs. of 5 variables:
## $ n : num 1 1 1 1 1 1 1 1 8
## $ % : num 12.5 12.5 12.5 12.5 12.5 12.5 12.5 12.5 100
## $ val% : num 12.5 12.5 12.5 12.5 12.5 12.5 12.5 12.5 100
## $ %cum : num 12.5 25 37.5 50 62.5 75 87.5 100 100
## $ val%cum: num 12.5 25 37.5 50 62.5 75 87.5 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
| 0.88149 |
1 |
| 0.90692 |
1 |
| 4.1083 |
1 |
| 6.8095 |
1 |
| 7.07138 |
1 |
| 12.33417 |
1 |
| 14.04274 |
1 |
| 32.12708 |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.88149 8.88149 16.88149 24.88149 32.88149
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
| (0.881,8.88] |
4 |
0.5714286 |
4 |
| (8.88,16.9] |
2 |
0.2857143 |
6 |
| (16.9,24.9] |
0 |
0.0000000 |
6 |
| (24.9,32.9] |
1 |
0.1428571 |
7 |
str(Freq_table)
## 'data.frame': 4 obs. of 4 variables:
## $ distance: Factor w/ 4 levels "(0.881,8.88]",..: 1 2 3 4
## $ Freq : int 4 2 0 1
## $ Rel_Freq: num 0.571 0.286 0 0.143
## $ Cum_Freq: int 4 6 6 7
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
| (0.881,8.88] |
4 |
| (8.88,16.9] |
2 |
| (16.9,24.9] |
0 |
| (24.9,32.9] |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")

library(pastecs)
stat.desc(df_MX)
## id date time continent_code country_name country_code
## nbr.val 8.000000e+00 NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA
## min 3.834000e+03 NA NA NA NA NA
## max 7.479000e+03 NA NA NA NA NA
## range 3.645000e+03 NA NA NA NA NA
## sum 4.719200e+04 NA NA NA NA NA
## median 5.540500e+03 NA NA NA NA NA
## mean 5.899000e+03 NA NA NA NA NA
## SE.mean 4.199559e+02 NA NA NA NA NA
## CI.mean.0.95 9.930378e+02 NA NA NA NA NA
## var 1.410903e+06 NA NA NA NA NA
## std.dev 1.187815e+03 NA NA NA NA NA
## coef.var 2.013586e-01 NA NA NA NA NA
## state population city distance location_description
## nbr.val NA 8.000000e+00 NA 8.000000 NA
## nbr.null NA 0.000000e+00 NA 0.000000 NA
## nbr.na NA 0.000000e+00 NA 0.000000 NA
## min NA 1.018000e+03 NA 0.881490 NA
## max NA 6.521360e+05 NA 32.127080 NA
## range NA 6.511180e+05 NA 31.245590 NA
## sum NA 1.659650e+06 NA 78.281580 NA
## median NA 9.332850e+04 NA 6.940440 NA
## mean NA 2.074563e+05 NA 9.785198 NA
## SE.mean NA 1.001192e+05 NA 3.611317 NA
## CI.mean.0.95 NA 2.367442e+05 NA 8.539408 NA
## var NA 8.019077e+10 NA 104.332887 NA
## std.dev NA 2.831797e+05 NA 10.214347 NA
## coef.var NA 1.365009e+00 NA 1.043857 NA
## latitude longitude geolocation hazard_type landslide_type
## nbr.val 8.00000000 8.000000e+00 NA NA NA
## nbr.null 0.00000000 0.000000e+00 NA NA NA
## nbr.na 0.00000000 0.000000e+00 NA NA NA
## min 16.86380000 -1.001681e+02 NA NA NA
## max 17.55860000 -9.850000e+01 NA NA NA
## range 0.69480000 1.668100e+00 NA NA NA
## sum 137.82300000 -7.963511e+02 NA NA NA
## median 17.28555000 -9.969710e+01 NA NA NA
## mean 17.22787500 -9.954389e+01 NA NA NA
## SE.mean 0.10831128 1.985507e-01 NA NA NA
## CI.mean.0.95 0.25611548 4.694978e-01 NA NA NA
## var 0.09385067 3.153790e-01 NA NA NA
## std.dev 0.30635057 5.615862e-01 NA NA NA
## coef.var 0.01778226 -5.641594e-03 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 4.000000 7.000000 NA
## nbr.null NA NA NA 3.000000 5.000000 NA
## nbr.na NA NA NA 4.000000 1.000000 NA
## min NA NA NA 0.000000 0.000000 NA
## max NA NA NA 4.000000 71.000000 NA
## range NA NA NA 4.000000 71.000000 NA
## sum NA NA NA 4.000000 75.000000 NA
## median NA NA NA 0.000000 0.000000 NA
## mean NA NA NA 1.000000 10.714286 NA
## SE.mean NA NA NA 1.000000 10.063404 NA
## CI.mean.0.95 NA NA NA 3.182446 24.624264 NA
## var NA NA NA 4.000000 708.904762 NA
## std.dev NA NA NA 2.000000 26.625265 NA
## coef.var NA NA NA 2.000000 2.485025 NA
## source_link prop ypos
## nbr.val NA 8.000000 8.0000000
## nbr.null NA 0.000000 0.0000000
## nbr.na NA 0.000000 0.0000000
## min NA 1.126050 8.9693770
## max NA 41.040408 99.4207322
## range NA 39.914358 90.4513552
## sum NA 100.000000 425.6358903
## median NA 8.865994 45.7485752
## mean NA 12.500000 53.2044863
## SE.mean NA 4.613240 11.3394853
## CI.mean.0.95 NA 10.908579 26.8136220
## var NA 170.255856 1028.6714218
## std.dev NA 13.048213 32.0729079
## coef.var NA 1.043857 0.6028234
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Hidalgo (Mexico)
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_MX <- subset(df, country_name == "Mexico")
knitr::kable(head(df_MX))
library(dplyr)
df_MX <- subset(df, state == "Hidalgo")
knitr::kable(head(df_MX))
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(fill=city, y=distance, x=state)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(fill=city, y=distance, x=state)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(x=state, y=distance, fill=city)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_MX <- df_MX %>%
arrange(desc(city)) %>%
mutate(prop = distance / sum(df_MX$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_MX, aes(x=state, y = prop, fill=city)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales
library(forecast)
data<- ts(df_MX$distance, frequency=12, start=2008)
knitr::kable(head(data))
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto
library(qcc)
distance <- df_MX$distance
names(distance) <- df_MX$city
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por ciudades"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Fontezuelas 9.782510 9.782510 93.815968 93.815968
## Tepeji de Ocampo 0.644830 10.427340 6.184032 100.000000
stem(df_MX$"distance")
##
## The decimal point is at the |
##
## 0 | 6
## 2 |
## 4 |
## 6 |
## 8 | 8
head(df_MX)
## # A tibble: 2 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 4874 5/26/13 <NA> <NA> Mexico MX Hidalgo 33196
## 2 5527 9/15/13 <NA> <NA> Mexico MX Hidalgo 1236
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_MX))
stem(df_MX$"distance")
##
## The decimal point is at the |
##
## 0 | 6
## 2 |
## 4 |
## 6 |
## 8 | 8
stem(df_MX$"distance", scale = 2)
##
## The decimal point is at the |
##
## 0 | 6
## 1 |
## 2 |
## 3 |
## 4 |
## 5 |
## 6 |
## 7 |
## 8 |
## 9 | 8
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 0.64483 |
1 |
50 |
50 |
50 |
50 |
| 9.78251 |
1 |
50 |
50 |
100 |
100 |
| Total |
2 |
100 |
100 |
100 |
100 |
str(table)
## Classes 'freqtab' and 'data.frame': 3 obs. of 5 variables:
## $ n : num 1 1 2
## $ % : num 50 50 100
## $ val% : num 50 50 100
## $ %cum : num 50 100 100
## $ val%cum: num 50 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.64483 5.64483 10.64483
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
| (0.645,5.64] |
0 |
0 |
0 |
| (5.64,10.6] |
1 |
1 |
1 |
str(Freq_table)
## 'data.frame': 2 obs. of 4 variables:
## $ distance: Factor w/ 2 levels "(0.645,5.64]",..: 1 2
## $ Freq : int 0 1
## $ Rel_Freq: num 0 1
## $ Cum_Freq: int 0 1
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
| (0.645,5.64] |
0 |
| (5.64,10.6] |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")

library(pastecs)
stat.desc(df_MX)
## Warning in min(x): ningún argumento finito para min; retornando Inf
## Warning in max(x): ningun argumento finito para max; retornando -Inf
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## id date time continent_code country_name country_code
## nbr.val 2.000000e+00 NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA
## min 4.874000e+03 NA NA NA NA NA
## max 5.527000e+03 NA NA NA NA NA
## range 6.530000e+02 NA NA NA NA NA
## sum 1.040100e+04 NA NA NA NA NA
## median 5.200500e+03 NA NA NA NA NA
## mean 5.200500e+03 NA NA NA NA NA
## SE.mean 3.265000e+02 NA NA NA NA NA
## CI.mean.0.95 4.148576e+03 NA NA NA NA NA
## var 2.132045e+05 NA NA NA NA NA
## std.dev 4.617407e+02 NA NA NA NA NA
## coef.var 8.878776e-02 NA NA NA NA NA
## state population city distance location_description latitude
## nbr.val NA 2.000000e+00 NA 2.000000 NA 2.00000000
## nbr.null NA 0.000000e+00 NA 0.000000 NA 0.00000000
## nbr.na NA 0.000000e+00 NA 0.000000 NA 0.00000000
## min NA 1.236000e+03 NA 0.644830 NA 19.90060000
## max NA 3.319600e+04 NA 9.782510 NA 20.54020000
## range NA 3.196000e+04 NA 9.137680 NA 0.63960000
## sum NA 3.443200e+04 NA 10.427340 NA 40.44080000
## median NA 1.721600e+04 NA 5.213670 NA 20.22040000
## mean NA 1.721600e+04 NA 5.213670 NA 20.22040000
## SE.mean NA 1.598000e+04 NA 4.568840 NA 0.31980000
## CI.mean.0.95 NA 2.030452e+05 NA 58.052616 NA 4.06344427
## var NA 5.107208e+08 NA 41.748598 NA 0.20454408
## std.dev NA 2.259913e+04 NA 6.461315 NA 0.45226550
## coef.var NA 1.312682e+00 NA 1.239303 NA 0.02236679
## longitude geolocation hazard_type landslide_type
## nbr.val 2.000000e+00 NA NA NA
## nbr.null 0.000000e+00 NA NA NA
## nbr.na 0.000000e+00 NA NA NA
## min -9.934170e+01 NA NA NA
## max -9.894920e+01 NA NA NA
## range 3.925000e-01 NA NA NA
## sum -1.982909e+02 NA NA NA
## median -9.914545e+01 NA NA NA
## mean -9.914545e+01 NA NA NA
## SE.mean 1.962500e-01 NA NA NA
## CI.mean.0.95 2.493593e+00 NA NA NA
## var 7.702812e-02 NA NA NA
## std.dev 2.775394e-01 NA NA NA
## coef.var -2.799316e-03 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 0 2.0000000 NA
## nbr.null NA NA NA 0 0.0000000 NA
## nbr.na NA NA NA 2 0.0000000 NA
## min NA NA NA Inf 3.0000000 NA
## max NA NA NA -Inf 7.0000000 NA
## range NA NA NA -Inf 4.0000000 NA
## sum NA NA NA 0 10.0000000 NA
## median NA NA NA NA 5.0000000 NA
## mean NA NA NA NaN 5.0000000 NA
## SE.mean NA NA NA NA 2.0000000 NA
## CI.mean.0.95 NA NA NA NaN 25.4124095 NA
## var NA NA NA NA 8.0000000 NA
## std.dev NA NA NA NA 2.8284271 NA
## coef.var NA NA NA NA 0.5656854 NA
## source_link prop ypos
## nbr.val NA 2.000000 2.000000
## nbr.null NA 0.000000 0.000000
## nbr.na NA 0.000000 0.000000
## min NA 6.184032 3.092016
## max NA 93.815968 53.092016
## range NA 87.631937 50.000000
## sum NA 100.000000 56.184032
## median NA 50.000000 28.092016
## mean NA 50.000000 28.092016
## SE.mean NA 43.815968 25.000000
## CI.mean.0.95 NA 556.734665 317.655118
## var NA 3839.678175 1250.000000
## std.dev NA 61.965137 35.355339
## coef.var NA 1.239303 1.258555
boxplot(data, horizontal=TRUE, col='green')

Gráfico para México (Mexico)
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_MX <- subset(df, country_name == "Mexico")
knitr::kable(head(df_MX))
library(dplyr)
df_MX <- subset(df, state == "México")
knitr::kable(head(df_MX))
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(fill=city, y=distance, x=state)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(fill=city, y=distance, x=state)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(x=state, y=distance, fill=city)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_MX <- df_MX %>%
arrange(desc(city)) %>%
mutate(prop = distance / sum(df_MX$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_MX, aes(x=state, y = prop, fill=city)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales
library(forecast)
data<- ts(df_MX$distance, frequency=12, start=2008)
knitr::kable(head(data))
| 4.40801 |
| 1.06048 |
| 0.66626 |
| 2.59637 |
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto
library(qcc)
distance <- df_MX$distance
names(distance) <- df_MX$city
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por ciudades"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Zacazonapan 4.408010 4.408010 50.486192 50.486192
## San Francisco Chimalpa 2.596370 7.004380 29.736964 80.223156
## Villa Guerrero 1.060480 8.064860 12.145979 92.369135
## Toluca 0.666260 8.731120 7.630865 100.000000
stem(df_MX$"distance")
##
## The decimal point is at the |
##
## 0 | 7
## 1 | 1
## 2 | 6
## 3 |
## 4 | 4
head(df_MX)
## # A tibble: 4 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 1477 2/4/10 22:00 <NA> Mexico MX México 2968
## 2 2474 9/20/10 Late night <NA> Mexico MX México 9267
## 3 1469 2/5/10 <NA> <NA> Mexico MX México 505881
## 4 744 8/25/08 <NA> <NA> Mexico MX México 7182
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_MX))
stem(df_MX$"distance")
##
## The decimal point is at the |
##
## 0 | 7
## 1 | 1
## 2 | 6
## 3 |
## 4 | 4
stem(df_MX$"distance", scale = 2)
##
## The decimal point is at the |
##
## 0 | 7
## 1 | 1
## 1 |
## 2 |
## 2 | 6
## 3 |
## 3 |
## 4 | 4
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 0.66626 |
1 |
25 |
25 |
25 |
25 |
| 1.06048 |
1 |
25 |
25 |
50 |
50 |
| 2.59637 |
1 |
25 |
25 |
75 |
75 |
| 4.40801 |
1 |
25 |
25 |
100 |
100 |
| Total |
4 |
100 |
100 |
100 |
100 |
str(table)
## Classes 'freqtab' and 'data.frame': 5 obs. of 5 variables:
## $ n : num 1 1 1 1 4
## $ % : num 25 25 25 25 100
## $ val% : num 25 25 25 25 100
## $ %cum : num 25 50 75 100 100
## $ val%cum: num 25 50 75 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
| 0.66626 |
1 |
| 1.06048 |
1 |
| 2.59637 |
1 |
| 4.40801 |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.66626 2.66626 4.66626
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
| (0.666,2.67] |
2 |
0.6666667 |
2 |
| (2.67,4.67] |
1 |
0.3333333 |
3 |
str(Freq_table)
## 'data.frame': 2 obs. of 4 variables:
## $ distance: Factor w/ 2 levels "(0.666,2.67]",..: 1 2
## $ Freq : int 2 1
## $ Rel_Freq: num 0.667 0.333
## $ Cum_Freq: int 2 3
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
| (0.666,2.67] |
2 |
| (2.67,4.67] |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")

library(pastecs)
stat.desc(df_MX)
## Warning in min(x): ningún argumento finito para min; retornando Inf
## Warning in max(x): ningun argumento finito para max; retornando -Inf
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## id date time continent_code country_name country_code
## nbr.val 4.000000e+00 NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA
## min 7.440000e+02 NA NA NA NA NA
## max 2.474000e+03 NA NA NA NA NA
## range 1.730000e+03 NA NA NA NA NA
## sum 6.164000e+03 NA NA NA NA NA
## median 1.473000e+03 NA NA NA NA NA
## mean 1.541000e+03 NA NA NA NA NA
## SE.mean 3.553142e+02 NA NA NA NA NA
## CI.mean.0.95 1.130768e+03 NA NA NA NA NA
## var 5.049927e+05 NA NA NA NA NA
## std.dev 7.106284e+02 NA NA NA NA NA
## coef.var 4.611475e-01 NA NA NA NA NA
## state population city distance location_description latitude
## nbr.val NA 4.000000e+00 NA 4.0000000 NA 4.00000000
## nbr.null NA 0.000000e+00 NA 0.0000000 NA 0.00000000
## nbr.na NA 0.000000e+00 NA 0.0000000 NA 0.00000000
## min NA 2.968000e+03 NA 0.6662600 NA 18.96420000
## max NA 5.058810e+05 NA 4.4080100 NA 19.43600000
## range NA 5.029130e+05 NA 3.7417500 NA 0.47180000
## sum NA 5.252980e+05 NA 8.7311200 NA 76.88090000
## median NA 8.224500e+03 NA 1.8284250 NA 19.24035000
## mean NA 1.313245e+05 NA 2.1827800 NA 19.22022500
## SE.mean NA 1.248590e+05 NA 0.8505927 NA 0.09893547
## CI.mean.0.95 NA 3.973572e+05 NA 2.7069656 NA 0.31485682
## var NA 6.235912e+10 NA 2.8940318 NA 0.03915291
## std.dev NA 2.497181e+05 NA 1.7011854 NA 0.19787094
## coef.var NA 1.901535e+00 NA 0.7793664 NA 0.01029493
## longitude geolocation hazard_type landslide_type
## nbr.val 4.000000e+00 NA NA NA
## nbr.null 0.000000e+00 NA NA NA
## nbr.na 0.000000e+00 NA NA NA
## min -9.986540e+01 NA NA NA
## max -9.936500e+01 NA NA NA
## range 5.004000e-01 NA NA NA
## sum -3.985284e+02 NA NA NA
## median -9.964900e+01 NA NA NA
## mean -9.963210e+01 NA NA NA
## SE.mean 1.026123e-01 NA NA NA
## CI.mean.0.95 3.265580e-01 NA NA NA
## var 4.211711e-02 NA NA NA
## std.dev 2.052245e-01 NA NA NA
## coef.var -2.059824e-03 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 0 4.0000000 NA
## nbr.null NA NA NA 0 0.0000000 NA
## nbr.na NA NA NA 4 0.0000000 NA
## min NA NA NA Inf 1.0000000 NA
## max NA NA NA -Inf 11.0000000 NA
## range NA NA NA -Inf 10.0000000 NA
## sum NA NA NA 0 31.0000000 NA
## median NA NA NA NA 9.5000000 NA
## mean NA NA NA NaN 7.7500000 NA
## SE.mean NA NA NA NA 2.3584953 NA
## CI.mean.0.95 NA NA NA NaN 7.5057846 NA
## var NA NA NA NA 22.2500000 NA
## std.dev NA NA NA NA 4.7169906 NA
## coef.var NA NA NA NA 0.6086439 NA
## source_link prop ypos
## nbr.val NA 4.0000000 4.0000000
## nbr.null NA 0.0000000 0.0000000
## nbr.na NA 0.0000000 0.0000000
## min NA 7.6308652 25.2430960
## max NA 50.4861919 85.1315181
## range NA 42.8553267 59.8884221
## sum NA 100.0000000 233.3813989
## median NA 20.9414714 61.5033925
## mean NA 25.0000000 58.3453497
## SE.mean NA 9.7420801 12.5236274
## CI.mean.0.95 NA 31.0036469 39.8557719
## var NA 379.6325015 627.3649774
## std.dev NA 19.4841603 25.0472549
## coef.var NA 0.7793664 0.4292931
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Michoacán (Mexico)
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_MX <- subset(df, country_name == "Mexico")
knitr::kable(head(df_MX))
library(dplyr)
df_MX <- subset(df, state == "Michoacán")
knitr::kable(head(df_MX))
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(fill=city, y=distance, x=state)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(fill=city, y=distance, x=state)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(x=state, y=distance, fill=city)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_MX <- df_MX %>%
arrange(desc(city)) %>%
mutate(prop = distance / sum(df_MX$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_MX, aes(x=state, y = prop, fill=city)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales
library(forecast)
data<- ts(df_MX$distance, frequency=12, start=2008)
knitr::kable(head(data))
| 1.69508 |
| 3.42740 |
| 3.36905 |
| 4.18059 |
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto
library(qcc)
distance <- df_MX$distance
names(distance) <- df_MX$city
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por ciudades"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Heroica Zitácuaro 4.18059 4.18059 32.99045 32.99045
## Jarácuaro 3.42740 7.60799 27.04678 60.03723
## Jarácuaro 3.36905 10.97704 26.58632 86.62355
## Mineral de Angangueo 1.69508 12.67212 13.37645 100.00000
stem(df_MX$"distance")
##
## The decimal point is at the |
##
## 1 | 7
## 2 |
## 2 |
## 3 | 44
## 3 |
## 4 | 2
head(df_MX)
## # A tibble: 4 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 1472 2/5/10 <NA> <NA> Mexico MX Michoacán 4943
## 2 1471 2/5/10 <NA> <NA> Mexico MX Michoacán 2817
## 3 3835 7/22/11 <NA> <NA> Mexico MX Michoacán 2817
## 4 1470 2/5/10 <NA> <NA> Mexico MX Michoacán 78950
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_MX))
stem(df_MX$"distance")
##
## The decimal point is at the |
##
## 1 | 7
## 2 |
## 2 |
## 3 | 44
## 3 |
## 4 | 2
stem(df_MX$"distance", scale = 2)
##
## The decimal point is at the |
##
## 1 | 7
## 2 |
## 2 |
## 3 | 44
## 3 |
## 4 | 2
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 1.69508 |
1 |
25 |
25 |
25 |
25 |
| 3.36905 |
1 |
25 |
25 |
50 |
50 |
| 3.4274 |
1 |
25 |
25 |
75 |
75 |
| 4.18059 |
1 |
25 |
25 |
100 |
100 |
| Total |
4 |
100 |
100 |
100 |
100 |
str(table)
## Classes 'freqtab' and 'data.frame': 5 obs. of 5 variables:
## $ n : num 1 1 1 1 4
## $ % : num 25 25 25 25 100
## $ val% : num 25 25 25 25 100
## $ %cum : num 25 50 75 100 100
## $ val%cum: num 25 50 75 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
| 1.69508 |
1 |
| 3.36905 |
1 |
| 3.4274 |
1 |
| 4.18059 |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 1.69508 2.69508 3.69508 4.69508
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
| (1.7,2.7] |
0 |
0.0000000 |
0 |
| (2.7,3.7] |
2 |
0.6666667 |
2 |
| (3.7,4.7] |
1 |
0.3333333 |
3 |
str(Freq_table)
## 'data.frame': 3 obs. of 4 variables:
## $ distance: Factor w/ 3 levels "(1.7,2.7]","(2.7,3.7]",..: 1 2 3
## $ Freq : int 0 2 1
## $ Rel_Freq: num 0 0.667 0.333
## $ Cum_Freq: int 0 2 3
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
| (1.7,2.7] |
0 |
| (2.7,3.7] |
2 |
| (3.7,4.7] |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")

library(pastecs)
stat.desc(df_MX)
## Warning in min(x): ningún argumento finito para min; retornando Inf
## Warning in max(x): ningun argumento finito para max; retornando -Inf
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## id date time continent_code country_name country_code
## nbr.val 4.000000e+00 NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA
## min 1.470000e+03 NA NA NA NA NA
## max 3.835000e+03 NA NA NA NA NA
## range 2.365000e+03 NA NA NA NA NA
## sum 8.248000e+03 NA NA NA NA NA
## median 1.471500e+03 NA NA NA NA NA
## mean 2.062000e+03 NA NA NA NA NA
## SE.mean 5.910001e+02 NA NA NA NA NA
## CI.mean.0.95 1.880826e+03 NA NA NA NA NA
## var 1.397125e+06 NA NA NA NA NA
## std.dev 1.182000e+03 NA NA NA NA NA
## coef.var 5.732300e-01 NA NA NA NA NA
## state population city distance location_description
## nbr.val NA 4.000000e+00 NA 4.0000000 NA
## nbr.null NA 0.000000e+00 NA 0.0000000 NA
## nbr.na NA 0.000000e+00 NA 0.0000000 NA
## min NA 2.817000e+03 NA 1.6950800 NA
## max NA 7.895000e+04 NA 4.1805900 NA
## range NA 7.613300e+04 NA 2.4855100 NA
## sum NA 8.952700e+04 NA 12.6721200 NA
## median NA 3.880000e+03 NA 3.3982250 NA
## mean NA 2.238175e+04 NA 3.1680300 NA
## SE.mean NA 1.886274e+04 NA 0.5246063 NA
## CI.mean.0.95 NA 6.002966e+04 NA 1.6695314 NA
## var NA 1.423212e+09 NA 1.1008471 NA
## std.dev NA 3.772548e+04 NA 1.0492126 NA
## coef.var NA 1.685547e+00 NA 0.3311877 NA
## latitude longitude geolocation hazard_type landslide_type
## nbr.val 4.000000000 4.000000e+00 NA NA NA
## nbr.null 0.000000000 0.000000e+00 NA NA NA
## nbr.na 0.000000000 0.000000e+00 NA NA NA
## min 19.400000000 -1.017098e+02 NA NA NA
## max 19.616700000 -1.003000e+02 NA NA NA
## range 0.216700000 1.409800e+00 NA NA NA
## sum 78.164200000 -4.040788e+02 NA NA NA
## median 19.573750000 -1.010345e+02 NA NA NA
## mean 19.541050000 -1.010197e+02 NA NA NA
## SE.mean 0.048173065 3.965011e-01 NA NA NA
## CI.mean.0.95 0.153308192 1.261844e+00 NA NA NA
## var 0.009282577 6.288526e-01 NA NA NA
## std.dev 0.096346129 7.930023e-01 NA NA NA
## coef.var 0.004930448 -7.849977e-03 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 0 4.0000000 NA
## nbr.null NA NA NA 0 1.0000000 NA
## nbr.na NA NA NA 4 0.0000000 NA
## min NA NA NA Inf 0.0000000 NA
## max NA NA NA -Inf 3.0000000 NA
## range NA NA NA -Inf 3.0000000 NA
## sum NA NA NA 0 6.0000000 NA
## median NA NA NA NA 1.5000000 NA
## mean NA NA NA NaN 1.5000000 NA
## SE.mean NA NA NA NA 0.6454972 NA
## CI.mean.0.95 NA NA NA NaN 2.0542603 NA
## var NA NA NA NA 1.6666667 NA
## std.dev NA NA NA NA 1.2909944 NA
## coef.var NA NA NA NA 0.8606630 NA
## source_link prop ypos
## nbr.val NA 4.0000000 4.0000000
## nbr.null NA 0.0000000 0.0000000
## nbr.na NA 0.0000000 0.0000000
## min NA 13.3764516 6.6882258
## max NA 32.9904546 83.5047727
## range NA 19.6140030 76.8165469
## sum NA 100.0000000 170.8092253
## median NA 26.8165469 40.3081134
## mean NA 25.0000000 42.7023063
## SE.mean NA 4.1398464 16.6655412
## CI.mean.0.95 NA 13.1748389 53.0371901
## var NA 68.5533130 1110.9610564
## std.dev NA 8.2796928 33.3310824
## coef.var NA 0.3311877 0.7805453
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Nayarit (Mexico)
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_MX <- subset(df, country_name == "Mexico")
knitr::kable(head(df_MX))
library(dplyr)
df_MX <- subset(df, state == "Nayarit")
knitr::kable(head(df_MX))
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(fill=city, y=distance, x=state)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(fill=city, y=distance, x=state)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(x=state, y=distance, fill=city)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_MX <- df_MX %>%
arrange(desc(city)) %>%
mutate(prop = distance / sum(df_MX$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_MX, aes(x=state, y = prop, fill=city)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales
library(forecast)
data<- ts(df_MX$distance, frequency=12, start=2008)
knitr::kable(head(data))
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto
library(qcc)
distance <- df_MX$distance
names(distance) <- df_MX$city
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por ciudades"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## La Jarretadera 21.80060 21.80060 52.89594 52.89594
## Puga 19.41353 41.21413 47.10406 100.00000
stem(df_MX$"distance")
##
## The decimal point is at the |
##
## 19 | 4
## 19 |
## 20 |
## 20 |
## 21 |
## 21 | 8
head(df_MX)
## # A tibble: 2 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 3836 7/22/11 <NA> <NA> Mexico MX Nayarit 6361
## 2 3639 6/22/11 <NA> <NA> Mexico MX Nayarit 4463
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_MX))
stem(df_MX$"distance")
##
## The decimal point is at the |
##
## 19 | 4
## 19 |
## 20 |
## 20 |
## 21 |
## 21 | 8
stem(df_MX$"distance", scale = 2)
##
## The decimal point is at the |
##
## 19 | 4
## 19 |
## 20 |
## 20 |
## 21 |
## 21 | 8
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 19.41353 |
1 |
50 |
50 |
50 |
50 |
| 21.8006 |
1 |
50 |
50 |
100 |
100 |
| Total |
2 |
100 |
100 |
100 |
100 |
str(table)
## Classes 'freqtab' and 'data.frame': 3 obs. of 5 variables:
## $ n : num 1 1 2
## $ % : num 50 50 100
## $ val% : num 50 50 100
## $ %cum : num 50 100 100
## $ val%cum: num 50 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 19.41353 21.41353 23.41353
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
| (19.4,21.4] |
0 |
0 |
0 |
| (21.4,23.4] |
1 |
1 |
1 |
str(Freq_table)
## 'data.frame': 2 obs. of 4 variables:
## $ distance: Factor w/ 2 levels "(19.4,21.4]",..: 1 2
## $ Freq : int 0 1
## $ Rel_Freq: num 0 1
## $ Cum_Freq: int 0 1
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
| (19.4,21.4] |
0 |
| (21.4,23.4] |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")

library(pastecs)
stat.desc(df_MX)
## Warning in min(x): ningún argumento finito para min; retornando Inf
## Warning in max(x): ningun argumento finito para max; retornando -Inf
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## id date time continent_code country_name country_code
## nbr.val 2.000000e+00 NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA
## min 3.639000e+03 NA NA NA NA NA
## max 3.836000e+03 NA NA NA NA NA
## range 1.970000e+02 NA NA NA NA NA
## sum 7.475000e+03 NA NA NA NA NA
## median 3.737500e+03 NA NA NA NA NA
## mean 3.737500e+03 NA NA NA NA NA
## SE.mean 9.850000e+01 NA NA NA NA NA
## CI.mean.0.95 1.251561e+03 NA NA NA NA NA
## var 1.940450e+04 NA NA NA NA NA
## std.dev 1.393000e+02 NA NA NA NA NA
## coef.var 3.727091e-02 NA NA NA NA NA
## state population city distance location_description
## nbr.val NA 2.000000e+00 NA 2.00000000 NA
## nbr.null NA 0.000000e+00 NA 0.00000000 NA
## nbr.na NA 0.000000e+00 NA 0.00000000 NA
## min NA 4.463000e+03 NA 19.41353000 NA
## max NA 6.361000e+03 NA 21.80060000 NA
## range NA 1.898000e+03 NA 2.38707000 NA
## sum NA 1.082400e+04 NA 41.21413000 NA
## median NA 5.412000e+03 NA 20.60706500 NA
## mean NA 5.412000e+03 NA 20.60706500 NA
## SE.mean NA 9.490000e+02 NA 1.19353500 NA
## CI.mean.0.95 NA 1.205819e+04 NA 15.16530007 NA
## var NA 1.801202e+06 NA 2.84905159 NA
## std.dev NA 1.342089e+03 NA 1.68791338 NA
## coef.var NA 2.479839e-01 NA 0.08190945 NA
## latitude longitude geolocation hazard_type landslide_type
## nbr.val 2.000000000 2.000000e+00 NA NA NA
## nbr.null 0.000000000 0.000000e+00 NA NA NA
## nbr.na 0.000000000 0.000000e+00 NA NA NA
## min 21.759500000 -1.048381e+02 NA NA NA
## max 21.981400000 -1.048334e+02 NA NA NA
## range 0.221900000 4.700000e-03 NA NA NA
## sum 43.740900000 -2.096715e+02 NA NA NA
## median 21.870450000 -1.048357e+02 NA NA NA
## mean 21.870450000 -1.048357e+02 NA NA NA
## SE.mean 0.110950000 2.350000e-03 NA NA NA
## CI.mean.0.95 1.409753415 2.985958e-02 NA NA NA
## var 0.024619805 1.104500e-05 NA NA NA
## std.dev 0.156906995 3.323402e-03 NA NA NA
## coef.var 0.007174383 -3.170104e-05 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 0 2 NA
## nbr.null NA NA NA 0 2 NA
## nbr.na NA NA NA 2 0 NA
## min NA NA NA Inf 0 NA
## max NA NA NA -Inf 0 NA
## range NA NA NA -Inf 0 NA
## sum NA NA NA 0 0 NA
## median NA NA NA NA 0 NA
## mean NA NA NA NaN 0 NA
## SE.mean NA NA NA NA 0 NA
## CI.mean.0.95 NA NA NA NaN 0 NA
## var NA NA NA NA 0 NA
## std.dev NA NA NA NA 0 NA
## coef.var NA NA NA NA NaN NA
## source_link prop ypos
## nbr.val NA 2.00000000 2.0000000
## nbr.null NA 0.00000000 0.0000000
## nbr.na NA 0.00000000 0.0000000
## min NA 47.10406358 23.5520318
## max NA 52.89593642 73.5520318
## range NA 5.79187284 50.0000000
## sum NA 100.00000000 97.1040636
## median NA 50.00000000 48.5520318
## mean NA 50.00000000 48.5520318
## SE.mean NA 2.89593642 25.0000000
## CI.mean.0.95 NA 36.79636103 317.6551184
## var NA 16.77289547 1250.0000000
## std.dev NA 4.09547256 35.3553391
## coef.var NA 0.08190945 0.7281948
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Oaxaca (Mexico)
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_MX <- subset(df, country_name == "Mexico")
knitr::kable(head(df_MX))
library(dplyr)
df_MX <- subset(df, state == "Oaxaca")
knitr::kable(head(df_MX))
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(fill=city, y=distance, x=state)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(fill=city, y=distance, x=state)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(x=state, y=distance, fill=city)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_MX <- df_MX %>%
arrange(desc(city)) %>%
mutate(prop = distance / sum(df_MX$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_MX, aes(x=state, y = prop, fill=city)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales
library(forecast)
data<- ts(df_MX$distance, frequency=12, start=2008)
knitr::kable(head(data))
| 24.67589 |
| 15.74984 |
| 16.16369 |
| 0.63550 |
| 0.28905 |
| 11.83490 |
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto
library(qcc)
distance <- df_MX$distance
names(distance) <- df_MX$city
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por ciudades"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Tanetze de Zaragoza 24.6758900 24.6758900 29.6060776 29.6060776
## Santiago Choapam 16.1636900 40.8395800 19.3931591 48.9992367
## Santo Domingo Tepuxtepec 15.7498400 56.5894200 18.8966228 67.8958595
## San Andrés Huayapam 11.8349000 68.4243200 14.1994865 82.0953460
## Paso Real de Sarabia 9.5682900 77.9926100 11.4800129 93.5753589
## Oaxaca 3.6468200 81.6394300 4.3754465 97.9508054
## Salina Cruz 0.7834000 82.4228300 0.9399216 98.8907270
## Santa María la Asunción 0.6355000 83.0583300 0.7624715 99.6531985
## San José Chiltepec 0.2890500 83.3473800 0.3468015 100.0000000
stem(df_MX$"distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 0114
## 0 |
## 1 | 02
## 1 | 66
## 2 |
## 2 | 5
head(df_MX)
## # A tibble: 6 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 2524 9/30/10 Morning <NA> Mexico MX Oaxaca 1096
## 2 2514 9/28/10 4:00:00 <NA> Mexico MX Oaxaca 1813
## 3 2321 8/24/10 Morning <NA> Mexico MX Oaxaca 1099
## 4 2322 8/24/10 Morning <NA> Mexico MX Oaxaca 1701
## 5 3796 7/17/11 <NA> <NA> Mexico MX Oaxaca 3294
## 6 5526 9/14/13 <NA> <NA> Mexico MX Oaxaca 3630
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_MX))
stem(df_MX$"distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 0114
## 0 |
## 1 | 02
## 1 | 66
## 2 |
## 2 | 5
stem(df_MX$"distance", scale = 2)
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 0114
## 0 |
## 1 | 02
## 1 | 66
## 2 |
## 2 | 5
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 0.28905 |
1 |
11.1 |
11.1 |
11.1 |
11.1 |
| 0.6355 |
1 |
11.1 |
11.1 |
22.2 |
22.2 |
| 0.7834 |
1 |
11.1 |
11.1 |
33.3 |
33.3 |
| 3.64682 |
1 |
11.1 |
11.1 |
44.4 |
44.4 |
| 9.56829 |
1 |
11.1 |
11.1 |
55.6 |
55.6 |
| 11.8349 |
1 |
11.1 |
11.1 |
66.7 |
66.7 |
| 15.74984 |
1 |
11.1 |
11.1 |
77.8 |
77.8 |
| 16.16369 |
1 |
11.1 |
11.1 |
88.9 |
88.9 |
| 24.67589 |
1 |
11.1 |
11.1 |
100.0 |
100.0 |
| Total |
9 |
100.0 |
100.0 |
100.0 |
100.0 |
str(table)
## Classes 'freqtab' and 'data.frame': 10 obs. of 5 variables:
## $ n : num 1 1 1 1 1 1 1 1 1 9
## $ % : num 11.1 11.1 11.1 11.1 11.1 11.1 11.1 11.1 11.1 100
## $ val% : num 11.1 11.1 11.1 11.1 11.1 11.1 11.1 11.1 11.1 100
## $ %cum : num 11.1 22.2 33.3 44.4 55.6 66.7 77.8 88.9 100 100
## $ val%cum: num 11.1 22.2 33.3 44.4 55.6 66.7 77.8 88.9 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
| 0.28905 |
1 |
| 0.6355 |
1 |
| 0.7834 |
1 |
| 3.64682 |
1 |
| 9.56829 |
1 |
| 11.8349 |
1 |
| 15.74984 |
1 |
| 16.16369 |
1 |
| 24.67589 |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.28905 5.28905 10.28905 15.28905 20.28905 25.28905
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
| (0.289,5.29] |
3 |
0.375 |
3 |
| (5.29,10.3] |
1 |
0.125 |
4 |
| (10.3,15.3] |
1 |
0.125 |
5 |
| (15.3,20.3] |
2 |
0.250 |
7 |
| (20.3,25.3] |
1 |
0.125 |
8 |
str(Freq_table)
## 'data.frame': 5 obs. of 4 variables:
## $ distance: Factor w/ 5 levels "(0.289,5.29]",..: 1 2 3 4 5
## $ Freq : int 3 1 1 2 1
## $ Rel_Freq: num 0.375 0.125 0.125 0.25 0.125
## $ Cum_Freq: int 3 4 5 7 8
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
| (0.289,5.29] |
3 |
| (5.29,10.3] |
1 |
| (10.3,15.3] |
1 |
| (15.3,20.3] |
2 |
| (20.3,25.3] |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")

library(pastecs)
stat.desc(df_MX)
## Warning in min(x): ningún argumento finito para min; retornando Inf
## Warning in max(x): ningun argumento finito para max; retornando -Inf
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## id date time continent_code country_name country_code
## nbr.val 9.000000e+00 NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA
## min 2.321000e+03 NA NA NA NA NA
## max 5.526000e+03 NA NA NA NA NA
## range 3.205000e+03 NA NA NA NA NA
## sum 3.100700e+04 NA NA NA NA NA
## median 3.795000e+03 NA NA NA NA NA
## mean 3.445222e+03 NA NA NA NA NA
## SE.mean 3.703370e+02 NA NA NA NA NA
## CI.mean.0.95 8.539986e+02 NA NA NA NA NA
## var 1.234345e+06 NA NA NA NA NA
## std.dev 1.111011e+03 NA NA NA NA NA
## coef.var 3.224787e-01 NA NA NA NA NA
## state population city distance location_description
## nbr.val NA 9.000000e+00 NA 9.0000000 NA
## nbr.null NA 0.000000e+00 NA 0.0000000 NA
## nbr.na NA 0.000000e+00 NA 0.0000000 NA
## min NA 1.019000e+03 NA 0.2890500 NA
## max NA 2.625660e+05 NA 24.6758900 NA
## range NA 2.615470e+05 NA 24.3868400 NA
## sum NA 3.498660e+05 NA 83.3473800 NA
## median NA 1.813000e+03 NA 9.5682900 NA
## mean NA 3.887400e+04 NA 9.2608200 NA
## SE.mean NA 2.905882e+04 NA 2.8683480 NA
## CI.mean.0.95 NA 6.700977e+04 NA 6.6144223 NA
## var NA 7.599737e+09 NA 74.0467814 NA
## std.dev NA 8.717647e+04 NA 8.6050440 NA
## coef.var NA 2.242539e+00 NA 0.9291881 NA
## latitude longitude geolocation hazard_type landslide_type
## nbr.val 9.00000000 9.000000e+00 NA NA NA
## nbr.null 0.00000000 0.000000e+00 NA NA NA
## nbr.na 0.00000000 0.000000e+00 NA NA NA
## min 16.18630000 -9.681330e+01 NA NA NA
## max 18.10600000 -9.500000e+01 NA NA NA
## range 1.91970000 1.813300e+00 NA NA NA
## sum 155.05720000 -8.648023e+02 NA NA NA
## median 17.18330000 -9.616740e+01 NA NA NA
## mean 17.22857778 -9.608914e+01 NA NA NA
## SE.mean 0.18515476 2.120154e-01 NA NA NA
## CI.mean.0.95 0.42696764 4.889083e-01 NA NA NA
## var 0.30854056 4.045547e-01 NA NA NA
## std.dev 0.55546428 6.360461e-01 NA NA NA
## coef.var 0.03224087 -6.619334e-03 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 0 9.000000 NA
## nbr.null NA NA NA 0 2.000000 NA
## nbr.na NA NA NA 9 0.000000 NA
## min NA NA NA Inf 0.000000 NA
## max NA NA NA -Inf 11.000000 NA
## range NA NA NA -Inf 11.000000 NA
## sum NA NA NA 0 23.000000 NA
## median NA NA NA NA 2.000000 NA
## mean NA NA NA NaN 2.555556 NA
## SE.mean NA NA NA NA 1.106937 NA
## CI.mean.0.95 NA NA NA NaN 2.552600 NA
## var NA NA NA NA 11.027778 NA
## std.dev NA NA NA NA 3.320810 NA
## coef.var NA NA NA NA 1.299447 NA
## source_link prop ypos
## nbr.val NA 9.0000000 9.0000000
## nbr.null NA 0.0000000 0.0000000
## nbr.na NA 0.0000000 0.0000000
## min NA 0.3468015 14.8030388
## max NA 29.6060776 97.8122768
## range NA 29.2592761 83.0092380
## sum NA 100.0000000 596.6418141
## median NA 11.4800129 68.8317317
## mean NA 11.1111111 66.2935349
## SE.mean NA 3.4414375 8.6742485
## CI.mean.0.95 NA 7.9359691 20.0028529
## var NA 106.5914281 677.1832815
## std.dev NA 10.3243125 26.0227455
## coef.var NA 0.9291881 0.3925382
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Puebla (Mexico)
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_MX <- subset(df, country_name == "Mexico")
knitr::kable(head(df_MX))
library(dplyr)
df_MX <- subset(df, state == "Puebla")
knitr::kable(head(df_MX))
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(fill=city, y=distance, x=state)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(fill=city, y=distance, x=state)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(x=state, y=distance, fill=city)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_MX <- df_MX %>%
arrange(desc(city)) %>%
mutate(prop = distance / sum(df_MX$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_MX, aes(x=state, y = prop, fill=city)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales
library(forecast)
data<- ts(df_MX$distance, frequency=12, start=2008)
knitr::kable(head(data))
| 1.68294 |
| 7.93258 |
| 0.30326 |
| 5.24855 |
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto
library(qcc)
distance <- df_MX$distance
names(distance) <- df_MX$city
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por ciudades"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Xaltepuxtla 7.932580 7.932580 52.300438 52.300438
## Atzala 5.248550 13.181130 34.604311 86.904749
## Xochitlaxco (San Baltazar) 1.682940 14.864070 11.095822 98.000571
## Tlatlauquitepec 0.303260 15.167330 1.999429 100.000000
stem(df_MX$"distance")
##
## The decimal point is at the |
##
## 0 | 37
## 2 |
## 4 | 2
## 6 | 9
head(df_MX)
## # A tibble: 4 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 201 8/23/07 <NA> <NA> Mexico MX Puebla 1414
## 2 284 9/28/07 <NA> <NA> Mexico MX Puebla 3761
## 3 5528 9/15/13 <NA> <NA> Mexico MX Puebla 9720
## 4 5539 9/16/13 <NA> <NA> Mexico MX Puebla 1123
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_MX))
stem(df_MX$"distance")
##
## The decimal point is at the |
##
## 0 | 37
## 2 |
## 4 | 2
## 6 | 9
stem(df_MX$"distance", scale = 2)
##
## The decimal point is at the |
##
## 0 | 3
## 1 | 7
## 2 |
## 3 |
## 4 |
## 5 | 2
## 6 |
## 7 | 9
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 0.30326 |
1 |
25 |
25 |
25 |
25 |
| 1.68294 |
1 |
25 |
25 |
50 |
50 |
| 5.24855 |
1 |
25 |
25 |
75 |
75 |
| 7.93258 |
1 |
25 |
25 |
100 |
100 |
| Total |
4 |
100 |
100 |
100 |
100 |
str(table)
## Classes 'freqtab' and 'data.frame': 5 obs. of 5 variables:
## $ n : num 1 1 1 1 4
## $ % : num 25 25 25 25 100
## $ val% : num 25 25 25 25 100
## $ %cum : num 25 50 75 100 100
## $ val%cum: num 25 50 75 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
| 0.30326 |
1 |
| 1.68294 |
1 |
| 5.24855 |
1 |
| 7.93258 |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.30326 3.30326 6.30326 9.30326
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
| (0.303,3.3] |
1 |
0.3333333 |
1 |
| (3.3,6.3] |
1 |
0.3333333 |
2 |
| (6.3,9.3] |
1 |
0.3333333 |
3 |
str(Freq_table)
## 'data.frame': 3 obs. of 4 variables:
## $ distance: Factor w/ 3 levels "(0.303,3.3]",..: 1 2 3
## $ Freq : int 1 1 1
## $ Rel_Freq: num 0.333 0.333 0.333
## $ Cum_Freq: int 1 2 3
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
| (0.303,3.3] |
1 |
| (3.3,6.3] |
1 |
| (6.3,9.3] |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")

library(pastecs)
stat.desc(df_MX)
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## id date time continent_code country_name country_code
## nbr.val 4.000000e+00 NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA
## min 2.010000e+02 NA NA NA NA NA
## max 5.539000e+03 NA NA NA NA NA
## range 5.338000e+03 NA NA NA NA NA
## sum 1.155200e+04 NA NA NA NA NA
## median 2.906000e+03 NA NA NA NA NA
## mean 2.888000e+03 NA NA NA NA NA
## SE.mean 1.527476e+03 NA NA NA NA NA
## CI.mean.0.95 4.861110e+03 NA NA NA NA NA
## var 9.332729e+06 NA NA NA NA NA
## std.dev 3.054951e+03 NA NA NA NA NA
## coef.var 1.057809e+00 NA NA NA NA NA
## state population city distance location_description
## nbr.val NA 4.000000e+00 NA 4.0000000 NA
## nbr.null NA 0.000000e+00 NA 0.0000000 NA
## nbr.na NA 0.000000e+00 NA 0.0000000 NA
## min NA 1.123000e+03 NA 0.3032600 NA
## max NA 9.720000e+03 NA 7.9325800 NA
## range NA 8.597000e+03 NA 7.6293200 NA
## sum NA 1.601800e+04 NA 15.1673300 NA
## median NA 2.587500e+03 NA 3.4657450 NA
## mean NA 4.004500e+03 NA 3.7918325 NA
## SE.mean NA 1.994575e+03 NA 1.7292913 NA
## CI.mean.0.95 NA 6.347629e+03 NA 5.5033768 NA
## var NA 1.591332e+07 NA 11.9617938 NA
## std.dev NA 3.989150e+03 NA 3.4585826 NA
## coef.var NA 9.961669e-01 NA 0.9121138 NA
## latitude longitude geolocation hazard_type landslide_type
## nbr.val 4.00000000 4.000000e+00 NA NA NA
## nbr.null 0.00000000 0.000000e+00 NA NA NA
## nbr.na 0.00000000 0.000000e+00 NA NA NA
## min 18.59380000 -9.855390e+01 NA NA NA
## max 20.20000000 -9.749970e+01 NA NA NA
## range 1.60620000 1.054200e+00 NA NA NA
## sum 78.57610000 -3.918036e+02 NA NA NA
## median 19.89115000 -9.787500e+01 NA NA NA
## mean 19.64402500 -9.795090e+01 NA NA NA
## SE.mean 0.35797760 2.198412e-01 NA NA NA
## CI.mean.0.95 1.13924448 6.996329e-01 NA NA NA
## var 0.51259184 1.933207e-01 NA NA NA
## std.dev 0.71595520 4.396825e-01 NA NA NA
## coef.var 0.03644646 -4.488805e-03 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 1 4.000000 NA
## nbr.null NA NA NA 0 1.000000 NA
## nbr.na NA NA NA 3 0.000000 NA
## min NA NA NA 1 0.000000 NA
## max NA NA NA 1 6.000000 NA
## range NA NA NA 0 6.000000 NA
## sum NA NA NA 1 10.000000 NA
## median NA NA NA 1 2.000000 NA
## mean NA NA NA 1 2.500000 NA
## SE.mean NA NA NA NA 1.322876 NA
## CI.mean.0.95 NA NA NA NaN 4.209981 NA
## var NA NA NA NA 7.000000 NA
## std.dev NA NA NA NA 2.645751 NA
## coef.var NA NA NA NA 1.058301 NA
## source_link prop ypos
## nbr.val NA 4.0000000 4.0000000
## nbr.null NA 0.0000000 0.0000000
## nbr.na NA 0.0000000 0.0000000
## min NA 1.9994290 5.5479112
## max NA 52.3004378 82.6978446
## range NA 50.3010088 77.1499334
## sum NA 100.0000000 189.8877719
## median NA 22.8500666 50.8210080
## mean NA 25.0000000 47.4719430
## SE.mean NA 11.4014221 16.8064484
## CI.mean.0.95 NA 36.2844136 53.4856197
## var NA 519.9697034 1129.8268351
## std.dev NA 22.8028442 33.6128969
## coef.var NA 0.9121138 0.7080582
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Tabasco (Mexico)
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_MX <- subset(df, country_name == "Mexico")
knitr::kable(head(df_MX))
library(dplyr)
df_MX <- subset(df, state == "Tabasco")
knitr::kable(head(df_MX))
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(fill=city, y=distance, x=state)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(fill=city, y=distance, x=state)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(x=state, y=distance, fill=city)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_MX <- df_MX %>%
arrange(desc(city)) %>%
mutate(prop = distance / sum(df_MX$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_MX, aes(x=state, y = prop, fill=city)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales
library(forecast)
data<- ts(df_MX$distance, frequency=12, start=2008)
knitr::kable(head(data))
| 15.22260 |
| 4.32007 |
| 8.93271 |
| 4.81680 |
| 2.15703 |
| 4.19108 |
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto
library(qcc)
distance <- df_MX$distance
names(distance) <- df_MX$city
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por ciudades"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Tenosique de Pino Suárez 15.222600 15.222600 38.401838 38.401838
## Olcuatitán 8.932710 24.155310 22.534421 60.936260
## Libertad 4.816800 28.972110 12.151273 73.087533
## Olcuatitán 4.320070 33.292180 10.898180 83.985713
## Buenavista 4.191080 37.483260 10.572778 94.558491
## Huapinol 2.157030 39.640290 5.441509 100.000000
stem(df_MX$"distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 244
## 0 | 59
## 1 |
## 1 | 5
head(df_MX)
## # A tibble: 6 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 7518 10/15/15 <NA> <NA> Mexico MX Tabasco 32415
## 2 5693 12/16/13 <NA> <NA> Mexico MX Tabasco 1732
## 3 7469 12/16/15 <NA> <NA> Mexico MX Tabasco 1732
## 4 7430 10/7/15 <NA> <NA> Mexico MX Tabasco 1042
## 5 7431 10/7/15 <NA> <NA> Mexico MX Tabasco 2500
## 6 342 10/31/07 <NA> <NA> Mexico MX Tabasco 4468
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_MX))
stem(df_MX$"distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 244
## 0 | 59
## 1 |
## 1 | 5
stem(df_MX$"distance", scale = 2)
##
## The decimal point is at the |
##
## 2 | 2
## 4 | 238
## 6 |
## 8 | 9
## 10 |
## 12 |
## 14 | 2
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 2.15703 |
1 |
16.7 |
16.7 |
16.7 |
16.7 |
| 4.19108 |
1 |
16.7 |
16.7 |
33.3 |
33.3 |
| 4.32007 |
1 |
16.7 |
16.7 |
50.0 |
50.0 |
| 4.8168 |
1 |
16.7 |
16.7 |
66.7 |
66.7 |
| 8.93271 |
1 |
16.7 |
16.7 |
83.3 |
83.3 |
| 15.2226 |
1 |
16.7 |
16.7 |
100.0 |
100.0 |
| Total |
6 |
100.0 |
100.0 |
100.0 |
100.0 |
str(table)
## Classes 'freqtab' and 'data.frame': 7 obs. of 5 variables:
## $ n : num 1 1 1 1 1 1 6
## $ % : num 16.7 16.7 16.7 16.7 16.7 16.7 100
## $ val% : num 16.7 16.7 16.7 16.7 16.7 16.7 100
## $ %cum : num 16.7 33.3 50 66.7 83.3 100 100
## $ val%cum: num 16.7 33.3 50 66.7 83.3 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
| 2.15703 |
1 |
| 4.19108 |
1 |
| 4.32007 |
1 |
| 4.8168 |
1 |
| 8.93271 |
1 |
| 15.2226 |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 2.15703 7.15703 12.15703 17.15703
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
| (2.16,7.16] |
3 |
0.6 |
3 |
| (7.16,12.2] |
1 |
0.2 |
4 |
| (12.2,17.2] |
1 |
0.2 |
5 |
str(Freq_table)
## 'data.frame': 3 obs. of 4 variables:
## $ distance: Factor w/ 3 levels "(2.16,7.16]",..: 1 2 3
## $ Freq : int 3 1 1
## $ Rel_Freq: num 0.6 0.2 0.2
## $ Cum_Freq: int 3 4 5
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
| (2.16,7.16] |
3 |
| (7.16,12.2] |
1 |
| (12.2,17.2] |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")

library(pastecs)
stat.desc(df_MX)
## id date time continent_code country_name country_code
## nbr.val 6.000000e+00 NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA
## min 3.420000e+02 NA NA NA NA NA
## max 7.518000e+03 NA NA NA NA NA
## range 7.176000e+03 NA NA NA NA NA
## sum 3.588300e+04 NA NA NA NA NA
## median 7.430500e+03 NA NA NA NA NA
## mean 5.980500e+03 NA NA NA NA NA
## SE.mean 1.164186e+03 NA NA NA NA NA
## CI.mean.0.95 2.992636e+03 NA NA NA NA NA
## var 8.131976e+06 NA NA NA NA NA
## std.dev 2.851662e+03 NA NA NA NA NA
## coef.var 4.768267e-01 NA NA NA NA NA
## state population city distance location_description
## nbr.val NA 6.000000e+00 NA 6.0000000 NA
## nbr.null NA 0.000000e+00 NA 0.0000000 NA
## nbr.na NA 0.000000e+00 NA 0.0000000 NA
## min NA 1.042000e+03 NA 2.1570300 NA
## max NA 3.241500e+04 NA 15.2226000 NA
## range NA 3.137300e+04 NA 13.0655700 NA
## sum NA 4.388900e+04 NA 39.6402900 NA
## median NA 2.116000e+03 NA 4.5684350 NA
## mean NA 7.314833e+03 NA 6.6067150 NA
## SE.mean NA 5.043114e+03 NA 1.9468886 NA
## CI.mean.0.95 NA 1.296374e+04 NA 5.0046365 NA
## var NA 1.525980e+08 NA 22.7422514 NA
## std.dev NA 1.235306e+04 NA 4.7688837 NA
## coef.var NA 1.688768e+00 NA 0.7218237 NA
## latitude longitude geolocation hazard_type landslide_type
## nbr.val 6.00000000 6.000000e+00 NA NA NA
## nbr.null 0.00000000 0.000000e+00 NA NA NA
## nbr.na 0.00000000 0.000000e+00 NA NA NA
## min 17.37680000 -9.297900e+01 NA NA NA
## max 18.23410000 -9.129880e+01 NA NA NA
## range 0.85730000 1.680200e+00 NA NA NA
## sum 107.03190000 -5.553608e+02 NA NA NA
## median 17.92965000 -9.281700e+01 NA NA NA
## mean 17.83865000 -9.256013e+01 NA NA NA
## SE.mean 0.15006970 2.595921e-01 NA NA NA
## CI.mean.0.95 0.38576645 6.673029e-01 NA NA NA
## var 0.13512549 4.043285e-01 NA NA NA
## std.dev 0.36759419 6.358683e-01 NA NA NA
## coef.var 0.02060662 -6.869786e-03 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 4 5.000000 NA
## nbr.null NA NA NA 4 4.000000 NA
## nbr.na NA NA NA 2 1.000000 NA
## min NA NA NA 0 0.000000 NA
## max NA NA NA 0 18.000000 NA
## range NA NA NA 0 18.000000 NA
## sum NA NA NA 0 18.000000 NA
## median NA NA NA 0 0.000000 NA
## mean NA NA NA 0 3.600000 NA
## SE.mean NA NA NA 0 3.600000 NA
## CI.mean.0.95 NA NA NA 0 9.995202 NA
## var NA NA NA 0 64.800000 NA
## std.dev NA NA NA 0 8.049845 NA
## coef.var NA NA NA NaN 2.236068 NA
## source_link prop ypos
## nbr.val NA 6.0000000 6.0000000
## nbr.null NA 0.0000000 0.0000000
## nbr.na NA 0.0000000 0.0000000
## min NA 5.4415091 19.2009191
## max NA 38.4018381 94.7136108
## range NA 32.9603290 75.5126918
## sum NA 100.0000000 382.9492292
## median NA 11.5247265 69.2386521
## mean NA 16.6666667 63.8248715
## SE.mean NA 4.9113884 11.6575332
## CI.mean.0.95 NA 12.6251258 29.9666431
## var NA 144.7304161 815.3884811
## std.dev NA 12.0303955 28.5550080
## coef.var NA 0.7218237 0.4473962
boxplot(data, horizontal=TRUE, col='green')

Gráfico para The Federal District (Mexico)
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_MX <- subset(df, country_name == "Mexico")
knitr::kable(head(df_MX))
library(dplyr)
df_MX <- subset(df, state == "The Federal District")
knitr::kable(head(df_MX))
| 975 |
1/22/09 |
NA |
NA |
Mexico |
MX |
The Federal District |
12294193 |
Mexico City |
0.15208 |
NA |
19.4271 |
-99.1276 |
(19.427099999999999, -99.127600000000001) |
Landslide |
Landslide |
Medium |
Downpour |
NA |
NA |
2 |
NA |
NA |
| 1167 |
9/15/09 |
NA |
NA |
Mexico |
MX |
The Federal District |
228927 |
Magdalena Contreras |
3.49173 |
NA |
19.3161 |
-99.2398 |
(19.316099999999999, -99.239800000000002) |
Landslide |
Mudslide |
Medium |
Downpour |
NA |
NA |
3 |
NA |
http://edition.cnn.com/2009/WORLD/americas/09/16/mexico.mudslide.deaths/ |
| 2191 |
8/2/10 |
NA |
NA |
Mexico |
MX |
The Federal District |
12294193 |
Mexico City |
0.15208 |
NA |
19.4270 |
-99.1276 |
(19.427, -99.127600000000001) |
Landslide |
Mudslide |
Medium |
Construction |
NA |
NA |
2 |
NA |
NA |
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(fill=city, y=distance, x=state)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(fill=city, y=distance, x=state)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(x=state, y=distance, fill=city)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_MX <- df_MX %>%
arrange(desc(city)) %>%
mutate(prop = distance / sum(df_MX$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_MX, aes(x=state, y = prop, fill=city)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales
library(forecast)
data<- ts(df_MX$distance, frequency=12, start=2008)
knitr::kable(head(data))
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto
library(qcc)
distance <- df_MX$distance
names(distance) <- df_MX$city
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por ciudades"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Magdalena Contreras 3.491730 3.491730 91.987123 91.987123
## Mexico City 0.152080 3.643810 4.006439 95.993561
## Mexico City 0.152080 3.795890 4.006439 100.000000
stem(df_MX$"distance")
##
## The decimal point is at the |
##
## 0 | 22
## 1 |
## 2 |
## 3 | 5
head(df_MX)
## # A tibble: 3 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 975 1/22/09 <NA> <NA> Mexico MX The F~ 12294193
## 2 2191 8/2/10 <NA> <NA> Mexico MX The F~ 12294193
## 3 1167 9/15/09 <NA> <NA> Mexico MX The F~ 228927
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_MX))
| 975 |
1/22/09 |
NA |
NA |
Mexico |
MX |
The Federal District |
12294193 |
Mexico City |
0.15208 |
NA |
19.4271 |
-99.1276 |
(19.427099999999999, -99.127600000000001) |
Landslide |
Landslide |
Medium |
Downpour |
NA |
NA |
2 |
NA |
NA |
4.006438 |
2.003219 |
| 2191 |
8/2/10 |
NA |
NA |
Mexico |
MX |
The Federal District |
12294193 |
Mexico City |
0.15208 |
NA |
19.4270 |
-99.1276 |
(19.427, -99.127600000000001) |
Landslide |
Mudslide |
Medium |
Construction |
NA |
NA |
2 |
NA |
NA |
4.006438 |
6.009658 |
| 1167 |
9/15/09 |
NA |
NA |
Mexico |
MX |
The Federal District |
228927 |
Magdalena Contreras |
3.49173 |
NA |
19.3161 |
-99.2398 |
(19.316099999999999, -99.239800000000002) |
Landslide |
Mudslide |
Medium |
Downpour |
NA |
NA |
3 |
NA |
http://edition.cnn.com/2009/WORLD/americas/09/16/mexico.mudslide.deaths/ |
91.987123 |
54.006439 |
stem(df_MX$"distance")
##
## The decimal point is at the |
##
## 0 | 22
## 1 |
## 2 |
## 3 | 5
stem(df_MX$"distance", scale = 2)
##
## The decimal point is at the |
##
## 0 | 22
## 0 |
## 1 |
## 1 |
## 2 |
## 2 |
## 3 |
## 3 | 5
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 0.15208 |
2 |
66.7 |
66.7 |
66.7 |
66.7 |
| 3.49173 |
1 |
33.3 |
33.3 |
100.0 |
100.0 |
| Total |
3 |
100.0 |
100.0 |
100.0 |
100.0 |
str(table)
## Classes 'freqtab' and 'data.frame': 3 obs. of 5 variables:
## $ n : num 2 1 3
## $ % : num 66.7 33.3 100
## $ val% : num 66.7 33.3 100
## $ %cum : num 66.7 100 100
## $ val%cum: num 66.7 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.15208 2.15208 4.15208
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
| (0.152,2.15] |
0 |
0 |
0 |
| (2.15,4.15] |
1 |
1 |
1 |
str(Freq_table)
## 'data.frame': 2 obs. of 4 variables:
## $ distance: Factor w/ 2 levels "(0.152,2.15]",..: 1 2
## $ Freq : int 0 1
## $ Rel_Freq: num 0 1
## $ Cum_Freq: int 0 1
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
| (0.152,2.15] |
0 |
| (2.15,4.15] |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")

library(pastecs)
stat.desc(df_MX)
## Warning in min(x): ningún argumento finito para min; retornando Inf
## Warning in max(x): ningun argumento finito para max; retornando -Inf
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## id date time continent_code country_name country_code
## nbr.val 3.000000e+00 NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA
## min 9.750000e+02 NA NA NA NA NA
## max 2.191000e+03 NA NA NA NA NA
## range 1.216000e+03 NA NA NA NA NA
## sum 4.333000e+03 NA NA NA NA NA
## median 1.167000e+03 NA NA NA NA NA
## mean 1.444333e+03 NA NA NA NA NA
## SE.mean 3.774252e+02 NA NA NA NA NA
## CI.mean.0.95 1.623930e+03 NA NA NA NA NA
## var 4.273493e+05 NA NA NA NA NA
## std.dev 6.537196e+02 NA NA NA NA NA
## coef.var 4.526099e-01 NA NA NA NA NA
## state population city distance location_description latitude
## nbr.val NA 3.000000e+00 NA 3.000000 NA 3.000000000
## nbr.null NA 0.000000e+00 NA 0.000000 NA 0.000000000
## nbr.na NA 0.000000e+00 NA 0.000000 NA 0.000000000
## min NA 2.289270e+05 NA 0.152080 NA 19.316100000
## max NA 1.229419e+07 NA 3.491730 NA 19.427100000
## range NA 1.206527e+07 NA 3.339650 NA 0.111000000
## sum NA 2.481731e+07 NA 3.795890 NA 58.170200000
## median NA 1.229419e+07 NA 0.152080 NA 19.427000000
## mean NA 8.272438e+06 NA 1.265297 NA 19.390066667
## SE.mean NA 4.021755e+06 NA 1.113217 NA 0.036983345
## CI.mean.0.95 NA 1.730422e+07 NA 4.789785 NA 0.159126489
## var NA 4.852355e+13 NA 3.717754 NA 0.004103303
## std.dev NA 6.965885e+06 NA 1.928148 NA 0.064057032
## coef.var NA 8.420595e-01 NA 1.523870 NA 0.003303600
## longitude geolocation hazard_type landslide_type
## nbr.val 3.000000e+00 NA NA NA
## nbr.null 0.000000e+00 NA NA NA
## nbr.na 0.000000e+00 NA NA NA
## min -9.923980e+01 NA NA NA
## max -9.912760e+01 NA NA NA
## range 1.122000e-01 NA NA NA
## sum -2.974950e+02 NA NA NA
## median -9.912760e+01 NA NA NA
## mean -9.916500e+01 NA NA NA
## SE.mean 3.740000e-02 NA NA NA
## CI.mean.0.95 1.609192e-01 NA NA NA
## var 4.196280e-03 NA NA NA
## std.dev 6.477870e-02 NA NA NA
## coef.var -6.532416e-04 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 0 3.0000000 NA
## nbr.null NA NA NA 0 0.0000000 NA
## nbr.na NA NA NA 3 0.0000000 NA
## min NA NA NA Inf 2.0000000 NA
## max NA NA NA -Inf 3.0000000 NA
## range NA NA NA -Inf 1.0000000 NA
## sum NA NA NA 0 7.0000000 NA
## median NA NA NA NA 2.0000000 NA
## mean NA NA NA NaN 2.3333333 NA
## SE.mean NA NA NA NA 0.3333333 NA
## CI.mean.0.95 NA NA NA NaN 1.4342176 NA
## var NA NA NA NA 0.3333333 NA
## std.dev NA NA NA NA 0.5773503 NA
## coef.var NA NA NA NA 0.2474358 NA
## source_link prop ypos
## nbr.val NA 3.000000 3.000000
## nbr.null NA 0.000000 0.000000
## nbr.na NA 0.000000 0.000000
## min NA 4.006439 2.003219
## max NA 91.987123 54.006439
## range NA 87.980684 52.003219
## sum NA 100.000000 62.019316
## median NA 4.006439 6.009658
## mean NA 33.333333 20.673105
## SE.mean NA 29.326895 16.706747
## CI.mean.0.95 NA 126.183444 71.883332
## var NA 2580.200274 837.346221
## std.dev NA 50.795672 28.936935
## coef.var NA 1.523870 1.399738
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Veracruz (Mexico)
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_MX <- subset(df, country_name == "Mexico")
knitr::kable(head(df_MX))
library(dplyr)
df_MX <- subset(df, state == "Veracruz")
knitr::kable(head(df_MX))
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(fill=city, y=distance, x=state)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(fill=city, y=distance, x=state)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(x=state, y=distance, fill=city)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_MX <- df_MX %>%
arrange(desc(city)) %>%
mutate(prop = distance / sum(df_MX$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_MX, aes(x=state, y = prop, fill=city)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales
library(forecast)
data<- ts(df_MX$distance, frequency=12, start=2008)
knitr::kable(head(data))
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto
library(qcc)
distance <- df_MX$distance
names(distance) <- df_MX$city
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por ciudades"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Cruz Verde 2.47800 2.47800 56.15024 56.15024
## Chocaman 1.93516 4.41316 43.84976 100.00000
stem(df_MX$"distance")
##
## The decimal point is 1 digit(s) to the left of the |
##
## 18 | 4
## 20 |
## 22 |
## 24 | 8
head(df_MX)
## # A tibble: 2 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 7445 9/17/15 <NA> <NA> Mexico MX Veracruz 1005
## 2 6688 1/5/14 Night <NA> Mexico MX Veracruz 9277
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_MX))
stem(df_MX$"distance")
##
## The decimal point is 1 digit(s) to the left of the |
##
## 18 | 4
## 20 |
## 22 |
## 24 | 8
stem(df_MX$"distance", scale = 2)
##
## The decimal point is 1 digit(s) to the left of the |
##
## 19 | 4
## 20 |
## 21 |
## 22 |
## 23 |
## 24 | 8
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 1.93516 |
1 |
50 |
50 |
50 |
50 |
| 2.478 |
1 |
50 |
50 |
100 |
100 |
| Total |
2 |
100 |
100 |
100 |
100 |
str(table)
## Classes 'freqtab' and 'data.frame': 3 obs. of 5 variables:
## $ n : num 1 1 2
## $ % : num 50 50 100
## $ val% : num 50 50 100
## $ %cum : num 50 100 100
## $ val%cum: num 50 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 1.93516 2.93516
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
str(Freq_table)
## 'data.frame': 1 obs. of 4 variables:
## $ distance: Factor w/ 1 level "(1.94,2.94]": 1
## $ Freq : int 1
## $ Rel_Freq: num 1
## $ Cum_Freq: int 1
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")

library(pastecs)
stat.desc(df_MX)
## id date time continent_code country_name country_code
## nbr.val 2.000000e+00 NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA
## min 6.688000e+03 NA NA NA NA NA
## max 7.445000e+03 NA NA NA NA NA
## range 7.570000e+02 NA NA NA NA NA
## sum 1.413300e+04 NA NA NA NA NA
## median 7.066500e+03 NA NA NA NA NA
## mean 7.066500e+03 NA NA NA NA NA
## SE.mean 3.785000e+02 NA NA NA NA NA
## CI.mean.0.95 4.809298e+03 NA NA NA NA NA
## var 2.865245e+05 NA NA NA NA NA
## std.dev 5.352798e+02 NA NA NA NA NA
## coef.var 7.574893e-02 NA NA NA NA NA
## state population city distance location_description
## nbr.val NA 2.000000e+00 NA 2.0000000 NA
## nbr.null NA 0.000000e+00 NA 0.0000000 NA
## nbr.na NA 0.000000e+00 NA 0.0000000 NA
## min NA 1.005000e+03 NA 1.9351600 NA
## max NA 9.277000e+03 NA 2.4780000 NA
## range NA 8.272000e+03 NA 0.5428400 NA
## sum NA 1.028200e+04 NA 4.4131600 NA
## median NA 5.141000e+03 NA 2.2065800 NA
## mean NA 5.141000e+03 NA 2.2065800 NA
## SE.mean NA 4.136000e+03 NA 0.2714200 NA
## CI.mean.0.95 NA 5.255286e+04 NA 3.4487181 NA
## var NA 3.421299e+07 NA 0.1473376 NA
## std.dev NA 5.849187e+03 NA 0.3838458 NA
## coef.var NA 1.137753e+00 NA 0.1739551 NA
## latitude longitude geolocation hazard_type landslide_type
## nbr.val 2.000000000 2.000000e+00 NA NA NA
## nbr.null 0.000000000 0.000000e+00 NA NA NA
## nbr.na 0.000000000 0.000000e+00 NA NA NA
## min 19.029400000 -9.713170e+01 NA NA NA
## max 19.133100000 -9.703550e+01 NA NA NA
## range 0.103700000 9.620000e-02 NA NA NA
## sum 38.162500000 -1.941672e+02 NA NA NA
## median 19.081250000 -9.708360e+01 NA NA NA
## mean 19.081250000 -9.708360e+01 NA NA NA
## SE.mean 0.051850000 4.810000e-02 NA NA NA
## CI.mean.0.95 0.658816716 6.111684e-01 NA NA NA
## var 0.005376845 4.627220e-03 NA NA NA
## std.dev 0.073326973 6.802367e-02 NA NA NA
## coef.var 0.003842881 -7.006711e-04 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 2 2.000000 NA
## nbr.null NA NA NA 2 1.000000 NA
## nbr.na NA NA NA 0 0.000000 NA
## min NA NA NA 0 0.000000 NA
## max NA NA NA 0 6.000000 NA
## range NA NA NA 0 6.000000 NA
## sum NA NA NA 0 6.000000 NA
## median NA NA NA 0 3.000000 NA
## mean NA NA NA 0 3.000000 NA
## SE.mean NA NA NA 0 3.000000 NA
## CI.mean.0.95 NA NA NA 0 38.118614 NA
## var NA NA NA 0 18.000000 NA
## std.dev NA NA NA 0 4.242641 NA
## coef.var NA NA NA NaN 1.414214 NA
## source_link prop ypos
## nbr.val NA 2.0000000 2.0000000
## nbr.null NA 0.0000000 0.0000000
## nbr.na NA 0.0000000 0.0000000
## min NA 43.8497584 28.0751208
## max NA 56.1502416 78.0751208
## range NA 12.3004831 50.0000000
## sum NA 100.0000000 106.1502416
## median NA 50.0000000 53.0751208
## mean NA 50.0000000 53.0751208
## SE.mean NA 6.1502416 25.0000000
## CI.mean.0.95 NA 78.1462283 317.6551184
## var NA 75.6509423 1250.0000000
## std.dev NA 8.6977550 35.3553391
## coef.var NA 0.1739551 0.6661377
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Veracruz-Llave (Mexico)
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_MX <- subset(df, country_name == "Mexico")
knitr::kable(head(df_MX))
library(dplyr)
df_MX <- subset(df, state == "Veracruz-Llave")
knitr::kable(head(df_MX))
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(fill=city, y=distance, x=state)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(fill=city, y=distance, x=state)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_MX, aes(x=state, y=distance, fill=city)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_MX <- df_MX %>%
arrange(desc(city)) %>%
mutate(prop = distance / sum(df_MX$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_MX, aes(x=state, y = prop, fill=city)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales
library(forecast)
data<- ts(df_MX$distance, frequency=12, start=2008)
knitr::kable(head(data))
| 3.73160 |
| 1.27837 |
| 4.51820 |
| 9.51003 |
| 0.09971 |
| 8.28739 |
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto
library(qcc)
distance <- df_MX$distance
names(distance) <- df_MX$city
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por ciudades"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage
## Laguna Chica (Pueblo Nuevo) 9.5100300 9.5100300 29.4329486
## El Hatito 8.2873900 17.7974200 25.6489542
## Tetlaxco 4.5182000 22.3156200 13.9835467
## Yecuatla 3.7316000 26.0472200 11.5490688
## Altotonga 2.8538200 28.9010400 8.8323946
## Coatzintla 1.5298300 30.4308700 4.7347283
## Xalapa de Enríquez 1.2783700 31.7092400 3.9564753
## Altotonga 0.5018800 32.2111200 1.5532872
## Heroica Coscomatepec de Bravo 0.0997100 32.3108300 0.3085962
##
## Pareto chart analysis for distance
## Cum.Percent.
## Laguna Chica (Pueblo Nuevo) 29.4329486
## El Hatito 55.0819029
## Tetlaxco 69.0654496
## Yecuatla 80.6145184
## Altotonga 89.4469130
## Coatzintla 94.1816413
## Xalapa de Enríquez 98.1381165
## Altotonga 99.6914038
## Heroica Coscomatepec de Bravo 100.0000000
stem(df_MX$"distance")
##
## The decimal point is at the |
##
## 0 | 1535
## 2 | 97
## 4 | 5
## 6 |
## 8 | 35
head(df_MX)
## # A tibble: 6 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 5406 8/26/13 <NA> <NA> Mexico MX Verac~ 3198
## 2 3684 7/1/11 <NA> <NA> Mexico MX Verac~ 425148
## 3 5542 9/16/13 <NA> <NA> Mexico MX Verac~ 1543
## 4 115 7/4/07 <NA> <NA> Mexico MX Verac~ 1947
## 5 5486 9/9/13 Night <NA> Mexico MX Verac~ 12920
## 6 2438 9/17/10 <NA> <NA> Mexico MX Verac~ 1324
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_MX))
stem(df_MX$"distance")
##
## The decimal point is at the |
##
## 0 | 1535
## 2 | 97
## 4 | 5
## 6 |
## 8 | 35
stem(df_MX$"distance", scale = 2)
##
## The decimal point is at the |
##
## 0 | 15
## 1 | 35
## 2 | 9
## 3 | 7
## 4 | 5
## 5 |
## 6 |
## 7 |
## 8 | 3
## 9 | 5
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 0.09971 |
1 |
11.1 |
11.1 |
11.1 |
11.1 |
| 0.50188 |
1 |
11.1 |
11.1 |
22.2 |
22.2 |
| 1.27837 |
1 |
11.1 |
11.1 |
33.3 |
33.3 |
| 1.52983 |
1 |
11.1 |
11.1 |
44.4 |
44.4 |
| 2.85382 |
1 |
11.1 |
11.1 |
55.6 |
55.6 |
| 3.7316 |
1 |
11.1 |
11.1 |
66.7 |
66.7 |
| 4.5182 |
1 |
11.1 |
11.1 |
77.8 |
77.8 |
| 8.28739 |
1 |
11.1 |
11.1 |
88.9 |
88.9 |
| 9.51003 |
1 |
11.1 |
11.1 |
100.0 |
100.0 |
| Total |
9 |
100.0 |
100.0 |
100.0 |
100.0 |
str(table)
## Classes 'freqtab' and 'data.frame': 10 obs. of 5 variables:
## $ n : num 1 1 1 1 1 1 1 1 1 9
## $ % : num 11.1 11.1 11.1 11.1 11.1 11.1 11.1 11.1 11.1 100
## $ val% : num 11.1 11.1 11.1 11.1 11.1 11.1 11.1 11.1 11.1 100
## $ %cum : num 11.1 22.2 33.3 44.4 55.6 66.7 77.8 88.9 100 100
## $ val%cum: num 11.1 22.2 33.3 44.4 55.6 66.7 77.8 88.9 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
| 0.09971 |
1 |
| 0.50188 |
1 |
| 1.27837 |
1 |
| 1.52983 |
1 |
| 2.85382 |
1 |
| 3.7316 |
1 |
| 4.5182 |
1 |
| 8.28739 |
1 |
| 9.51003 |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.09971 2.09971 4.09971 6.09971 8.09971 10.09971
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
| (0.0997,2.1] |
3 |
0.375 |
3 |
| (2.1,4.1] |
2 |
0.250 |
5 |
| (4.1,6.1] |
1 |
0.125 |
6 |
| (6.1,8.1] |
0 |
0.000 |
6 |
| (8.1,10.1] |
2 |
0.250 |
8 |
str(Freq_table)
## 'data.frame': 5 obs. of 4 variables:
## $ distance: Factor w/ 5 levels "(0.0997,2.1]",..: 1 2 3 4 5
## $ Freq : int 3 2 1 0 2
## $ Rel_Freq: num 0.375 0.25 0.125 0 0.25
## $ Cum_Freq: int 3 5 6 6 8
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
| (0.0997,2.1] |
3 |
| (2.1,4.1] |
2 |
| (4.1,6.1] |
1 |
| (6.1,8.1] |
0 |
| (8.1,10.1] |
2 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")

library(pastecs)
stat.desc(df_MX)
## id date time continent_code country_name country_code
## nbr.val 9.000000e+00 NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA
## min 1.150000e+02 NA NA NA NA NA
## max 5.542000e+03 NA NA NA NA NA
## range 5.427000e+03 NA NA NA NA NA
## sum 3.901100e+04 NA NA NA NA NA
## median 5.405000e+03 NA NA NA NA NA
## mean 4.334556e+03 NA NA NA NA NA
## SE.mean 6.398549e+02 NA NA NA NA NA
## CI.mean.0.95 1.475508e+03 NA NA NA NA NA
## var 3.684729e+06 NA NA NA NA NA
## std.dev 1.919565e+03 NA NA NA NA NA
## coef.var 4.428516e-01 NA NA NA NA NA
## state population city distance location_description
## nbr.val NA 9.000000e+00 NA 9.0000000 NA
## nbr.null NA 0.000000e+00 NA 0.0000000 NA
## nbr.na NA 0.000000e+00 NA 0.0000000 NA
## min NA 1.324000e+03 NA 0.0997100 NA
## max NA 4.251480e+05 NA 9.5100300 NA
## range NA 4.238240e+05 NA 9.4103200 NA
## sum NA 5.082870e+05 NA 32.3108300 NA
## median NA 1.292000e+04 NA 2.8538200 NA
## mean NA 5.647633e+04 NA 3.5900922 NA
## SE.mean NA 4.619791e+04 NA 1.1170543 NA
## CI.mean.0.95 NA 1.065326e+05 NA 2.5759318 NA
## var NA 1.920822e+10 NA 11.2302922 NA
## std.dev NA 1.385937e+05 NA 3.3511628 NA
## coef.var NA 2.454014e+00 NA 0.9334476 NA
## latitude longitude geolocation hazard_type landslide_type
## nbr.val 9.00000000 9.000000e+00 NA NA NA
## nbr.null 0.00000000 0.000000e+00 NA NA NA
## nbr.na 0.00000000 0.000000e+00 NA NA NA
## min 18.53690000 -9.746470e+01 NA NA NA
## max 20.50040000 -9.631490e+01 NA NA NA
## range 1.96350000 1.149800e+00 NA NA NA
## sum 175.38730000 -8.729500e+02 NA NA NA
## median 19.54260000 -9.704740e+01 NA NA NA
## mean 19.48747778 -9.699444e+01 NA NA NA
## SE.mean 0.19084238 1.114103e-01 NA NA NA
## CI.mean.0.95 0.44008331 2.569127e-01 NA NA NA
## var 0.32778731 1.117104e-01 NA NA NA
## std.dev 0.57252713 3.342310e-01 NA NA NA
## coef.var 0.02937923 -3.445878e-03 NA NA NA
## landslide_size trigger storm_name injuries fatalities
## nbr.val NA NA NA 2.0000000 9.0000000
## nbr.null NA NA NA 0.0000000 1.0000000
## nbr.na NA NA NA 7.0000000 0.0000000
## min NA NA NA 2.0000000 0.0000000
## max NA NA NA 8.0000000 14.0000000
## range NA NA NA 6.0000000 14.0000000
## sum NA NA NA 10.0000000 61.0000000
## median NA NA NA 5.0000000 7.0000000
## mean NA NA NA 5.0000000 6.7777778
## SE.mean NA NA NA 3.0000000 1.8240506
## CI.mean.0.95 NA NA NA 38.1186142 4.2062682
## var NA NA NA 18.0000000 29.9444444
## std.dev NA NA NA 4.2426407 5.4721517
## coef.var NA NA NA 0.8485281 0.8073666
## source_name source_link prop ypos
## nbr.val NA NA 9.0000000 9.0000000
## nbr.null NA NA 0.0000000 0.0000000
## nbr.na NA NA 0.0000000 0.0000000
## min NA NA 0.3085962 5.7745344
## max NA NA 29.4329486 99.2233564
## range NA NA 29.1243524 93.4488220
## sum NA NA 100.0000000 497.6369997
## median NA NA 8.8323946 59.0763376
## mean NA NA 11.1111111 55.2930000
## SE.mean NA NA 3.4572132 11.8683450
## CI.mean.0.95 NA NA 7.9723479 27.3684527
## var NA NA 107.5709066 1267.7185196
## std.dev NA NA 10.3716395 35.6050350
## coef.var NA NA 0.9334476 0.6439339
boxplot(data, horizontal=TRUE, col='green')

Gráfico para El Salvador
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_ES <- subset(df, country_name == "El Salvador")
knitr::kable(head(df_ES))
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_ES, aes(fill= state, y=distance, x=country_name)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_ES, aes(fill=state, y=distance, x=country_name)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_ES, aes(x=country_name, y=distance, fill=state)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_ES <- df_ES %>%
arrange(desc(state)) %>%
mutate(prop = distance / sum(df_ES$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_ES, aes(x=country_name, y=prop, fill=state)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales
library(forecast)
data<- ts(df_ES$distance, frequency=12, start=2008)
knitr::kable(head(data))
| 4.23875 |
| 3.22235 |
| 0.49346 |
| 8.83210 |
| 1.15810 |
| 7.60946 |
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto
library(qcc)
distance <- df_ES$distance
names(distance) <- df_ES$state
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por estados"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## San Miguel 1.006695e+01 1.006695e+01 8.974011e+00 8.974011e+00
## San Miguel 9.972270e+00 2.003922e+01 8.889610e+00 1.786362e+01
## La Libertad 9.875530e+00 2.991475e+01 8.803373e+00 2.666699e+01
## Santa Ana 8.832100e+00 3.874685e+01 7.873225e+00 3.454022e+01
## Cabañas 8.825250e+00 4.757210e+01 7.867118e+00 4.240734e+01
## San Vicente 7.609460e+00 5.518156e+01 6.783323e+00 4.919066e+01
## San Miguel 6.945360e+00 6.212692e+01 6.191323e+00 5.538198e+01
## San Vicente 5.907260e+00 6.803418e+01 5.265926e+00 6.064791e+01
## Ahuachapán 5.299010e+00 7.333319e+01 4.723712e+00 6.537162e+01
## La Libertad 4.964160e+00 7.829735e+01 4.425216e+00 6.979684e+01
## La Libertad 4.862190e+00 8.315954e+01 4.334316e+00 7.413115e+01
## La Libertad 4.677220e+00 8.783676e+01 4.169428e+00 7.830058e+01
## La Libertad 4.606550e+00 9.244331e+01 4.106430e+00 8.240701e+01
## Sonsonate 4.238750e+00 9.668206e+01 3.778561e+00 8.618557e+01
## San Vicente 4.031250e+00 1.007133e+02 3.593589e+00 8.977916e+01
## San Salvador 3.252270e+00 1.039656e+02 2.899181e+00 9.267834e+01
## Sonsonate 3.222350e+00 1.071879e+02 2.872509e+00 9.555085e+01
## San Salvador 3.017390e+00 1.102053e+02 2.689801e+00 9.824065e+01
## Santa Ana 1.158100e+00 1.113634e+02 1.032368e+00 9.927302e+01
## Sonsonate 4.934600e-01 1.118569e+02 4.398865e-01 9.971291e+01
## La Paz 3.193300e-01 1.121762e+02 2.846613e-01 9.999757e+01
## Ahuachapán 2.730000e-03 1.121789e+02 2.433612e-03 1.000000e+02
stem(df_ES$"distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 000133344
## 0 | 5555567899
## 1 | 000
head(df_ES)
## # A tibble: 6 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 6681 10/15/14 <NA> <NA> El Salvador SV Sons~ 7358
## 2 6682 10/15/14 <NA> <NA> El Salvador SV Sons~ 15446
## 3 7442 10/19/15 <NA> <NA> El Salvador SV Sons~ 9936
## 4 6685 10/12/14 <NA> <NA> El Salvador SV Sant~ 5773
## 5 7438 7/18/15 <NA> <NA> El Salvador SV Sant~ 10095
## 6 1285 11/8/09 <NA> <NA> El Salvador SV San ~ 41504
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_ES))
stem(df_ES$"distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 000133344
## 0 | 5555567899
## 1 | 000
stem(df_ES$"distance", scale = 2)
##
## The decimal point is at the |
##
## 0 | 0352
## 2 | 023
## 4 | 02679039
## 6 | 96
## 8 | 889
## 10 | 01
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 0.00273 |
1 |
4.5 |
4.5 |
4.5 |
4.5 |
| 0.31933 |
1 |
4.5 |
4.5 |
9.1 |
9.1 |
| 0.49346 |
1 |
4.5 |
4.5 |
13.6 |
13.6 |
| 1.1581 |
1 |
4.5 |
4.5 |
18.2 |
18.2 |
| 3.01739 |
1 |
4.5 |
4.5 |
22.7 |
22.7 |
| 3.22235 |
1 |
4.5 |
4.5 |
27.3 |
27.3 |
| 3.25227 |
1 |
4.5 |
4.5 |
31.8 |
31.8 |
| 4.03125 |
1 |
4.5 |
4.5 |
36.4 |
36.4 |
| 4.23875 |
1 |
4.5 |
4.5 |
40.9 |
40.9 |
| 4.60655 |
1 |
4.5 |
4.5 |
45.5 |
45.5 |
| 4.67722 |
1 |
4.5 |
4.5 |
50.0 |
50.0 |
| 4.86219 |
1 |
4.5 |
4.5 |
54.5 |
54.5 |
| 4.96416 |
1 |
4.5 |
4.5 |
59.1 |
59.1 |
| 5.29901 |
1 |
4.5 |
4.5 |
63.6 |
63.6 |
| 5.90726 |
1 |
4.5 |
4.5 |
68.2 |
68.2 |
| 6.94536 |
1 |
4.5 |
4.5 |
72.7 |
72.7 |
| 7.60946 |
1 |
4.5 |
4.5 |
77.3 |
77.3 |
| 8.82525 |
1 |
4.5 |
4.5 |
81.8 |
81.8 |
| 8.8321 |
1 |
4.5 |
4.5 |
86.4 |
86.4 |
| 9.87553 |
1 |
4.5 |
4.5 |
90.9 |
90.9 |
| 9.97227 |
1 |
4.5 |
4.5 |
95.5 |
95.5 |
| 10.06695 |
1 |
4.5 |
4.5 |
100.0 |
100.0 |
| Total |
22 |
100.0 |
100.0 |
100.0 |
100.0 |
str(table)
## Classes 'freqtab' and 'data.frame': 23 obs. of 5 variables:
## $ n : num 1 1 1 1 1 1 1 1 1 1 ...
## $ % : num 4.5 4.5 4.5 4.5 4.5 4.5 4.5 4.5 4.5 4.5 ...
## $ val% : num 4.5 4.5 4.5 4.5 4.5 4.5 4.5 4.5 4.5 4.5 ...
## $ %cum : num 4.5 9.1 13.6 18.2 22.7 27.3 31.8 36.4 40.9 45.5 ...
## $ val%cum: num 4.5 9.1 13.6 18.2 22.7 27.3 31.8 36.4 40.9 45.5 ...
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
| 0.00273 |
1 |
| 0.31933 |
1 |
| 0.49346 |
1 |
| 1.1581 |
1 |
| 3.01739 |
1 |
| 3.22235 |
1 |
| 3.25227 |
1 |
| 4.03125 |
1 |
| 4.23875 |
1 |
| 4.60655 |
1 |
| 4.67722 |
1 |
| 4.86219 |
1 |
| 4.96416 |
1 |
| 5.29901 |
1 |
| 5.90726 |
1 |
| 6.94536 |
1 |
| 7.60946 |
1 |
| 8.82525 |
1 |
| 8.8321 |
1 |
| 9.87553 |
1 |
| 9.97227 |
1 |
| 10.06695 |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.00273 3.00273 6.00273 9.00273 12.00273
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
| (0.00273,3] |
3 |
0.1428571 |
3 |
| (3,6] |
11 |
0.5238095 |
14 |
| (6,9] |
4 |
0.1904762 |
18 |
| (9,12] |
3 |
0.1428571 |
21 |
str(Freq_table)
## 'data.frame': 4 obs. of 4 variables:
## $ distance: Factor w/ 4 levels "(0.00273,3]",..: 1 2 3 4
## $ Freq : int 3 11 4 3
## $ Rel_Freq: num 0.143 0.524 0.19 0.143
## $ Cum_Freq: int 3 14 18 21
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
| (0.00273,3] |
3 |
| (3,6] |
11 |
| (6,9] |
4 |
| (9,12] |
3 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")

library(pastecs)
stat.desc(df_ES)
## id date time continent_code country_name country_code
## nbr.val 2.200000e+01 NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA
## min 2.300000e+02 NA NA NA NA NA
## max 7.442000e+03 NA NA NA NA NA
## range 7.212000e+03 NA NA NA NA NA
## sum 1.086050e+05 NA NA NA NA NA
## median 6.680000e+03 NA NA NA NA NA
## mean 4.936591e+03 NA NA NA NA NA
## SE.mean 5.803080e+02 NA NA NA NA NA
## CI.mean.0.95 1.206817e+03 NA NA NA NA NA
## var 7.408663e+06 NA NA NA NA NA
## std.dev 2.721886e+03 NA NA NA NA NA
## coef.var 5.513696e-01 NA NA NA NA NA
## state population city distance location_description
## nbr.val NA 2.200000e+01 NA 22.0000000 NA
## nbr.null NA 0.000000e+00 NA 0.0000000 NA
## nbr.na NA 0.000000e+00 NA 0.0000000 NA
## min NA 2.654000e+03 NA 0.0027300 NA
## max NA 1.246940e+05 NA 10.0669500 NA
## range NA 1.220400e+05 NA 10.0642200 NA
## sum NA 9.383360e+05 NA 112.1789400 NA
## median NA 2.643100e+04 NA 4.7697050 NA
## mean NA 4.265164e+04 NA 5.0990427 NA
## SE.mean NA 9.849090e+03 NA 0.6720603 NA
## CI.mean.0.95 NA 2.048230e+04 NA 1.3976259 NA
## var NA 2.134100e+09 NA 9.9366315 NA
## std.dev NA 4.619633e+04 NA 3.1522423 NA
## coef.var NA 1.083108e+00 NA 0.6182028 NA
## latitude longitude geolocation hazard_type landslide_type
## nbr.val 22.00000000 2.200000e+01 NA NA NA
## nbr.null 0.00000000 0.000000e+00 NA NA NA
## nbr.na 0.00000000 0.000000e+00 NA NA NA
## min 13.28170000 -8.990440e+01 NA NA NA
## max 14.00070000 -8.808430e+01 NA NA NA
## range 0.71900000 1.820100e+00 NA NA NA
## sum 301.25530000 -1.961688e+03 NA NA NA
## median 13.71515000 -8.926030e+01 NA NA NA
## mean 13.69342273 -8.916765e+01 NA NA NA
## SE.mean 0.03809807 1.112458e-01 NA NA NA
## CI.mean.0.95 0.07922927 2.313484e-01 NA NA NA
## var 0.03193218 2.722640e-01 NA NA NA
## std.dev 0.17869578 5.217892e-01 NA NA NA
## coef.var 0.01304975 -5.851777e-03 NA NA NA
## landslide_size trigger storm_name injuries fatalities
## nbr.val NA NA NA 14.0000000 18.000000
## nbr.null NA NA NA 10.0000000 13.000000
## nbr.na NA NA NA 8.0000000 4.000000
## min NA NA NA 0.0000000 0.000000
## max NA NA NA 2.0000000 32.000000
## range NA NA NA 2.0000000 32.000000
## sum NA NA NA 6.0000000 63.000000
## median NA NA NA 0.0000000 0.000000
## mean NA NA NA 0.4285714 3.500000
## SE.mean NA NA NA 0.2020305 2.107022
## CI.mean.0.95 NA NA NA 0.4364604 4.445428
## var NA NA NA 0.5714286 79.911765
## std.dev NA NA NA 0.7559289 8.939338
## coef.var NA NA NA 1.7638342 2.554097
## source_name source_link prop ypos
## nbr.val NA NA 2.200000e+01 22.0000000
## nbr.null NA NA 0.000000e+00 0.0000000
## nbr.na NA NA 0.000000e+00 0.0000000
## min NA NA 2.433612e-03 1.8892806
## max NA NA 8.974011e+00 97.6381440
## range NA NA 8.971577e+00 95.7488634
## sum NA NA 1.000000e+02 1038.4834890
## median NA NA 4.251872e+00 44.1153638
## mean NA NA 4.545455e+00 47.2037950
## SE.mean NA NA 5.990967e-01 6.6246536
## CI.mean.0.95 NA NA 1.245890e+00 13.7767213
## var NA NA 7.896171e+00 965.4927696
## std.dev NA NA 2.810013e+00 31.0723795
## coef.var NA NA 6.182028e-01 0.6582602
boxplot(data, horizontal=TRUE, col='green')

Gráfico para La Libertad (El Salvador)
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_ES <- subset(df, country_name == "El Salvador")
knitr::kable(head(df_ES))
library(dplyr)
df_ES <- subset(df, state == "La Libertad")
knitr::kable(head(df_ES))
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_ES, aes(fill=city, y=distance, x=state)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_ES, aes(fill=city, y=distance, x=state)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_ES, aes(x=state, y=distance, fill=city)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_ES <- df_ES %>%
arrange(desc(city)) %>%
mutate(prop = distance / sum(df_ES$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_ES, aes(x=state, y = prop, fill=city)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales
library(forecast)
data<- ts(df_ES$distance, frequency=12, start=2008)
knitr::kable(head(data))
| 4.96416 |
| 4.60655 |
| 4.67722 |
| 9.87553 |
| 9.64894 |
| 0.00442 |
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto
library(qcc)
distance <- df_ES$distance
names(distance) <- df_ES$city
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por ciudades"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Santa Tecla 9.87553000 9.87553000 25.55844469 25.55844469
## Parcoy 9.64894000 19.52447000 24.97201662 50.53046131
## Santa Tecla 4.96416000 24.48863000 12.84753414 63.37799545
## Antiguo Cuscatlán 4.86219000 29.35082000 12.58362986 75.96162531
## Santa Tecla 4.67722000 34.02804000 12.10491677 88.06654208
## Santa Tecla 4.60655000 38.63459000 11.92201871 99.98856078
## Aricapampa 0.00442000 38.63901000 0.01143922 100.00000000
stem(df_ES$"distance")
##
## The decimal point is at the |
##
## 0 | 0
## 2 |
## 4 | 6790
## 6 |
## 8 | 69
head(df_ES)
## # A tibble: 6 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 564 6/2/08 <NA> <NA> El Salvador SV La L~ 124694
## 2 6686 10/12/14 <NA> <NA> El Salvador SV La L~ 124694
## 3 7440 11/3/15 1:00 <NA> El Salvador SV La L~ 124694
## 4 7441 11/4/15 <NA> <NA> El Salvador SV La L~ 124694
## 5 1004 4/14/09 <NA> SA Peru PE La L~ 0
## 6 1005 4/14/09 <NA> SA Peru PE La L~ 0
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_ES))
stem(df_ES$"distance")
##
## The decimal point is at the |
##
## 0 | 0
## 2 |
## 4 | 6790
## 6 |
## 8 | 69
stem(df_ES$"distance", scale = 2)
##
## The decimal point is at the |
##
## 0 | 0
## 1 |
## 2 |
## 3 |
## 4 | 679
## 5 | 0
## 6 |
## 7 |
## 8 |
## 9 | 69
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 0.00442 |
1 |
14.3 |
14.3 |
14.3 |
14.3 |
| 4.60655 |
1 |
14.3 |
14.3 |
28.6 |
28.6 |
| 4.67722 |
1 |
14.3 |
14.3 |
42.9 |
42.9 |
| 4.86219 |
1 |
14.3 |
14.3 |
57.1 |
57.1 |
| 4.96416 |
1 |
14.3 |
14.3 |
71.4 |
71.4 |
| 9.64894 |
1 |
14.3 |
14.3 |
85.7 |
85.7 |
| 9.87553 |
1 |
14.3 |
14.3 |
100.0 |
100.0 |
| Total |
7 |
100.0 |
100.0 |
100.0 |
100.0 |
str(table)
## Classes 'freqtab' and 'data.frame': 8 obs. of 5 variables:
## $ n : num 1 1 1 1 1 1 1 7
## $ % : num 14.3 14.3 14.3 14.3 14.3 14.3 14.3 100
## $ val% : num 14.3 14.3 14.3 14.3 14.3 14.3 14.3 100
## $ %cum : num 14.3 28.6 42.9 57.1 71.4 85.7 100 100
## $ val%cum: num 14.3 28.6 42.9 57.1 71.4 85.7 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
| 0.00442 |
1 |
| 4.60655 |
1 |
| 4.67722 |
1 |
| 4.86219 |
1 |
| 4.96416 |
1 |
| 9.64894 |
1 |
| 9.87553 |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.00442 4.00442 8.00442 12.00442
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
| (0.00442,4] |
0 |
0.0000000 |
0 |
| (4,8] |
4 |
0.6666667 |
4 |
| (8,12] |
2 |
0.3333333 |
6 |
str(Freq_table)
## 'data.frame': 3 obs. of 4 variables:
## $ distance: Factor w/ 3 levels "(0.00442,4]",..: 1 2 3
## $ Freq : int 0 4 2
## $ Rel_Freq: num 0 0.667 0.333
## $ Cum_Freq: int 0 4 6
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
| (0.00442,4] |
0 |
| (4,8] |
4 |
| (8,12] |
2 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")

library(pastecs)
stat.desc(df_ES)
## id date time continent_code country_name country_code
## nbr.val 7.000000e+00 NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA
## min 5.640000e+02 NA NA NA NA NA
## max 7.441000e+03 NA NA NA NA NA
## range 6.877000e+03 NA NA NA NA NA
## sum 2.542600e+04 NA NA NA NA NA
## median 1.286000e+03 NA NA NA NA NA
## mean 3.632286e+03 NA NA NA NA NA
## SE.mean 1.263594e+03 NA NA NA NA NA
## CI.mean.0.95 3.091903e+03 NA NA NA NA NA
## var 1.117669e+07 NA NA NA NA NA
## std.dev 3.343155e+03 NA NA NA NA NA
## coef.var 9.203998e-01 NA NA NA NA NA
## state population city distance location_description latitude
## nbr.val NA 7.000000e+00 NA 7.0000000 NA 7.000000
## nbr.null NA 2.000000e+00 NA 0.0000000 NA 0.000000
## nbr.na NA 0.000000e+00 NA 0.0000000 NA 0.000000
## min NA 0.000000e+00 NA 0.0044200 NA -7.958900
## max NA 1.246940e+05 NA 9.8755300 NA 13.720500
## range NA 1.246940e+05 NA 9.8711100 NA 21.679400
## sum NA 5.325430e+05 NA 38.6390100 NA 52.794400
## median NA 1.246940e+05 NA 4.8621900 NA 13.714700
## mean NA 7.607757e+04 NA 5.5198586 NA 7.542057
## SE.mean NA 2.330952e+04 NA 1.2791994 NA 3.982602
## CI.mean.0.95 NA 5.703633e+04 NA 3.1300883 NA 9.745075
## var NA 3.803334e+09 NA 11.4544585 NA 111.027808
## std.dev NA 6.167118e+04 NA 3.3844436 NA 10.536973
## coef.var NA 8.106355e-01 NA 0.6131395 NA 1.397095
## longitude geolocation hazard_type landslide_type
## nbr.val 7.00000000 NA NA NA
## nbr.null 0.00000000 NA NA NA
## nbr.na 0.00000000 NA NA NA
## min -89.36250000 NA NA NA
## max -77.52390000 NA NA NA
## range 11.83860000 NA NA NA
## sum -601.71290000 NA NA NA
## median -89.26850000 NA NA NA
## mean -85.95898571 NA NA NA
## SE.mean 2.15312466 NA NA NA
## CI.mean.0.95 5.26850626 NA NA NA
## var 32.45162074 NA NA NA
## std.dev 5.69663240 NA NA NA
## coef.var -0.06627152 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 3.0000000 6.000000 NA
## nbr.null NA NA NA 2.0000000 4.000000 NA
## nbr.na NA NA NA 4.0000000 1.000000 NA
## min NA NA NA 0.0000000 0.000000 NA
## max NA NA NA 1.0000000 12.000000 NA
## range NA NA NA 1.0000000 12.000000 NA
## sum NA NA NA 1.0000000 16.000000 NA
## median NA NA NA 0.0000000 0.000000 NA
## mean NA NA NA 0.3333333 2.666667 NA
## SE.mean NA NA NA 0.3333333 1.977653 NA
## CI.mean.0.95 NA NA NA 1.4342176 5.083719 NA
## var NA NA NA 0.3333333 23.466667 NA
## std.dev NA NA NA 0.5773503 4.844241 NA
## coef.var NA NA NA 1.7320508 1.816590 NA
## source_link prop ypos
## nbr.val NA 7.00000000 7.0000000
## nbr.null NA 0.00000000 0.0000000
## nbr.na NA 0.00000000 0.0000000
## min NA 0.01143922 6.4237671
## max NA 25.55844469 93.7081851
## range NA 25.54700547 87.2844180
## sum NA 100.00000000 361.7457720
## median NA 12.58362986 49.6536920
## mean NA 14.28571429 51.6779674
## SE.mean NA 3.31064238 13.0483680
## CI.mean.0.95 NA 8.10085008 31.9282063
## var NA 76.72247089 1191.8193525
## std.dev NA 8.75913642 34.5227367
## coef.var NA 0.61313955 0.6680359
boxplot(data, horizontal=TRUE, col='green')

Gráfico para La Paz (El Salvador)
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_ES <- subset(df, country_name == "El Salvador")
knitr::kable(head(df_ES))
library(dplyr)
df_ES <- subset(df, state == "La Paz")
knitr::kable(head(df_ES))
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_ES, aes(fill=city, y=distance, x=state)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_ES, aes(fill=city, y=distance, x=state)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_ES, aes(x=state, y=distance, fill=city)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_ES <- df_ES %>%
arrange(desc(city)) %>%
mutate(prop = distance / sum(df_ES$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_ES, aes(x=state, y = prop, fill=city)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales
library(forecast)
data<- ts(df_ES$distance, frequency=12, start=2008)
knitr::kable(head(data))
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto
library(qcc)
distance <- df_ES$distance
names(distance) <- df_ES$city
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por ciudades"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## San José 4.691330 4.691330 93.626987 93.626987
## San Pedro Masahuat 0.319330 5.010660 6.373013 100.000000
stem(df_ES$"distance")
##
## The decimal point is at the |
##
## 0 | 3
## 1 |
## 2 |
## 3 |
## 4 | 7
head(df_ES)
## # A tibble: 2 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 6683 10/15/14 <NA> <NA> El Salvador SV La Paz 2654
## 2 7460 9/25/15 <NA> <NA> Honduras HN La Paz 1463
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_ES))
stem(df_ES$"distance")
##
## The decimal point is at the |
##
## 0 | 3
## 1 |
## 2 |
## 3 |
## 4 | 7
stem(df_ES$"distance", scale = 2)
##
## The decimal point is at the |
##
## 0 | 3
## 0 |
## 1 |
## 1 |
## 2 |
## 2 |
## 3 |
## 3 |
## 4 |
## 4 | 7
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 0.31933 |
1 |
50 |
50 |
50 |
50 |
| 4.69133 |
1 |
50 |
50 |
100 |
100 |
| Total |
2 |
100 |
100 |
100 |
100 |
str(table)
## Classes 'freqtab' and 'data.frame': 3 obs. of 5 variables:
## $ n : num 1 1 2
## $ % : num 50 50 100
## $ val% : num 50 50 100
## $ %cum : num 50 100 100
## $ val%cum: num 50 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.31933 3.31933 6.31933
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
| (0.319,3.32] |
0 |
0 |
0 |
| (3.32,6.32] |
1 |
1 |
1 |
str(Freq_table)
## 'data.frame': 2 obs. of 4 variables:
## $ distance: Factor w/ 2 levels "(0.319,3.32]",..: 1 2
## $ Freq : int 0 1
## $ Rel_Freq: num 0 1
## $ Cum_Freq: int 0 1
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
| (0.319,3.32] |
0 |
| (3.32,6.32] |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")

library(pastecs)
stat.desc(df_ES)
## id date time continent_code country_name country_code
## nbr.val 2.000000e+00 NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA
## min 6.683000e+03 NA NA NA NA NA
## max 7.460000e+03 NA NA NA NA NA
## range 7.770000e+02 NA NA NA NA NA
## sum 1.414300e+04 NA NA NA NA NA
## median 7.071500e+03 NA NA NA NA NA
## mean 7.071500e+03 NA NA NA NA NA
## SE.mean 3.885000e+02 NA NA NA NA NA
## CI.mean.0.95 4.936361e+03 NA NA NA NA NA
## var 3.018645e+05 NA NA NA NA NA
## std.dev 5.494220e+02 NA NA NA NA NA
## coef.var 7.769525e-02 NA NA NA NA NA
## state population city distance location_description latitude
## nbr.val NA 2.000000e+00 NA 2.000000 NA 2.00000000
## nbr.null NA 0.000000e+00 NA 0.000000 NA 0.00000000
## nbr.na NA 0.000000e+00 NA 0.000000 NA 0.00000000
## min NA 1.463000e+03 NA 0.319330 NA 13.54610000
## max NA 2.654000e+03 NA 4.691330 NA 14.28010000
## range NA 1.191000e+03 NA 4.372000 NA 0.73400000
## sum NA 4.117000e+03 NA 5.010660 NA 27.82620000
## median NA 2.058500e+03 NA 2.505330 NA 13.91310000
## mean NA 2.058500e+03 NA 2.505330 NA 13.91310000
## SE.mean NA 5.955000e+02 NA 2.186000 NA 0.36700000
## CI.mean.0.95 NA 7.566545e+03 NA 27.775764 NA 4.66317714
## var NA 7.092405e+05 NA 9.557192 NA 0.26937800
## std.dev NA 8.421642e+02 NA 3.091471 NA 0.51901638
## coef.var NA 4.091155e-01 NA 1.233958 NA 0.03730415
## longitude geolocation hazard_type landslide_type
## nbr.val 2.000000e+00 NA NA NA
## nbr.null 0.000000e+00 NA NA NA
## nbr.na 0.000000e+00 NA NA NA
## min -8.904010e+01 NA NA NA
## max -8.793690e+01 NA NA NA
## range 1.103200e+00 NA NA NA
## sum -1.769770e+02 NA NA NA
## median -8.848850e+01 NA NA NA
## mean -8.848850e+01 NA NA NA
## SE.mean 5.516000e-01 NA NA NA
## CI.mean.0.95 7.008743e+00 NA NA NA
## var 6.085251e-01 NA NA NA
## std.dev 7.800802e-01 NA NA NA
## coef.var -8.815611e-03 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 2 2 NA
## nbr.null NA NA NA 2 2 NA
## nbr.na NA NA NA 0 0 NA
## min NA NA NA 0 0 NA
## max NA NA NA 0 0 NA
## range NA NA NA 0 0 NA
## sum NA NA NA 0 0 NA
## median NA NA NA 0 0 NA
## mean NA NA NA 0 0 NA
## SE.mean NA NA NA 0 0 NA
## CI.mean.0.95 NA NA NA 0 0 NA
## var NA NA NA 0 0 NA
## std.dev NA NA NA 0 0 NA
## coef.var NA NA NA NaN NaN NA
## source_link prop ypos
## nbr.val NA 2.000000 2.000000
## nbr.null NA 0.000000 0.000000
## nbr.na NA 0.000000 0.000000
## min NA 6.373013 3.186506
## max NA 93.626987 53.186506
## range NA 87.253975 50.000000
## sum NA 100.000000 56.373013
## median NA 50.000000 28.186506
## mean NA 50.000000 28.186506
## SE.mean NA 43.626987 25.000000
## CI.mean.0.95 NA 554.333432 317.655118
## var NA 3806.628035 1250.000000
## std.dev NA 61.697877 35.355339
## coef.var NA 1.233958 1.254336
boxplot(data, horizontal=TRUE, col='green')

Gráfico para San Miguel (El Salvador)
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_ES <- subset(df, country_name == "El Salvador")
knitr::kable(head(df_ES))
library(dplyr)
df_ES <- subset(df, state == "San Miguel")
knitr::kable(head(df_ES))
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_ES, aes(fill=city, y=distance, x=state)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_ES, aes(fill=city, y=distance, x=state)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_ES, aes(x=state, y=distance, fill=city)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_ES <- df_ES %>%
arrange(desc(city)) %>%
mutate(prop = distance / sum(df_ES$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_ES, aes(x=state, y = prop, fill=city)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales
library(forecast)
data<- ts(df_ES$distance, frequency=12, start=2008)
knitr::kable(head(data))
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto
library(qcc)
distance <- df_ES$distance
names(distance) <- df_ES$city
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por ciudades"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## San Rafael Oriente 10.06695 10.06695 37.30631 37.30631
## Chirilagua 9.97227 20.03922 36.95544 74.26175
## Chirilagua 6.94536 26.98458 25.73825 100.00000
stem(df_ES$"distance")
##
## The decimal point is at the |
##
## 6 | 9
## 7 |
## 8 |
## 9 |
## 10 | 01
head(df_ES)
## # A tibble: 3 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 6675 5/22/14 Night <NA> El Salvador SV San Miguel 19095
## 2 6251 10/13/14 Night <NA> El Salvador SV San Miguel 6393
## 3 6684 10/15/14 <NA> <NA> El Salvador SV San Miguel 6393
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_ES))
stem(df_ES$"distance")
##
## The decimal point is at the |
##
## 6 | 9
## 7 |
## 8 |
## 9 |
## 10 | 01
stem(df_ES$"distance", scale = 2)
##
## The decimal point is at the |
##
## 6 | 9
## 7 |
## 7 |
## 8 |
## 8 |
## 9 |
## 9 |
## 10 | 01
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 6.94536 |
1 |
33.3 |
33.3 |
33.3 |
33.3 |
| 9.97227 |
1 |
33.3 |
33.3 |
66.7 |
66.7 |
| 10.06695 |
1 |
33.3 |
33.3 |
100.0 |
100.0 |
| Total |
3 |
100.0 |
100.0 |
100.0 |
100.0 |
str(table)
## Classes 'freqtab' and 'data.frame': 4 obs. of 5 variables:
## $ n : num 1 1 1 3
## $ % : num 33.3 33.3 33.3 100
## $ val% : num 33.3 33.3 33.3 100
## $ %cum : num 33.3 66.7 100 100
## $ val%cum: num 33.3 66.7 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
| 6.94536 |
1 |
| 9.97227 |
1 |
| 10.06695 |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 6.94536 8.94536 10.94536
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
| (6.95,8.95] |
0 |
0 |
0 |
| (8.95,10.9] |
2 |
1 |
2 |
str(Freq_table)
## 'data.frame': 2 obs. of 4 variables:
## $ distance: Factor w/ 2 levels "(6.95,8.95]",..: 1 2
## $ Freq : int 0 2
## $ Rel_Freq: num 0 1
## $ Cum_Freq: int 0 2
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
| (6.95,8.95] |
0 |
| (8.95,10.9] |
2 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")

library(pastecs)
stat.desc(df_ES)
## id date time continent_code country_name country_code
## nbr.val 3.000000e+00 NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA
## min 6.251000e+03 NA NA NA NA NA
## max 6.684000e+03 NA NA NA NA NA
## range 4.330000e+02 NA NA NA NA NA
## sum 1.961000e+04 NA NA NA NA NA
## median 6.675000e+03 NA NA NA NA NA
## mean 6.536667e+03 NA NA NA NA NA
## SE.mean 1.428570e+02 NA NA NA NA NA
## CI.mean.0.95 6.146639e+02 NA NA NA NA NA
## var 6.122433e+04 NA NA NA NA NA
## std.dev 2.474355e+02 NA NA NA NA NA
## coef.var 3.785347e-02 NA NA NA NA NA
## state population city distance location_description
## nbr.val NA 3.000000e+00 NA 3.0000000 NA
## nbr.null NA 0.000000e+00 NA 0.0000000 NA
## nbr.na NA 0.000000e+00 NA 0.0000000 NA
## min NA 6.393000e+03 NA 6.9453600 NA
## max NA 1.909500e+04 NA 10.0669500 NA
## range NA 1.270200e+04 NA 3.1215900 NA
## sum NA 3.188100e+04 NA 26.9845800 NA
## median NA 6.393000e+03 NA 9.9722700 NA
## mean NA 1.062700e+04 NA 8.9948600 NA
## SE.mean NA 4.234000e+03 NA 1.0251144 NA
## CI.mean.0.95 NA 1.821743e+04 NA 4.4107114 NA
## var NA 5.378027e+07 NA 3.1525788 NA
## std.dev NA 7.333503e+03 NA 1.7755503 NA
## coef.var NA 6.900822e-01 NA 0.1973961 NA
## latitude longitude geolocation hazard_type landslide_type
## nbr.val 3.000000000 3.000000e+00 NA NA NA
## nbr.null 0.000000000 0.000000e+00 NA NA NA
## nbr.na 0.000000000 0.000000e+00 NA NA NA
## min 13.281700000 -8.827560e+01 NA NA NA
## max 13.437900000 -8.808430e+01 NA NA NA
## range 0.156200000 1.913000e-01 NA NA NA
## sum 40.012600000 -2.644852e+02 NA NA NA
## median 13.293000000 -8.812530e+01 NA NA NA
## mean 13.337533333 -8.816173e+01 NA NA NA
## SE.mean 0.050289241 5.815056e-02 NA NA NA
## CI.mean.0.95 0.216377141 2.502017e-01 NA NA NA
## var 0.007587023 1.014446e-02 NA NA NA
## std.dev 0.087103521 1.007197e-01 NA NA NA
## coef.var 0.006530707 -1.142443e-03 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 3 3 NA
## nbr.null NA NA NA 3 3 NA
## nbr.na NA NA NA 0 0 NA
## min NA NA NA 0 0 NA
## max NA NA NA 0 0 NA
## range NA NA NA 0 0 NA
## sum NA NA NA 0 0 NA
## median NA NA NA 0 0 NA
## mean NA NA NA 0 0 NA
## SE.mean NA NA NA 0 0 NA
## CI.mean.0.95 NA NA NA 0 0 NA
## var NA NA NA 0 0 NA
## std.dev NA NA NA 0 0 NA
## coef.var NA NA NA NaN NaN NA
## source_link prop ypos
## nbr.val NA 3.0000000 3.0000000
## nbr.null NA 0.0000000 0.0000000
## nbr.na NA 0.0000000 0.0000000
## min NA 25.7382550 18.6531530
## max NA 37.3063060 81.5222805
## range NA 11.5680511 62.8691275
## sum NA 100.0000000 150.3508671
## median NA 36.9554390 50.1754335
## mean NA 33.3333333 50.1169557
## SE.mean NA 3.7988897 18.1487774
## CI.mean.0.95 NA 16.3453031 78.0878866
## var NA 43.2946886 988.1343623
## std.dev NA 6.5798699 31.4346045
## coef.var NA 0.1973961 0.6272249
boxplot(data, horizontal=TRUE, col='green')

Gráfico para San Salvador (El Salvador)
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_ES <- subset(df, country_name == "El Salvador")
knitr::kable(head(df_ES))
library(dplyr)
df_ES <- subset(df, state == "San Salvador")
knitr::kable(head(df_ES))
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_ES, aes(fill=city, y=distance, x=state)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_ES, aes(fill=city, y=distance, x=state)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_ES, aes(x=state, y=distance, fill=city)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_ES <- df_ES %>%
arrange(desc(city)) %>%
mutate(prop = distance / sum(df_ES$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_ES, aes(x=state, y = prop, fill=city)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales
library(forecast)
data<- ts(df_ES$distance, frequency=12, start=2008)
knitr::kable(head(data))
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto
library(qcc)
distance <- df_ES$distance
names(distance) <- df_ES$city
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por ciudades"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Antiguo Cuscatlán 3.25227 3.25227 51.87315 51.87315
## Apopa 3.01739 6.26966 48.12685 100.00000
stem(df_ES$"distance")
##
## The decimal point is 1 digit(s) to the left of the |
##
## 30 | 2
## 30 |
## 31 |
## 31 |
## 32 |
## 32 | 5
head(df_ES)
## # A tibble: 2 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 4008 10/10/11 <NA> <NA> El Salvador SV San ~ 112158
## 2 6687 10/12/14 <NA> <NA> El Salvador SV San ~ 33767
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_ES))
stem(df_ES$"distance")
##
## The decimal point is 1 digit(s) to the left of the |
##
## 30 | 2
## 30 |
## 31 |
## 31 |
## 32 |
## 32 | 5
stem(df_ES$"distance", scale = 2)
##
## The decimal point is 1 digit(s) to the left of the |
##
## 30 | 2
## 30 |
## 31 |
## 31 |
## 32 |
## 32 | 5
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 3.01739 |
1 |
50 |
50 |
50 |
50 |
| 3.25227 |
1 |
50 |
50 |
100 |
100 |
| Total |
2 |
100 |
100 |
100 |
100 |
str(table)
## Classes 'freqtab' and 'data.frame': 3 obs. of 5 variables:
## $ n : num 1 1 2
## $ % : num 50 50 100
## $ val% : num 50 50 100
## $ %cum : num 50 100 100
## $ val%cum: num 50 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 3.01739 4.01739
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
str(Freq_table)
## 'data.frame': 1 obs. of 4 variables:
## $ distance: Factor w/ 1 level "(3.02,4.02]": 1
## $ Freq : int 1
## $ Rel_Freq: num 1
## $ Cum_Freq: int 1
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")

library(pastecs)
stat.desc(df_ES)
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## id date time continent_code country_name country_code
## nbr.val 2.000000e+00 NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA
## min 4.008000e+03 NA NA NA NA NA
## max 6.687000e+03 NA NA NA NA NA
## range 2.679000e+03 NA NA NA NA NA
## sum 1.069500e+04 NA NA NA NA NA
## median 5.347500e+03 NA NA NA NA NA
## mean 5.347500e+03 NA NA NA NA NA
## SE.mean 1.339500e+03 NA NA NA NA NA
## CI.mean.0.95 1.701996e+04 NA NA NA NA NA
## var 3.588521e+06 NA NA NA NA NA
## std.dev 1.894339e+03 NA NA NA NA NA
## coef.var 3.542476e-01 NA NA NA NA NA
## state population city distance location_description
## nbr.val NA 2.000000e+00 NA 2.00000000 NA
## nbr.null NA 0.000000e+00 NA 0.00000000 NA
## nbr.na NA 0.000000e+00 NA 0.00000000 NA
## min NA 3.376700e+04 NA 3.01739000 NA
## max NA 1.121580e+05 NA 3.25227000 NA
## range NA 7.839100e+04 NA 0.23488000 NA
## sum NA 1.459250e+05 NA 6.26966000 NA
## median NA 7.296250e+04 NA 3.13483000 NA
## mean NA 7.296250e+04 NA 3.13483000 NA
## SE.mean NA 3.919550e+04 NA 0.11744000 NA
## CI.mean.0.95 NA 4.980260e+05 NA 1.49221668 NA
## var NA 3.072574e+09 NA 0.02758431 NA
## std.dev NA 5.543081e+04 NA 0.16608524 NA
## coef.var NA 7.597164e-01 NA 0.05298062 NA
## latitude longitude geolocation hazard_type landslide_type
## nbr.val 2.000000000 2.000000e+00 NA NA NA
## nbr.null 0.000000000 0.000000e+00 NA NA NA
## nbr.na 0.000000000 0.000000e+00 NA NA NA
## min 13.689100000 -8.923610e+01 NA NA NA
## max 13.787400000 -8.916000e+01 NA NA NA
## range 0.098300000 7.610000e-02 NA NA NA
## sum 27.476500000 -1.783961e+02 NA NA NA
## median 13.738250000 -8.919805e+01 NA NA NA
## mean 13.738250000 -8.919805e+01 NA NA NA
## SE.mean 0.049150000 3.805000e-02 NA NA NA
## CI.mean.0.95 0.624509963 4.834711e-01 NA NA NA
## var 0.004831445 2.895605e-03 NA NA NA
## std.dev 0.069508597 5.381083e-02 NA NA NA
## coef.var 0.005059494 -6.032736e-04 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 1 2.000000 NA
## nbr.null NA NA NA 1 1.000000 NA
## nbr.na NA NA NA 1 0.000000 NA
## min NA NA NA 0 0.000000 NA
## max NA NA NA 0 32.000000 NA
## range NA NA NA 0 32.000000 NA
## sum NA NA NA 0 32.000000 NA
## median NA NA NA 0 16.000000 NA
## mean NA NA NA 0 16.000000 NA
## SE.mean NA NA NA NA 16.000000 NA
## CI.mean.0.95 NA NA NA NaN 203.299276 NA
## var NA NA NA NA 512.000000 NA
## std.dev NA NA NA NA 22.627417 NA
## coef.var NA NA NA NA 1.414214 NA
## source_link prop ypos
## nbr.val NA 2.00000000 2.0000000
## nbr.null NA 0.00000000 0.0000000
## nbr.na NA 0.00000000 0.0000000
## min NA 48.12685217 24.0634261
## max NA 51.87314783 74.0634261
## range NA 3.74629565 50.0000000
## sum NA 100.00000000 98.1268522
## median NA 50.00000000 49.0634261
## mean NA 50.00000000 49.0634261
## SE.mean NA 1.87314783 25.0000000
## CI.mean.0.95 NA 23.80059978 317.6551184
## var NA 7.01736556 1250.0000000
## std.dev NA 2.64903106 35.3553391
## coef.var NA 0.05298062 0.7206048
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Santa Ana (El Salvador)
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_ES <- subset(df, country_name == "El Salvador")
knitr::kable(head(df_ES))
library(dplyr)
df_ES <- subset(df, state == "Santa Ana")
knitr::kable(head(df_ES))
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_ES, aes(fill=city, y=distance, x=state)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_ES, aes(fill=city, y=distance, x=state)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_ES, aes(x=state, y=distance, fill=city)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_ES <- df_ES %>%
arrange(desc(city)) %>%
mutate(prop = distance / sum(df_ES$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_ES, aes(x=state, y = prop, fill=city)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales
library(forecast)
data<- ts(df_ES$distance, frequency=12, start=2008)
knitr::kable(head(data))
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto
library(qcc)
distance <- df_ES$distance
names(distance) <- df_ES$city
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por ciudades"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Coatepeque 8.83210 8.83210 88.40764 88.40764
## Ciudad Arce 1.15810 9.99020 11.59236 100.00000
stem(df_ES$"distance")
##
## The decimal point is at the |
##
## 0 | 2
## 2 |
## 4 |
## 6 |
## 8 | 8
head(df_ES)
## # A tibble: 2 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 6685 10/12/14 <NA> <NA> El Salvador SV Santa Ana 5773
## 2 7438 7/18/15 <NA> <NA> El Salvador SV Santa Ana 10095
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_ES))
stem(df_ES$"distance")
##
## The decimal point is at the |
##
## 0 | 2
## 2 |
## 4 |
## 6 |
## 8 | 8
stem(df_ES$"distance", scale = 2)
##
## The decimal point is at the |
##
## 1 | 2
## 2 |
## 3 |
## 4 |
## 5 |
## 6 |
## 7 |
## 8 | 8
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 1.1581 |
1 |
50 |
50 |
50 |
50 |
| 8.8321 |
1 |
50 |
50 |
100 |
100 |
| Total |
2 |
100 |
100 |
100 |
100 |
str(table)
## Classes 'freqtab' and 'data.frame': 3 obs. of 5 variables:
## $ n : num 1 1 2
## $ % : num 50 50 100
## $ val% : num 50 50 100
## $ %cum : num 50 100 100
## $ val%cum: num 50 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 1.1581 5.1581 9.1581
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
| (1.16,5.16] |
0 |
0 |
0 |
| (5.16,9.16] |
1 |
1 |
1 |
str(Freq_table)
## 'data.frame': 2 obs. of 4 variables:
## $ distance: Factor w/ 2 levels "(1.16,5.16]",..: 1 2
## $ Freq : int 0 1
## $ Rel_Freq: num 0 1
## $ Cum_Freq: int 0 1
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
| (1.16,5.16] |
0 |
| (5.16,9.16] |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")

library(pastecs)
stat.desc(df_ES)
## id date time continent_code country_name country_code
## nbr.val 2.000000e+00 NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA
## min 6.685000e+03 NA NA NA NA NA
## max 7.438000e+03 NA NA NA NA NA
## range 7.530000e+02 NA NA NA NA NA
## sum 1.412300e+04 NA NA NA NA NA
## median 7.061500e+03 NA NA NA NA NA
## mean 7.061500e+03 NA NA NA NA NA
## SE.mean 3.765000e+02 NA NA NA NA NA
## CI.mean.0.95 4.783886e+03 NA NA NA NA NA
## var 2.835045e+05 NA NA NA NA NA
## std.dev 5.324514e+02 NA NA NA NA NA
## coef.var 7.540203e-02 NA NA NA NA NA
## state population city distance location_description
## nbr.val NA 2.000000e+00 NA 2.000000 NA
## nbr.null NA 0.000000e+00 NA 0.000000 NA
## nbr.na NA 0.000000e+00 NA 0.000000 NA
## min NA 5.773000e+03 NA 1.158100 NA
## max NA 1.009500e+04 NA 8.832100 NA
## range NA 4.322000e+03 NA 7.674000 NA
## sum NA 1.586800e+04 NA 9.990200 NA
## median NA 7.934000e+03 NA 4.995100 NA
## mean NA 7.934000e+03 NA 4.995100 NA
## SE.mean NA 2.161000e+03 NA 3.837000 NA
## CI.mean.0.95 NA 2.745811e+04 NA 48.753708 NA
## var NA 9.339842e+06 NA 29.445138 NA
## std.dev NA 3.056116e+03 NA 5.426337 NA
## coef.var NA 3.851923e-01 NA 1.086332 NA
## latitude longitude geolocation hazard_type landslide_type
## nbr.val 2.000000000 2.000000e+00 NA NA NA
## nbr.null 0.000000000 0.000000e+00 NA NA NA
## nbr.na 0.000000000 0.000000e+00 NA NA NA
## min 13.846400000 -8.946910e+01 NA NA NA
## max 14.000700000 -8.945020e+01 NA NA NA
## range 0.154300000 1.890000e-02 NA NA NA
## sum 27.847100000 -1.789193e+02 NA NA NA
## median 13.923550000 -8.945965e+01 NA NA NA
## mean 13.923550000 -8.945965e+01 NA NA NA
## SE.mean 0.077150000 9.450000e-03 NA NA NA
## CI.mean.0.95 0.980283695 1.200736e-01 NA NA NA
## var 0.011904245 1.786050e-04 NA NA NA
## std.dev 0.109106576 1.336432e-02 NA NA NA
## coef.var 0.007836118 -1.493893e-04 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 2.000000 2.000000 NA
## nbr.null NA NA NA 1.000000 1.000000 NA
## nbr.na NA NA NA 0.000000 0.000000 NA
## min NA NA NA 0.000000 0.000000 NA
## max NA NA NA 2.000000 3.000000 NA
## range NA NA NA 2.000000 3.000000 NA
## sum NA NA NA 2.000000 3.000000 NA
## median NA NA NA 1.000000 1.500000 NA
## mean NA NA NA 1.000000 1.500000 NA
## SE.mean NA NA NA 1.000000 1.500000 NA
## CI.mean.0.95 NA NA NA 12.706205 19.059307 NA
## var NA NA NA 2.000000 4.500000 NA
## std.dev NA NA NA 1.414214 2.121320 NA
## coef.var NA NA NA 1.414214 1.414214 NA
## source_link prop ypos
## nbr.val NA 2.000000 2.0000000
## nbr.null NA 0.000000 0.0000000
## nbr.na NA 0.000000 0.0000000
## min NA 11.592361 44.2038197
## max NA 88.407639 94.2038197
## range NA 76.815279 50.0000000
## sum NA 100.000000 138.4076395
## median NA 50.000000 69.2038197
## mean NA 50.000000 69.2038197
## SE.mean NA 38.407639 25.0000000
## CI.mean.0.95 NA 488.015331 317.6551184
## var NA 2950.293542 1250.0000000
## std.dev NA 54.316605 35.3553391
## coef.var NA 1.086332 0.5108871
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Sonsonate (El Salvador)
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_ES <- subset(df, country_name == "El Salvador")
knitr::kable(head(df_ES))
library(dplyr)
df_ES <- subset(df, state == "Sonsonate")
knitr::kable(head(df_ES))
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_ES, aes(fill=city, y=distance, x=state)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_ES, aes(fill=city, y=distance, x=state)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_ES, aes(x=state, y=distance, fill=city)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_ES <- df_ES %>%
arrange(desc(city)) %>%
mutate(prop = distance / sum(df_ES$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_ES, aes(x=state, y = prop, fill=city)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales
library(forecast)
data<- ts(df_ES$distance, frequency=12, start=2008)
knitr::kable(head(data))
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto
library(qcc)
distance <- df_ES$distance
names(distance) <- df_ES$city
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por ciudades"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Nahuizalco 4.238750 4.238750 53.287045 53.287045
## Sonzacate 3.222350 7.461100 40.509469 93.796514
## Juayúa 0.493460 7.954560 6.203486 100.000000
stem(df_ES$"distance")
##
## The decimal point is at the |
##
## 0 | 5
## 1 |
## 2 |
## 3 | 2
## 4 | 2
head(df_ES)
## # A tibble: 3 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 6682 10/15/14 <NA> <NA> El Salvador SV Sonsonate 15446
## 2 6681 10/15/14 <NA> <NA> El Salvador SV Sonsonate 7358
## 3 7442 10/19/15 <NA> <NA> El Salvador SV Sonsonate 9936
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_ES))
stem(df_ES$"distance")
##
## The decimal point is at the |
##
## 0 | 5
## 1 |
## 2 |
## 3 | 2
## 4 | 2
stem(df_ES$"distance", scale = 2)
##
## The decimal point is at the |
##
## 0 |
## 0 | 5
## 1 |
## 1 |
## 2 |
## 2 |
## 3 | 2
## 3 |
## 4 | 2
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 0.49346 |
1 |
33.3 |
33.3 |
33.3 |
33.3 |
| 3.22235 |
1 |
33.3 |
33.3 |
66.7 |
66.7 |
| 4.23875 |
1 |
33.3 |
33.3 |
100.0 |
100.0 |
| Total |
3 |
100.0 |
100.0 |
100.0 |
100.0 |
str(table)
## Classes 'freqtab' and 'data.frame': 4 obs. of 5 variables:
## $ n : num 1 1 1 3
## $ % : num 33.3 33.3 33.3 100
## $ val% : num 33.3 33.3 33.3 100
## $ %cum : num 33.3 66.7 100 100
## $ val%cum: num 33.3 66.7 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
| 0.49346 |
1 |
| 3.22235 |
1 |
| 4.23875 |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.49346 2.49346 4.49346
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
| (0.493,2.49] |
0 |
0 |
0 |
| (2.49,4.49] |
2 |
1 |
2 |
str(Freq_table)
## 'data.frame': 2 obs. of 4 variables:
## $ distance: Factor w/ 2 levels "(0.493,2.49]",..: 1 2
## $ Freq : int 0 2
## $ Rel_Freq: num 0 1
## $ Cum_Freq: int 0 2
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
| (0.493,2.49] |
0 |
| (2.49,4.49] |
2 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")

library(pastecs)
stat.desc(df_ES)
## id date time continent_code country_name country_code
## nbr.val 3.000000e+00 NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA
## min 6.681000e+03 NA NA NA NA NA
## max 7.442000e+03 NA NA NA NA NA
## range 7.610000e+02 NA NA NA NA NA
## sum 2.080500e+04 NA NA NA NA NA
## median 6.682000e+03 NA NA NA NA NA
## mean 6.935000e+03 NA NA NA NA NA
## SE.mean 2.535002e+02 NA NA NA NA NA
## CI.mean.0.95 1.090723e+03 NA NA NA NA NA
## var 1.927870e+05 NA NA NA NA NA
## std.dev 4.390752e+02 NA NA NA NA NA
## coef.var 6.331293e-02 NA NA NA NA NA
## state population city distance location_description latitude
## nbr.val NA 3.000000e+00 NA 3.000000 NA 3.000000000
## nbr.null NA 0.000000e+00 NA 0.000000 NA 0.000000000
## nbr.na NA 0.000000e+00 NA 0.000000 NA 0.000000000
## min NA 7.358000e+03 NA 0.493460 NA 13.713500000
## max NA 1.544600e+04 NA 4.238750 NA 13.845700000
## range NA 8.088000e+03 NA 3.745290 NA 0.132200000
## sum NA 3.274000e+04 NA 7.954560 NA 41.348700000
## median NA 9.936000e+03 NA 3.222350 NA 13.789500000
## mean NA 1.091333e+04 NA 2.651520 NA 13.782900000
## SE.mean NA 2.385395e+03 NA 1.118211 NA 0.038305265
## CI.mean.0.95 NA 1.026352e+04 NA 4.811272 NA 0.164814253
## var NA 1.707032e+07 NA 3.751184 NA 0.004401880
## std.dev NA 4.131625e+03 NA 1.936797 NA 0.066346665
## coef.var NA 3.785850e-01 NA 0.730448 NA 0.004813694
## longitude geolocation hazard_type landslide_type
## nbr.val 3.000000e+00 NA NA NA
## nbr.null 0.000000e+00 NA NA NA
## nbr.na 0.000000e+00 NA NA NA
## min -8.977390e+01 NA NA NA
## max -8.969380e+01 NA NA NA
## range 8.010000e-02 NA NA NA
## sum -2.692122e+02 NA NA NA
## median -8.974450e+01 NA NA NA
## mean -8.973740e+01 NA NA NA
## SE.mean 2.339380e-02 NA NA NA
## CI.mean.0.95 1.006554e-01 NA NA NA
## var 1.641810e-03 NA NA NA
## std.dev 4.051925e-02 NA NA NA
## coef.var -4.515314e-04 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 3.0000000 3 NA
## nbr.null NA NA NA 2.0000000 3 NA
## nbr.na NA NA NA 0.0000000 0 NA
## min NA NA NA 0.0000000 0 NA
## max NA NA NA 1.0000000 0 NA
## range NA NA NA 1.0000000 0 NA
## sum NA NA NA 1.0000000 0 NA
## median NA NA NA 0.0000000 0 NA
## mean NA NA NA 0.3333333 0 NA
## SE.mean NA NA NA 0.3333333 0 NA
## CI.mean.0.95 NA NA NA 1.4342176 0 NA
## var NA NA NA 0.3333333 0 NA
## std.dev NA NA NA 0.5773503 0 NA
## coef.var NA NA NA 1.7320508 NaN NA
## source_link prop ypos
## nbr.val NA 3.000000 3.0000000
## nbr.null NA 0.000000 0.0000000
## nbr.na NA 0.000000 0.0000000
## min NA 6.203486 20.2547344
## max NA 53.287045 96.8982571
## range NA 47.083560 76.6435227
## sum NA 100.000000 184.3059830
## median NA 40.509469 67.1529915
## mean NA 33.333333 61.4353277
## SE.mean NA 14.057478 22.3090128
## CI.mean.0.95 NA 60.484447 95.9879347
## var NA 592.838087 1493.0761531
## std.dev NA 24.348267 38.6403436
## coef.var NA 0.730448 0.6289597
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Guatemala
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_GT <- subset(df, country_name == "El Salvador")
knitr::kable(head(df_GT))
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_GT, aes(fill= state, y=distance, x=country_name)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_GT, aes(fill=state, y=distance, x=country_name)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_GT, aes(x=country_name, y=distance, fill=state)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_GT <- df_GT %>%
arrange(desc(state)) %>%
mutate(prop = distance / sum(df_GT$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_GT, aes(x=country_name, y=prop, fill=state)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales
library(forecast)
data<- ts(df_GT$distance, frequency=12, start=2008)
knitr::kable(head(data))
| 4.23875 |
| 3.22235 |
| 0.49346 |
| 8.83210 |
| 1.15810 |
| 7.60946 |
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto
library(qcc)
distance <- df_GT$distance
names(distance) <- df_GT$state
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por estados"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## San Miguel 1.006695e+01 1.006695e+01 8.974011e+00 8.974011e+00
## San Miguel 9.972270e+00 2.003922e+01 8.889610e+00 1.786362e+01
## La Libertad 9.875530e+00 2.991475e+01 8.803373e+00 2.666699e+01
## Santa Ana 8.832100e+00 3.874685e+01 7.873225e+00 3.454022e+01
## Cabañas 8.825250e+00 4.757210e+01 7.867118e+00 4.240734e+01
## San Vicente 7.609460e+00 5.518156e+01 6.783323e+00 4.919066e+01
## San Miguel 6.945360e+00 6.212692e+01 6.191323e+00 5.538198e+01
## San Vicente 5.907260e+00 6.803418e+01 5.265926e+00 6.064791e+01
## Ahuachapán 5.299010e+00 7.333319e+01 4.723712e+00 6.537162e+01
## La Libertad 4.964160e+00 7.829735e+01 4.425216e+00 6.979684e+01
## La Libertad 4.862190e+00 8.315954e+01 4.334316e+00 7.413115e+01
## La Libertad 4.677220e+00 8.783676e+01 4.169428e+00 7.830058e+01
## La Libertad 4.606550e+00 9.244331e+01 4.106430e+00 8.240701e+01
## Sonsonate 4.238750e+00 9.668206e+01 3.778561e+00 8.618557e+01
## San Vicente 4.031250e+00 1.007133e+02 3.593589e+00 8.977916e+01
## San Salvador 3.252270e+00 1.039656e+02 2.899181e+00 9.267834e+01
## Sonsonate 3.222350e+00 1.071879e+02 2.872509e+00 9.555085e+01
## San Salvador 3.017390e+00 1.102053e+02 2.689801e+00 9.824065e+01
## Santa Ana 1.158100e+00 1.113634e+02 1.032368e+00 9.927302e+01
## Sonsonate 4.934600e-01 1.118569e+02 4.398865e-01 9.971291e+01
## La Paz 3.193300e-01 1.121762e+02 2.846613e-01 9.999757e+01
## Ahuachapán 2.730000e-03 1.121789e+02 2.433612e-03 1.000000e+02
stem(df_GT$"distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 000133344
## 0 | 5555567899
## 1 | 000
head(df_GT)
## # A tibble: 6 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 6681 10/15/14 <NA> <NA> El Salvador SV Sons~ 7358
## 2 6682 10/15/14 <NA> <NA> El Salvador SV Sons~ 15446
## 3 7442 10/19/15 <NA> <NA> El Salvador SV Sons~ 9936
## 4 6685 10/12/14 <NA> <NA> El Salvador SV Sant~ 5773
## 5 7438 7/18/15 <NA> <NA> El Salvador SV Sant~ 10095
## 6 1285 11/8/09 <NA> <NA> El Salvador SV San ~ 41504
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_GT))
stem(df_GT$"distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 000133344
## 0 | 5555567899
## 1 | 000
stem(df_GT$"distance", scale = 2)
##
## The decimal point is at the |
##
## 0 | 0352
## 2 | 023
## 4 | 02679039
## 6 | 96
## 8 | 889
## 10 | 01
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 0.00273 |
1 |
4.5 |
4.5 |
4.5 |
4.5 |
| 0.31933 |
1 |
4.5 |
4.5 |
9.1 |
9.1 |
| 0.49346 |
1 |
4.5 |
4.5 |
13.6 |
13.6 |
| 1.1581 |
1 |
4.5 |
4.5 |
18.2 |
18.2 |
| 3.01739 |
1 |
4.5 |
4.5 |
22.7 |
22.7 |
| 3.22235 |
1 |
4.5 |
4.5 |
27.3 |
27.3 |
| 3.25227 |
1 |
4.5 |
4.5 |
31.8 |
31.8 |
| 4.03125 |
1 |
4.5 |
4.5 |
36.4 |
36.4 |
| 4.23875 |
1 |
4.5 |
4.5 |
40.9 |
40.9 |
| 4.60655 |
1 |
4.5 |
4.5 |
45.5 |
45.5 |
| 4.67722 |
1 |
4.5 |
4.5 |
50.0 |
50.0 |
| 4.86219 |
1 |
4.5 |
4.5 |
54.5 |
54.5 |
| 4.96416 |
1 |
4.5 |
4.5 |
59.1 |
59.1 |
| 5.29901 |
1 |
4.5 |
4.5 |
63.6 |
63.6 |
| 5.90726 |
1 |
4.5 |
4.5 |
68.2 |
68.2 |
| 6.94536 |
1 |
4.5 |
4.5 |
72.7 |
72.7 |
| 7.60946 |
1 |
4.5 |
4.5 |
77.3 |
77.3 |
| 8.82525 |
1 |
4.5 |
4.5 |
81.8 |
81.8 |
| 8.8321 |
1 |
4.5 |
4.5 |
86.4 |
86.4 |
| 9.87553 |
1 |
4.5 |
4.5 |
90.9 |
90.9 |
| 9.97227 |
1 |
4.5 |
4.5 |
95.5 |
95.5 |
| 10.06695 |
1 |
4.5 |
4.5 |
100.0 |
100.0 |
| Total |
22 |
100.0 |
100.0 |
100.0 |
100.0 |
str(table)
## Classes 'freqtab' and 'data.frame': 23 obs. of 5 variables:
## $ n : num 1 1 1 1 1 1 1 1 1 1 ...
## $ % : num 4.5 4.5 4.5 4.5 4.5 4.5 4.5 4.5 4.5 4.5 ...
## $ val% : num 4.5 4.5 4.5 4.5 4.5 4.5 4.5 4.5 4.5 4.5 ...
## $ %cum : num 4.5 9.1 13.6 18.2 22.7 27.3 31.8 36.4 40.9 45.5 ...
## $ val%cum: num 4.5 9.1 13.6 18.2 22.7 27.3 31.8 36.4 40.9 45.5 ...
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
| 0.00273 |
1 |
| 0.31933 |
1 |
| 0.49346 |
1 |
| 1.1581 |
1 |
| 3.01739 |
1 |
| 3.22235 |
1 |
| 3.25227 |
1 |
| 4.03125 |
1 |
| 4.23875 |
1 |
| 4.60655 |
1 |
| 4.67722 |
1 |
| 4.86219 |
1 |
| 4.96416 |
1 |
| 5.29901 |
1 |
| 5.90726 |
1 |
| 6.94536 |
1 |
| 7.60946 |
1 |
| 8.82525 |
1 |
| 8.8321 |
1 |
| 9.87553 |
1 |
| 9.97227 |
1 |
| 10.06695 |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.00273 3.00273 6.00273 9.00273 12.00273
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
| (0.00273,3] |
3 |
0.1428571 |
3 |
| (3,6] |
11 |
0.5238095 |
14 |
| (6,9] |
4 |
0.1904762 |
18 |
| (9,12] |
3 |
0.1428571 |
21 |
str(Freq_table)
## 'data.frame': 4 obs. of 4 variables:
## $ distance: Factor w/ 4 levels "(0.00273,3]",..: 1 2 3 4
## $ Freq : int 3 11 4 3
## $ Rel_Freq: num 0.143 0.524 0.19 0.143
## $ Cum_Freq: int 3 14 18 21
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
| (0.00273,3] |
3 |
| (3,6] |
11 |
| (6,9] |
4 |
| (9,12] |
3 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")

library(pastecs)
stat.desc(df_GT)
## id date time continent_code country_name country_code
## nbr.val 2.200000e+01 NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA
## min 2.300000e+02 NA NA NA NA NA
## max 7.442000e+03 NA NA NA NA NA
## range 7.212000e+03 NA NA NA NA NA
## sum 1.086050e+05 NA NA NA NA NA
## median 6.680000e+03 NA NA NA NA NA
## mean 4.936591e+03 NA NA NA NA NA
## SE.mean 5.803080e+02 NA NA NA NA NA
## CI.mean.0.95 1.206817e+03 NA NA NA NA NA
## var 7.408663e+06 NA NA NA NA NA
## std.dev 2.721886e+03 NA NA NA NA NA
## coef.var 5.513696e-01 NA NA NA NA NA
## state population city distance location_description
## nbr.val NA 2.200000e+01 NA 22.0000000 NA
## nbr.null NA 0.000000e+00 NA 0.0000000 NA
## nbr.na NA 0.000000e+00 NA 0.0000000 NA
## min NA 2.654000e+03 NA 0.0027300 NA
## max NA 1.246940e+05 NA 10.0669500 NA
## range NA 1.220400e+05 NA 10.0642200 NA
## sum NA 9.383360e+05 NA 112.1789400 NA
## median NA 2.643100e+04 NA 4.7697050 NA
## mean NA 4.265164e+04 NA 5.0990427 NA
## SE.mean NA 9.849090e+03 NA 0.6720603 NA
## CI.mean.0.95 NA 2.048230e+04 NA 1.3976259 NA
## var NA 2.134100e+09 NA 9.9366315 NA
## std.dev NA 4.619633e+04 NA 3.1522423 NA
## coef.var NA 1.083108e+00 NA 0.6182028 NA
## latitude longitude geolocation hazard_type landslide_type
## nbr.val 22.00000000 2.200000e+01 NA NA NA
## nbr.null 0.00000000 0.000000e+00 NA NA NA
## nbr.na 0.00000000 0.000000e+00 NA NA NA
## min 13.28170000 -8.990440e+01 NA NA NA
## max 14.00070000 -8.808430e+01 NA NA NA
## range 0.71900000 1.820100e+00 NA NA NA
## sum 301.25530000 -1.961688e+03 NA NA NA
## median 13.71515000 -8.926030e+01 NA NA NA
## mean 13.69342273 -8.916765e+01 NA NA NA
## SE.mean 0.03809807 1.112458e-01 NA NA NA
## CI.mean.0.95 0.07922927 2.313484e-01 NA NA NA
## var 0.03193218 2.722640e-01 NA NA NA
## std.dev 0.17869578 5.217892e-01 NA NA NA
## coef.var 0.01304975 -5.851777e-03 NA NA NA
## landslide_size trigger storm_name injuries fatalities
## nbr.val NA NA NA 14.0000000 18.000000
## nbr.null NA NA NA 10.0000000 13.000000
## nbr.na NA NA NA 8.0000000 4.000000
## min NA NA NA 0.0000000 0.000000
## max NA NA NA 2.0000000 32.000000
## range NA NA NA 2.0000000 32.000000
## sum NA NA NA 6.0000000 63.000000
## median NA NA NA 0.0000000 0.000000
## mean NA NA NA 0.4285714 3.500000
## SE.mean NA NA NA 0.2020305 2.107022
## CI.mean.0.95 NA NA NA 0.4364604 4.445428
## var NA NA NA 0.5714286 79.911765
## std.dev NA NA NA 0.7559289 8.939338
## coef.var NA NA NA 1.7638342 2.554097
## source_name source_link prop ypos
## nbr.val NA NA 2.200000e+01 22.0000000
## nbr.null NA NA 0.000000e+00 0.0000000
## nbr.na NA NA 0.000000e+00 0.0000000
## min NA NA 2.433612e-03 1.8892806
## max NA NA 8.974011e+00 97.6381440
## range NA NA 8.971577e+00 95.7488634
## sum NA NA 1.000000e+02 1038.4834890
## median NA NA 4.251872e+00 44.1153638
## mean NA NA 4.545455e+00 47.2037950
## SE.mean NA NA 5.990967e-01 6.6246536
## CI.mean.0.95 NA NA 1.245890e+00 13.7767213
## var NA NA 7.896171e+00 965.4927696
## std.dev NA NA 2.810013e+00 31.0723795
## coef.var NA NA 6.182028e-01 0.6582602
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Alta Verapaz (Guatemala)
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_GT <- subset(df, country_name == "Guatemala")
knitr::kable(head(df_GT))
library(dplyr)
df_GT <- subset(df, state == "Alta Verapaz")
knitr::kable(head(df_GT))
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_GT, aes(fill=city, y=distance, x=state)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_GT, aes(fill=city, y=distance, x=state)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_GT, aes(x=state, y=distance, fill=city)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_GT <- df_GT %>%
arrange(desc(city)) %>%
mutate(prop = distance / sum(df_GT$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_GT, aes(x=state, y = prop, fill=city)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales
library(forecast)
data<- ts(df_GT$distance, frequency=12, start=2008)
knitr::kable(head(data))
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto
library(qcc)
distance <- df_GT$distance
names(distance) <- df_GT$city
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por ciudades"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Lanquín 13.39817 13.39817 64.27831 64.27831
## Cahabón 5.14479 18.54296 24.68235 88.96066
## Senahú 2.30104 20.84400 11.03934 100.00000
stem(df_GT$"distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 2
## 0 | 5
## 1 | 3
head(df_GT)
## # A tibble: 3 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 2354 8/28/10 <NA> <NA> Guatemala GT Alta~ 5633
## 2 198 8/21/07 <NA> <NA> Guatemala GT Alta~ 2006
## 3 7433 10/15/15 <NA> <NA> Guatemala GT Alta~ 4671
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_GT))
stem(df_GT$"distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 2
## 0 | 5
## 1 | 3
stem(df_GT$"distance", scale = 2)
##
## The decimal point is at the |
##
## 2 | 3
## 4 | 1
## 6 |
## 8 |
## 10 |
## 12 | 4
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 2.30104 |
1 |
33.3 |
33.3 |
33.3 |
33.3 |
| 5.14479 |
1 |
33.3 |
33.3 |
66.7 |
66.7 |
| 13.39817 |
1 |
33.3 |
33.3 |
100.0 |
100.0 |
| Total |
3 |
100.0 |
100.0 |
100.0 |
100.0 |
str(table)
## Classes 'freqtab' and 'data.frame': 4 obs. of 5 variables:
## $ n : num 1 1 1 3
## $ % : num 33.3 33.3 33.3 100
## $ val% : num 33.3 33.3 33.3 100
## $ %cum : num 33.3 66.7 100 100
## $ val%cum: num 33.3 66.7 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
| 2.30104 |
1 |
| 5.14479 |
1 |
| 13.39817 |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 2.30104 6.30104 10.30104 14.30104
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
| (2.3,6.3] |
1 |
0.5 |
1 |
| (6.3,10.3] |
0 |
0.0 |
1 |
| (10.3,14.3] |
1 |
0.5 |
2 |
str(Freq_table)
## 'data.frame': 3 obs. of 4 variables:
## $ distance: Factor w/ 3 levels "(2.3,6.3]","(6.3,10.3]",..: 1 2 3
## $ Freq : int 1 0 1
## $ Rel_Freq: num 0.5 0 0.5
## $ Cum_Freq: int 1 1 2
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
| (2.3,6.3] |
1 |
| (6.3,10.3] |
0 |
| (10.3,14.3] |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")

library(pastecs)
stat.desc(df_GT)
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## id date time continent_code country_name country_code
## nbr.val 3.000000e+00 NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA
## min 1.980000e+02 NA NA NA NA NA
## max 7.433000e+03 NA NA NA NA NA
## range 7.235000e+03 NA NA NA NA NA
## sum 9.985000e+03 NA NA NA NA NA
## median 2.354000e+03 NA NA NA NA NA
## mean 3.328333e+03 NA NA NA NA NA
## SE.mean 2.144629e+03 NA NA NA NA NA
## CI.mean.0.95 9.227594e+03 NA NA NA NA NA
## var 1.379830e+07 NA NA NA NA NA
## std.dev 3.714606e+03 NA NA NA NA NA
## coef.var 1.116056e+00 NA NA NA NA NA
## state population city distance location_description
## nbr.val NA 3.000000e+00 NA 3.0000000 NA
## nbr.null NA 0.000000e+00 NA 0.0000000 NA
## nbr.na NA 0.000000e+00 NA 0.0000000 NA
## min NA 2.006000e+03 NA 2.3010400 NA
## max NA 5.633000e+03 NA 13.3981700 NA
## range NA 3.627000e+03 NA 11.0971300 NA
## sum NA 1.231000e+04 NA 20.8440000 NA
## median NA 4.671000e+03 NA 5.1447900 NA
## mean NA 4.103333e+03 NA 6.9480000 NA
## SE.mean NA 1.084814e+03 NA 3.3279247 NA
## CI.mean.0.95 NA 4.667579e+03 NA 14.3189043 NA
## var NA 3.530466e+06 NA 33.2252483 NA
## std.dev NA 1.878954e+03 NA 5.7641347 NA
## coef.var NA 4.579091e-01 NA 0.8296106 NA
## latitude longitude geolocation hazard_type landslide_type
## nbr.val 3.000000000 3.000000e+00 NA NA NA
## nbr.null 0.000000000 0.000000e+00 NA NA NA
## nbr.na 0.000000000 0.000000e+00 NA NA NA
## min 15.416800000 -9.008530e+01 NA NA NA
## max 15.604600000 -8.973210e+01 NA NA NA
## range 0.187800000 3.532000e-01 NA NA NA
## sum 46.599300000 -2.696381e+02 NA NA NA
## median 15.577900000 -8.982070e+01 NA NA NA
## mean 15.533100000 -8.987937e+01 NA NA NA
## SE.mean 0.058658588 1.060957e-01 NA NA NA
## CI.mean.0.95 0.252387536 4.564930e-01 NA NA NA
## var 0.010322490 3.376889e-02 NA NA NA
## std.dev 0.101599656 1.837631e-01 NA NA NA
## coef.var 0.006540849 -2.044553e-03 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 1 2.0000000 NA
## nbr.null NA NA NA 1 0.0000000 NA
## nbr.na NA NA NA 2 1.0000000 NA
## min NA NA NA 0 1.0000000 NA
## max NA NA NA 0 2.0000000 NA
## range NA NA NA 0 1.0000000 NA
## sum NA NA NA 0 3.0000000 NA
## median NA NA NA 0 1.5000000 NA
## mean NA NA NA 0 1.5000000 NA
## SE.mean NA NA NA NA 0.5000000 NA
## CI.mean.0.95 NA NA NA NaN 6.3531024 NA
## var NA NA NA NA 0.5000000 NA
## std.dev NA NA NA NA 0.7071068 NA
## coef.var NA NA NA NA 0.4714045 NA
## source_link prop ypos
## nbr.val NA 3.0000000 3.0000000
## nbr.null NA 0.0000000 0.0000000
## nbr.na NA 0.0000000 0.0000000
## min NA 11.0393399 5.5196699
## max NA 64.2783055 87.6588227
## range NA 53.2389656 82.1391528
## sum NA 100.0000000 136.3569852
## median NA 24.6823546 43.1784926
## mean NA 33.3333333 45.4523284
## SE.mean NA 15.9658640 23.7387717
## CI.mean.0.95 NA 68.6955683 102.1396910
## var NA 764.7264402 1690.5878507
## std.dev NA 27.6536876 41.1167588
## coef.var NA 0.8296106 0.9046128
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Chimaltenango (Guatemala)
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_GT <- subset(df, country_name == "Guatemala")
knitr::kable(head(df_GT))
library(dplyr)
df_GT <- subset(df, state == "Chimaltenango")
knitr::kable(head(df_GT))
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_GT, aes(fill=city, y=distance, x=state)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_GT, aes(fill=city, y=distance, x=state)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_GT, aes(x=state, y=distance, fill=city)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_GT <- df_GT %>%
arrange(desc(city)) %>%
mutate(prop = distance / sum(df_GT$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_GT, aes(x=state, y = prop, fill=city)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales
library(forecast)
data<- ts(df_GT$distance, frequency=12, start=2008)
knitr::kable(head(data))
| 0.99952 |
| 5.31511 |
| 5.52205 |
| 1.36473 |
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto
library(qcc)
distance <- df_GT$distance
names(distance) <- df_GT$city
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por ciudades"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Patzicía 5.522050 5.522050 41.829244 41.829244
## San José Poaquil 5.315110 10.837160 40.261684 82.090928
## Chimaltenango 1.364730 12.201890 10.337759 92.428688
## Santa Apolonia 0.999520 13.201410 7.571312 100.000000
stem(df_GT$"distance")
##
## The decimal point is at the |
##
## 0 |
## 1 | 04
## 2 |
## 3 |
## 4 |
## 5 | 35
head(df_GT)
## # A tibble: 4 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 1921 5/30/10 <NA> <NA> Guatemala GT Chim~ 2396
## 2 852 10/18/08 <NA> <NA> Guatemala GT Chim~ 5987
## 3 7413 9/27/15 <NA> <NA> Guatemala GT Chim~ 16494
## 4 2388 9/4/10 <NA> <NA> Guatemala GT Chim~ 82370
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_GT))
stem(df_GT$"distance")
##
## The decimal point is at the |
##
## 0 |
## 1 | 04
## 2 |
## 3 |
## 4 |
## 5 | 35
stem(df_GT$"distance", scale = 2)
##
## The decimal point is at the |
##
## 0 |
## 1 | 04
## 1 |
## 2 |
## 2 |
## 3 |
## 3 |
## 4 |
## 4 |
## 5 | 3
## 5 | 5
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 0.99952 |
1 |
25 |
25 |
25 |
25 |
| 1.36473 |
1 |
25 |
25 |
50 |
50 |
| 5.31511 |
1 |
25 |
25 |
75 |
75 |
| 5.52205 |
1 |
25 |
25 |
100 |
100 |
| Total |
4 |
100 |
100 |
100 |
100 |
str(table)
## Classes 'freqtab' and 'data.frame': 5 obs. of 5 variables:
## $ n : num 1 1 1 1 4
## $ % : num 25 25 25 25 100
## $ val% : num 25 25 25 25 100
## $ %cum : num 25 50 75 100 100
## $ val%cum: num 25 50 75 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
| 0.99952 |
1 |
| 1.36473 |
1 |
| 5.31511 |
1 |
| 5.52205 |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.99952 2.99952 4.99952 6.99952
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
| (1,3] |
1 |
0.3333333 |
1 |
| (3,5] |
0 |
0.0000000 |
1 |
| (5,7] |
2 |
0.6666667 |
3 |
str(Freq_table)
## 'data.frame': 3 obs. of 4 variables:
## $ distance: Factor w/ 3 levels "(1,3]","(3,5]",..: 1 2 3
## $ Freq : int 1 0 2
## $ Rel_Freq: num 0.333 0 0.667
## $ Cum_Freq: int 1 1 3
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")

library(pastecs)
stat.desc(df_GT)
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## id date time continent_code country_name country_code
## nbr.val 4.000000e+00 NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA
## min 8.520000e+02 NA NA NA NA NA
## max 7.413000e+03 NA NA NA NA NA
## range 6.561000e+03 NA NA NA NA NA
## sum 1.257400e+04 NA NA NA NA NA
## median 2.154500e+03 NA NA NA NA NA
## mean 3.143500e+03 NA NA NA NA NA
## SE.mean 1.459020e+03 NA NA NA NA NA
## CI.mean.0.95 4.643254e+03 NA NA NA NA NA
## var 8.514963e+06 NA NA NA NA NA
## std.dev 2.918041e+03 NA NA NA NA NA
## coef.var 9.282777e-01 NA NA NA NA NA
## state population city distance location_description
## nbr.val NA 4.000000e+00 NA 4.000000 NA
## nbr.null NA 0.000000e+00 NA 0.000000 NA
## nbr.na NA 0.000000e+00 NA 0.000000 NA
## min NA 2.396000e+03 NA 0.999520 NA
## max NA 8.237000e+04 NA 5.522050 NA
## range NA 7.997400e+04 NA 4.522530 NA
## sum NA 1.072470e+05 NA 13.201410 NA
## median NA 1.124050e+04 NA 3.339920 NA
## mean NA 2.681175e+04 NA 3.300352 NA
## SE.mean NA 1.875938e+04 NA 1.225957 NA
## CI.mean.0.95 NA 5.970073e+04 NA 3.901543 NA
## var NA 1.407658e+09 NA 6.011884 NA
## std.dev NA 3.751877e+04 NA 2.451914 NA
## coef.var NA 1.399341e+00 NA 0.742925 NA
## latitude longitude geolocation hazard_type landslide_type
## nbr.val 4.000000000 4.000000e+00 NA NA NA
## nbr.null 0.000000000 0.000000e+00 NA NA NA
## nbr.na 0.000000000 0.000000e+00 NA NA NA
## min 14.651000000 -9.096810e+01 NA NA NA
## max 14.866700000 -9.082670e+01 NA NA NA
## range 0.215700000 1.414000e-01 NA NA NA
## sum 58.962400000 -3.636782e+02 NA NA NA
## median 14.722350000 -9.094170e+01 NA NA NA
## mean 14.740600000 -9.091955e+01 NA NA NA
## SE.mean 0.051660188 3.317814e-02 NA NA NA
## CI.mean.0.95 0.164405774 1.055877e-01 NA NA NA
## var 0.010675100 4.403157e-03 NA NA NA
## std.dev 0.103320376 6.635629e-02 NA NA NA
## coef.var 0.007009238 -7.298352e-04 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 1 3.0000000 NA
## nbr.null NA NA NA 1 1.0000000 NA
## nbr.na NA NA NA 3 1.0000000 NA
## min NA NA NA 0 0.0000000 NA
## max NA NA NA 0 11.0000000 NA
## range NA NA NA 0 11.0000000 NA
## sum NA NA NA 0 21.0000000 NA
## median NA NA NA 0 10.0000000 NA
## mean NA NA NA 0 7.0000000 NA
## SE.mean NA NA NA NA 3.5118846 NA
## CI.mean.0.95 NA NA NA NaN 15.1104198 NA
## var NA NA NA NA 37.0000000 NA
## std.dev NA NA NA NA 6.0827625 NA
## coef.var NA NA NA NA 0.8689661 NA
## source_link prop ypos
## nbr.val NA 4.000000 4.0000000
## nbr.null NA 0.000000 0.0000000
## nbr.na NA 0.000000 0.0000000
## min NA 7.571312 3.7856562
## max NA 41.829244 94.8311203
## range NA 34.257932 91.0454641
## sum NA 100.000000 195.0665497
## median NA 25.299722 48.2248866
## mean NA 25.000000 48.7666374
## SE.mean NA 9.286562 20.3882673
## CI.mean.0.95 NA 29.553986 64.8845658
## var NA 344.960969 1662.7257673
## std.dev NA 18.573125 40.7765345
## coef.var NA 0.742925 0.8361564
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Escuintla (Guatemala)
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_GT <- subset(df, country_name == "Guatemala")
knitr::kable(head(df_GT))
library(dplyr)
df_GT <- subset(df, state == "Escuintla")
knitr::kable(head(df_GT))
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_GT, aes(fill=city, y=distance, x=state)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_GT, aes(fill=city, y=distance, x=state)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_GT, aes(x=state, y=distance, fill=city)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_GT <- df_GT %>%
arrange(desc(city)) %>%
mutate(prop = distance / sum(df_GT$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_GT, aes(x=state, y = prop, fill=city)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales
library(forecast)
data<- ts(df_GT$distance, frequency=12, start=2008)
knitr::kable(head(data))
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto
library(qcc)
distance <- df_GT$distance
names(distance) <- df_GT$city
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por ciudades"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Nueva Concepción 8.254650 8.254650 71.521032 71.521032
## Palín 3.101500 11.356150 26.872427 98.393459
## Palín 0.185420 11.541570 1.606541 100.000000
stem(df_GT$"distance")
##
## The decimal point is at the |
##
## 0 | 2
## 2 | 1
## 4 |
## 6 |
## 8 | 3
head(df_GT)
## # A tibble: 3 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 563 6/1/08 <NA> <NA> Guatemala GT Escuintla 31329
## 2 1914 5/29/10 <NA> <NA> Guatemala GT Escuintla 31329
## 3 7434 5/4/15 13:32 <NA> Guatemala GT Escuintla 11121
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_GT))
stem(df_GT$"distance")
##
## The decimal point is at the |
##
## 0 | 2
## 2 | 1
## 4 |
## 6 |
## 8 | 3
stem(df_GT$"distance", scale = 2)
##
## The decimal point is at the |
##
## 0 | 2
## 1 |
## 2 |
## 3 | 1
## 4 |
## 5 |
## 6 |
## 7 |
## 8 | 3
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 0.18542 |
1 |
33.3 |
33.3 |
33.3 |
33.3 |
| 3.1015 |
1 |
33.3 |
33.3 |
66.7 |
66.7 |
| 8.25465 |
1 |
33.3 |
33.3 |
100.0 |
100.0 |
| Total |
3 |
100.0 |
100.0 |
100.0 |
100.0 |
str(table)
## Classes 'freqtab' and 'data.frame': 4 obs. of 5 variables:
## $ n : num 1 1 1 3
## $ % : num 33.3 33.3 33.3 100
## $ val% : num 33.3 33.3 33.3 100
## $ %cum : num 33.3 66.7 100 100
## $ val%cum: num 33.3 66.7 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
| 0.18542 |
1 |
| 3.1015 |
1 |
| 8.25465 |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.18542 3.18542 6.18542 9.18542
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
| (0.185,3.19] |
1 |
0.5 |
1 |
| (3.19,6.19] |
0 |
0.0 |
1 |
| (6.19,9.19] |
1 |
0.5 |
2 |
str(Freq_table)
## 'data.frame': 3 obs. of 4 variables:
## $ distance: Factor w/ 3 levels "(0.185,3.19]",..: 1 2 3
## $ Freq : int 1 0 1
## $ Rel_Freq: num 0.5 0 0.5
## $ Cum_Freq: int 1 1 2
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
| (0.185,3.19] |
1 |
| (3.19,6.19] |
0 |
| (6.19,9.19] |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")

library(pastecs)
stat.desc(df_GT)
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## id date time continent_code country_name country_code
## nbr.val 3.000000e+00 NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA
## min 5.630000e+02 NA NA NA NA NA
## max 7.434000e+03 NA NA NA NA NA
## range 6.871000e+03 NA NA NA NA NA
## sum 9.911000e+03 NA NA NA NA NA
## median 1.914000e+03 NA NA NA NA NA
## mean 3.303667e+03 NA NA NA NA NA
## SE.mean 2.101669e+03 NA NA NA NA NA
## CI.mean.0.95 9.042753e+03 NA NA NA NA NA
## var 1.325104e+07 NA NA NA NA NA
## std.dev 3.640198e+03 NA NA NA NA NA
## coef.var 1.101866e+00 NA NA NA NA NA
## state population city distance location_description latitude
## nbr.val NA 3.000000e+00 NA 3.000000 NA 3.00000000
## nbr.null NA 0.000000e+00 NA 0.000000 NA 0.00000000
## nbr.na NA 0.000000e+00 NA 0.000000 NA 0.00000000
## min NA 1.112100e+04 NA 0.185420 NA 14.22020000
## max NA 3.132900e+04 NA 8.254650 NA 14.42260000
## range NA 2.020800e+04 NA 8.069230 NA 0.20240000
## sum NA 7.377900e+04 NA 11.541570 NA 43.04670000
## median NA 3.132900e+04 NA 3.101500 NA 14.40390000
## mean NA 2.459300e+04 NA 3.847190 NA 14.34890000
## SE.mean NA 6.736000e+03 NA 2.359036 NA 0.06457603
## CI.mean.0.95 NA 2.898267e+04 NA 10.150114 NA 0.27784822
## var NA 1.361211e+08 NA 16.695158 NA 0.01251019
## std.dev NA 1.166709e+04 NA 4.085971 NA 0.11184896
## coef.var NA 4.744071e-01 NA 1.062066 NA 0.00779495
## longitude geolocation hazard_type landslide_type
## nbr.val 3.000000e+00 NA NA NA
## nbr.null 0.000000e+00 NA NA NA
## nbr.na 0.000000e+00 NA NA NA
## min -9.122640e+01 NA NA NA
## max -9.067550e+01 NA NA NA
## range 5.509000e-01 NA NA NA
## sum -2.726005e+02 NA NA NA
## median -9.069860e+01 NA NA NA
## mean -9.086683e+01 NA NA NA
## SE.mean 1.799070e-01 NA NA NA
## CI.mean.0.95 7.740772e-01 NA NA NA
## var 9.709954e-02 NA NA NA
## std.dev 3.116080e-01 NA NA NA
## coef.var -3.429282e-03 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 1 3.0000000 NA
## nbr.null NA NA NA 1 2.0000000 NA
## nbr.na NA NA NA 2 0.0000000 NA
## min NA NA NA 0 0.0000000 NA
## max NA NA NA 0 1.0000000 NA
## range NA NA NA 0 1.0000000 NA
## sum NA NA NA 0 1.0000000 NA
## median NA NA NA 0 0.0000000 NA
## mean NA NA NA 0 0.3333333 NA
## SE.mean NA NA NA NA 0.3333333 NA
## CI.mean.0.95 NA NA NA NaN 1.4342176 NA
## var NA NA NA NA 0.3333333 NA
## std.dev NA NA NA NA 0.5773503 NA
## coef.var NA NA NA NA 1.7320508 NA
## source_link prop ypos
## nbr.val NA 3.000000 3.0000000
## nbr.null NA 0.000000 0.0000000
## nbr.na NA 0.000000 0.0000000
## min NA 1.606541 13.4362136
## max NA 71.521032 64.2394839
## range NA 69.914492 50.8032703
## sum NA 100.000000 105.3513950
## median NA 26.872427 27.6756975
## mean NA 33.333333 35.1171317
## SE.mean NA 20.439476 15.1302597
## CI.mean.0.95 NA 87.943967 65.1002531
## var NA 1253.316518 686.7742743
## std.dev NA 35.402211 26.2063785
## coef.var NA 1.062066 0.7462562
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Guatemala (Guatemala)
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_GT <- subset(df, country_name == "Guatemala")
knitr::kable(head(df_GT))
library(dplyr)
df_GT <- subset(df, state == "Guatemala")
knitr::kable(head(df_GT))
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_GT, aes(fill=city, y=distance, x=state)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_GT, aes(fill=city, y=distance, x=state)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_GT, aes(x=state, y=distance, fill=city)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_GT <- df_GT %>%
arrange(desc(city)) %>%
mutate(prop = distance / sum(df_GT$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_GT, aes(x=state, y = prop, fill=city)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales
library(forecast)
data<- ts(df_GT$distance, frequency=12, start=2008)
knitr::kable(head(data))
| 0.65744 |
| 2.81128 |
| 2.70053 |
| 0.94245 |
| 3.96161 |
| 4.74385 |
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto
library(qcc)
distance <- df_GT$distance
names(distance) <- df_GT$city
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por ciudades"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Fraijanes 6.1921800 6.1921800 7.1121567 7.1121567
## Chinautla 5.9453500 12.1375300 6.8286550 13.9408116
## San José Pinula 4.7438500 16.8813800 5.4486472 19.3894589
## Petapa 4.2072600 21.0886400 4.8323357 24.2217945
## Guatemala City 4.0793000 25.1679400 4.6853646 28.9071591
## Santa Catarina Pinula 3.9616100 29.1295500 4.5501893 33.4573483
## Chinautla 3.8564800 32.9860300 4.4294400 37.8867884
## Guatemala City 3.8031200 36.7891500 4.3681523 42.2549407
## Mixco 3.6474900 40.4366400 4.1894002 46.4443409
## Guatemala City 3.4934100 43.9300500 4.0124285 50.4567693
## Guatemala City 3.2567500 47.1868000 3.7406077 54.1973771
## Guatemala City 3.1261400 50.3129400 3.5905929 57.7879699
## Guatemala City 3.0031400 53.3160800 3.4493187 61.2372886
## Santa Catarina Pinula 2.8112800 56.1273600 3.2289539 64.4662425
## Guatemala City 2.7911300 58.9184900 3.2058102 67.6720527
## Santa Catarina Pinula 2.7005300 61.6190200 3.1017497 70.7738024
## Chinautla 2.6635800 64.2826000 3.0593100 73.8331124
## Guatemala City 2.5962000 66.8788000 2.9819193 76.8150318
## Chinautla 2.3637600 69.2425600 2.7149455 79.5299773
## Mixco 2.1041800 71.3467400 2.4167995 81.9467768
## Guatemala City 2.0842500 73.4309900 2.3939085 84.3406854
## Amatitlán 2.0289100 75.4599000 2.3303466 86.6710320
## Mixco 1.8700900 77.3299900 2.1479306 88.8189626
## Guatemala City 1.8386300 79.1686200 2.1117966 90.9307592
## Guatemala City 1.7014700 80.8700900 1.9542586 92.8850178
## Guatemala City 1.6829000 82.5529900 1.9329297 94.8179475
## Guatemala City 1.5738100 84.1268000 1.8076321 96.6255796
## Santa Catarina Pinula 0.9424500 85.0692500 1.0824705 97.7080501
## San José Pinula 0.8904000 85.9596500 1.0226874 98.7307375
## Villa Canales 0.6574400 86.6170900 0.7551163 99.4858538
## Chinautla 0.4476400 87.0647300 0.5141462 100.0000000
stem(df_GT$"distance")
##
## The decimal point is at the |
##
## 0 | 4799
## 1 | 67789
## 2 | 011467788
## 3 | 0135689
## 4 | 0127
## 5 | 9
## 6 | 2
head(df_GT)
## # A tibble: 6 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 7435 11/24/15 <NA> <NA> Guatemala GT Guatemala 122194
## 2 2350 8/28/10 <NA> <NA> Guatemala GT Guatemala 67994
## 3 6661 9/23/14 14:30 <NA> Guatemala GT Guatemala 67994
## 4 7343 10/1/15 21:30 <NA> Guatemala GT Guatemala 67994
## 5 7345 10/19/15 <NA> <NA> Guatemala GT Guatemala 67994
## 6 165 8/9/07 <NA> <NA> Guatemala GT Guatemala 47247
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_GT))
stem(df_GT$"distance")
##
## The decimal point is at the |
##
## 0 | 4799
## 1 | 67789
## 2 | 011467788
## 3 | 0135689
## 4 | 0127
## 5 | 9
## 6 | 2
stem(df_GT$"distance", scale = 2)
##
## The decimal point is at the |
##
## 0 | 4
## 0 | 799
## 1 |
## 1 | 67789
## 2 | 0114
## 2 | 67788
## 3 | 013
## 3 | 5689
## 4 | 012
## 4 | 7
## 5 |
## 5 | 9
## 6 | 2
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 0.44764 |
1 |
3.2 |
3.2 |
3.2 |
3.2 |
| 0.65744 |
1 |
3.2 |
3.2 |
6.5 |
6.5 |
| 0.8904 |
1 |
3.2 |
3.2 |
9.7 |
9.7 |
| 0.94245 |
1 |
3.2 |
3.2 |
12.9 |
12.9 |
| 1.57381 |
1 |
3.2 |
3.2 |
16.1 |
16.1 |
| 1.6829 |
1 |
3.2 |
3.2 |
19.4 |
19.4 |
| 1.70147 |
1 |
3.2 |
3.2 |
22.6 |
22.6 |
| 1.83863 |
1 |
3.2 |
3.2 |
25.8 |
25.8 |
| 1.87009 |
1 |
3.2 |
3.2 |
29.0 |
29.0 |
| 2.02891 |
1 |
3.2 |
3.2 |
32.3 |
32.3 |
| 2.08425 |
1 |
3.2 |
3.2 |
35.5 |
35.5 |
| 2.10418 |
1 |
3.2 |
3.2 |
38.7 |
38.7 |
| 2.36376 |
1 |
3.2 |
3.2 |
41.9 |
41.9 |
| 2.5962 |
1 |
3.2 |
3.2 |
45.2 |
45.2 |
| 2.66358 |
1 |
3.2 |
3.2 |
48.4 |
48.4 |
| 2.70053 |
1 |
3.2 |
3.2 |
51.6 |
51.6 |
| 2.79113 |
1 |
3.2 |
3.2 |
54.8 |
54.8 |
| 2.81128 |
1 |
3.2 |
3.2 |
58.1 |
58.1 |
| 3.00314 |
1 |
3.2 |
3.2 |
61.3 |
61.3 |
| 3.12614 |
1 |
3.2 |
3.2 |
64.5 |
64.5 |
| 3.25675 |
1 |
3.2 |
3.2 |
67.7 |
67.7 |
| 3.49341 |
1 |
3.2 |
3.2 |
71.0 |
71.0 |
| 3.64749 |
1 |
3.2 |
3.2 |
74.2 |
74.2 |
| 3.80312 |
1 |
3.2 |
3.2 |
77.4 |
77.4 |
| 3.85648 |
1 |
3.2 |
3.2 |
80.6 |
80.6 |
| 3.96161 |
1 |
3.2 |
3.2 |
83.9 |
83.9 |
| 4.0793 |
1 |
3.2 |
3.2 |
87.1 |
87.1 |
| 4.20726 |
1 |
3.2 |
3.2 |
90.3 |
90.3 |
| 4.74385 |
1 |
3.2 |
3.2 |
93.5 |
93.5 |
| 5.94535 |
1 |
3.2 |
3.2 |
96.8 |
96.8 |
| 6.19218 |
1 |
3.2 |
3.2 |
100.0 |
100.0 |
| Total |
31 |
100.0 |
100.0 |
100.0 |
100.0 |
str(table)
## Classes 'freqtab' and 'data.frame': 32 obs. of 5 variables:
## $ n : num 1 1 1 1 1 1 1 1 1 1 ...
## $ % : num 3.2 3.2 3.2 3.2 3.2 3.2 3.2 3.2 3.2 3.2 ...
## $ val% : num 3.2 3.2 3.2 3.2 3.2 3.2 3.2 3.2 3.2 3.2 ...
## $ %cum : num 3.2 6.5 9.7 12.9 16.1 19.4 22.6 25.8 29 32.3 ...
## $ val%cum: num 3.2 6.5 9.7 12.9 16.1 19.4 22.6 25.8 29 32.3 ...
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
| 0.44764 |
1 |
| 0.65744 |
1 |
| 0.8904 |
1 |
| 0.94245 |
1 |
| 1.57381 |
1 |
| 1.6829 |
1 |
| 1.70147 |
1 |
| 1.83863 |
1 |
| 1.87009 |
1 |
| 2.02891 |
1 |
| 2.08425 |
1 |
| 2.10418 |
1 |
| 2.36376 |
1 |
| 2.5962 |
1 |
| 2.66358 |
1 |
| 2.70053 |
1 |
| 2.79113 |
1 |
| 2.81128 |
1 |
| 3.00314 |
1 |
| 3.12614 |
1 |
| 3.25675 |
1 |
| 3.49341 |
1 |
| 3.64749 |
1 |
| 3.80312 |
1 |
| 3.85648 |
1 |
| 3.96161 |
1 |
| 4.0793 |
1 |
| 4.20726 |
1 |
| 4.74385 |
1 |
| 5.94535 |
1 |
| 6.19218 |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.44764 2.44764 4.44764 6.44764
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
| (0.448,2.45] |
12 |
0.4 |
12 |
| (2.45,4.45] |
15 |
0.5 |
27 |
| (4.45,6.45] |
3 |
0.1 |
30 |
str(Freq_table)
## 'data.frame': 3 obs. of 4 variables:
## $ distance: Factor w/ 3 levels "(0.448,2.45]",..: 1 2 3
## $ Freq : int 12 15 3
## $ Rel_Freq: num 0.4 0.5 0.1
## $ Cum_Freq: int 12 27 30
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
| (0.448,2.45] |
12 |
| (2.45,4.45] |
15 |
| (4.45,6.45] |
3 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")

library(pastecs)
stat.desc(df_GT)
## id date time continent_code country_name country_code
## nbr.val 3.100000e+01 NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA
## min 1.650000e+02 NA NA NA NA NA
## max 7.435000e+03 NA NA NA NA NA
## range 7.270000e+03 NA NA NA NA NA
## sum 1.638690e+05 NA NA NA NA NA
## median 7.345000e+03 NA NA NA NA NA
## mean 5.286097e+03 NA NA NA NA NA
## SE.mean 5.051486e+02 NA NA NA NA NA
## CI.mean.0.95 1.031651e+03 NA NA NA NA NA
## var 7.910428e+06 NA NA NA NA NA
## std.dev 2.812548e+03 NA NA NA NA NA
## coef.var 5.320652e-01 NA NA NA NA NA
## state population city distance location_description
## nbr.val NA 3.100000e+01 NA 31.0000000 NA
## nbr.null NA 0.000000e+00 NA 0.0000000 NA
## nbr.na NA 0.000000e+00 NA 0.0000000 NA
## min NA 2.849200e+04 NA 0.4476400 NA
## max NA 9.949380e+05 NA 6.1921800 NA
## range NA 9.664460e+05 NA 5.7445400 NA
## sum NA 1.556974e+07 NA 87.0647300 NA
## median NA 4.730800e+05 NA 2.7005300 NA
## mean NA 5.022497e+05 NA 2.8085397 NA
## SE.mean NA 7.918482e+04 NA 0.2511060 NA
## CI.mean.0.95 NA 1.617170e+05 NA 0.5128270 NA
## var NA 1.943773e+11 NA 1.9546816 NA
## std.dev NA 4.408824e+05 NA 1.3980993 NA
## coef.var NA 8.778152e-01 NA 0.4978029 NA
## latitude longitude geolocation hazard_type landslide_type
## nbr.val 3.100000e+01 3.100000e+01 NA NA NA
## nbr.null 0.000000e+00 0.000000e+00 NA NA NA
## nbr.na 0.000000e+00 0.000000e+00 NA NA NA
## min 1.442360e+01 -9.063190e+01 NA NA NA
## max 1.469690e+01 -9.041910e+01 NA NA NA
## range 2.733000e-01 2.128000e-01 NA NA NA
## sum 4.528442e+02 -2.805914e+03 NA NA NA
## median 1.462290e+01 -9.050650e+01 NA NA NA
## mean 1.460788e+01 -9.051336e+01 NA NA NA
## SE.mean 1.173315e-02 8.643431e-03 NA NA NA
## CI.mean.0.95 2.396228e-02 1.765224e-02 NA NA NA
## var 4.267668e-03 2.315976e-03 NA NA NA
## std.dev 6.532739e-02 4.812459e-02 NA NA NA
## coef.var 4.472066e-03 -5.316849e-04 NA NA NA
## landslide_size trigger storm_name injuries fatalities
## nbr.val NA NA NA 20.0000000 31.000000
## nbr.null NA NA NA 18.0000000 21.000000
## nbr.na NA NA NA 11.0000000 0.000000
## min NA NA NA 0.0000000 0.000000
## max NA NA NA 6.0000000 280.000000
## range NA NA NA 6.0000000 280.000000
## sum NA NA NA 9.0000000 337.000000
## median NA NA NA 0.0000000 0.000000
## mean NA NA NA 0.4500000 10.870968
## SE.mean NA NA NA 0.3282730 8.997938
## CI.mean.0.95 NA NA NA 0.6870833 18.376241
## var NA NA NA 2.1552632 2509.849462
## std.dev NA NA NA 1.4680815 50.098398
## coef.var NA NA NA 3.2624032 4.608458
## source_name source_link prop ypos
## nbr.val NA NA 31.0000000 31.0000000
## nbr.null NA NA 0.0000000 0.0000000
## nbr.na NA NA 0.0000000 0.0000000
## min NA NA 0.5141462 0.3775582
## max NA NA 7.1121567 98.8348267
## range NA NA 6.5980105 98.4572685
## sum NA NA 100.0000000 1491.4174144
## median NA NA 3.1017497 49.4240435
## mean NA NA 3.2258065 48.1102392
## SE.mean NA NA 0.2884130 5.3801852
## CI.mean.0.95 NA NA 0.5890180 10.9878040
## var NA NA 2.5786447 897.3381627
## std.dev NA NA 1.6058159 29.9556032
## coef.var NA NA 0.4978029 0.6226451
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Huehuetenango (Guatemala)
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_GT <- subset(df, country_name == "Guatemala")
knitr::kable(head(df_GT))
library(dplyr)
df_GT <- subset(df, state == "Huehuetenango")
knitr::kable(head(df_GT))
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_GT, aes(fill=city, y=distance, x=state)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_GT, aes(fill=city, y=distance, x=state)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_GT, aes(x=state, y=distance, fill=city)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_GT <- df_GT %>%
arrange(desc(city)) %>%
mutate(prop = distance / sum(df_GT$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_GT, aes(x=state, y = prop, fill=city)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales
library(forecast)
data<- ts(df_GT$distance, frequency=12, start=2008)
knitr::kable(head(data))
| 1.58358 |
| 0.91108 |
| 0.03280 |
| 3.30989 |
| 0.45507 |
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto
library(qcc)
distance <- df_GT$distance
names(distance) <- df_GT$city
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por ciudades"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Cuilco 3.3098900 3.3098900 52.6012250 52.6012250
## San Sebastián Huehuetenango 1.5835800 4.8934700 25.1664701 77.7676951
## San Pedro Necta 0.9110800 5.8045500 14.4790081 92.2467032
## Barillas 0.4550700 6.2596200 7.2320347 99.4787379
## Malacatancito 0.0328000 6.2924200 0.5212621 100.0000000
stem(df_GT$"distance")
##
## The decimal point is at the |
##
## 0 | 059
## 1 | 6
## 2 |
## 3 | 3
head(df_GT)
## # A tibble: 5 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 867 10/28/08 <NA> <NA> Guatemala GT Hueh~ 1311
## 2 6040 5/30/14 18:00 <NA> Guatemala GT Hueh~ 3554
## 3 2352 8/28/10 <NA> <NA> Guatemala GT Hueh~ 2121
## 4 6666 9/27/14 <NA> <NA> Guatemala GT Hueh~ 1713
## 5 3981 9/20/11 15:00 <NA> Guatemala GT Hueh~ 14100
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_GT))
stem(df_GT$"distance")
##
## The decimal point is at the |
##
## 0 | 059
## 1 | 6
## 2 |
## 3 | 3
stem(df_GT$"distance", scale = 2)
##
## The decimal point is at the |
##
## 0 | 0
## 0 | 59
## 1 |
## 1 | 6
## 2 |
## 2 |
## 3 | 3
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 0.0328 |
1 |
20 |
20 |
20 |
20 |
| 0.45507 |
1 |
20 |
20 |
40 |
40 |
| 0.91108 |
1 |
20 |
20 |
60 |
60 |
| 1.58358 |
1 |
20 |
20 |
80 |
80 |
| 3.30989 |
1 |
20 |
20 |
100 |
100 |
| Total |
5 |
100 |
100 |
100 |
100 |
str(table)
## Classes 'freqtab' and 'data.frame': 6 obs. of 5 variables:
## $ n : num 1 1 1 1 1 5
## $ % : num 20 20 20 20 20 100
## $ val% : num 20 20 20 20 20 100
## $ %cum : num 20 40 60 80 100 100
## $ val%cum: num 20 40 60 80 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
| 0.0328 |
1 |
| 0.45507 |
1 |
| 0.91108 |
1 |
| 1.58358 |
1 |
| 3.30989 |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.0328 2.0328 4.0328
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
| (0.0328,2.03] |
3 |
0.75 |
3 |
| (2.03,4.03] |
1 |
0.25 |
4 |
str(Freq_table)
## 'data.frame': 2 obs. of 4 variables:
## $ distance: Factor w/ 2 levels "(0.0328,2.03]",..: 1 2
## $ Freq : int 3 1
## $ Rel_Freq: num 0.75 0.25
## $ Cum_Freq: int 3 4
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
| (0.0328,2.03] |
3 |
| (2.03,4.03] |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")

library(pastecs)
stat.desc(df_GT)
## id date time continent_code country_name country_code
## nbr.val 5.000000e+00 NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA
## min 8.670000e+02 NA NA NA NA NA
## max 6.666000e+03 NA NA NA NA NA
## range 5.799000e+03 NA NA NA NA NA
## sum 1.990600e+04 NA NA NA NA NA
## median 3.981000e+03 NA NA NA NA NA
## mean 3.981200e+03 NA NA NA NA NA
## SE.mean 1.090856e+03 NA NA NA NA NA
## CI.mean.0.95 3.028702e+03 NA NA NA NA NA
## var 5.949836e+06 NA NA NA NA NA
## std.dev 2.439229e+03 NA NA NA NA NA
## coef.var 6.126868e-01 NA NA NA NA NA
## state population city distance location_description latitude
## nbr.val NA 5.000000e+00 NA 5.0000000 NA 5.00000000
## nbr.null NA 0.000000e+00 NA 0.0000000 NA 0.00000000
## nbr.na NA 0.000000e+00 NA 0.0000000 NA 0.00000000
## min NA 1.311000e+03 NA 0.0328000 NA 15.21640000
## max NA 1.410000e+04 NA 3.3098900 NA 15.80760000
## range NA 1.278900e+04 NA 3.2770900 NA 0.59120000
## sum NA 2.279900e+04 NA 6.2924200 NA 77.28930000
## median NA 2.121000e+03 NA 0.9110800 NA 15.39750000
## mean NA 4.559800e+03 NA 1.2584840 NA 15.45786000
## SE.mean NA 2.414889e+03 NA 0.5736906 NA 0.09800952
## CI.mean.0.95 NA 6.704806e+03 NA 1.5928205 NA 0.27211805
## var NA 2.915844e+07 NA 1.6456047 NA 0.04802933
## std.dev NA 5.399856e+03 NA 1.2828113 NA 0.21915594
## coef.var NA 1.184231e+00 NA 1.0193306 NA 0.01417764
## longitude geolocation hazard_type landslide_type
## nbr.val 5.000000e+00 NA NA NA
## nbr.null 0.000000e+00 NA NA NA
## nbr.na 0.000000e+00 NA NA NA
## min -9.199550e+01 NA NA NA
## max -9.131480e+01 NA NA NA
## range 6.807000e-01 NA NA NA
## sum -4.581902e+02 NA NA NA
## median -9.159600e+01 NA NA NA
## mean -9.163804e+01 NA NA NA
## SE.mean 1.151922e-01 NA NA NA
## CI.mean.0.95 3.198247e-01 NA NA NA
## var 6.634618e-02 NA NA NA
## std.dev 2.575775e-01 NA NA NA
## coef.var -2.810814e-03 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 2.000000 4.000000 NA
## nbr.null NA NA NA 1.000000 2.000000 NA
## nbr.na NA NA NA 3.000000 1.000000 NA
## min NA NA NA 0.000000 0.000000 NA
## max NA NA NA 7.000000 15.000000 NA
## range NA NA NA 7.000000 15.000000 NA
## sum NA NA NA 7.000000 21.000000 NA
## median NA NA NA 3.500000 3.000000 NA
## mean NA NA NA 3.500000 5.250000 NA
## SE.mean NA NA NA 3.500000 3.544362 NA
## CI.mean.0.95 NA NA NA 44.471717 11.279741 NA
## var NA NA NA 24.500000 50.250000 NA
## std.dev NA NA NA 4.949747 7.088723 NA
## coef.var NA NA NA 1.414214 1.350233 NA
## source_link prop ypos
## nbr.val NA 5.0000000 5.0000000
## nbr.null NA 0.0000000 0.0000000
## nbr.na NA 0.0000000 0.0000000
## min NA 0.5212621 12.5832351
## max NA 52.6012250 96.3839826
## range NA 52.0799629 83.8007476
## sum NA 100.0000000 247.7466539
## median NA 14.4790081 39.9061093
## mean NA 20.0000000 49.5493308
## SE.mean NA 9.1171701 14.5483379
## CI.mean.0.95 NA 25.3133222 40.3926615
## var NA 415.6139506 1058.2706780
## std.dev NA 20.3866120 32.5310725
## coef.var NA 1.0193306 0.6565391
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Quetzaltenang (Guatemala)
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_GT <- subset(df, country_name == "Guatemala")
knitr::kable(head(df_GT))
library(dplyr)
df_GT <- subset(df, state == "Quetzaltenango")
knitr::kable(head(df_GT))
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_GT, aes(fill=city, y=distance, x=state)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_GT, aes(fill=city, y=distance, x=state)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_GT, aes(x=state, y=distance, fill=city)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_GT <- df_GT %>%
arrange(desc(city)) %>%
mutate(prop = distance / sum(df_GT$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_GT, aes(x=state, y = prop, fill=city)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales
library(forecast)
data<- ts(df_GT$distance, frequency=12, start=2008)
knitr::kable(head(data))
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto
library(qcc)
distance <- df_GT$distance
names(distance) <- df_GT$city
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por ciudades"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Coatepeque 1.81216 1.81216 51.59924 51.59924
## Colomba 0.92729 2.73945 26.40355 78.00278
## Almolonga 0.77254 3.51199 21.99722 100.00000
stem(df_GT$"distance")
##
## The decimal point is at the |
##
## 0 | 89
## 1 |
## 1 | 8
head(df_GT)
## # A tibble: 3 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 2385 9/4/10 <NA> <NA> Guatemala GT Quetz~ 19115
## 2 7439 10/6/15 <NA> <NA> Guatemala GT Quetz~ 45654
## 3 1904 5/26/10 <NA> <NA> Guatemala GT Quetz~ 11913
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_GT))
stem(df_GT$"distance")
##
## The decimal point is at the |
##
## 0 | 89
## 1 |
## 1 | 8
stem(df_GT$"distance", scale = 2)
##
## The decimal point is 1 digit(s) to the left of the |
##
## 6 | 7
## 8 | 3
## 10 |
## 12 |
## 14 |
## 16 |
## 18 | 1
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 0.77254 |
1 |
33.3 |
33.3 |
33.3 |
33.3 |
| 0.92729 |
1 |
33.3 |
33.3 |
66.7 |
66.7 |
| 1.81216 |
1 |
33.3 |
33.3 |
100.0 |
100.0 |
| Total |
3 |
100.0 |
100.0 |
100.0 |
100.0 |
str(table)
## Classes 'freqtab' and 'data.frame': 4 obs. of 5 variables:
## $ n : num 1 1 1 3
## $ % : num 33.3 33.3 33.3 100
## $ val% : num 33.3 33.3 33.3 100
## $ %cum : num 33.3 66.7 100 100
## $ val%cum: num 33.3 66.7 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
| 0.77254 |
1 |
| 0.92729 |
1 |
| 1.81216 |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.77254 1.77254 2.77254
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
| (0.773,1.77] |
1 |
0.5 |
1 |
| (1.77,2.77] |
1 |
0.5 |
2 |
str(Freq_table)
## 'data.frame': 2 obs. of 4 variables:
## $ distance: Factor w/ 2 levels "(0.773,1.77]",..: 1 2
## $ Freq : int 1 1
## $ Rel_Freq: num 0.5 0.5
## $ Cum_Freq: int 1 2
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
| (0.773,1.77] |
1 |
| (1.77,2.77] |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")

library(pastecs)
stat.desc(df_GT)
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## id date time continent_code country_name country_code
## nbr.val 3.000000e+00 NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA
## min 1.904000e+03 NA NA NA NA NA
## max 7.439000e+03 NA NA NA NA NA
## range 5.535000e+03 NA NA NA NA NA
## sum 1.172800e+04 NA NA NA NA NA
## median 2.385000e+03 NA NA NA NA NA
## mean 3.909333e+03 NA NA NA NA NA
## SE.mean 1.770287e+03 NA NA NA NA NA
## CI.mean.0.95 7.616931e+03 NA NA NA NA NA
## var 9.401750e+06 NA NA NA NA NA
## std.dev 3.066227e+03 NA NA NA NA NA
## coef.var 7.843351e-01 NA NA NA NA NA
## state population city distance location_description
## nbr.val NA 3.000000e+00 NA 3.0000000 NA
## nbr.null NA 0.000000e+00 NA 0.0000000 NA
## nbr.na NA 0.000000e+00 NA 0.0000000 NA
## min NA 1.191300e+04 NA 0.7725400 NA
## max NA 4.565400e+04 NA 1.8121600 NA
## range NA 3.374100e+04 NA 1.0396200 NA
## sum NA 7.668200e+04 NA 3.5119900 NA
## median NA 1.911500e+04 NA 0.9272900 NA
## mean NA 2.556067e+04 NA 1.1706633 NA
## SE.mean NA 1.025953e+04 NA 0.3238443 NA
## CI.mean.0.95 NA 4.414319e+04 NA 1.3933895 NA
## var NA 3.157737e+08 NA 0.3146254 NA
## std.dev NA 1.777002e+04 NA 0.5609148 NA
## coef.var NA 6.952097e-01 NA 0.4791427 NA
## latitude longitude geolocation hazard_type landslide_type
## nbr.val 3.000000000 3.000000e+00 NA NA NA
## nbr.null 0.000000000 0.000000e+00 NA NA NA
## nbr.na 0.000000000 0.000000e+00 NA NA NA
## min 14.708500000 -9.187080e+01 NA NA NA
## max 14.812300000 -9.149440e+01 NA NA NA
## range 0.103800000 3.764000e-01 NA NA NA
## sum 44.236700000 -2.750966e+02 NA NA NA
## median 14.715900000 -9.173140e+01 NA NA NA
## mean 14.745566667 -9.169887e+01 NA NA NA
## SE.mean 0.033434978 1.098682e-01 NA NA NA
## CI.mean.0.95 0.143859101 4.727246e-01 NA NA NA
## var 0.003353693 3.621305e-02 NA NA NA
## std.dev 0.057911081 1.902973e-01 NA NA NA
## coef.var 0.003927355 -2.075241e-03 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 1 3.0000000 NA
## nbr.null NA NA NA 1 1.0000000 NA
## nbr.na NA NA NA 2 0.0000000 NA
## min NA NA NA 0 0.0000000 NA
## max NA NA NA 0 4.0000000 NA
## range NA NA NA 0 4.0000000 NA
## sum NA NA NA 0 8.0000000 NA
## median NA NA NA 0 4.0000000 NA
## mean NA NA NA 0 2.6666667 NA
## SE.mean NA NA NA NA 1.3333333 NA
## CI.mean.0.95 NA NA NA NaN 5.7368703 NA
## var NA NA NA NA 5.3333333 NA
## std.dev NA NA NA NA 2.3094011 NA
## coef.var NA NA NA NA 0.8660254 NA
## source_link prop ypos
## nbr.val NA 3.0000000 3.0000000
## nbr.null NA 0.0000000 0.0000000
## nbr.na NA 0.0000000 0.0000000
## min NA 21.9972153 13.2017745
## max NA 51.5992358 89.0013924
## range NA 29.6020205 75.7996179
## sum NA 100.0000000 154.4063337
## median NA 26.4035490 52.2031669
## mean NA 33.3333333 51.4687779
## SE.mean NA 9.2211051 21.8845456
## CI.mean.0.95 NA 39.6752130 94.1616000
## var NA 255.0863379 1436.8000131
## std.dev NA 15.9714225 37.9051449
## coef.var NA 0.4791427 0.7364687
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Quiché (Guatemala)
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_GT <- subset(df, country_name == "Guatemala")
knitr::kable(head(df_GT))
library(dplyr)
df_GT <- subset(df, state == "Quiché")
knitr::kable(head(df_GT))
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_GT, aes(fill=city, y=distance, x=state)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_GT, aes(fill=city, y=distance, x=state)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_GT, aes(x=state, y=distance, fill=city)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_GT <- df_GT %>%
arrange(desc(city)) %>%
mutate(prop = distance / sum(df_GT$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_GT, aes(x=state, y = prop, fill=city)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales
library(forecast)
data<- ts(df_GT$distance, frequency=12, start=2008)
knitr::kable(head(data))
| 22.56101 |
| 8.93658 |
| 0.35171 |
| 3.98185 |
| 2.27725 |
| 21.83272 |
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto
library(qcc)
distance <- df_GT$distance
names(distance) <- df_GT$city
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por ciudades"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## San Luis Ixcán 22.5610100 22.5610100 33.5030438 33.5030438
## Chicamán 21.8327200 44.3937300 32.4215350 65.9245788
## San Juan Cotzal 8.9365800 53.3303100 13.2707991 79.1953779
## Chajul 7.3990600 60.7293700 10.9875857 90.1829636
## Chinique 3.9818500 64.7112200 5.9130374 96.0960009
## Chichicastenango 2.2772500 66.9884700 3.3817106 99.4777115
## Sacapulas 0.3517100 67.3401800 0.5222885 100.0000000
stem(df_GT$"distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 024
## 0 | 79
## 1 |
## 1 |
## 2 | 23
head(df_GT)
## # A tibble: 6 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 6664 3/12/14 Night <NA> Guatemala GT Quiché 7850
## 2 1924 5/30/10 <NA> <NA> Guatemala GT Quiché 11046
## 3 2408 9/10/10 <NA> <NA> Guatemala GT Quiché 12088
## 4 6668 6/24/14 <NA> <NA> Guatemala GT Quiché 2693
## 5 7427 9/27/15 <NA> <NA> Guatemala GT Quiché 79759
## 6 2386 9/4/10 <NA> <NA> Guatemala GT Quiché 2090
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_GT))
stem(df_GT$"distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 024
## 0 | 79
## 1 |
## 1 |
## 2 | 23
stem(df_GT$"distance", scale = 2)
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 024
## 0 | 79
## 1 |
## 1 |
## 2 | 23
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 0.35171 |
1 |
14.3 |
14.3 |
14.3 |
14.3 |
| 2.27725 |
1 |
14.3 |
14.3 |
28.6 |
28.6 |
| 3.98185 |
1 |
14.3 |
14.3 |
42.9 |
42.9 |
| 7.39906 |
1 |
14.3 |
14.3 |
57.1 |
57.1 |
| 8.93658 |
1 |
14.3 |
14.3 |
71.4 |
71.4 |
| 21.83272 |
1 |
14.3 |
14.3 |
85.7 |
85.7 |
| 22.56101 |
1 |
14.3 |
14.3 |
100.0 |
100.0 |
| Total |
7 |
100.0 |
100.0 |
100.0 |
100.0 |
str(table)
## Classes 'freqtab' and 'data.frame': 8 obs. of 5 variables:
## $ n : num 1 1 1 1 1 1 1 7
## $ % : num 14.3 14.3 14.3 14.3 14.3 14.3 14.3 100
## $ val% : num 14.3 14.3 14.3 14.3 14.3 14.3 14.3 100
## $ %cum : num 14.3 28.6 42.9 57.1 71.4 85.7 100 100
## $ val%cum: num 14.3 28.6 42.9 57.1 71.4 85.7 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
| 0.35171 |
1 |
| 2.27725 |
1 |
| 3.98185 |
1 |
| 7.39906 |
1 |
| 8.93658 |
1 |
| 21.83272 |
1 |
| 22.56101 |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.35171 8.35171 16.35171 24.35171
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
| (0.352,8.35] |
3 |
0.5000000 |
3 |
| (8.35,16.4] |
1 |
0.1666667 |
4 |
| (16.4,24.4] |
2 |
0.3333333 |
6 |
str(Freq_table)
## 'data.frame': 3 obs. of 4 variables:
## $ distance: Factor w/ 3 levels "(0.352,8.35]",..: 1 2 3
## $ Freq : int 3 1 2
## $ Rel_Freq: num 0.5 0.167 0.333
## $ Cum_Freq: int 3 4 6
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
| (0.352,8.35] |
3 |
| (8.35,16.4] |
1 |
| (16.4,24.4] |
2 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")

library(pastecs)
stat.desc(df_GT)
## id date time continent_code country_name country_code
## nbr.val 7.000000e+00 NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA
## min 1.924000e+03 NA NA NA NA NA
## max 7.427000e+03 NA NA NA NA NA
## range 5.503000e+03 NA NA NA NA NA
## sum 3.183800e+04 NA NA NA NA NA
## median 4.361000e+03 NA NA NA NA NA
## mean 4.548286e+03 NA NA NA NA NA
## SE.mean 8.922242e+02 NA NA NA NA NA
## CI.mean.0.95 2.183194e+03 NA NA NA NA NA
## var 5.572448e+06 NA NA NA NA NA
## std.dev 2.360603e+03 NA NA NA NA NA
## coef.var 5.190094e-01 NA NA NA NA NA
## state population city distance location_description
## nbr.val NA 7.000000e+00 NA 7.0000000 NA
## nbr.null NA 0.000000e+00 NA 0.0000000 NA
## nbr.na NA 0.000000e+00 NA 0.0000000 NA
## min NA 2.090000e+03 NA 0.3517100 NA
## max NA 7.975900e+04 NA 22.5610100 NA
## range NA 7.766900e+04 NA 22.2093000 NA
## sum NA 1.271830e+05 NA 67.3401800 NA
## median NA 1.104600e+04 NA 7.3990600 NA
## mean NA 1.816900e+04 NA 9.6200257 NA
## SE.mean NA 1.038310e+04 NA 3.4282642 NA
## CI.mean.0.95 NA 2.540654e+04 NA 8.3886604 NA
## var NA 7.546621e+08 NA 82.2709699 NA
## std.dev NA 2.747111e+04 NA 9.0703346 NA
## coef.var NA 1.511977e+00 NA 0.9428597 NA
## latitude longitude geolocation hazard_type landslide_type
## nbr.val 7.00000000 7.000000e+00 NA NA NA
## nbr.null 0.00000000 0.000000e+00 NA NA NA
## nbr.na 0.00000000 0.000000e+00 NA NA NA
## min 14.92330000 -9.113520e+01 NA NA NA
## max 15.73180000 -9.085000e+01 NA NA NA
## range 0.80850000 2.852000e-01 NA NA NA
## sum 107.56980000 -6.369058e+02 NA NA NA
## median 15.44460000 -9.097780e+01 NA NA NA
## mean 15.36711429 -9.098654e+01 NA NA NA
## SE.mean 0.10867388 3.841296e-02 NA NA NA
## CI.mean.0.95 0.26591540 9.399313e-02 NA NA NA
## var 0.08267008 1.032889e-02 NA NA NA
## std.dev 0.28752405 1.016311e-01 NA NA NA
## coef.var 0.01871035 -1.116991e-03 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 3.0000000 7.0000000 NA
## nbr.null NA NA NA 2.0000000 1.0000000 NA
## nbr.na NA NA NA 4.0000000 0.0000000 NA
## min NA NA NA 0.0000000 0.0000000 NA
## max NA NA NA 1.0000000 5.0000000 NA
## range NA NA NA 1.0000000 5.0000000 NA
## sum NA NA NA 1.0000000 14.0000000 NA
## median NA NA NA 0.0000000 2.0000000 NA
## mean NA NA NA 0.3333333 2.0000000 NA
## SE.mean NA NA NA 0.3333333 0.6172134 NA
## CI.mean.0.95 NA NA NA 1.4342176 1.5102668 NA
## var NA NA NA 0.3333333 2.6666667 NA
## std.dev NA NA NA 0.5773503 1.6329932 NA
## coef.var NA NA NA 1.7320508 0.8164966 NA
## source_link prop ypos
## nbr.val NA 7.0000000 7.0000000
## nbr.null NA 0.0000000 0.0000000
## nbr.na NA 0.0000000 0.0000000
## min NA 0.5222885 16.7515219
## max NA 33.5030438 94.5062071
## range NA 32.9807553 77.7546852
## sum NA 100.0000000 376.3854804
## median NA 10.9875857 50.2526501
## mean NA 14.2857143 53.7693543
## SE.mean NA 5.0909639 9.3054494
## CI.mean.0.95 NA 12.4571399 22.7696144
## var NA 181.4253926 606.1397184
## std.dev NA 13.4694244 24.6199049
## coef.var NA 0.9428597 0.4578799
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Sacatepéquez (Guatemala)
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_GT <- subset(df, country_name == "Guatemala")
knitr::kable(head(df_GT))
library(dplyr)
df_GT <- subset(df, state == "Sacatepéquez")
knitr::kable(head(df_GT))
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_GT, aes(fill=city, y=distance, x=state)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_GT, aes(fill=city, y=distance, x=state)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_GT, aes(x=state, y=distance, fill=city)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_GT <- df_GT %>%
arrange(desc(city)) %>%
mutate(prop = distance / sum(df_GT$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_GT, aes(x=state, y = prop, fill=city)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales
library(forecast)
data<- ts(df_GT$distance, frequency=12, start=2008)
knitr::kable(head(data))
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto
library(qcc)
distance <- df_GT$distance
names(distance) <- df_GT$city
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por ciudades"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Santa María De Jesús 6.135270 6.135270 74.655032 74.655032
## Santa Catarina Barahona 1.452000 7.587270 17.668188 92.323221
## Jocotenango 0.630890 8.218160 7.676779 100.000000
stem(df_GT$"distance")
##
## The decimal point is at the |
##
## 0 | 65
## 2 |
## 4 |
## 6 | 1
head(df_GT)
## # A tibble: 3 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 1917 5/29/10 <NA> <NA> Guatemala GT Sacat~ 15529
## 2 7432 9/23/15 <NA> <NA> Guatemala GT Sacat~ 3214
## 3 2387 9/4/10 <NA> <NA> Guatemala GT Sacat~ 17918
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_GT))
stem(df_GT$"distance")
##
## The decimal point is at the |
##
## 0 | 65
## 2 |
## 4 |
## 6 | 1
stem(df_GT$"distance", scale = 2)
##
## The decimal point is at the |
##
## 0 | 6
## 1 | 5
## 2 |
## 3 |
## 4 |
## 5 |
## 6 | 1
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 0.63089 |
1 |
33.3 |
33.3 |
33.3 |
33.3 |
| 1.452 |
1 |
33.3 |
33.3 |
66.7 |
66.7 |
| 6.13527 |
1 |
33.3 |
33.3 |
100.0 |
100.0 |
| Total |
3 |
100.0 |
100.0 |
100.0 |
100.0 |
str(table)
## Classes 'freqtab' and 'data.frame': 4 obs. of 5 variables:
## $ n : num 1 1 1 3
## $ % : num 33.3 33.3 33.3 100
## $ val% : num 33.3 33.3 33.3 100
## $ %cum : num 33.3 66.7 100 100
## $ val%cum: num 33.3 66.7 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
| 0.63089 |
1 |
| 1.452 |
1 |
| 6.13527 |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.63089 2.63089 4.63089 6.63089
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
| (0.631,2.63] |
1 |
0.5 |
1 |
| (2.63,4.63] |
0 |
0.0 |
1 |
| (4.63,6.63] |
1 |
0.5 |
2 |
str(Freq_table)
## 'data.frame': 3 obs. of 4 variables:
## $ distance: Factor w/ 3 levels "(0.631,2.63]",..: 1 2 3
## $ Freq : int 1 0 1
## $ Rel_Freq: num 0.5 0 0.5
## $ Cum_Freq: int 1 1 2
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
| (0.631,2.63] |
1 |
| (2.63,4.63] |
0 |
| (4.63,6.63] |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")

library(pastecs)
stat.desc(df_GT)
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## id date time continent_code country_name country_code
## nbr.val 3.000000e+00 NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA
## min 1.917000e+03 NA NA NA NA NA
## max 7.432000e+03 NA NA NA NA NA
## range 5.515000e+03 NA NA NA NA NA
## sum 1.173600e+04 NA NA NA NA NA
## median 2.387000e+03 NA NA NA NA NA
## mean 3.912000e+03 NA NA NA NA NA
## SE.mean 1.765222e+03 NA NA NA NA NA
## CI.mean.0.95 7.595137e+03 NA NA NA NA NA
## var 9.348025e+06 NA NA NA NA NA
## std.dev 3.057454e+03 NA NA NA NA NA
## coef.var 7.815578e-01 NA NA NA NA NA
## state population city distance location_description latitude
## nbr.val NA 3.000000e+00 NA 3.000000 NA 3.000000000
## nbr.null NA 0.000000e+00 NA 0.000000 NA 0.000000000
## nbr.na NA 0.000000e+00 NA 0.000000 NA 0.000000000
## min NA 3.214000e+03 NA 0.630890 NA 14.465100000
## max NA 1.791800e+04 NA 6.135270 NA 14.583200000
## range NA 1.470400e+04 NA 5.504380 NA 0.118100000
## sum NA 3.666100e+04 NA 8.218160 NA 43.604100000
## median NA 1.552900e+04 NA 1.452000 NA 14.555800000
## mean NA 1.222033e+04 NA 2.739387 NA 14.534700000
## SE.mean NA 4.555669e+03 NA 1.714407 NA 0.035687580
## CI.mean.0.95 NA 1.960146e+04 NA 7.376498 NA 0.153551262
## var NA 6.226236e+07 NA 8.817573 NA 0.003820810
## std.dev NA 7.890650e+03 NA 2.969440 NA 0.061812701
## coef.var NA 6.456984e-01 NA 1.083980 NA 0.004252768
## longitude geolocation hazard_type landslide_type
## nbr.val 3.000000e+00 NA NA NA
## nbr.null 0.000000e+00 NA NA NA
## nbr.na 0.000000e+00 NA NA NA
## min -9.079980e+01 NA NA NA
## max -9.073790e+01 NA NA NA
## range 6.190000e-02 NA NA NA
## sum -2.722803e+02 NA NA NA
## median -9.074260e+01 NA NA NA
## mean -9.076010e+01 NA NA NA
## SE.mean 1.989631e-02 NA NA NA
## CI.mean.0.95 8.560693e-02 NA NA NA
## var 1.187590e-03 NA NA NA
## std.dev 3.446143e-02 NA NA NA
## coef.var -3.796980e-04 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 1 3.0000000 NA
## nbr.null NA NA NA 1 2.0000000 NA
## nbr.na NA NA NA 2 0.0000000 NA
## min NA NA NA 0 0.0000000 NA
## max NA NA NA 0 1.0000000 NA
## range NA NA NA 0 1.0000000 NA
## sum NA NA NA 0 1.0000000 NA
## median NA NA NA 0 0.0000000 NA
## mean NA NA NA 0 0.3333333 NA
## SE.mean NA NA NA NA 0.3333333 NA
## CI.mean.0.95 NA NA NA NaN 1.4342176 NA
## var NA NA NA NA 0.3333333 NA
## std.dev NA NA NA NA 0.5773503 NA
## coef.var NA NA NA NA 1.7320508 NA
## source_link prop ypos
## nbr.val NA 3.000000 3.0000000
## nbr.null NA 0.000000 0.0000000
## nbr.na NA 0.000000 0.0000000
## min NA 7.676779 37.3275161
## max NA 74.655032 96.1616104
## range NA 66.978253 58.8340942
## sum NA 100.000000 216.9782530
## median NA 17.668188 83.4891265
## mean NA 33.333333 72.3260843
## SE.mean NA 20.861201 17.8775725
## CI.mean.0.95 NA 89.758505 76.9209861
## var NA 1305.569167 958.8227945
## std.dev NA 36.132661 30.9648639
## coef.var NA 1.083980 0.4281286
boxplot(data, horizontal=TRUE, col='green')

Gráfico para San Marcos (Guatemala)
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_GT <- subset(df, country_name == "Guatemala")
knitr::kable(head(df_GT))
library(dplyr)
df_GT <- subset(df, state == "San Marcos")
knitr::kable(head(df_GT))
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_GT, aes(fill=city, y=distance, x=state)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_GT, aes(fill=city, y=distance, x=state)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_GT, aes(x=state, y=distance, fill=city)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_GT <- df_GT %>%
arrange(desc(city)) %>%
mutate(prop = distance / sum(df_GT$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_GT, aes(x=state, y = prop, fill=city)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales
library(forecast)
data<- ts(df_GT$distance, frequency=12, start=2008)
knitr::kable(head(data))
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto
library(qcc)
distance <- df_GT$distance
names(distance) <- df_GT$city
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por ciudades"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Pajapita 0.96647 0.96647 56.06755 56.06755
## Tacaná 0.75729 1.72376 43.93245 100.00000
stem(df_GT$"distance")
##
## The decimal point is 1 digit(s) to the left of the |
##
## 7 | 6
## 8 |
## 8 |
## 9 |
## 9 | 7
head(df_GT)
## # A tibble: 2 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 6669 9/27/14 <NA> <NA> Guatemala GT San ~ 6438
## 2 5475 9/7/13 0:13:00 <NA> Guatemala GT San ~ 8164
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_GT))
stem(df_GT$"distance")
##
## The decimal point is 1 digit(s) to the left of the |
##
## 7 | 6
## 8 |
## 8 |
## 9 |
## 9 | 7
stem(df_GT$"distance", scale = 2)
##
## The decimal point is 1 digit(s) to the left of the |
##
## 7 | 6
## 8 |
## 8 |
## 9 |
## 9 | 7
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 0.75729 |
1 |
50 |
50 |
50 |
50 |
| 0.96647 |
1 |
50 |
50 |
100 |
100 |
| Total |
2 |
100 |
100 |
100 |
100 |
str(table)
## Classes 'freqtab' and 'data.frame': 3 obs. of 5 variables:
## $ n : num 1 1 2
## $ % : num 50 50 100
## $ val% : num 50 50 100
## $ %cum : num 50 100 100
## $ val%cum: num 50 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.75729 1.75729
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
str(Freq_table)
## 'data.frame': 1 obs. of 4 variables:
## $ distance: Factor w/ 1 level "(0.757,1.76]": 1
## $ Freq : int 1
## $ Rel_Freq: num 1
## $ Cum_Freq: int 1
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")

library(pastecs)
stat.desc(df_GT)
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## id date time continent_code country_name country_code
## nbr.val 2.000000e+00 NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA
## min 5.475000e+03 NA NA NA NA NA
## max 6.669000e+03 NA NA NA NA NA
## range 1.194000e+03 NA NA NA NA NA
## sum 1.214400e+04 NA NA NA NA NA
## median 6.072000e+03 NA NA NA NA NA
## mean 6.072000e+03 NA NA NA NA NA
## SE.mean 5.970000e+02 NA NA NA NA NA
## CI.mean.0.95 7.585604e+03 NA NA NA NA NA
## var 7.128180e+05 NA NA NA NA NA
## std.dev 8.442855e+02 NA NA NA NA NA
## coef.var 1.390457e-01 NA NA NA NA NA
## state population city distance location_description
## nbr.val NA 2.000000e+00 NA 2.00000000 NA
## nbr.null NA 0.000000e+00 NA 0.00000000 NA
## nbr.na NA 0.000000e+00 NA 0.00000000 NA
## min NA 6.438000e+03 NA 0.75729000 NA
## max NA 8.164000e+03 NA 0.96647000 NA
## range NA 1.726000e+03 NA 0.20918000 NA
## sum NA 1.460200e+04 NA 1.72376000 NA
## median NA 7.301000e+03 NA 0.86188000 NA
## mean NA 7.301000e+03 NA 0.86188000 NA
## SE.mean NA 8.630000e+02 NA 0.10459000 NA
## CI.mean.0.95 NA 1.096545e+04 NA 1.32894195 NA
## var NA 1.489538e+06 NA 0.02187814 NA
## std.dev NA 1.220466e+03 NA 0.14791260 NA
## coef.var NA 1.671643e-01 NA 0.17161623 NA
## latitude longitude geolocation hazard_type landslide_type
## nbr.val 2.00000000 2.000000e+00 NA NA NA
## nbr.null 0.00000000 0.000000e+00 NA NA NA
## nbr.na 0.00000000 0.000000e+00 NA NA NA
## min 14.72540000 -9.207380e+01 NA NA NA
## max 15.24220000 -9.203350e+01 NA NA NA
## range 0.51680000 4.030000e-02 NA NA NA
## sum 29.96760000 -1.841073e+02 NA NA NA
## median 14.98380000 -9.205365e+01 NA NA NA
## mean 14.98380000 -9.205365e+01 NA NA NA
## SE.mean 0.25840000 2.015000e-02 NA NA NA
## CI.mean.0.95 3.28328330 2.560300e-01 NA NA NA
## var 0.13354112 8.120450e-04 NA NA NA
## std.dev 0.36543278 2.849640e-02 NA NA NA
## coef.var 0.02438853 -3.095630e-04 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 1 2.000000 NA
## nbr.null NA NA NA 1 1.000000 NA
## nbr.na NA NA NA 1 0.000000 NA
## min NA NA NA 0 0.000000 NA
## max NA NA NA 0 2.000000 NA
## range NA NA NA 0 2.000000 NA
## sum NA NA NA 0 2.000000 NA
## median NA NA NA 0 1.000000 NA
## mean NA NA NA 0 1.000000 NA
## SE.mean NA NA NA NA 1.000000 NA
## CI.mean.0.95 NA NA NA NaN 12.706205 NA
## var NA NA NA NA 2.000000 NA
## std.dev NA NA NA NA 1.414214 NA
## coef.var NA NA NA NA 1.414214 NA
## source_link prop ypos
## nbr.val NA 2.0000000 2.0000000
## nbr.null NA 0.0000000 0.0000000
## nbr.na NA 0.0000000 0.0000000
## min NA 43.9324500 21.9662250
## max NA 56.0675500 71.9662250
## range NA 12.1351000 50.0000000
## sum NA 100.0000000 93.9324500
## median NA 50.0000000 46.9662250
## mean NA 50.0000000 46.9662250
## SE.mean NA 6.0675500 25.0000000
## CI.mean.0.95 NA 77.0955326 317.6551184
## var NA 73.6303262 1250.0000000
## std.dev NA 8.5808115 35.3553391
## coef.var NA 0.1716162 0.7527822
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Sololá (Guatemala)
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_GT <- subset(df, country_name == "Guatemala")
knitr::kable(head(df_GT))
library(dplyr)
df_GT <- subset(df, state == "Sololá")
knitr::kable(head(df_GT))
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_GT, aes(fill=city, y=distance, x=state)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_GT, aes(fill=city, y=distance, x=state)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_GT, aes(x=state, y=distance, fill=city)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_GT <- df_GT %>%
arrange(desc(city)) %>%
mutate(prop = distance / sum(df_GT$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_GT, aes(x=state, y = prop, fill=city)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales
library(forecast)
data<- ts(df_GT$distance, frequency=12, start=2008)
knitr::kable(head(data))
| 7.03115 |
| 6.00513 |
| 0.50611 |
| 3.85753 |
| 3.18658 |
| 0.00359 |
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto
library(qcc)
distance <- df_GT$distance
names(distance) <- df_GT$city
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por ciudades"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Zunil 7.03115000 7.03115000 28.05305349 28.05305349
## Santa Cruz La Laguna 6.00513000 13.03628000 23.95941391 52.01246740
## San Pablo La Laguna 3.85753000 16.89381000 15.39086713 67.40333454
## San Antonio Palopó 3.18658000 20.08039000 12.71389448 80.11722902
## Nahualá 3.04642000 23.12681000 12.15468070 92.27190972
## San Andrés Semetabaj 0.75685000 23.88366000 3.01969856 95.29160828
## San Andrés Semetabaj 0.67040000 24.55406000 2.67477825 97.96638653
## San Pedro La Laguna 0.50611000 25.06017000 2.01929000 99.98567653
## San Andrés Semetabaj 0.00359000 25.06376000 0.01432347 100.00000000
stem(df_GT$"distance")
##
## The decimal point is at the |
##
## 0 | 0578
## 2 | 029
## 4 |
## 6 | 00
head(df_GT)
## # A tibble: 6 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 6586 10/20/14 <NA> <NA> Guatemala GT Sololá 9986
## 2 1920 5/30/10 <NA> <NA> Guatemala GT Sololá 1422
## 3 1922 5/29/10 Night <NA> Guatemala GT Sololá 9681
## 4 2021 6/27/10 <NA> <NA> Guatemala GT Sololá 6186
## 5 7416 9/27/15 <NA> <NA> Guatemala GT Sololá 3588
## 6 2353 8/28/10 <NA> <NA> Guatemala GT Sololá 2605
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_GT))
stem(df_GT$"distance")
##
## The decimal point is at the |
##
## 0 | 0578
## 2 | 029
## 4 |
## 6 | 00
stem(df_GT$"distance", scale = 2)
##
## The decimal point is at the |
##
## 0 | 0578
## 1 |
## 2 |
## 3 | 029
## 4 |
## 5 |
## 6 | 0
## 7 | 0
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 0.00359 |
1 |
11.1 |
11.1 |
11.1 |
11.1 |
| 0.50611 |
1 |
11.1 |
11.1 |
22.2 |
22.2 |
| 0.6704 |
1 |
11.1 |
11.1 |
33.3 |
33.3 |
| 0.75685 |
1 |
11.1 |
11.1 |
44.4 |
44.4 |
| 3.04642 |
1 |
11.1 |
11.1 |
55.6 |
55.6 |
| 3.18658 |
1 |
11.1 |
11.1 |
66.7 |
66.7 |
| 3.85753 |
1 |
11.1 |
11.1 |
77.8 |
77.8 |
| 6.00513 |
1 |
11.1 |
11.1 |
88.9 |
88.9 |
| 7.03115 |
1 |
11.1 |
11.1 |
100.0 |
100.0 |
| Total |
9 |
100.0 |
100.0 |
100.0 |
100.0 |
str(table)
## Classes 'freqtab' and 'data.frame': 10 obs. of 5 variables:
## $ n : num 1 1 1 1 1 1 1 1 1 9
## $ % : num 11.1 11.1 11.1 11.1 11.1 11.1 11.1 11.1 11.1 100
## $ val% : num 11.1 11.1 11.1 11.1 11.1 11.1 11.1 11.1 11.1 100
## $ %cum : num 11.1 22.2 33.3 44.4 55.6 66.7 77.8 88.9 100 100
## $ val%cum: num 11.1 22.2 33.3 44.4 55.6 66.7 77.8 88.9 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
| 0.00359 |
1 |
| 0.50611 |
1 |
| 0.6704 |
1 |
| 0.75685 |
1 |
| 3.04642 |
1 |
| 3.18658 |
1 |
| 3.85753 |
1 |
| 6.00513 |
1 |
| 7.03115 |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.00359 2.00359 4.00359 6.00359 8.00359
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
| (0.00359,2] |
3 |
0.375 |
3 |
| (2,4] |
3 |
0.375 |
6 |
| (4,6] |
0 |
0.000 |
6 |
| (6,8] |
2 |
0.250 |
8 |
str(Freq_table)
## 'data.frame': 4 obs. of 4 variables:
## $ distance: Factor w/ 4 levels "(0.00359,2]",..: 1 2 3 4
## $ Freq : int 3 3 0 2
## $ Rel_Freq: num 0.375 0.375 0 0.25
## $ Cum_Freq: int 3 6 6 8
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
| (0.00359,2] |
3 |
| (2,4] |
3 |
| (4,6] |
0 |
| (6,8] |
2 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")

library(pastecs)
stat.desc(df_GT)
## id date time continent_code country_name country_code
## nbr.val 9.000000e+00 NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA
## min 1.920000e+03 NA NA NA NA NA
## max 7.436000e+03 NA NA NA NA NA
## range 5.516000e+03 NA NA NA NA NA
## sum 3.945500e+04 NA NA NA NA NA
## median 2.384000e+03 NA NA NA NA NA
## mean 4.383889e+03 NA NA NA NA NA
## SE.mean 9.005946e+02 NA NA NA NA NA
## CI.mean.0.95 2.076775e+03 NA NA NA NA NA
## var 7.299636e+06 NA NA NA NA NA
## std.dev 2.701784e+03 NA NA NA NA NA
## coef.var 6.162984e-01 NA NA NA NA NA
## state population city distance location_description
## nbr.val NA 9.000000e+00 NA 9.0000000 NA
## nbr.null NA 0.000000e+00 NA 0.0000000 NA
## nbr.na NA 0.000000e+00 NA 0.0000000 NA
## min NA 1.422000e+03 NA 0.0035900 NA
## max NA 2.769000e+04 NA 7.0311500 NA
## range NA 2.626800e+04 NA 7.0275600 NA
## sum NA 6.636800e+04 NA 25.0637600 NA
## median NA 3.588000e+03 NA 3.0464200 NA
## mean NA 7.374222e+03 NA 2.7848622 NA
## SE.mean NA 2.749460e+03 NA 0.8440434 NA
## CI.mean.0.95 NA 6.340266e+03 NA 1.9463675 NA
## var NA 6.803577e+07 NA 6.4116826 NA
## std.dev NA 8.248380e+03 NA 2.5321301 NA
## coef.var NA 1.118542e+00 NA 0.9092479 NA
## latitude longitude geolocation hazard_type landslide_type
## nbr.val 9.000000e+00 9.000000e+00 NA NA NA
## nbr.null 0.000000e+00 0.000000e+00 NA NA NA
## nbr.na 0.000000e+00 0.000000e+00 NA NA NA
## min 1.466850e+01 -9.132270e+01 NA NA NA
## max 1.482310e+01 -9.108730e+01 NA NA NA
## range 1.546000e-01 2.354000e-01 NA NA NA
## sum 1.327115e+02 -8.206534e+02 NA NA NA
## median 1.475680e+01 -9.113440e+01 NA NA NA
## mean 1.474572e+01 -9.118371e+01 NA NA NA
## SE.mean 1.648067e-02 2.814259e-02 NA NA NA
## CI.mean.0.95 3.800449e-02 6.489692e-02 NA NA NA
## var 2.444512e-03 7.128046e-03 NA NA NA
## std.dev 4.944201e-02 8.442776e-02 NA NA NA
## coef.var 3.352973e-03 -9.259083e-04 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 4 9.000000 NA
## nbr.null NA NA NA 4 4.000000 NA
## nbr.na NA NA NA 5 0.000000 NA
## min NA NA NA 0 0.000000 NA
## max NA NA NA 0 25.000000 NA
## range NA NA NA 0 25.000000 NA
## sum NA NA NA 0 48.000000 NA
## median NA NA NA 0 1.000000 NA
## mean NA NA NA 0 5.333333 NA
## SE.mean NA NA NA 0 2.901149 NA
## CI.mean.0.95 NA NA NA 0 6.690062 NA
## var NA NA NA 0 75.750000 NA
## std.dev NA NA NA 0 8.703448 NA
## coef.var NA NA NA NaN 1.631896 NA
## source_link prop ypos
## nbr.val NA 9.00000000 9.0000000
## nbr.null NA 0.00000000 0.0000000
## nbr.na NA 0.00000000 0.0000000
## min NA 0.01432347 14.0265267
## max NA 28.05305349 93.9226596
## range NA 28.03873002 79.8961329
## sum NA 100.00000000 590.4782044
## median NA 12.15468070 75.7795718
## mean NA 11.11111111 65.6086894
## SE.mean NA 3.36758473 8.6731921
## CI.mean.0.95 NA 7.76566431 20.0004168
## var NA 102.06564205 677.0183450
## std.dev NA 10.10275418 26.0195762
## coef.var NA 0.90924788 0.3965873
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Jamaica
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_JA <- subset(df, country_name == "Jamaica")
knitr::kable(head(df_JA))
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_JA, aes(fill= state, y=distance, x=country_name)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_JA, aes(fill=state, y=distance, x=country_name)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_JA, aes(x=country_name, y=distance, fill=state)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_JA <- df_JA %>%
arrange(desc(state)) %>%
mutate(prop = distance / sum(df_JA$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_JA, aes(x=country_name, y=prop, fill=state)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales
library(forecast)
data<- ts(df_JA$distance, frequency=12, start=2008)
knitr::kable(head(data))
| 4.57363 |
| 0.21825 |
| 6.51940 |
| 1.71217 |
| 4.53632 |
| 1.24727 |
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto
library(qcc)
distance <- df_JA$distance
names(distance) <- df_JA$state
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por estados"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Saint Andrew 10.8446700 10.8446700 7.3695199 7.3695199
## Saint Andrew 9.8305100 20.6751800 6.6803451 14.0498650
## Clarendon 9.5395300 30.2147100 6.4826090 20.5324740
## Saint Mary 8.7463500 38.9610600 5.9436018 26.4760758
## Portland 7.8126900 46.7737500 5.3091310 31.7852069
## Portland 7.7902700 54.5640200 5.2938955 37.0791023
## Portland 7.5231700 62.0871900 5.1123871 42.1914894
## Saint Catherine 6.7126900 68.7998800 4.5616236 46.7531130
## Saint Thomas 6.5194000 75.3192800 4.4302729 51.1833859
## Saint Andrew 5.9873100 81.3065900 4.0686900 55.2520758
## Saint Catherine 5.8653000 87.1718900 3.9857778 59.2378536
## Saint Catherine 5.4638100 92.6357000 3.7129444 62.9507980
## Portland 5.0235100 97.6592100 3.4137375 66.3645355
## Portland 4.6873200 102.3465300 3.1852788 69.5498143
## St. Elizabeth 4.5736300 106.9201600 3.1080205 72.6578348
## Saint Thomas 4.5363200 111.4564800 3.0826664 75.7405013
## Portland 4.4694200 115.9259000 3.0372044 78.7777057
## Portland 4.1345400 120.0604400 2.8096359 81.5873416
## Saint Ann 3.9653300 124.0257700 2.6946489 84.2819905
## Saint Mary 3.7758000 127.8015700 2.5658534 86.8478439
## Portland 3.6079900 131.4095600 2.4518177 89.2996616
## Portland 3.1793000 134.5888600 2.1605005 91.4601621
## Portland 2.7888500 137.3777100 1.8951693 93.3553313
## Saint Andrew 2.1387800 139.5164900 1.4534128 94.8087441
## Saint Thomas 1.7121700 141.2286600 1.1635090 95.9722531
## Saint Mary 1.2472700 142.4759300 0.8475851 96.8198382
## Saint Ann 1.0819600 143.5578900 0.7352483 97.5550865
## Saint Andrew 0.9651400 144.5230300 0.6558631 98.2109496
## Portland 0.7177300 145.2407600 0.4877350 98.6986846
## Portland 0.5959900 145.8367500 0.4050063 99.1036910
## Clarendon 0.5618600 146.3986100 0.3818132 99.4855042
## Saint Ann 0.5388600 146.9374700 0.3661835 99.8516877
## Saint Thomas 0.2182500 147.1557200 0.1483123 100.0000000
stem(df_JA$"distance")
##
## The decimal point is at the |
##
## 0 | 256670127
## 2 | 18268
## 4 | 015567059
## 6 | 057588
## 8 | 758
## 10 | 8
head(df_JA)
## # A tibble: 6 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 304 10/12/07 <NA> <NA> Jamaica JM St. ~ 1371
## 2 314 10/17/07 <NA> <NA> Jamaica JM Sain~ 2382
## 3 339 10/31/07 <NA> <NA> Jamaica JM Sain~ 2634
## 4 774 9/4/08 <NA> <NA> Jamaica JM Sain~ 2382
## 5 1760 4/18/10 <NA> <NA> Jamaica JM Sain~ 2634
## 6 2517 9/29/10 <NA> <NA> Jamaica JM Sain~ 2046
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_JA))
stem(df_JA$"distance")
##
## The decimal point is at the |
##
## 0 | 256670127
## 2 | 18268
## 4 | 015567059
## 6 | 057588
## 8 | 758
## 10 | 8
stem(df_JA$"distance", scale = 2)
##
## The decimal point is at the |
##
## 0 | 25667
## 1 | 0127
## 2 | 18
## 3 | 268
## 4 | 015567
## 5 | 059
## 6 | 057
## 7 | 588
## 8 | 7
## 9 | 58
## 10 | 8
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 0.21825 |
1 |
3 |
3 |
3.0 |
3.0 |
| 0.53886 |
1 |
3 |
3 |
6.1 |
6.1 |
| 0.56186 |
1 |
3 |
3 |
9.1 |
9.1 |
| 0.59599 |
1 |
3 |
3 |
12.1 |
12.1 |
| 0.71773 |
1 |
3 |
3 |
15.2 |
15.2 |
| 0.96514 |
1 |
3 |
3 |
18.2 |
18.2 |
| 1.08196 |
1 |
3 |
3 |
21.2 |
21.2 |
| 1.24727 |
1 |
3 |
3 |
24.2 |
24.2 |
| 1.71217 |
1 |
3 |
3 |
27.3 |
27.3 |
| 2.13878 |
1 |
3 |
3 |
30.3 |
30.3 |
| 2.78885 |
1 |
3 |
3 |
33.3 |
33.3 |
| 3.1793 |
1 |
3 |
3 |
36.4 |
36.4 |
| 3.60799 |
1 |
3 |
3 |
39.4 |
39.4 |
| 3.7758 |
1 |
3 |
3 |
42.4 |
42.4 |
| 3.96533 |
1 |
3 |
3 |
45.5 |
45.5 |
| 4.13454 |
1 |
3 |
3 |
48.5 |
48.5 |
| 4.46942 |
1 |
3 |
3 |
51.5 |
51.5 |
| 4.53632 |
1 |
3 |
3 |
54.5 |
54.5 |
| 4.57363 |
1 |
3 |
3 |
57.6 |
57.6 |
| 4.68732 |
1 |
3 |
3 |
60.6 |
60.6 |
| 5.02351 |
1 |
3 |
3 |
63.6 |
63.6 |
| 5.46381 |
1 |
3 |
3 |
66.7 |
66.7 |
| 5.8653 |
1 |
3 |
3 |
69.7 |
69.7 |
| 5.98731 |
1 |
3 |
3 |
72.7 |
72.7 |
| 6.5194 |
1 |
3 |
3 |
75.8 |
75.8 |
| 6.71269 |
1 |
3 |
3 |
78.8 |
78.8 |
| 7.52317 |
1 |
3 |
3 |
81.8 |
81.8 |
| 7.79027 |
1 |
3 |
3 |
84.8 |
84.8 |
| 7.81269 |
1 |
3 |
3 |
87.9 |
87.9 |
| 8.74635 |
1 |
3 |
3 |
90.9 |
90.9 |
| 9.53953 |
1 |
3 |
3 |
93.9 |
93.9 |
| 9.83051 |
1 |
3 |
3 |
97.0 |
97.0 |
| 10.84467 |
1 |
3 |
3 |
100.0 |
100.0 |
| Total |
33 |
100 |
100 |
100.0 |
100.0 |
str(table)
## Classes 'freqtab' and 'data.frame': 34 obs. of 5 variables:
## $ n : num 1 1 1 1 1 1 1 1 1 1 ...
## $ % : num 3 3 3 3 3 3 3 3 3 3 ...
## $ val% : num 3 3 3 3 3 3 3 3 3 3 ...
## $ %cum : num 3 6.1 9.1 12.1 15.2 18.2 21.2 24.2 27.3 30.3 ...
## $ val%cum: num 3 6.1 9.1 12.1 15.2 18.2 21.2 24.2 27.3 30.3 ...
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
| 0.21825 |
1 |
| 0.53886 |
1 |
| 0.56186 |
1 |
| 0.59599 |
1 |
| 0.71773 |
1 |
| 0.96514 |
1 |
| 1.08196 |
1 |
| 1.24727 |
1 |
| 1.71217 |
1 |
| 2.13878 |
1 |
| 2.78885 |
1 |
| 3.1793 |
1 |
| 3.60799 |
1 |
| 3.7758 |
1 |
| 3.96533 |
1 |
| 4.13454 |
1 |
| 4.46942 |
1 |
| 4.53632 |
1 |
| 4.57363 |
1 |
| 4.68732 |
1 |
| 5.02351 |
1 |
| 5.46381 |
1 |
| 5.8653 |
1 |
| 5.98731 |
1 |
| 6.5194 |
1 |
| 6.71269 |
1 |
| 7.52317 |
1 |
| 7.79027 |
1 |
| 7.81269 |
1 |
| 8.74635 |
1 |
| 9.53953 |
1 |
| 9.83051 |
1 |
| 10.84467 |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.21825 2.21825 4.21825 6.21825 8.21825 10.21825 12.21825
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
| (0.218,2.22] |
9 |
0.28125 |
9 |
| (2.22,4.22] |
6 |
0.18750 |
15 |
| (4.22,6.22] |
8 |
0.25000 |
23 |
| (6.22,8.22] |
5 |
0.15625 |
28 |
| (8.22,10.2] |
3 |
0.09375 |
31 |
| (10.2,12.2] |
1 |
0.03125 |
32 |
str(Freq_table)
## 'data.frame': 6 obs. of 4 variables:
## $ distance: Factor w/ 6 levels "(0.218,2.22]",..: 1 2 3 4 5 6
## $ Freq : int 9 6 8 5 3 1
## $ Rel_Freq: num 0.2812 0.1875 0.25 0.1562 0.0938 ...
## $ Cum_Freq: int 9 15 23 28 31 32
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
| (0.218,2.22] |
9 |
| (2.22,4.22] |
6 |
| (4.22,6.22] |
8 |
| (6.22,8.22] |
5 |
| (8.22,10.2] |
3 |
| (10.2,12.2] |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")

library(pastecs)
stat.desc(df_JA)
## id date time continent_code country_name country_code
## nbr.val 3.300000e+01 NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA
## min 1.740000e+02 NA NA NA NA NA
## max 7.474000e+03 NA NA NA NA NA
## range 7.300000e+03 NA NA NA NA NA
## sum 8.120500e+04 NA NA NA NA NA
## median 2.233000e+03 NA NA NA NA NA
## mean 2.460758e+03 NA NA NA NA NA
## SE.mean 3.719434e+02 NA NA NA NA NA
## CI.mean.0.95 7.576240e+02 NA NA NA NA NA
## var 4.565283e+06 NA NA NA NA NA
## std.dev 2.136652e+03 NA NA NA NA NA
## coef.var 8.682905e-01 NA NA NA NA NA
## state population city distance location_description
## nbr.val NA 3.300000e+01 NA 33.0000000 NA
## nbr.null NA 0.000000e+00 NA 0.0000000 NA
## nbr.na NA 0.000000e+00 NA 0.0000000 NA
## min NA 1.088000e+03 NA 0.2182500 NA
## max NA 4.475500e+04 NA 10.8446700 NA
## range NA 4.366700e+04 NA 10.6264200 NA
## sum NA 2.342270e+05 NA 147.1557200 NA
## median NA 2.757000e+03 NA 4.4694200 NA
## mean NA 7.097788e+03 NA 4.4592642 NA
## SE.mean NA 1.474547e+03 NA 0.5248336 NA
## CI.mean.0.95 NA 3.003553e+03 NA 1.0690510 NA
## var NA 7.175151e+07 NA 9.0898595 NA
## std.dev NA 8.470626e+03 NA 3.0149394 NA
## coef.var NA 1.193418e+00 NA 0.6761069 NA
## latitude longitude geolocation hazard_type landslide_type
## nbr.val 3.300000e+01 3.300000e+01 NA NA NA
## nbr.null 0.000000e+00 0.000000e+00 NA NA NA
## nbr.na 0.000000e+00 0.000000e+00 NA NA NA
## min 1.793840e+01 -7.774050e+01 NA NA NA
## max 1.843130e+01 -7.627810e+01 NA NA NA
## range 4.929000e-01 1.462400e+00 NA NA NA
## sum 5.982112e+02 -2.532567e+03 NA NA NA
## median 1.812570e+01 -7.668520e+01 NA NA NA
## mean 1.812761e+01 -7.674444e+01 NA NA NA
## SE.mean 2.204383e-02 6.043166e-02 NA NA NA
## CI.mean.0.95 4.490182e-02 1.230953e-01 NA NA NA
## var 1.603571e-02 1.205155e-01 NA NA NA
## std.dev 1.266322e-01 3.471534e-01 NA NA NA
## coef.var 6.985596e-03 -4.523499e-03 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 4 23.0000000 NA
## nbr.null NA NA NA 4 20.0000000 NA
## nbr.na NA NA NA 29 10.0000000 NA
## min NA NA NA 0 0.0000000 NA
## max NA NA NA 0 7.0000000 NA
## range NA NA NA 0 7.0000000 NA
## sum NA NA NA 0 10.0000000 NA
## median NA NA NA 0 0.0000000 NA
## mean NA NA NA 0 0.4347826 NA
## SE.mean NA NA NA 0 0.3132520 NA
## CI.mean.0.95 NA NA NA 0 0.6496449 NA
## var NA NA NA 0 2.2569170 NA
## std.dev NA NA NA 0 1.5023039 NA
## coef.var NA NA NA NaN 3.4552990 NA
## source_link prop ypos
## nbr.val NA 33.0000000 33.0000000
## nbr.null NA 0.0000000 0.0000000
## nbr.na NA 0.0000000 0.0000000
## min NA 0.1483123 1.5540103
## max NA 7.3695199 99.8090934
## range NA 7.2212076 98.2550831
## sum NA 100.0000000 1613.0748027
## median NA 3.0372044 46.7168011
## mean NA 3.0303030 48.8810546
## SE.mean NA 0.3566518 5.3099807
## CI.mean.0.95 NA 0.7264760 10.8160768
## var NA 4.1976176 930.4645430
## std.dev NA 2.0488088 30.5035169
## coef.var NA 0.6761069 0.6240356
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Clarendon (Jamaica)
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_JA <- subset(df, country_name == "Jamaica")
knitr::kable(head(df_JA))
library(dplyr)
df_JA <- subset(df, state == "Clarendon")
knitr::kable(head(df_JA))
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_JA, aes(fill=city, y=distance, x=state)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_JA, aes(fill=city, y=distance, x=state)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_JA, aes(x=state, y=distance, fill=city)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_JA <- df_JA %>%
arrange(desc(city)) %>%
mutate(prop = distance / sum(df_JA$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_JA, aes(x=state, y = prop, fill=city)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales
library(forecast)
data<- ts(df_JA$distance, frequency=12, start=2008)
knitr::kable(head(data))
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto
library(qcc)
distance <- df_JA$distance
names(distance) <- df_JA$city
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por ciudades"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Chapelton 9.539530 9.539530 94.437795 94.437795
## May Pen 0.561860 10.101390 5.562205 100.000000
stem(df_JA$"distance")
##
## The decimal point is at the |
##
## 0 | 6
## 2 |
## 4 |
## 6 |
## 8 | 5
head(df_JA)
## # A tibble: 2 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 1762 4/18/10 <NA> <NA> Jamaica JM Clarendon 44755
## 2 1761 4/18/10 <NA> <NA> Jamaica JM Clarendon 4514
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_JA))
stem(df_JA$"distance")
##
## The decimal point is at the |
##
## 0 | 6
## 2 |
## 4 |
## 6 |
## 8 | 5
stem(df_JA$"distance", scale = 2)
##
## The decimal point is at the |
##
## 0 | 6
## 1 |
## 2 |
## 3 |
## 4 |
## 5 |
## 6 |
## 7 |
## 8 |
## 9 | 5
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 0.56186 |
1 |
50 |
50 |
50 |
50 |
| 9.53953 |
1 |
50 |
50 |
100 |
100 |
| Total |
2 |
100 |
100 |
100 |
100 |
str(table)
## Classes 'freqtab' and 'data.frame': 3 obs. of 5 variables:
## $ n : num 1 1 2
## $ % : num 50 50 100
## $ val% : num 50 50 100
## $ %cum : num 50 100 100
## $ val%cum: num 50 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.56186 5.56186 10.56186
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
| (0.562,5.56] |
0 |
0 |
0 |
| (5.56,10.6] |
1 |
1 |
1 |
str(Freq_table)
## 'data.frame': 2 obs. of 4 variables:
## $ distance: Factor w/ 2 levels "(0.562,5.56]",..: 1 2
## $ Freq : int 0 1
## $ Rel_Freq: num 0 1
## $ Cum_Freq: int 0 1
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
| (0.562,5.56] |
0 |
| (5.56,10.6] |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")

library(pastecs)
stat.desc(df_JA)
## Warning in min(x): ningún argumento finito para min; retornando Inf
## Warning in max(x): ningun argumento finito para max; retornando -Inf
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## id date time continent_code country_name country_code
## nbr.val 2.000000e+00 NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA
## min 1.761000e+03 NA NA NA NA NA
## max 1.762000e+03 NA NA NA NA NA
## range 1.000000e+00 NA NA NA NA NA
## sum 3.523000e+03 NA NA NA NA NA
## median 1.761500e+03 NA NA NA NA NA
## mean 1.761500e+03 NA NA NA NA NA
## SE.mean 5.000000e-01 NA NA NA NA NA
## CI.mean.0.95 6.353102e+00 NA NA NA NA NA
## var 5.000000e-01 NA NA NA NA NA
## std.dev 7.071068e-01 NA NA NA NA NA
## coef.var 4.014231e-04 NA NA NA NA NA
## state population city distance location_description
## nbr.val NA 2.000000e+00 NA 2.000000 NA
## nbr.null NA 0.000000e+00 NA 0.000000 NA
## nbr.na NA 0.000000e+00 NA 0.000000 NA
## min NA 4.514000e+03 NA 0.561860 NA
## max NA 4.475500e+04 NA 9.539530 NA
## range NA 4.024100e+04 NA 8.977670 NA
## sum NA 4.926900e+04 NA 10.101390 NA
## median NA 2.463450e+04 NA 5.050695 NA
## mean NA 2.463450e+04 NA 5.050695 NA
## SE.mean NA 2.012050e+04 NA 4.488835 NA
## CI.mean.0.95 NA 2.556552e+05 NA 57.036057 NA
## var NA 8.096690e+08 NA 40.299279 NA
## std.dev NA 2.845468e+04 NA 6.348171 NA
## coef.var NA 1.155075e+00 NA 1.256891 NA
## latitude longitude geolocation hazard_type landslide_type
## nbr.val 2.000000000 2.000000e+00 NA NA NA
## nbr.null 0.000000000 0.000000e+00 NA NA NA
## nbr.na 0.000000000 0.000000e+00 NA NA NA
## min 17.969300000 -7.733100e+01 NA NA NA
## max 18.143700000 -7.724330e+01 NA NA NA
## range 0.174400000 8.770000e-02 NA NA NA
## sum 36.113000000 -1.545743e+02 NA NA NA
## median 18.056500000 -7.728715e+01 NA NA NA
## mean 18.056500000 -7.728715e+01 NA NA NA
## SE.mean 0.087200000 4.385000e-02 NA NA NA
## CI.mean.0.95 1.107981053 5.571671e-01 NA NA NA
## var 0.015207680 3.845645e-03 NA NA NA
## std.dev 0.123319423 6.201326e-02 NA NA NA
## coef.var 0.006829642 -8.023748e-04 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 0 2 NA
## nbr.null NA NA NA 0 2 NA
## nbr.na NA NA NA 2 0 NA
## min NA NA NA Inf 0 NA
## max NA NA NA -Inf 0 NA
## range NA NA NA -Inf 0 NA
## sum NA NA NA 0 0 NA
## median NA NA NA NA 0 NA
## mean NA NA NA NaN 0 NA
## SE.mean NA NA NA NA 0 NA
## CI.mean.0.95 NA NA NA NaN 0 NA
## var NA NA NA NA 0 NA
## std.dev NA NA NA NA 0 NA
## coef.var NA NA NA NA NaN NA
## source_link prop ypos
## nbr.val NA 2.000000 2.000000
## nbr.null NA 0.000000 0.000000
## nbr.na NA 0.000000 0.000000
## min NA 5.562205 2.781102
## max NA 94.437795 52.781102
## range NA 88.875590 50.000000
## sum NA 100.000000 55.562205
## median NA 50.000000 27.781102
## mean NA 50.000000 27.781102
## SE.mean NA 44.437795 25.000000
## CI.mean.0.95 NA 564.635724 317.655118
## var NA 3949.435284 1250.000000
## std.dev NA 62.844533 35.355339
## coef.var NA 1.256891 1.272640
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Portland (Jamaica)
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_JA <- subset(df, country_name == "Jamaica")
knitr::kable(head(df_JA))
library(dplyr)
df_JA <- subset(df, state == "Portland")
knitr::kable(head(df_JA))
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_JA, aes(fill=city, y=distance, x=state)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_JA, aes(fill=city, y=distance, x=state)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_JA, aes(x=state, y=distance, fill=city)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_JA <- df_JA %>%
arrange(desc(city)) %>%
mutate(prop = distance / sum(df_JA$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_JA, aes(x=state, y = prop, fill=city)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales
library(forecast)
data<- ts(df_JA$distance, frequency=12, start=2008)
knitr::kable(head(data))
| 7.79027 |
| 4.46942 |
| 3.60799 |
| 4.68732 |
| 5.02351 |
| 0.59599 |
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto
library(qcc)
distance <- df_JA$distance
names(distance) <- df_JA$city
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por ciudades"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Moore Town 7.812690 7.812690 14.929435 14.929435
## Port Antonio 7.790270 15.602960 14.886593 29.816028
## Buff Bay 7.523170 23.126130 14.376185 44.192213
## Moore Town 5.023510 28.149640 9.599532 53.791746
## Port Antonio 4.687320 32.836960 8.957099 62.748845
## Port Antonio 4.469420 37.306380 8.540710 71.289555
## Manchioneal 4.134540 41.440920 7.900780 79.190335
## Port Antonio 3.607990 45.048910 6.894585 86.084920
## Moore Town 3.179300 48.228210 6.075392 92.160312
## Buff Bay 2.788850 51.017060 5.329273 97.489585
## Manchioneal 0.717730 51.734790 1.371526 98.861110
## Moore Town 0.595990 52.330780 1.138890 100.000000
stem(df_JA$"distance")
##
## The decimal point is at the |
##
## 0 | 67
## 2 | 826
## 4 | 1570
## 6 | 588
head(df_JA)
## # A tibble: 6 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 174 8/11/07 <NA> <NA> Jamaica JM Portland 14400
## 2 1764 4/18/10 <NA> <NA> Jamaica JM Portland 14400
## 3 6345 9/8/14 0:15 <NA> Jamaica JM Portland 14400
## 4 7474 12/1/15 <NA> <NA> Jamaica JM Portland 14400
## 5 1391 1/10/10 <NA> <NA> Jamaica JM Portland 1119
## 6 2418 9/13/10 Night <NA> Jamaica JM Portland 1119
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_JA))
stem(df_JA$"distance")
##
## The decimal point is at the |
##
## 0 | 67
## 2 | 826
## 4 | 1570
## 6 | 588
stem(df_JA$"distance", scale = 2)
##
## The decimal point is at the |
##
## 0 | 67
## 1 |
## 2 | 8
## 3 | 26
## 4 | 157
## 5 | 0
## 6 |
## 7 | 588
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 0.59599 |
1 |
8.3 |
8.3 |
8.3 |
8.3 |
| 0.71773 |
1 |
8.3 |
8.3 |
16.7 |
16.7 |
| 2.78885 |
1 |
8.3 |
8.3 |
25.0 |
25.0 |
| 3.1793 |
1 |
8.3 |
8.3 |
33.3 |
33.3 |
| 3.60799 |
1 |
8.3 |
8.3 |
41.7 |
41.7 |
| 4.13454 |
1 |
8.3 |
8.3 |
50.0 |
50.0 |
| 4.46942 |
1 |
8.3 |
8.3 |
58.3 |
58.3 |
| 4.68732 |
1 |
8.3 |
8.3 |
66.7 |
66.7 |
| 5.02351 |
1 |
8.3 |
8.3 |
75.0 |
75.0 |
| 7.52317 |
1 |
8.3 |
8.3 |
83.3 |
83.3 |
| 7.79027 |
1 |
8.3 |
8.3 |
91.7 |
91.7 |
| 7.81269 |
1 |
8.3 |
8.3 |
100.0 |
100.0 |
| Total |
12 |
100.0 |
100.0 |
100.0 |
100.0 |
str(table)
## Classes 'freqtab' and 'data.frame': 13 obs. of 5 variables:
## $ n : num 1 1 1 1 1 1 1 1 1 1 ...
## $ % : num 8.3 8.3 8.3 8.3 8.3 8.3 8.3 8.3 8.3 8.3 ...
## $ val% : num 8.3 8.3 8.3 8.3 8.3 8.3 8.3 8.3 8.3 8.3 ...
## $ %cum : num 8.3 16.7 25 33.3 41.7 50 58.3 66.7 75 83.3 ...
## $ val%cum: num 8.3 16.7 25 33.3 41.7 50 58.3 66.7 75 83.3 ...
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
| 0.59599 |
1 |
| 0.71773 |
1 |
| 2.78885 |
1 |
| 3.1793 |
1 |
| 3.60799 |
1 |
| 4.13454 |
1 |
| 4.46942 |
1 |
| 4.68732 |
1 |
| 5.02351 |
1 |
| 7.52317 |
1 |
| 7.79027 |
1 |
| 7.81269 |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.59599 2.59599 4.59599 6.59599 8.59599
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
| (0.596,2.6] |
1 |
0.0909091 |
1 |
| (2.6,4.6] |
5 |
0.4545455 |
6 |
| (4.6,6.6] |
2 |
0.1818182 |
8 |
| (6.6,8.6] |
3 |
0.2727273 |
11 |
str(Freq_table)
## 'data.frame': 4 obs. of 4 variables:
## $ distance: Factor w/ 4 levels "(0.596,2.6]",..: 1 2 3 4
## $ Freq : int 1 5 2 3
## $ Rel_Freq: num 0.0909 0.4545 0.1818 0.2727
## $ Cum_Freq: int 1 6 8 11
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
| (0.596,2.6] |
1 |
| (2.6,4.6] |
5 |
| (4.6,6.6] |
2 |
| (6.6,8.6] |
3 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")

library(pastecs)
stat.desc(df_JA)
## id date time continent_code country_name country_code
## nbr.val 1.200000e+01 NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA
## min 1.740000e+02 NA NA NA NA NA
## max 7.474000e+03 NA NA NA NA NA
## range 7.300000e+03 NA NA NA NA NA
## sum 3.885800e+04 NA NA NA NA NA
## median 2.623000e+03 NA NA NA NA NA
## mean 3.238167e+03 NA NA NA NA NA
## SE.mean 6.142102e+02 NA NA NA NA NA
## CI.mean.0.95 1.351868e+03 NA NA NA NA NA
## var 4.527050e+06 NA NA NA NA NA
## std.dev 2.127687e+03 NA NA NA NA NA
## coef.var 6.570652e-01 NA NA NA NA NA
## state population city distance location_description
## nbr.val NA 1.200000e+01 NA 12.0000000 NA
## nbr.null NA 0.000000e+00 NA 0.0000000 NA
## nbr.na NA 0.000000e+00 NA 0.0000000 NA
## min NA 1.119000e+03 NA 0.5959900 NA
## max NA 1.440000e+04 NA 7.8126900 NA
## range NA 1.328100e+04 NA 7.2167000 NA
## sum NA 7.223200e+04 NA 52.3307800 NA
## median NA 2.539000e+03 NA 4.3019800 NA
## mean NA 6.019333e+03 NA 4.3608983 NA
## SE.mean NA 1.795694e+03 NA 0.7079935 NA
## CI.mean.0.95 NA 3.952296e+03 NA 1.5582832 NA
## var NA 3.869421e+07 NA 6.0150573 NA
## std.dev NA 6.220467e+03 NA 2.4525614 NA
## coef.var NA 1.033415e+00 NA 0.5623982 NA
## latitude longitude geolocation hazard_type landslide_type
## nbr.val 1.200000e+01 1.200000e+01 NA NA NA
## nbr.null 0.000000e+00 0.000000e+00 NA NA NA
## nbr.na 0.000000e+00 0.000000e+00 NA NA NA
## min 1.800830e+01 -7.668520e+01 NA NA NA
## max 1.821060e+01 -7.627810e+01 NA NA NA
## range 2.023000e-01 4.071000e-01 NA NA NA
## sum 2.173922e+02 -9.175819e+02 NA NA NA
## median 1.812125e+01 -7.645255e+01 NA NA NA
## mean 1.811602e+01 -7.646516e+01 NA NA NA
## SE.mean 1.703378e-02 3.541084e-02 NA NA NA
## CI.mean.0.95 3.749110e-02 7.793873e-02 NA NA NA
## var 3.481796e-03 1.504713e-02 NA NA NA
## std.dev 5.900675e-02 1.226667e-01 NA NA NA
## coef.var 3.257159e-03 -1.604217e-03 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 2 9 NA
## nbr.null NA NA NA 2 9 NA
## nbr.na NA NA NA 10 3 NA
## min NA NA NA 0 0 NA
## max NA NA NA 0 0 NA
## range NA NA NA 0 0 NA
## sum NA NA NA 0 0 NA
## median NA NA NA 0 0 NA
## mean NA NA NA 0 0 NA
## SE.mean NA NA NA 0 0 NA
## CI.mean.0.95 NA NA NA 0 0 NA
## var NA NA NA 0 0 NA
## std.dev NA NA NA 0 0 NA
## coef.var NA NA NA NaN NaN NA
## source_link prop ypos
## nbr.val NA 12.0000000 12.0000000
## nbr.null NA 0.0000000 0.0000000
## nbr.na NA 0.0000000 0.0000000
## min NA 1.1388900 7.4432963
## max NA 14.9294354 92.8119073
## range NA 13.7905454 85.3686110
## sum NA 100.0000000 631.0918928
## median NA 8.2207450 53.4650449
## mean NA 8.3333333 52.5909911
## SE.mean NA 1.3529198 7.7590755
## CI.mean.0.95 NA 2.9777564 17.0776100
## var NA 21.9647038 722.4390327
## std.dev NA 4.6866517 26.8782260
## coef.var NA 0.5623982 0.5110804
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Saint Andrew (Jamaica)
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_JA <- subset(df, country_name == "Jamaica")
knitr::kable(head(df_JA))
library(dplyr)
df_JA <- subset(df, state == "Saint Andrew")
knitr::kable(head(df_JA))
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_JA, aes(fill=city, y=distance, x=state)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_JA, aes(fill=city, y=distance, x=state)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_JA, aes(x=state, y=distance, fill=city)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_JA <- df_JA %>%
arrange(desc(city)) %>%
mutate(prop = distance / sum(df_JA$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_JA, aes(x=state, y = prop, fill=city)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales
library(forecast)
data<- ts(df_JA$distance, frequency=12, start=2008)
knitr::kable(head(data))
| 5.98731 |
| 10.84467 |
| 2.13878 |
| 9.83051 |
| 0.96514 |
| 2.64873 |
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto
library(qcc)
distance <- df_JA$distance
names(distance) <- df_JA$city
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por ciudades"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Stony Hill 10.844670 10.844670 33.455570 33.455570
## Mavis Bank 9.830510 20.675180 30.326909 63.782479
## Stony Hill 5.987310 26.662490 18.470721 82.253200
## Calibishie 2.648730 29.311220 8.171274 90.424474
## Stony Hill 2.138780 31.450000 6.598090 97.022564
## Gordon Town 0.965140 32.415140 2.977436 100.000000
stem(df_JA$"distance")
##
## The decimal point is at the |
##
## 0 | 0
## 2 | 16
## 4 |
## 6 | 0
## 8 | 8
## 10 | 8
head(df_JA)
## # A tibble: 6 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 2233 8/11/10 <NA> <NA> Jamaica JM Sain~ 8551
## 2 2518 9/29/10 <NA> <NA> Jamaica JM Sain~ 8551
## 3 2523 9/30/10 Earl~ <NA> Jamaica JM Sain~ 8551
## 4 756 8/28/08 <NA> <NA> Jamaica JM Sain~ 1821
## 5 341 10/31/07 <NA> <NA> Jamaica JM Sain~ 1088
## 6 2547 10/5/10 <NA> <NA> Dominica DM Sain~ 1020
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_JA))
stem(df_JA$"distance")
##
## The decimal point is at the |
##
## 0 | 0
## 2 | 16
## 4 |
## 6 | 0
## 8 | 8
## 10 | 8
stem(df_JA$"distance", scale = 2)
##
## The decimal point is at the |
##
## 0 |
## 1 | 0
## 2 | 16
## 3 |
## 4 |
## 5 |
## 6 | 0
## 7 |
## 8 |
## 9 | 8
## 10 | 8
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 0.96514 |
1 |
16.7 |
16.7 |
16.7 |
16.7 |
| 2.13878 |
1 |
16.7 |
16.7 |
33.3 |
33.3 |
| 2.64873 |
1 |
16.7 |
16.7 |
50.0 |
50.0 |
| 5.98731 |
1 |
16.7 |
16.7 |
66.7 |
66.7 |
| 9.83051 |
1 |
16.7 |
16.7 |
83.3 |
83.3 |
| 10.84467 |
1 |
16.7 |
16.7 |
100.0 |
100.0 |
| Total |
6 |
100.0 |
100.0 |
100.0 |
100.0 |
str(table)
## Classes 'freqtab' and 'data.frame': 7 obs. of 5 variables:
## $ n : num 1 1 1 1 1 1 6
## $ % : num 16.7 16.7 16.7 16.7 16.7 16.7 100
## $ val% : num 16.7 16.7 16.7 16.7 16.7 16.7 100
## $ %cum : num 16.7 33.3 50 66.7 83.3 100 100
## $ val%cum: num 16.7 33.3 50 66.7 83.3 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
| 0.96514 |
1 |
| 2.13878 |
1 |
| 2.64873 |
1 |
| 5.98731 |
1 |
| 9.83051 |
1 |
| 10.84467 |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.96514 4.96514 8.96514 12.96514
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
| (0.965,4.97] |
2 |
0.4 |
2 |
| (4.97,8.97] |
1 |
0.2 |
3 |
| (8.97,13] |
2 |
0.4 |
5 |
str(Freq_table)
## 'data.frame': 3 obs. of 4 variables:
## $ distance: Factor w/ 3 levels "(0.965,4.97]",..: 1 2 3
## $ Freq : int 2 1 2
## $ Rel_Freq: num 0.4 0.2 0.4
## $ Cum_Freq: int 2 3 5
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
| (0.965,4.97] |
2 |
| (4.97,8.97] |
1 |
| (8.97,13] |
2 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")

library(pastecs)
stat.desc(df_JA)
## Warning in min(x): ningún argumento finito para min; retornando Inf
## Warning in max(x): ningun argumento finito para max; retornando -Inf
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## id date time continent_code country_name country_code
## nbr.val 6.000000e+00 NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA
## min 3.410000e+02 NA NA NA NA NA
## max 2.547000e+03 NA NA NA NA NA
## range 2.206000e+03 NA NA NA NA NA
## sum 1.091800e+04 NA NA NA NA NA
## median 2.375500e+03 NA NA NA NA NA
## mean 1.819667e+03 NA NA NA NA NA
## SE.mean 4.082502e+02 NA NA NA NA NA
## CI.mean.0.95 1.049441e+03 NA NA NA NA NA
## var 1.000009e+06 NA NA NA NA NA
## std.dev 1.000005e+03 NA NA NA NA NA
## coef.var 5.495538e-01 NA NA NA NA NA
## state population city distance location_description
## nbr.val NA 6.000000e+00 NA 6.000000 NA
## nbr.null NA 0.000000e+00 NA 0.000000 NA
## nbr.na NA 0.000000e+00 NA 0.000000 NA
## min NA 1.020000e+03 NA 0.965140 NA
## max NA 8.551000e+03 NA 10.844670 NA
## range NA 7.531000e+03 NA 9.879530 NA
## sum NA 2.958200e+04 NA 32.415140 NA
## median NA 5.186000e+03 NA 4.318020 NA
## mean NA 4.930333e+03 NA 5.402523 NA
## SE.mean NA 1.623267e+03 NA 1.707745 NA
## CI.mean.0.95 NA 4.172741e+03 NA 4.389898 NA
## var NA 1.580997e+07 NA 17.498354 NA
## std.dev NA 3.976176e+03 NA 4.183103 NA
## coef.var NA 8.064720e-01 NA 0.774287 NA
## latitude longitude geolocation hazard_type landslide_type
## nbr.val 6.00000000 6.00000000 NA NA NA
## nbr.null 0.00000000 0.00000000 NA NA NA
## nbr.na 0.00000000 0.00000000 NA NA NA
## min 15.59180000 -76.82260000 NA NA NA
## max 18.16860000 -61.37310000 NA NA NA
## range 2.57680000 15.44950000 NA NA NA
## sum 106.02440000 -445.06580000 NA NA NA
## median 18.05050000 -76.74255000 NA NA NA
## mean 17.67073333 -74.17763333 NA NA NA
## SE.mean 0.41633213 2.56117804 NA NA NA
## CI.mean.0.95 1.07021581 6.58371775 NA NA NA
## var 1.03999465 39.35779778 NA NA NA
## std.dev 1.01980128 6.27357934 NA NA NA
## coef.var 0.05771132 -0.08457508 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 0 5.000000 NA
## nbr.null NA NA NA 0 3.000000 NA
## nbr.na NA NA NA 6 1.000000 NA
## min NA NA NA Inf 0.000000 NA
## max NA NA NA -Inf 7.000000 NA
## range NA NA NA -Inf 7.000000 NA
## sum NA NA NA 0 9.000000 NA
## median NA NA NA NA 0.000000 NA
## mean NA NA NA NaN 1.800000 NA
## SE.mean NA NA NA NA 1.356466 NA
## CI.mean.0.95 NA NA NA NaN 3.766153 NA
## var NA NA NA NA 9.200000 NA
## std.dev NA NA NA NA 3.033150 NA
## coef.var NA NA NA NA 1.685083 NA
## source_link prop ypos
## nbr.val NA 6.000000 6.0000000
## nbr.null NA 0.000000 0.0000000
## nbr.na NA 0.000000 0.0000000
## min NA 2.977436 9.2353604
## max NA 33.455570 95.9143629
## range NA 30.478135 86.6790025
## sum NA 100.000000 359.6014085
## median NA 13.320998 64.4565857
## mean NA 16.666667 59.9335681
## SE.mean NA 5.268356 13.6780214
## CI.mean.0.95 NA 13.542739 35.1604732
## var NA 166.533419 1122.5296089
## std.dev NA 12.904783 33.5041730
## coef.var NA 0.774287 0.5590218
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Saint Ann (Jamaica)
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_JA <- subset(df, country_name == "Jamaica")
knitr::kable(head(df_JA))
library(dplyr)
df_JA <- subset(df, state == "Saint Ann")
knitr::kable(head(df_JA))
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_JA, aes(fill=city, y=distance, x=state)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_JA, aes(fill=city, y=distance, x=state)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_JA, aes(x=state, y=distance, fill=city)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_JA <- df_JA %>%
arrange(desc(city)) %>%
mutate(prop = distance / sum(df_JA$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_JA, aes(x=state, y = prop, fill=city)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales
library(forecast)
data<- ts(df_JA$distance, frequency=12, start=2008)
knitr::kable(head(data))
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto
library(qcc)
distance <- df_JA$distance
names(distance) <- df_JA$city
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por ciudades"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Saint Ann<U+043D><U+045E><U+0434>‰<U+0435><U+0434>‹<U+045E>s Bay 3.965330 3.965330 70.985025 70.985025
## Saint Ann<U+043D><U+045E><U+0434>‰<U+0435><U+0434>‹<U+045E>s Bay 1.081960 5.047290 19.368617 90.353642
## Ocho Rios 0.538860 5.586150 9.646358 100.000000
stem(df_JA$"distance")
##
## The decimal point is at the |
##
## 0 | 5
## 1 | 1
## 2 |
## 3 |
## 4 | 0
head(df_JA)
## # A tibble: 3 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 354 11/12/07 <NA> <NA> Jamaica JM Saint Ann 13671
## 2 7473 12/1/15 <NA> <NA> Jamaica JM Saint Ann 13671
## 3 501 4/16/08 <NA> <NA> Jamaica JM Saint Ann 9450
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_JA))
stem(df_JA$"distance")
##
## The decimal point is at the |
##
## 0 | 5
## 1 | 1
## 2 |
## 3 |
## 4 | 0
stem(df_JA$"distance", scale = 2)
##
## The decimal point is at the |
##
## 0 | 5
## 1 | 1
## 1 |
## 2 |
## 2 |
## 3 |
## 3 |
## 4 | 0
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 0.53886 |
1 |
33.3 |
33.3 |
33.3 |
33.3 |
| 1.08196 |
1 |
33.3 |
33.3 |
66.7 |
66.7 |
| 3.96533 |
1 |
33.3 |
33.3 |
100.0 |
100.0 |
| Total |
3 |
100.0 |
100.0 |
100.0 |
100.0 |
str(table)
## Classes 'freqtab' and 'data.frame': 4 obs. of 5 variables:
## $ n : num 1 1 1 3
## $ % : num 33.3 33.3 33.3 100
## $ val% : num 33.3 33.3 33.3 100
## $ %cum : num 33.3 66.7 100 100
## $ val%cum: num 33.3 66.7 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
| 0.53886 |
1 |
| 1.08196 |
1 |
| 3.96533 |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.53886 2.53886 4.53886
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
| (0.539,2.54] |
1 |
0.5 |
1 |
| (2.54,4.54] |
1 |
0.5 |
2 |
str(Freq_table)
## 'data.frame': 2 obs. of 4 variables:
## $ distance: Factor w/ 2 levels "(0.539,2.54]",..: 1 2
## $ Freq : int 1 1
## $ Rel_Freq: num 0.5 0.5
## $ Cum_Freq: int 1 2
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
| (0.539,2.54] |
1 |
| (2.54,4.54] |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")

library(pastecs)
stat.desc(df_JA)
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## id date time continent_code country_name country_code
## nbr.val 3.000000e+00 NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA
## min 3.540000e+02 NA NA NA NA NA
## max 7.473000e+03 NA NA NA NA NA
## range 7.119000e+03 NA NA NA NA NA
## sum 8.328000e+03 NA NA NA NA NA
## median 5.010000e+02 NA NA NA NA NA
## mean 2.776000e+03 NA NA NA NA NA
## SE.mean 2.348883e+03 NA NA NA NA NA
## CI.mean.0.95 1.010643e+04 NA NA NA NA NA
## var 1.655176e+07 NA NA NA NA NA
## std.dev 4.068385e+03 NA NA NA NA NA
## coef.var 1.465557e+00 NA NA NA NA NA
## state population city distance location_description
## nbr.val NA 3.000000e+00 NA 3.0000000 NA
## nbr.null NA 0.000000e+00 NA 0.0000000 NA
## nbr.na NA 0.000000e+00 NA 0.0000000 NA
## min NA 9.450000e+03 NA 0.5388600 NA
## max NA 1.367100e+04 NA 3.9653300 NA
## range NA 4.221000e+03 NA 3.4264700 NA
## sum NA 3.679200e+04 NA 5.5861500 NA
## median NA 1.367100e+04 NA 1.0819600 NA
## mean NA 1.226400e+04 NA 1.8620500 NA
## SE.mean NA 1.407000e+03 NA 1.0632622 NA
## CI.mean.0.95 NA 6.053832e+03 NA 4.5748480 NA
## var NA 5.938947e+06 NA 3.3915795 NA
## std.dev NA 2.436995e+03 NA 1.8416241 NA
## coef.var NA 1.987113e-01 NA 0.9890304 NA
## latitude longitude geolocation hazard_type landslide_type
## nbr.val 3.000000e+00 3.000000e+00 NA NA NA
## nbr.null 0.000000e+00 0.000000e+00 NA NA NA
## nbr.na 0.000000e+00 0.000000e+00 NA NA NA
## min 1.840000e+01 -7.720000e+01 NA NA NA
## max 1.843130e+01 -7.710070e+01 NA NA NA
## range 3.130000e-02 9.930000e-02 NA NA NA
## sum 5.523670e+01 -2.314926e+02 NA NA NA
## median 1.840540e+01 -7.719190e+01 NA NA NA
## mean 1.841223e+01 -7.716420e+01 NA NA NA
## SE.mean 9.659940e-03 3.183599e-02 NA NA NA
## CI.mean.0.95 4.156337e-02 1.369792e-01 NA NA NA
## var 2.799433e-04 3.040590e-03 NA NA NA
## std.dev 1.673151e-02 5.514155e-02 NA NA NA
## coef.var 9.087169e-04 -7.146001e-04 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 1 1 NA
## nbr.null NA NA NA 1 1 NA
## nbr.na NA NA NA 2 2 NA
## min NA NA NA 0 0 NA
## max NA NA NA 0 0 NA
## range NA NA NA 0 0 NA
## sum NA NA NA 0 0 NA
## median NA NA NA 0 0 NA
## mean NA NA NA 0 0 NA
## SE.mean NA NA NA NA NA NA
## CI.mean.0.95 NA NA NA NaN NaN NA
## var NA NA NA NA NA NA
## std.dev NA NA NA NA NA NA
## coef.var NA NA NA NA NA NA
## source_link prop ypos
## nbr.val NA 3.0000000 3.0000000
## nbr.null NA 0.0000000 0.0000000
## nbr.na NA 0.0000000 0.0000000
## min NA 9.6463575 35.4925127
## max NA 70.9850255 95.1768212
## range NA 61.3386680 59.6843085
## sum NA 100.0000000 211.3386680
## median NA 19.3686170 80.6693340
## mean NA 33.3333333 70.4462227
## SE.mean NA 19.0338998 17.9716274
## CI.mean.0.95 NA 81.8962609 77.3256717
## var NA 1086.8680228 968.9381746
## std.dev NA 32.9676815 31.1277718
## coef.var NA 0.9890304 0.4418657
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Saint Catherine (Jamaica)
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_JA <- subset(df, country_name == "Jamaica")
knitr::kable(head(df_JA))
library(dplyr)
df_JA <- subset(df, state == "Saint Catherine")
knitr::kable(head(df_JA))
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_JA, aes(fill=city, y=distance, x=state)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_JA, aes(fill=city, y=distance, x=state)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_JA, aes(x=state, y=distance, fill=city)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_JA <- df_JA %>%
arrange(desc(city)) %>%
mutate(prop = distance / sum(df_JA$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_JA, aes(x=state, y = prop, fill=city)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales
library(forecast)
data<- ts(df_JA$distance, frequency=12, start=2008)
knitr::kable(head(data))
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto
library(qcc)
distance <- df_JA$distance
names(distance) <- df_JA$city
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por ciudades"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Bog Walk 6.71269 6.71269 37.20632 37.20632
## Bog Walk 5.86530 12.57799 32.50951 69.71583
## Riversdale 5.46381 18.04180 30.28417 100.00000
stem(df_JA$"distance")
##
## The decimal point is at the |
##
## 5 |
## 5 | 59
## 6 |
## 6 | 7
head(df_JA)
## # A tibble: 3 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 340 10/31/07 <NA> <NA> Jamaica JM Sain~ 4085
## 2 2519 9/29/10 <NA> <NA> Jamaica JM Sain~ 12873
## 3 7472 12/1/15 <NA> <NA> Jamaica JM Sain~ 12873
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_JA))
stem(df_JA$"distance")
##
## The decimal point is at the |
##
## 5 |
## 5 | 59
## 6 |
## 6 | 7
stem(df_JA$"distance", scale = 2)
##
## The decimal point is 1 digit(s) to the left of the |
##
## 54 | 6
## 56 |
## 58 | 7
## 60 |
## 62 |
## 64 |
## 66 | 1
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 5.46381 |
1 |
33.3 |
33.3 |
33.3 |
33.3 |
| 5.8653 |
1 |
33.3 |
33.3 |
66.7 |
66.7 |
| 6.71269 |
1 |
33.3 |
33.3 |
100.0 |
100.0 |
| Total |
3 |
100.0 |
100.0 |
100.0 |
100.0 |
str(table)
## Classes 'freqtab' and 'data.frame': 4 obs. of 5 variables:
## $ n : num 1 1 1 3
## $ % : num 33.3 33.3 33.3 100
## $ val% : num 33.3 33.3 33.3 100
## $ %cum : num 33.3 66.7 100 100
## $ val%cum: num 33.3 66.7 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
| 5.46381 |
1 |
| 5.8653 |
1 |
| 6.71269 |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 5.46381 6.46381 7.46381
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
| (5.46,6.46] |
1 |
0.5 |
1 |
| (6.46,7.46] |
1 |
0.5 |
2 |
str(Freq_table)
## 'data.frame': 2 obs. of 4 variables:
## $ distance: Factor w/ 2 levels "(5.46,6.46]",..: 1 2
## $ Freq : int 1 1
## $ Rel_Freq: num 0.5 0.5
## $ Cum_Freq: int 1 2
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
| (5.46,6.46] |
1 |
| (6.46,7.46] |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")

library(pastecs)
stat.desc(df_JA)
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## id date time continent_code country_name country_code
## nbr.val 3.000000e+00 NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA
## min 3.400000e+02 NA NA NA NA NA
## max 7.472000e+03 NA NA NA NA NA
## range 7.132000e+03 NA NA NA NA NA
## sum 1.033100e+04 NA NA NA NA NA
## median 2.519000e+03 NA NA NA NA NA
## mean 3.443667e+03 NA NA NA NA NA
## SE.mean 2.110104e+03 NA NA NA NA NA
## CI.mean.0.95 9.079043e+03 NA NA NA NA NA
## var 1.335761e+07 NA NA NA NA NA
## std.dev 3.654807e+03 NA NA NA NA NA
## coef.var 1.061313e+00 NA NA NA NA NA
## state population city distance location_description
## nbr.val NA 3.000000e+00 NA 3.0000000 NA
## nbr.null NA 0.000000e+00 NA 0.0000000 NA
## nbr.na NA 0.000000e+00 NA 0.0000000 NA
## min NA 4.085000e+03 NA 5.4638100 NA
## max NA 1.287300e+04 NA 6.7126900 NA
## range NA 8.788000e+03 NA 1.2488800 NA
## sum NA 2.983100e+04 NA 18.0418000 NA
## median NA 1.287300e+04 NA 5.8653000 NA
## mean NA 9.943667e+03 NA 6.0139333 NA
## SE.mean NA 2.929333e+03 NA 0.3681006 NA
## CI.mean.0.95 NA 1.260390e+04 NA 1.5838092 NA
## var NA 2.574298e+07 NA 0.4064942 NA
## std.dev NA 5.073754e+03 NA 0.6375690 NA
## coef.var NA 5.102498e-01 NA 0.1060153 NA
## latitude longitude geolocation hazard_type landslide_type
## nbr.val 3.000000000 3.000000e+00 NA NA NA
## nbr.null 0.000000000 0.000000e+00 NA NA NA
## nbr.na 0.000000000 0.000000e+00 NA NA NA
## min 18.067700000 -7.704760e+01 NA NA NA
## max 18.215200000 -7.694290e+01 NA NA NA
## range 0.147500000 1.047000e-01 NA NA NA
## sum 54.374700000 -2.309564e+02 NA NA NA
## median 18.091800000 -7.696590e+01 NA NA NA
## mean 18.124900000 -7.698547e+01 NA NA NA
## SE.mean 0.045682856 3.176824e-02 NA NA NA
## CI.mean.0.95 0.196557465 1.366877e-01 NA NA NA
## var 0.006260770 3.027663e-03 NA NA NA
## std.dev 0.079125028 5.502421e-02 NA NA NA
## coef.var 0.004365543 -7.147350e-04 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 1 2 NA
## nbr.null NA NA NA 1 2 NA
## nbr.na NA NA NA 2 1 NA
## min NA NA NA 0 0 NA
## max NA NA NA 0 0 NA
## range NA NA NA 0 0 NA
## sum NA NA NA 0 0 NA
## median NA NA NA 0 0 NA
## mean NA NA NA 0 0 NA
## SE.mean NA NA NA NA 0 NA
## CI.mean.0.95 NA NA NA NaN 0 NA
## var NA NA NA NA 0 NA
## std.dev NA NA NA NA 0 NA
## coef.var NA NA NA NA NaN NA
## source_link prop ypos
## nbr.val NA 3.0000000 3.0000000
## nbr.null NA 0.0000000 0.0000000
## nbr.na NA 0.0000000 0.0000000
## min NA 30.2841734 15.1420867
## max NA 37.2063209 81.3968396
## range NA 6.9221475 66.2547529
## sum NA 100.0000000 143.0778525
## median NA 32.5095057 46.5389263
## mean NA 33.3333333 47.6926175
## SE.mean NA 2.0402655 19.1347966
## CI.mean.0.95 NA 8.7785541 82.3303848
## var NA 12.4880503 1098.4213215
## std.dev NA 3.5338436 33.1424399
## coef.var NA 0.1060153 0.6949176
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Saint Thomas (Jamaica)
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_JA <- subset(df, country_name == "Jamaica")
knitr::kable(head(df_JA))
library(dplyr)
df_JA <- subset(df, state == "Saint Thomas")
knitr::kable(head(df_JA))
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_JA, aes(fill=city, y=distance, x=state)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_JA, aes(fill=city, y=distance, x=state)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_JA, aes(x=state, y=distance, fill=city)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_JA <- df_JA %>%
arrange(desc(city)) %>%
mutate(prop = distance / sum(df_JA$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_JA, aes(x=state, y = prop, fill=city)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales
library(forecast)
data<- ts(df_JA$distance, frequency=12, start=2008)
knitr::kable(head(data))
| 6.51940 |
| 4.53632 |
| 0.21825 |
| 1.71217 |
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto
library(qcc)
distance <- df_JA$distance
names(distance) <- df_JA$city
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por ciudades"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Easington 6.519400 6.519400 50.202755 50.202755
## Easington 4.536320 11.055720 34.932012 85.134767
## Bath 1.712170 12.767890 13.184595 98.319362
## Bath 0.218250 12.986140 1.680638 100.000000
stem(df_JA$"distance")
##
## The decimal point is at the |
##
## 0 | 27
## 2 |
## 4 | 5
## 6 | 5
head(df_JA)
## # A tibble: 4 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 339 10/31/07 <NA> <NA> Jamaica JM Sain~ 2634
## 2 1760 4/18/10 <NA> <NA> Jamaica JM Sain~ 2634
## 3 314 10/17/07 <NA> <NA> Jamaica JM Sain~ 2382
## 4 774 9/4/08 <NA> <NA> Jamaica JM Sain~ 2382
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_JA))
stem(df_JA$"distance")
##
## The decimal point is at the |
##
## 0 | 27
## 2 |
## 4 | 5
## 6 | 5
stem(df_JA$"distance", scale = 2)
##
## The decimal point is at the |
##
## 0 | 2
## 1 | 7
## 2 |
## 3 |
## 4 | 5
## 5 |
## 6 | 5
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 0.21825 |
1 |
25 |
25 |
25 |
25 |
| 1.71217 |
1 |
25 |
25 |
50 |
50 |
| 4.53632 |
1 |
25 |
25 |
75 |
75 |
| 6.5194 |
1 |
25 |
25 |
100 |
100 |
| Total |
4 |
100 |
100 |
100 |
100 |
str(table)
## Classes 'freqtab' and 'data.frame': 5 obs. of 5 variables:
## $ n : num 1 1 1 1 4
## $ % : num 25 25 25 25 100
## $ val% : num 25 25 25 25 100
## $ %cum : num 25 50 75 100 100
## $ val%cum: num 25 50 75 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
| 0.21825 |
1 |
| 1.71217 |
1 |
| 4.53632 |
1 |
| 6.5194 |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.21825 3.21825 6.21825 9.21825
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
| (0.218,3.22] |
1 |
0.3333333 |
1 |
| (3.22,6.22] |
1 |
0.3333333 |
2 |
| (6.22,9.22] |
1 |
0.3333333 |
3 |
str(Freq_table)
## 'data.frame': 3 obs. of 4 variables:
## $ distance: Factor w/ 3 levels "(0.218,3.22]",..: 1 2 3
## $ Freq : int 1 1 1
## $ Rel_Freq: num 0.333 0.333 0.333
## $ Cum_Freq: int 1 2 3
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
| (0.218,3.22] |
1 |
| (3.22,6.22] |
1 |
| (6.22,9.22] |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")

library(pastecs)
stat.desc(df_JA)
## Warning in min(x): ningún argumento finito para min; retornando Inf
## Warning in max(x): ningun argumento finito para max; retornando -Inf
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## id date time continent_code country_name country_code
## nbr.val 4.000000e+00 NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA
## min 3.140000e+02 NA NA NA NA NA
## max 1.760000e+03 NA NA NA NA NA
## range 1.446000e+03 NA NA NA NA NA
## sum 3.187000e+03 NA NA NA NA NA
## median 5.565000e+02 NA NA NA NA NA
## mean 7.967500e+02 NA NA NA NA NA
## SE.mean 3.380028e+02 NA NA NA NA NA
## CI.mean.0.95 1.075676e+03 NA NA NA NA NA
## var 4.569836e+05 NA NA NA NA NA
## std.dev 6.760056e+02 NA NA NA NA NA
## coef.var 8.484539e-01 NA NA NA NA NA
## state population city distance location_description
## nbr.val NA 4.000000e+00 NA 4.0000000 NA
## nbr.null NA 0.000000e+00 NA 0.0000000 NA
## nbr.na NA 0.000000e+00 NA 0.0000000 NA
## min NA 2.382000e+03 NA 0.2182500 NA
## max NA 2.634000e+03 NA 6.5194000 NA
## range NA 2.520000e+02 NA 6.3011500 NA
## sum NA 1.003200e+04 NA 12.9861400 NA
## median NA 2.508000e+03 NA 3.1242450 NA
## mean NA 2.508000e+03 NA 3.2465350 NA
## SE.mean NA 7.274613e+01 NA 1.4112635 NA
## CI.mean.0.95 NA 2.315107e+02 NA 4.4912704 NA
## var NA 2.116800e+04 NA 7.9666589 NA
## std.dev NA 1.454923e+02 NA 2.8225270 NA
## coef.var NA 5.801127e-02 NA 0.8693968 NA
## latitude longitude geolocation hazard_type landslide_type
## nbr.val 4.000000e+00 4.000000e+00 NA NA NA
## nbr.null 0.000000e+00 0.000000e+00 NA NA NA
## nbr.na 0.000000e+00 0.000000e+00 NA NA NA
## min 1.793840e+01 -7.664790e+01 NA NA NA
## max 1.796470e+01 -7.633300e+01 NA NA NA
## range 2.630000e-02 3.149000e-01 NA NA NA
## sum 7.180110e+01 -3.059144e+02 NA NA NA
## median 1.794900e+01 -7.646675e+01 NA NA NA
## mean 1.795028e+01 -7.647860e+01 NA NA NA
## SE.mean 5.434055e-03 8.031387e-02 NA NA NA
## CI.mean.0.95 1.729359e-02 2.555946e-01 NA NA NA
## var 1.181158e-04 2.580127e-02 NA NA NA
## std.dev 1.086811e-02 1.606277e-01 NA NA NA
## coef.var 6.054565e-04 -2.100297e-03 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 0 2.0000000 NA
## nbr.null NA NA NA 0 1.0000000 NA
## nbr.na NA NA NA 4 2.0000000 NA
## min NA NA NA Inf 0.0000000 NA
## max NA NA NA -Inf 1.0000000 NA
## range NA NA NA -Inf 1.0000000 NA
## sum NA NA NA 0 1.0000000 NA
## median NA NA NA NA 0.5000000 NA
## mean NA NA NA NaN 0.5000000 NA
## SE.mean NA NA NA NA 0.5000000 NA
## CI.mean.0.95 NA NA NA NaN 6.3531024 NA
## var NA NA NA NA 0.5000000 NA
## std.dev NA NA NA NA 0.7071068 NA
## coef.var NA NA NA NA 1.4142136 NA
## source_link prop ypos
## nbr.val NA 4.0000000 4.0000000
## nbr.null NA 0.0000000 0.0000000
## nbr.na NA 0.0000000 0.0000000
## min NA 1.6806380 25.1013773
## max NA 50.2027546 93.4077024
## range NA 48.5221167 68.3063251
## sum NA 100.0000000 272.1529261
## median NA 24.0583037 76.8219232
## mean NA 25.0000000 68.0382315
## SE.mean NA 10.8674596 15.2999203
## CI.mean.0.95 NA 34.5851067 48.6911750
## var NA 472.4067134 936.3502494
## std.dev NA 21.7349192 30.5998407
## coef.var NA 0.8693968 0.4497448
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Haiti
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_HT <- subset(df, country_name == "Haiti")
knitr::kable(head(df_HT))
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_HT, aes(fill= state, y=distance, x=country_name)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_HT, aes(fill=state, y=distance, x=country_name)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_HT, aes(x=country_name, y=distance, fill=state)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_HT <- df_HT %>%
arrange(desc(state)) %>%
mutate(prop = distance / sum(df_HT$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_HT, aes(x=country_name, y=prop, fill=state)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales
library(forecast)
data<- ts(df_HT$distance, frequency=12, start=2008)
knitr::kable(head(data))
| 4.41574 |
| 0.19079 |
| 0.51272 |
| 2.72168 |
| 1.80063 |
| 3.50201 |
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto
library(qcc)
distance <- df_HT$distance
names(distance) <- df_HT$state
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por estados"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Artibonite 17.29836000 17.29836000 20.57571552 20.57571552
## Ouest 12.13199000 29.43035000 14.43052260 35.00623813
## Artibonite 8.70343000 38.13378000 10.35238599 45.35862412
## Centre 7.86436000 45.99814000 9.35434539 54.71296951
## Ouest 7.67473000 53.67287000 9.12878800 63.84175751
## Nord 5.23459000 58.90746000 6.22633791 70.06809542
## Artibonite 4.72379000 63.63125000 5.61876150 75.68685693
## Sud-Est 4.41574000 68.04699000 5.25234820 80.93920513
## Ouest 3.50201000 71.54900000 4.16550248 85.10470761
## Ouest 2.72168000 74.27068000 3.23733079 88.34203840
## Ouest 2.63565000 76.90633000 3.13500150 91.47703990
## Ouest 1.80063000 78.70696000 2.14177822 93.61881812
## Nord 1.58489000 80.29185000 1.88516401 95.50398214
## Ouest 1.33931000 81.63116000 1.59305631 97.09703845
## Ouest 1.31659000 82.94775000 1.56603177 98.66307021
## Ouest 0.51272000 83.46047000 0.60986018 99.27293039
## Nord 0.27505000 83.73552000 0.32716110 99.60009149
## Sud-Est 0.19079000 83.92631000 0.22693716 99.82702866
## Ouest 0.11071000 84.03702000 0.13168517 99.95871383
## Nord 0.03471000 84.07173000 0.04128617 100.00000000
stem(df_HT$"distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 0000111223344
## 0 | 55889
## 1 | 2
## 1 | 7
head(df_HT)
## # A tibble: 6 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 747 8/26/08 <NA> <NA> Haiti HT Sud-Est 137966
## 2 3563 6/2/11 <NA> <NA> Haiti HT Sud-Est 137966
## 3 303 10/12/07 <NA> <NA> Haiti HT Ouest 3951
## 4 334 10/29/07 <NA> <NA> Haiti HT Ouest 1234742
## 5 506 4/20/08 <NA> <NA> Haiti HT Ouest 1234742
## 6 748 8/26/08 <NA> <NA> Haiti HT Ouest 1234742
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_HT))
stem(df_HT$"distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 0000111223344
## 0 | 55889
## 1 | 2
## 1 | 7
stem(df_HT$"distance", scale = 2)
##
## The decimal point is at the |
##
## 0 | 012353368
## 2 | 675
## 4 | 472
## 6 | 79
## 8 | 7
## 10 |
## 12 | 1
## 14 |
## 16 | 3
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 0.03471 |
1 |
5 |
5 |
5 |
5 |
| 0.11071 |
1 |
5 |
5 |
10 |
10 |
| 0.19079 |
1 |
5 |
5 |
15 |
15 |
| 0.27505 |
1 |
5 |
5 |
20 |
20 |
| 0.51272 |
1 |
5 |
5 |
25 |
25 |
| 1.31659 |
1 |
5 |
5 |
30 |
30 |
| 1.33931 |
1 |
5 |
5 |
35 |
35 |
| 1.58489 |
1 |
5 |
5 |
40 |
40 |
| 1.80063 |
1 |
5 |
5 |
45 |
45 |
| 2.63565 |
1 |
5 |
5 |
50 |
50 |
| 2.72168 |
1 |
5 |
5 |
55 |
55 |
| 3.50201 |
1 |
5 |
5 |
60 |
60 |
| 4.41574 |
1 |
5 |
5 |
65 |
65 |
| 4.72379 |
1 |
5 |
5 |
70 |
70 |
| 5.23459 |
1 |
5 |
5 |
75 |
75 |
| 7.67473 |
1 |
5 |
5 |
80 |
80 |
| 7.86436 |
1 |
5 |
5 |
85 |
85 |
| 8.70343 |
1 |
5 |
5 |
90 |
90 |
| 12.13199 |
1 |
5 |
5 |
95 |
95 |
| 17.29836 |
1 |
5 |
5 |
100 |
100 |
| Total |
20 |
100 |
100 |
100 |
100 |
str(table)
## Classes 'freqtab' and 'data.frame': 21 obs. of 5 variables:
## $ n : num 1 1 1 1 1 1 1 1 1 1 ...
## $ % : num 5 5 5 5 5 5 5 5 5 5 ...
## $ val% : num 5 5 5 5 5 5 5 5 5 5 ...
## $ %cum : num 5 10 15 20 25 30 35 40 45 50 ...
## $ val%cum: num 5 10 15 20 25 30 35 40 45 50 ...
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
| 0.03471 |
1 |
| 0.11071 |
1 |
| 0.19079 |
1 |
| 0.27505 |
1 |
| 0.51272 |
1 |
| 1.31659 |
1 |
| 1.33931 |
1 |
| 1.58489 |
1 |
| 1.80063 |
1 |
| 2.63565 |
1 |
| 2.72168 |
1 |
| 3.50201 |
1 |
| 4.41574 |
1 |
| 4.72379 |
1 |
| 5.23459 |
1 |
| 7.67473 |
1 |
| 7.86436 |
1 |
| 8.70343 |
1 |
| 12.13199 |
1 |
| 17.29836 |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.03471 4.03471 8.03471 12.03471 16.03471 20.03471
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
| (0.0347,4.03] |
11 |
0.5789474 |
11 |
| (4.03,8.03] |
5 |
0.2631579 |
16 |
| (8.03,12] |
1 |
0.0526316 |
17 |
| (12,16] |
1 |
0.0526316 |
18 |
| (16,20] |
1 |
0.0526316 |
19 |
str(Freq_table)
## 'data.frame': 5 obs. of 4 variables:
## $ distance: Factor w/ 5 levels "(0.0347,4.03]",..: 1 2 3 4 5
## $ Freq : int 11 5 1 1 1
## $ Rel_Freq: num 0.5789 0.2632 0.0526 0.0526 0.0526
## $ Cum_Freq: int 11 16 17 18 19
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
| (0.0347,4.03] |
11 |
| (4.03,8.03] |
5 |
| (8.03,12] |
1 |
| (12,16] |
1 |
| (16,20] |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")

library(pastecs)
stat.desc(df_HT)
## id date time continent_code country_name country_code
## nbr.val 2.000000e+01 NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA
## min 2.970000e+02 NA NA NA NA NA
## max 6.722000e+03 NA NA NA NA NA
## range 6.425000e+03 NA NA NA NA NA
## sum 4.809400e+04 NA NA NA NA NA
## median 2.017000e+03 NA NA NA NA NA
## mean 2.404700e+03 NA NA NA NA NA
## SE.mean 4.506116e+02 NA NA NA NA NA
## CI.mean.0.95 9.431408e+02 NA NA NA NA NA
## var 4.061016e+06 NA NA NA NA NA
## std.dev 2.015196e+03 NA NA NA NA NA
## coef.var 8.380240e-01 NA NA NA NA NA
## state population city distance location_description
## nbr.val NA 2.000000e+01 NA 20.000000 NA
## nbr.null NA 0.000000e+00 NA 0.000000 NA
## nbr.na NA 0.000000e+00 NA 0.000000 NA
## min NA 3.951000e+03 NA 0.034710 NA
## max NA 1.234742e+06 NA 17.298360 NA
## range NA 1.230791e+06 NA 17.263650 NA
## sum NA 6.625032e+06 NA 84.071730 NA
## median NA 1.363905e+05 NA 2.678665 NA
## mean NA 3.312516e+05 NA 4.203587 NA
## SE.mean NA 9.247531e+04 NA 1.019568 NA
## CI.mean.0.95 NA 1.935530e+05 NA 2.133979 NA
## var NA 1.710337e+11 NA 20.790360 NA
## std.dev NA 4.135622e+05 NA 4.559645 NA
## coef.var NA 1.248483e+00 NA 1.084703 NA
## latitude longitude geolocation hazard_type landslide_type
## nbr.val 20.00000000 2.000000e+01 NA NA NA
## nbr.null 0.00000000 0.000000e+00 NA NA NA
## nbr.na 0.00000000 0.000000e+00 NA NA NA
## min 18.23480000 -7.275400e+01 NA NA NA
## max 19.76980000 -7.197470e+01 NA NA NA
## range 1.53500000 7.793000e-01 NA NA NA
## sum 378.03620000 -1.448005e+03 NA NA NA
## median 18.53150000 -7.240515e+01 NA NA NA
## mean 18.90181000 -7.240024e+01 NA NA NA
## SE.mean 0.12729585 4.201268e-02 NA NA NA
## CI.mean.0.95 0.26643329 8.793354e-02 NA NA NA
## var 0.32408469 3.530130e-02 NA NA NA
## std.dev 0.56928437 1.878864e-01 NA NA NA
## coef.var 0.03011798 -2.595107e-03 NA NA NA
## landslide_size trigger storm_name injuries fatalities
## nbr.val NA NA NA 2.0000000 18.000000
## nbr.null NA NA NA 1.0000000 1.000000
## nbr.na NA NA NA 18.0000000 2.000000
## min NA NA NA 0.0000000 0.000000
## max NA NA NA 1.0000000 26.000000
## range NA NA NA 1.0000000 26.000000
## sum NA NA NA 1.0000000 128.000000
## median NA NA NA 0.5000000 3.000000
## mean NA NA NA 0.5000000 7.111111
## SE.mean NA NA NA 0.5000000 2.035452
## CI.mean.0.95 NA NA NA 6.3531024 4.294428
## var NA NA NA 0.5000000 74.575163
## std.dev NA NA NA 0.7071068 8.635691
## coef.var NA NA NA 1.4142136 1.214394
## source_name source_link prop ypos
## nbr.val NA NA 20.00000000 20.000000
## nbr.null NA NA 0.00000000 0.000000
## nbr.na NA NA 0.00000000 0.000000
## min NA NA 0.04128617 2.626174
## max NA NA 20.57571552 89.712142
## range NA NA 20.53442935 87.085968
## sum NA NA 100.00000000 741.305853
## median NA NA 3.18616615 43.143272
## mean NA NA 5.00000000 37.065293
## SE.mean NA NA 1.21273530 5.707890
## CI.mean.0.95 NA NA 2.53828416 11.946751
## var NA NA 29.41453837 651.600189
## std.dev NA NA 5.42351716 25.526461
## coef.var NA NA 1.08470343 0.688689
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Artibonite (Haiti)
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_HT <- subset(df, country_name == "Haiti")
knitr::kable(head(df_HT))
library(dplyr)
df_HT <- subset(df, state == "Artibonite")
knitr::kable(head(df_HT))
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_HT, aes(fill=city, y=distance, x=state)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_HT, aes(fill=city, y=distance, x=state)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_HT, aes(x=state, y=distance, fill=city)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_HT <- df_HT %>%
arrange(desc(city)) %>%
mutate(prop = distance / sum(df_HT$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_HT, aes(x=state, y = prop, fill=city)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales
library(forecast)
data<- ts(df_HT$distance, frequency=12, start=2008)
knitr::kable(head(data))
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto
library(qcc)
distance <- df_HT$distance
names(distance) <- df_HT$city
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por ciudades"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Saint-Marc 17.29836 17.29836 56.29954 56.29954
## Gros Morne 8.70343 26.00179 28.32633 84.62587
## Gonaïves 4.72379 30.72558 15.37413 100.00000
stem(df_HT$"distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 |
## 0 | 59
## 1 |
## 1 | 7
head(df_HT)
## # A tibble: 3 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 1140 9/7/09 Early~ <NA> Haiti HT Arti~ 66226
## 2 297 10/8/07 <NA> <NA> Haiti HT Arti~ 7294
## 3 771 9/3/08 <NA> <NA> Haiti HT Arti~ 84961
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_HT))
stem(df_HT$"distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 |
## 0 | 59
## 1 |
## 1 | 7
stem(df_HT$"distance", scale = 2)
##
## The decimal point is at the |
##
## 4 | 7
## 6 |
## 8 | 7
## 10 |
## 12 |
## 14 |
## 16 | 3
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 4.72379 |
1 |
33.3 |
33.3 |
33.3 |
33.3 |
| 8.70343 |
1 |
33.3 |
33.3 |
66.7 |
66.7 |
| 17.29836 |
1 |
33.3 |
33.3 |
100.0 |
100.0 |
| Total |
3 |
100.0 |
100.0 |
100.0 |
100.0 |
str(table)
## Classes 'freqtab' and 'data.frame': 4 obs. of 5 variables:
## $ n : num 1 1 1 3
## $ % : num 33.3 33.3 33.3 100
## $ val% : num 33.3 33.3 33.3 100
## $ %cum : num 33.3 66.7 100 100
## $ val%cum: num 33.3 66.7 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
| 4.72379 |
1 |
| 8.70343 |
1 |
| 17.29836 |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 4.72379 9.72379 14.72379 19.72379
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
| (4.72,9.72] |
1 |
0.5 |
1 |
| (9.72,14.7] |
0 |
0.0 |
1 |
| (14.7,19.7] |
1 |
0.5 |
2 |
str(Freq_table)
## 'data.frame': 3 obs. of 4 variables:
## $ distance: Factor w/ 3 levels "(4.72,9.72]",..: 1 2 3
## $ Freq : int 1 0 1
## $ Rel_Freq: num 0.5 0 0.5
## $ Cum_Freq: int 1 1 2
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
| (4.72,9.72] |
1 |
| (9.72,14.7] |
0 |
| (14.7,19.7] |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")

library(pastecs)
stat.desc(df_HT)
## Warning in min(x): ningún argumento finito para min; retornando Inf
## Warning in max(x): ningun argumento finito para max; retornando -Inf
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## id date time continent_code country_name country_code
## nbr.val 3.000000e+00 NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA
## min 2.970000e+02 NA NA NA NA NA
## max 1.140000e+03 NA NA NA NA NA
## range 8.430000e+02 NA NA NA NA NA
## sum 2.208000e+03 NA NA NA NA NA
## median 7.710000e+02 NA NA NA NA NA
## mean 7.360000e+02 NA NA NA NA NA
## SE.mean 2.439816e+02 NA NA NA NA NA
## CI.mean.0.95 1.049768e+03 NA NA NA NA NA
## var 1.785810e+05 NA NA NA NA NA
## std.dev 4.225885e+02 NA NA NA NA NA
## coef.var 5.741691e-01 NA NA NA NA NA
## state population city distance location_description
## nbr.val NA 3.000000e+00 NA 3.0000000 NA
## nbr.null NA 0.000000e+00 NA 0.0000000 NA
## nbr.na NA 0.000000e+00 NA 0.0000000 NA
## min NA 7.294000e+03 NA 4.7237900 NA
## max NA 8.496100e+04 NA 17.2983600 NA
## range NA 7.766700e+04 NA 12.5745700 NA
## sum NA 1.584810e+05 NA 30.7255800 NA
## median NA 6.622600e+04 NA 8.7034300 NA
## mean NA 5.282700e+04 NA 10.2418600 NA
## SE.mean NA 2.340008e+04 NA 3.7105717 NA
## CI.mean.0.95 NA 1.006824e+05 NA 15.9653016 NA
## var NA 1.642691e+09 NA 41.3050278 NA
## std.dev NA 4.053012e+04 NA 6.4268988 NA
## coef.var NA 7.672236e-01 NA 0.6275129 NA
## latitude longitude geolocation hazard_type landslide_type
## nbr.val 3.00000000 3.000000e+00 NA NA NA
## nbr.null 0.00000000 0.000000e+00 NA NA NA
## nbr.na 0.00000000 0.000000e+00 NA NA NA
## min 18.95230000 -7.275400e+01 NA NA NA
## max 19.69900000 -7.264800e+01 NA NA NA
## range 0.74670000 1.060000e-01 NA NA NA
## sum 58.08130000 -2.181073e+02 NA NA NA
## median 19.43000000 -7.270530e+01 NA NA NA
## mean 19.36043333 -7.270243e+01 NA NA NA
## SE.mean 0.21834213 3.063312e-02 NA NA NA
## CI.mean.0.95 0.93945038 1.318037e-01 NA NA NA
## var 0.14301986 2.815163e-03 NA NA NA
## std.dev 0.37817967 5.305811e-02 NA NA NA
## coef.var 0.01953364 -7.297983e-04 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 0 2.000000 NA
## nbr.null NA NA NA 0 0.000000 NA
## nbr.na NA NA NA 3 1.000000 NA
## min NA NA NA Inf 1.000000 NA
## max NA NA NA -Inf 26.000000 NA
## range NA NA NA -Inf 25.000000 NA
## sum NA NA NA 0 27.000000 NA
## median NA NA NA NA 13.500000 NA
## mean NA NA NA NaN 13.500000 NA
## SE.mean NA NA NA NA 12.500000 NA
## CI.mean.0.95 NA NA NA NaN 158.827559 NA
## var NA NA NA NA 312.500000 NA
## std.dev NA NA NA NA 17.677670 NA
## coef.var NA NA NA NA 1.309457 NA
## source_link prop ypos
## nbr.val NA 3.0000000 3.0000000
## nbr.null NA 0.0000000 0.0000000
## nbr.na NA 0.0000000 0.0000000
## min NA 15.3741280 28.1497697
## max NA 56.2995393 92.3129360
## range NA 40.9254113 64.1631663
## sum NA 100.0000000 190.9254113
## median NA 28.3263327 70.4627057
## mean NA 33.3333333 63.6418038
## SE.mean NA 12.0764905 18.8336711
## CI.mean.0.95 NA 51.9609446 81.0347465
## var NA 437.5248651 1064.1215052
## std.dev NA 20.9170950 32.6208753
## coef.var NA 0.6275129 0.5125699
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Nord (Haiti)
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_HT <- subset(df, country_name == "Haiti")
knitr::kable(head(df_HT))
library(dplyr)
df_HT <- subset(df, state == "Nord")
knitr::kable(head(df_HT))
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_HT, aes(fill=city, y=distance, x=state)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_HT, aes(fill=city, y=distance, x=state)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_HT, aes(x=state, y=distance, fill=city)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_HT <- df_HT %>%
arrange(desc(city)) %>%
mutate(prop = distance / sum(df_HT$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_HT, aes(x=state, y = prop, fill=city)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales
library(forecast)
data<- ts(df_HT$distance, frequency=12, start=2008)
knitr::kable(head(data))
| 5.23459 |
| 1.58489 |
| 0.03471 |
| 0.27505 |
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto
library(qcc)
distance <- df_HT$distance
names(distance) <- df_HT$city
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por ciudades"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Okap 5.2345900 5.2345900 73.4242360 73.4242360
## Okap 1.5848900 6.8194800 22.2308409 95.6550768
## Cap-Haïtien 0.2750500 7.0945300 3.8580550 99.5131318
## Limbé 0.0347100 7.1292400 0.4868682 100.0000000
stem(df_HT$"distance")
##
## The decimal point is at the |
##
## 0 | 036
## 2 |
## 4 | 2
head(df_HT)
## # A tibble: 4 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 6713 11/1/14 <NA> <NA> Haiti HT Nord 134815
## 2 6722 5/27/14 <NA> <NA> Haiti HT Nord 134815
## 3 4312 4/8/12 <NA> <NA> Haiti HT Nord 32645
## 4 1506 2/15/10 12:00 <NA> Haiti HT Nord 134815
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_HT))
stem(df_HT$"distance")
##
## The decimal point is at the |
##
## 0 | 036
## 2 |
## 4 | 2
stem(df_HT$"distance", scale = 2)
##
## The decimal point is at the |
##
## 0 | 03
## 1 | 6
## 2 |
## 3 |
## 4 |
## 5 | 2
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 0.03471 |
1 |
25 |
25 |
25 |
25 |
| 0.27505 |
1 |
25 |
25 |
50 |
50 |
| 1.58489 |
1 |
25 |
25 |
75 |
75 |
| 5.23459 |
1 |
25 |
25 |
100 |
100 |
| Total |
4 |
100 |
100 |
100 |
100 |
str(table)
## Classes 'freqtab' and 'data.frame': 5 obs. of 5 variables:
## $ n : num 1 1 1 1 4
## $ % : num 25 25 25 25 100
## $ val% : num 25 25 25 25 100
## $ %cum : num 25 50 75 100 100
## $ val%cum: num 25 50 75 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
| 0.03471 |
1 |
| 0.27505 |
1 |
| 1.58489 |
1 |
| 5.23459 |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.03471 2.03471 4.03471 6.03471
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
| (0.0347,2.03] |
2 |
0.6666667 |
2 |
| (2.03,4.03] |
0 |
0.0000000 |
2 |
| (4.03,6.03] |
1 |
0.3333333 |
3 |
str(Freq_table)
## 'data.frame': 3 obs. of 4 variables:
## $ distance: Factor w/ 3 levels "(0.0347,2.03]",..: 1 2 3
## $ Freq : int 2 0 1
## $ Rel_Freq: num 0.667 0 0.333
## $ Cum_Freq: int 2 2 3
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
| (0.0347,2.03] |
2 |
| (2.03,4.03] |
0 |
| (4.03,6.03] |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")

library(pastecs)
stat.desc(df_HT)
## id date time continent_code country_name country_code
## nbr.val 4.000000e+00 NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA
## min 1.506000e+03 NA NA NA NA NA
## max 6.722000e+03 NA NA NA NA NA
## range 5.216000e+03 NA NA NA NA NA
## sum 1.925300e+04 NA NA NA NA NA
## median 5.512500e+03 NA NA NA NA NA
## mean 4.813250e+03 NA NA NA NA NA
## SE.mean 1.239675e+03 NA NA NA NA NA
## CI.mean.0.95 3.945199e+03 NA NA NA NA NA
## var 6.147177e+06 NA NA NA NA NA
## std.dev 2.479350e+03 NA NA NA NA NA
## coef.var 5.151094e-01 NA NA NA NA NA
## state population city distance location_description latitude
## nbr.val NA 4.000000e+00 NA 4.000000 NA 4.000000e+00
## nbr.null NA 0.000000e+00 NA 0.000000 NA 0.000000e+00
## nbr.na NA 0.000000e+00 NA 0.000000 NA 0.000000e+00
## min NA 3.264500e+04 NA 0.034710 NA 1.970410e+01
## max NA 1.348150e+05 NA 5.234590 NA 1.976980e+01
## range NA 1.021700e+05 NA 5.199880 NA 6.570000e-02
## sum NA 4.370900e+05 NA 7.129240 NA 7.897490e+01
## median NA 1.348150e+05 NA 0.929970 NA 1.975050e+01
## mean NA 1.092725e+05 NA 1.782310 NA 1.974373e+01
## SE.mean NA 2.554250e+04 NA 1.200109 NA 1.414905e-02
## CI.mean.0.95 NA 8.128763e+04 NA 3.819284 NA 4.502859e-02
## var NA 2.609677e+09 NA 5.761050 NA 8.007825e-04
## std.dev NA 5.108500e+04 NA 2.400219 NA 2.829810e-02
## coef.var NA 4.675010e-01 NA 1.346690 NA 1.433271e-03
## longitude geolocation hazard_type landslide_type
## nbr.val 4.000000e+00 NA NA NA
## nbr.null 0.000000e+00 NA NA NA
## nbr.na 0.000000e+00 NA NA NA
## min -7.240060e+01 NA NA NA
## max -7.220600e+01 NA NA NA
## range 1.946000e-01 NA NA NA
## sum -2.890303e+02 NA NA NA
## median -7.221185e+01 NA NA NA
## mean -7.225758e+01 NA NA NA
## SE.mean 4.771454e-02 NA NA NA
## CI.mean.0.95 1.518490e-01 NA NA NA
## var 9.106709e-03 NA NA NA
## std.dev 9.542908e-02 NA NA NA
## coef.var -1.320679e-03 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 2.0000000 4.0000000 NA
## nbr.null NA NA NA 1.0000000 0.0000000 NA
## nbr.na NA NA NA 2.0000000 0.0000000 NA
## min NA NA NA 0.0000000 1.0000000 NA
## max NA NA NA 1.0000000 4.0000000 NA
## range NA NA NA 1.0000000 3.0000000 NA
## sum NA NA NA 1.0000000 10.0000000 NA
## median NA NA NA 0.5000000 2.5000000 NA
## mean NA NA NA 0.5000000 2.5000000 NA
## SE.mean NA NA NA 0.5000000 0.6454972 NA
## CI.mean.0.95 NA NA NA 6.3531024 2.0542603 NA
## var NA NA NA 0.5000000 1.6666667 NA
## std.dev NA NA NA 0.7071068 1.2909944 NA
## coef.var NA NA NA 1.4142136 0.5163978 NA
## source_link prop ypos
## nbr.val NA 4.0000000 4.000000
## nbr.null NA 0.0000000 0.000000
## nbr.na NA 0.0000000 0.000000
## min NA 0.4868682 36.712118
## max NA 73.4242360 98.070973
## range NA 72.9373678 61.358855
## sum NA 100.0000000 315.221258
## median NA 13.0444479 90.219084
## mean NA 25.0000000 78.805314
## SE.mean NA 16.8336230 14.341266
## CI.mean.0.95 NA 53.5721012 45.640308
## var NA 1133.4834473 822.687604
## std.dev NA 33.6672459 28.682531
## coef.var NA 1.3466898 0.363967
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Ouest (Haiti)
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_HT <- subset(df, country_name == "Haiti")
knitr::kable(head(df_HT))
library(dplyr)
df_HT <- subset(df, state == "Ouest")
knitr::kable(head(df_HT))
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_HT, aes(fill=city, y=distance, x=state)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_HT, aes(fill=city, y=distance, x=state)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_HT, aes(x=state, y=distance, fill=city)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_HT <- df_HT %>%
arrange(desc(city)) %>%
mutate(prop = distance / sum(df_HT$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_HT, aes(x=state, y = prop, fill=city)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales
library(forecast)
data<- ts(df_HT$distance, frequency=12, start=2008)
knitr::kable(head(data))
| 2.72168 |
| 1.80063 |
| 3.50201 |
| 0.11071 |
| 1.33931 |
| 7.67473 |
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto
library(qcc)
distance <- df_HT$distance
names(distance) <- df_HT$city
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por ciudades"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Carrefour 12.1319900 12.1319900 35.9508766 35.9508766
## Léogâne 7.6747300 19.8067200 22.7426227 58.6934993
## Port-au-Prince 3.5020100 23.3087300 10.3775497 69.0710490
## Port-au-Prince 2.7216800 26.0304100 8.0651881 77.1362371
## Carrefour 2.6356500 28.6660600 7.8102544 84.9464915
## Port-au-Prince 1.8006300 30.4666900 5.3358292 90.2823207
## Pétionville 1.3393100 31.8060000 3.9687939 94.2511146
## Carrefour 1.3165900 33.1225900 3.9014675 98.1525821
## Cabaret 0.5127200 33.6353100 1.5193495 99.6719317
## Pétionville 0.1107100 33.7460200 0.3280683 100.0000000
stem(df_HT$"distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 01112334
## 0 | 8
## 1 | 2
head(df_HT)
## # A tibble: 6 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 334 10/29/07 <NA> <NA> Haiti HT Ouest 1234742
## 2 506 4/20/08 <NA> <NA> Haiti HT Ouest 1234742
## 3 748 8/26/08 <NA> <NA> Haiti HT Ouest 1234742
## 4 3576 6/7/11 <NA> <NA> Haiti HT Ouest 283052
## 5 4289 3/30/12 Late night <NA> Haiti HT Ouest 283052
## 6 2604 10/17/10 <NA> <NA> Haiti HT Ouest 134190
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_HT))
stem(df_HT$"distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 01112334
## 0 | 8
## 1 | 2
stem(df_HT$"distance", scale = 2)
##
## The decimal point is at the |
##
## 0 | 15338
## 2 | 675
## 4 |
## 6 | 7
## 8 |
## 10 |
## 12 | 1
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 0.11071 |
1 |
10 |
10 |
10 |
10 |
| 0.51272 |
1 |
10 |
10 |
20 |
20 |
| 1.31659 |
1 |
10 |
10 |
30 |
30 |
| 1.33931 |
1 |
10 |
10 |
40 |
40 |
| 1.80063 |
1 |
10 |
10 |
50 |
50 |
| 2.63565 |
1 |
10 |
10 |
60 |
60 |
| 2.72168 |
1 |
10 |
10 |
70 |
70 |
| 3.50201 |
1 |
10 |
10 |
80 |
80 |
| 7.67473 |
1 |
10 |
10 |
90 |
90 |
| 12.13199 |
1 |
10 |
10 |
100 |
100 |
| Total |
10 |
100 |
100 |
100 |
100 |
str(table)
## Classes 'freqtab' and 'data.frame': 11 obs. of 5 variables:
## $ n : num 1 1 1 1 1 1 1 1 1 1 ...
## $ % : num 10 10 10 10 10 10 10 10 10 10 ...
## $ val% : num 10 10 10 10 10 10 10 10 10 10 ...
## $ %cum : num 10 20 30 40 50 60 70 80 90 100 ...
## $ val%cum: num 10 20 30 40 50 60 70 80 90 100 ...
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
| 0.11071 |
1 |
| 0.51272 |
1 |
| 1.31659 |
1 |
| 1.33931 |
1 |
| 1.80063 |
1 |
| 2.63565 |
1 |
| 2.72168 |
1 |
| 3.50201 |
1 |
| 7.67473 |
1 |
| 12.13199 |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.11071 3.11071 6.11071 9.11071 12.11071 15.11071
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
| (0.111,3.11] |
6 |
0.6666667 |
6 |
| (3.11,6.11] |
1 |
0.1111111 |
7 |
| (6.11,9.11] |
1 |
0.1111111 |
8 |
| (9.11,12.1] |
0 |
0.0000000 |
8 |
| (12.1,15.1] |
1 |
0.1111111 |
9 |
str(Freq_table)
## 'data.frame': 5 obs. of 4 variables:
## $ distance: Factor w/ 5 levels "(0.111,3.11]",..: 1 2 3 4 5
## $ Freq : int 6 1 1 0 1
## $ Rel_Freq: num 0.667 0.111 0.111 0 0.111
## $ Cum_Freq: int 6 7 8 8 9
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
| (0.111,3.11] |
6 |
| (3.11,6.11] |
1 |
| (6.11,9.11] |
1 |
| (9.11,12.1] |
0 |
| (12.1,15.1] |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")

library(pastecs)
stat.desc(df_HT)
## Warning in min(x): ningún argumento finito para min; retornando Inf
## Warning in max(x): ningun argumento finito para max; retornando -Inf
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## id date time continent_code country_name country_code
## nbr.val 1.000000e+01 NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA
## min 3.030000e+02 NA NA NA NA NA
## max 4.289000e+03 NA NA NA NA NA
## range 3.986000e+03 NA NA NA NA NA
## sum 1.875900e+04 NA NA NA NA NA
## median 1.897000e+03 NA NA NA NA NA
## mean 1.875900e+03 NA NA NA NA NA
## SE.mean 4.542708e+02 NA NA NA NA NA
## CI.mean.0.95 1.027632e+03 NA NA NA NA NA
## var 2.063619e+06 NA NA NA NA NA
## std.dev 1.436530e+03 NA NA NA NA NA
## coef.var 7.657819e-01 NA NA NA NA NA
## state population city distance location_description
## nbr.val NA 1.000000e+01 NA 10.000000 NA
## nbr.null NA 0.000000e+00 NA 0.000000 NA
## nbr.na NA 0.000000e+00 NA 0.000000 NA
## min NA 3.951000e+03 NA 0.110710 NA
## max NA 1.234742e+06 NA 12.131990 NA
## range NA 1.230791e+06 NA 12.021280 NA
## sum NA 5.734939e+06 NA 33.746020 NA
## median NA 4.421560e+05 NA 2.218140 NA
## mean NA 5.734939e+05 NA 3.374602 NA
## SE.mean NA 1.509075e+05 NA 1.182606 NA
## CI.mean.0.95 NA 3.413764e+05 NA 2.675240 NA
## var NA 2.277307e+11 NA 13.985568 NA
## std.dev NA 4.772114e+05 NA 3.739728 NA
## coef.var NA 8.321123e-01 NA 1.108198 NA
## latitude longitude geolocation hazard_type landslide_type
## nbr.val 1.000000e+01 1.000000e+01 NA NA NA
## nbr.null 0.000000e+00 0.000000e+00 NA NA NA
## nbr.na 0.000000e+00 0.000000e+00 NA NA NA
## min 1.844680e+01 -7.257380e+01 NA NA NA
## max 1.873350e+01 -7.228530e+01 NA NA NA
## range 2.867000e-01 2.885000e-01 NA NA NA
## sum 1.852724e+02 -7.238491e+02 NA NA NA
## median 1.851405e+01 -7.237735e+01 NA NA NA
## mean 1.852724e+01 -7.238491e+01 NA NA NA
## SE.mean 2.447308e-02 2.769416e-02 NA NA NA
## CI.mean.0.95 5.536196e-02 6.264854e-02 NA NA NA
## var 5.989318e-03 7.669665e-03 NA NA NA
## std.dev 7.739069e-02 8.757663e-02 NA NA NA
## coef.var 4.177130e-03 -1.209874e-03 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 0 9.0000000 NA
## nbr.null NA NA NA 0 0.0000000 NA
## nbr.na NA NA NA 10 1.0000000 NA
## min NA NA NA Inf 2.0000000 NA
## max NA NA NA -Inf 23.0000000 NA
## range NA NA NA -Inf 21.0000000 NA
## sum NA NA NA 0 65.0000000 NA
## median NA NA NA NA 4.0000000 NA
## mean NA NA NA NaN 7.2222222 NA
## SE.mean NA NA NA NA 2.2838672 NA
## CI.mean.0.95 NA NA NA NaN 5.2666072 NA
## var NA NA NA NA 46.9444444 NA
## std.dev NA NA NA NA 6.8516016 NA
## coef.var NA NA NA NA 0.9486833 NA
## source_link prop ypos
## nbr.val NA 10.0000000 10.0000000
## nbr.null NA 0.0000000 0.0000000
## nbr.na NA 0.0000000 0.0000000
## min NA 0.3280683 4.0325941
## max NA 35.9508766 99.2403252
## range NA 35.6228083 95.2077312
## sum NA 100.0000000 442.1154554
## median NA 6.5730418 32.7688865
## mean NA 10.0000000 44.2115455
## SE.mean NA 3.5044308 10.8539511
## CI.mean.0.95 NA 7.9275733 24.5533433
## var NA 122.8103537 1178.0825517
## std.dev NA 11.0819833 34.3232072
## coef.var NA 1.1081983 0.7763404
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Trinidad and Tobago
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_TT <- subset(df, country_name == "Trinidad and Tobago")
knitr::kable(head(df_TT))
| 224 |
9/1/07 |
NA |
NA |
Trinidad and Tobago |
TT |
Tobago |
17000 |
Scarborough |
9.11607 |
NA |
11.2415 |
-60.6742 |
(11.2415, -60.674199999999999) |
Landslide |
Landslide |
Medium |
Tropical cyclone |
Hurricane Felix |
NA |
NA |
Trinadad Express |
http://www.trinidadexpress.com/index.pl/article_news?id=161197580 |
| 357 |
11/17/07 |
NA |
NA |
Trinidad and Tobago |
TT |
Eastern Tobago |
0 |
Roxborough |
7.33295 |
NA |
11.2965 |
-60.6312 |
(11.2965, -60.6312) |
Landslide |
Landslide |
Medium |
Rain |
NA |
NA |
NA |
Trinadad Express |
http://www.trinidadexpress.com/index.pl/article_news?id=161237574 |
| 390 |
12/11/07 |
NA |
NA |
Trinidad and Tobago |
TT |
Sangre Grande |
15968 |
Sangre Grande |
29.28864 |
NA |
10.8410 |
-61.0550 |
(10.840999999999999, -61.055) |
Landslide |
Landslide |
Medium |
Tropical cyclone |
Tropical Storm Olga |
NA |
3 |
Trinidad and Tobago’s Newsday |
http://www.newsday.co.tt/news/0,69681.html |
| 391 |
12/11/07 |
NA |
NA |
Trinidad and Tobago |
TT |
Eastern Tobago |
0 |
Roxborough |
8.62938 |
NA |
11.3000 |
-60.6440 |
(11.3, -60.643999999999998) |
Landslide |
Landslide |
Medium |
Tropical cyclone |
Tropical Storm Olga |
NA |
NA |
Trinidad and Tobago’s Newsday |
http://www.newsday.co.tt/news/0,69681.html |
| 392 |
12/11/07 |
NA |
NA |
Trinidad and Tobago |
TT |
Eastern Tobago |
0 |
Roxborough |
2.66802 |
NA |
11.2670 |
-60.5660 |
(11.266999999999999, -60.566000000000003) |
Landslide |
Landslide |
Small |
Tropical cyclone |
Tropical Storm Olga |
NA |
NA |
Trinidad and Tobago’s Newsday |
http://www.newsday.co.tt/news/0,69681.html |
| 780 |
9/7/08 |
NA |
NA |
Trinidad and Tobago |
TT |
Diego Martin |
8140 |
Petit Valley |
10.61854 |
NA |
10.7603 |
-61.4578 |
(10.760300000000001, -61.457799999999999) |
Landslide |
Landslide |
Medium |
Downpour |
NA |
NA |
NA |
NA |
http://www.newsday.co.tt/news/0,85847.html |
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_TT, aes(fill= state, y=distance, x=country_name)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_TT, aes(fill=state, y=distance, x=country_name)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_TT, aes(x=country_name, y=distance, fill=state)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_TT <- df_TT %>%
arrange(desc(state)) %>%
mutate(prop = distance / sum(df_TT$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_TT, aes(x=country_name, y=prop, fill=state)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4
## Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Greens is 9
## Returning the palette you asked for with that many colors

Grafico de series temporales
library(forecast)
data<- ts(df_TT$distance, frequency=12, start=2008)
knitr::kable(head(data))
| 2.64003 |
| 16.73194 |
| 13.75900 |
| 2.63186 |
| 9.11607 |
| 4.68038 |
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto
library(qcc)
distance <- df_TT$distance
names(distance) <- df_TT$city
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por estados"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Sangre Grande 33.51924000 33.51924000 6.44876727 6.44876727
## Sangre Grande 33.10893000 66.62817000 6.36982772 12.81859499
## Sangre Grande 33.10893000 99.73710000 6.36982772 19.18842271
## Sangre Grande 30.29383000 130.03093000 5.82823057 25.01665328
## Sangre Grande 29.28864000 159.31957000 5.63484205 30.65149533
## Sangre Grande 26.68822000 186.00779000 5.13454719 35.78604252
## Point Fortin 22.47289000 208.48068000 4.32355977 40.10960229
## Paradise 16.73194000 225.21262000 3.21905828 43.32866056
## Paradise 13.75900000 238.97162000 2.64709429 45.97575486
## Arima 13.34116000 252.31278000 2.56670605 48.54246090
## Petit Valley 11.75674000 264.06952000 2.26187945 50.80434035
## Petit Valley 10.61854000 274.68806000 2.04290113 52.84724148
## Marabella 9.42494000 284.11300000 1.81326440 54.66050588
## Scarborough 9.11607000 293.22907000 1.75384089 56.41434677
## Roxborough 8.62938000 301.85845000 1.66020659 58.07455336
## Tabaquite 8.55164000 310.41009000 1.64525019 59.71980356
## Roxborough 8.44112000 318.85121000 1.62398725 61.34379081
## Princes Town 8.41931000 327.27052000 1.61979122 62.96358203
## Roxborough 8.24676000 335.51728000 1.58659432 64.55017636
## Roxborough 7.87263000 343.38991000 1.51461545 66.06479180
## Roxborough 7.47816000 350.86807000 1.43872336 67.50351516
## Mucurapo 7.43310000 358.30117000 1.43005426 68.93356942
## Laventille 7.37181000 365.67298000 1.41826268 70.35183210
## Roxborough 7.33295000 373.00593000 1.41078640 71.76261850
## Mucurapo 7.24469000 380.25062000 1.39380606 73.15642455
## Petit Valley 6.95807000 387.20869000 1.33866323 74.49508778
## Tabaquite 6.91642000 394.12511000 1.33065019 75.82573797
## Scarborough 6.81393000 400.93904000 1.31093213 77.13667010
## Petit Valley 6.58396000 407.52300000 1.26668820 78.40335830
## Petit Valley 6.39375000 413.91675000 1.23009369 79.63345199
## Scarborough 6.35974000 420.27649000 1.22355051 80.85700250
## Petit Valley 5.91101000 426.18750000 1.13721933 81.99422184
## Petit Valley 5.73985000 431.92735000 1.10428986 83.09851169
## Roxborough 5.62092000 437.54827000 1.08140891 84.17992061
## Scarborough 4.68038000 442.22865000 0.90045840 85.08037901
## Petit Valley 4.66234000 446.89099000 0.89698769 85.97736670
## Petit Valley 4.64073000 451.53172000 0.89283014 86.87019684
## Petit Valley 4.50278000 456.03450000 0.86628994 87.73648678
## Petit Valley 4.33904000 460.37354000 0.83478799 88.57127477
## Peñal, 4.21321000 464.58675000 0.81057956 89.38185433
## Petit Valley 4.00979000 468.59654000 0.77144358 90.15329791
## Scarborough 3.88123000 472.47777000 0.74670992 90.90000783
## Siparia 3.75200000 476.22977000 0.72184736 91.62185518
## Scarborough 3.48176000 479.71153000 0.66985588 92.29171106
## Roxborough 3.36240000 483.07393000 0.64689220 92.93860327
## Petit Valley 3.33629000 486.41022000 0.64186890 93.58047217
## Scarborough 3.22335000 489.63357000 0.62014037 94.20061254
## Petit Valley 3.08955000 492.72312000 0.59439859 94.79501113
## Roxborough 2.66802000 495.39114000 0.51330042 95.30831155
## Paradise 2.64003000 498.03117000 0.50791543 95.81622697
## Tunapuna 2.63186000 500.66303000 0.50634360 96.32257057
## Peñal, 2.57071000 503.23374000 0.49457895 96.81714952
## Port-of-Spain 2.54016000 505.77390000 0.48870143 97.30585096
## Petit Valley 2.24772000 508.02162000 0.43243890 97.73828985
## Port-of-Spain 2.15046000 510.17208000 0.41372704 98.15201690
## Sangre Grande 2.00931000 512.18139000 0.38657119 98.53858809
## Petit Valley 1.84331000 514.02470000 0.35463445 98.89322254
## Petit Valley 1.83626000 515.86096000 0.35327810 99.24650064
## Port-of-Spain 1.07831000 516.93927000 0.20745608 99.45395672
## San Fernando 0.92162000 517.86089000 0.17731049 99.63126721
## Roxborough 0.91163000 518.77252000 0.17538851 99.80665573
## Tabaquite 0.61975000 519.39227000 0.11923372 99.92588944
## Laventille 0.38521000 519.77748000 0.07411056 100.00000000
stem(df_TT$"distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 0111122222333333333344444
## 0 | 55556666677777777788889999
## 1 | 1234
## 1 | 7
## 2 | 2
## 2 | 79
## 3 | 0334
head(df_TT)
## # A tibble: 6 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 2667 10/31/10 Morning <NA> Trinidad an~ TT Tuna~ 15067
## 2 4108 12/25/11 <NA> <NA> Trinidad an~ TT Tuna~ 15067
## 3 4374 5/30/12 <NA> <NA> Trinidad an~ TT Tuna~ 15067
## 4 4919 6/14/13 Morning <NA> Trinidad an~ TT Tuna~ 17758
## 5 224 9/1/07 <NA> <NA> Trinidad an~ TT Toba~ 17000
## 6 2669 10/31/10 <NA> <NA> Trinidad an~ TT Toba~ 17000
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_TT))
stem(df_TT$"distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 0111122222333333333344444
## 0 | 55556666677777777788889999
## 1 | 1234
## 1 | 7
## 2 | 2
## 2 | 79
## 3 | 0334
stem(df_TT$"distance", scale = 2)
##
## The decimal point is at the |
##
## 0 | 4699188
## 2 | 022566671234589
## 4 | 0235677679
## 6 | 446890234459
## 8 | 2446614
## 10 | 68
## 12 | 38
## 14 |
## 16 | 7
## 18 |
## 20 |
## 22 | 5
## 24 |
## 26 | 7
## 28 | 3
## 30 | 3
## 32 | 115
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 33.10893 |
2 |
3.2 |
3.2 |
3.2 |
3.2 |
| 0.38521 |
1 |
1.6 |
1.6 |
4.8 |
4.8 |
| 0.61975 |
1 |
1.6 |
1.6 |
6.3 |
6.3 |
| 0.91163 |
1 |
1.6 |
1.6 |
7.9 |
7.9 |
| 0.92162 |
1 |
1.6 |
1.6 |
9.5 |
9.5 |
| 1.07831 |
1 |
1.6 |
1.6 |
11.1 |
11.1 |
| 1.83626 |
1 |
1.6 |
1.6 |
12.7 |
12.7 |
| 1.84331 |
1 |
1.6 |
1.6 |
14.3 |
14.3 |
| 2.00931 |
1 |
1.6 |
1.6 |
15.9 |
15.9 |
| 2.15046 |
1 |
1.6 |
1.6 |
17.5 |
17.5 |
| 2.24772 |
1 |
1.6 |
1.6 |
19.0 |
19.0 |
| 2.54016 |
1 |
1.6 |
1.6 |
20.6 |
20.6 |
| 2.57071 |
1 |
1.6 |
1.6 |
22.2 |
22.2 |
| 2.63186 |
1 |
1.6 |
1.6 |
23.8 |
23.8 |
| 2.64003 |
1 |
1.6 |
1.6 |
25.4 |
25.4 |
| 2.66802 |
1 |
1.6 |
1.6 |
27.0 |
27.0 |
| 3.08955 |
1 |
1.6 |
1.6 |
28.6 |
28.6 |
| 3.22335 |
1 |
1.6 |
1.6 |
30.2 |
30.2 |
| 3.33629 |
1 |
1.6 |
1.6 |
31.7 |
31.7 |
| 3.3624 |
1 |
1.6 |
1.6 |
33.3 |
33.3 |
| 3.48176 |
1 |
1.6 |
1.6 |
34.9 |
34.9 |
| 3.752 |
1 |
1.6 |
1.6 |
36.5 |
36.5 |
| 3.88123 |
1 |
1.6 |
1.6 |
38.1 |
38.1 |
| 4.00979 |
1 |
1.6 |
1.6 |
39.7 |
39.7 |
| 4.21321 |
1 |
1.6 |
1.6 |
41.3 |
41.3 |
| 4.33904 |
1 |
1.6 |
1.6 |
42.9 |
42.9 |
| 4.50278 |
1 |
1.6 |
1.6 |
44.4 |
44.4 |
| 4.64073 |
1 |
1.6 |
1.6 |
46.0 |
46.0 |
| 4.66234 |
1 |
1.6 |
1.6 |
47.6 |
47.6 |
| 4.68038 |
1 |
1.6 |
1.6 |
49.2 |
49.2 |
| 5.62092 |
1 |
1.6 |
1.6 |
50.8 |
50.8 |
| 5.73985 |
1 |
1.6 |
1.6 |
52.4 |
52.4 |
| 5.91101 |
1 |
1.6 |
1.6 |
54.0 |
54.0 |
| 6.35974 |
1 |
1.6 |
1.6 |
55.6 |
55.6 |
| 6.39375 |
1 |
1.6 |
1.6 |
57.1 |
57.1 |
| 6.58396 |
1 |
1.6 |
1.6 |
58.7 |
58.7 |
| 6.81393 |
1 |
1.6 |
1.6 |
60.3 |
60.3 |
| 6.91642 |
1 |
1.6 |
1.6 |
61.9 |
61.9 |
| 6.95807 |
1 |
1.6 |
1.6 |
63.5 |
63.5 |
| 7.24469 |
1 |
1.6 |
1.6 |
65.1 |
65.1 |
| 7.33295 |
1 |
1.6 |
1.6 |
66.7 |
66.7 |
| 7.37181 |
1 |
1.6 |
1.6 |
68.3 |
68.3 |
| 7.4331 |
1 |
1.6 |
1.6 |
69.8 |
69.8 |
| 7.47816 |
1 |
1.6 |
1.6 |
71.4 |
71.4 |
| 7.87263 |
1 |
1.6 |
1.6 |
73.0 |
73.0 |
| 8.24676 |
1 |
1.6 |
1.6 |
74.6 |
74.6 |
| 8.41931 |
1 |
1.6 |
1.6 |
76.2 |
76.2 |
| 8.44112 |
1 |
1.6 |
1.6 |
77.8 |
77.8 |
| 8.55164 |
1 |
1.6 |
1.6 |
79.4 |
79.4 |
| 8.62938 |
1 |
1.6 |
1.6 |
81.0 |
81.0 |
| 9.11607 |
1 |
1.6 |
1.6 |
82.5 |
82.5 |
| 9.42494 |
1 |
1.6 |
1.6 |
84.1 |
84.1 |
| 10.61854 |
1 |
1.6 |
1.6 |
85.7 |
85.7 |
| 11.75674 |
1 |
1.6 |
1.6 |
87.3 |
87.3 |
| 13.34116 |
1 |
1.6 |
1.6 |
88.9 |
88.9 |
| 13.759 |
1 |
1.6 |
1.6 |
90.5 |
90.5 |
| 16.73194 |
1 |
1.6 |
1.6 |
92.1 |
92.1 |
| 22.47289 |
1 |
1.6 |
1.6 |
93.7 |
93.7 |
| 26.68822 |
1 |
1.6 |
1.6 |
95.2 |
95.2 |
| 29.28864 |
1 |
1.6 |
1.6 |
96.8 |
96.8 |
| 30.29383 |
1 |
1.6 |
1.6 |
98.4 |
98.4 |
| 33.51924 |
1 |
1.6 |
1.6 |
100.0 |
100.0 |
| Total |
63 |
100.0 |
100.0 |
100.0 |
100.0 |
str(table)
## Classes 'freqtab' and 'data.frame': 63 obs. of 5 variables:
## $ n : num 2 1 1 1 1 1 1 1 1 1 ...
## $ % : num 3.2 1.6 1.6 1.6 1.6 1.6 1.6 1.6 1.6 1.6 ...
## $ val% : num 3.2 1.6 1.6 1.6 1.6 1.6 1.6 1.6 1.6 1.6 ...
## $ %cum : num 3.2 4.8 6.3 7.9 9.5 11.1 12.7 14.3 15.9 17.5 ...
## $ val%cum: num 3.2 4.8 6.3 7.9 9.5 11.1 12.7 14.3 15.9 17.5 ...
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
| 33.10893 |
2 |
| 0.38521 |
1 |
| 0.61975 |
1 |
| 0.91163 |
1 |
| 0.92162 |
1 |
| 1.07831 |
1 |
| 1.83626 |
1 |
| 1.84331 |
1 |
| 2.00931 |
1 |
| 2.15046 |
1 |
| 2.24772 |
1 |
| 2.54016 |
1 |
| 2.57071 |
1 |
| 2.63186 |
1 |
| 2.64003 |
1 |
| 2.66802 |
1 |
| 3.08955 |
1 |
| 3.22335 |
1 |
| 3.33629 |
1 |
| 3.3624 |
1 |
| 3.48176 |
1 |
| 3.752 |
1 |
| 3.88123 |
1 |
| 4.00979 |
1 |
| 4.21321 |
1 |
| 4.33904 |
1 |
| 4.50278 |
1 |
| 4.64073 |
1 |
| 4.66234 |
1 |
| 4.68038 |
1 |
| 5.62092 |
1 |
| 5.73985 |
1 |
| 5.91101 |
1 |
| 6.35974 |
1 |
| 6.39375 |
1 |
| 6.58396 |
1 |
| 6.81393 |
1 |
| 6.91642 |
1 |
| 6.95807 |
1 |
| 7.24469 |
1 |
| 7.33295 |
1 |
| 7.37181 |
1 |
| 7.4331 |
1 |
| 7.47816 |
1 |
| 7.87263 |
1 |
| 8.24676 |
1 |
| 8.41931 |
1 |
| 8.44112 |
1 |
| 8.55164 |
1 |
| 8.62938 |
1 |
| 9.11607 |
1 |
| 9.42494 |
1 |
| 10.61854 |
1 |
| 11.75674 |
1 |
| 13.34116 |
1 |
| 13.759 |
1 |
| 16.73194 |
1 |
| 22.47289 |
1 |
| 26.68822 |
1 |
| 29.28864 |
1 |
| 30.29383 |
1 |
| 33.51924 |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.38521 5.38521 10.38521 15.38521 20.38521 25.38521 30.38521 35.38521
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
| (0.385,5.39] |
28 |
0.4516129 |
28 |
| (5.39,10.4] |
22 |
0.3548387 |
50 |
| (10.4,15.4] |
4 |
0.0645161 |
54 |
| (15.4,20.4] |
1 |
0.0161290 |
55 |
| (20.4,25.4] |
1 |
0.0161290 |
56 |
| (25.4,30.4] |
3 |
0.0483871 |
59 |
| (30.4,35.4] |
3 |
0.0483871 |
62 |
str(Freq_table)
## 'data.frame': 7 obs. of 4 variables:
## $ distance: Factor w/ 7 levels "(0.385,5.39]",..: 1 2 3 4 5 6 7
## $ Freq : int 28 22 4 1 1 3 3
## $ Rel_Freq: num 0.4516 0.3548 0.0645 0.0161 0.0161 ...
## $ Cum_Freq: int 28 50 54 55 56 59 62
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
| (0.385,5.39] |
28 |
| (5.39,10.4] |
22 |
| (10.4,15.4] |
4 |
| (15.4,20.4] |
1 |
| (20.4,25.4] |
1 |
| (25.4,30.4] |
3 |
| (30.4,35.4] |
3 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")

library(pastecs)
stat.desc(df_TT)
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## id date time continent_code country_name country_code
## nbr.val 6.300000e+01 NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA
## min 2.240000e+02 NA NA NA NA NA
## max 6.301000e+03 NA NA NA NA NA
## range 6.077000e+03 NA NA NA NA NA
## sum 1.939570e+05 NA NA NA NA NA
## median 2.759000e+03 NA NA NA NA NA
## mean 3.078683e+03 NA NA NA NA NA
## SE.mean 1.907885e+02 NA NA NA NA NA
## CI.mean.0.95 3.813809e+02 NA NA NA NA NA
## var 2.293217e+06 NA NA NA NA NA
## std.dev 1.514337e+03 NA NA NA NA NA
## coef.var 4.918783e-01 NA NA NA NA NA
## state population city distance location_description
## nbr.val NA 6.300000e+01 NA 63.000000 NA
## nbr.null NA 1.000000e+01 NA 0.000000 NA
## nbr.na NA 0.000000e+00 NA 0.000000 NA
## min NA 0.000000e+00 NA 0.385210 NA
## max NA 1.572580e+05 NA 33.519240 NA
## range NA 1.572580e+05 NA 33.134030 NA
## sum NA 1.091655e+06 NA 519.777480 NA
## median NA 8.140000e+03 NA 5.911010 NA
## mean NA 1.732786e+04 NA 8.250436 NA
## SE.mean NA 3.563658e+03 NA 1.064793 NA
## CI.mean.0.95 NA 7.123651e+03 NA 2.128491 NA
## var NA 8.000786e+08 NA 71.428394 NA
## std.dev NA 2.828566e+04 NA 8.451532 NA
## coef.var NA 1.632381e+00 NA 1.024374 NA
## latitude longitude geolocation hazard_type landslide_type
## nbr.val 63.00000000 6.300000e+01 NA NA NA
## nbr.null 0.00000000 0.000000e+00 NA NA NA
## nbr.na 0.00000000 0.000000e+00 NA NA NA
## min 10.08760000 -6.186420e+01 NA NA NA
## max 11.31760000 -6.054170e+01 NA NA NA
## range 1.23000000 1.322500e+00 NA NA NA
## sum 679.61780000 -3.856206e+03 NA NA NA
## median 10.71420000 -6.139170e+01 NA NA NA
## mean 10.78758413 -6.120962e+01 NA NA NA
## SE.mean 0.04203106 4.867603e-02 NA NA NA
## CI.mean.0.95 0.08401889 9.730199e-02 NA NA NA
## var 0.11129642 1.492694e-01 NA NA NA
## std.dev 0.33361118 3.863540e-01 NA NA NA
## coef.var 0.03092548 -6.311982e-03 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 1 44.00000000 NA
## nbr.null NA NA NA 1 40.00000000 NA
## nbr.na NA NA NA 62 19.00000000 NA
## min NA NA NA 0 0.00000000 NA
## max NA NA NA 0 3.00000000 NA
## range NA NA NA 0 3.00000000 NA
## sum NA NA NA 0 6.00000000 NA
## median NA NA NA 0 0.00000000 NA
## mean NA NA NA 0 0.13636364 NA
## SE.mean NA NA NA NA 0.07687686 NA
## CI.mean.0.95 NA NA NA NaN 0.15503697 NA
## var NA NA NA NA 0.26004228 NA
## std.dev NA NA NA NA 0.50994341 NA
## coef.var NA NA NA NA 3.73958502 NA
## source_link prop ypos
## nbr.val NA 63.00000000 63.0000000
## nbr.null NA 0.00000000 0.0000000
## nbr.na NA 0.00000000 0.0000000
## min NA 0.07411056 0.2539577
## max NA 6.44876727 99.8226828
## range NA 6.37465671 99.5687251
## sum NA 100.00000000 3733.7070298
## median NA 1.13721933 66.3125093
## mean NA 1.58730159 59.2651909
## SE.mean NA 0.20485554 3.8579049
## CI.mean.0.95 NA 0.40950038 7.7118419
## var NA 2.64384485 937.6560850
## std.dev NA 1.62599042 30.6211705
## coef.var NA 1.02437397 0.5166805
boxplot(data, horizontal=TRUE, col='green')

Gráfico para City of Port of Spain (Trinidad and Tobago)
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_TT <- subset(df, country_name == "Trinidad and Tobago")
knitr::kable(head(df_TT))
| 224 |
9/1/07 |
NA |
NA |
Trinidad and Tobago |
TT |
Tobago |
17000 |
Scarborough |
9.11607 |
NA |
11.2415 |
-60.6742 |
(11.2415, -60.674199999999999) |
Landslide |
Landslide |
Medium |
Tropical cyclone |
Hurricane Felix |
NA |
NA |
Trinadad Express |
http://www.trinidadexpress.com/index.pl/article_news?id=161197580 |
| 357 |
11/17/07 |
NA |
NA |
Trinidad and Tobago |
TT |
Eastern Tobago |
0 |
Roxborough |
7.33295 |
NA |
11.2965 |
-60.6312 |
(11.2965, -60.6312) |
Landslide |
Landslide |
Medium |
Rain |
NA |
NA |
NA |
Trinadad Express |
http://www.trinidadexpress.com/index.pl/article_news?id=161237574 |
| 390 |
12/11/07 |
NA |
NA |
Trinidad and Tobago |
TT |
Sangre Grande |
15968 |
Sangre Grande |
29.28864 |
NA |
10.8410 |
-61.0550 |
(10.840999999999999, -61.055) |
Landslide |
Landslide |
Medium |
Tropical cyclone |
Tropical Storm Olga |
NA |
3 |
Trinidad and Tobago’s Newsday |
http://www.newsday.co.tt/news/0,69681.html |
| 391 |
12/11/07 |
NA |
NA |
Trinidad and Tobago |
TT |
Eastern Tobago |
0 |
Roxborough |
8.62938 |
NA |
11.3000 |
-60.6440 |
(11.3, -60.643999999999998) |
Landslide |
Landslide |
Medium |
Tropical cyclone |
Tropical Storm Olga |
NA |
NA |
Trinidad and Tobago’s Newsday |
http://www.newsday.co.tt/news/0,69681.html |
| 392 |
12/11/07 |
NA |
NA |
Trinidad and Tobago |
TT |
Eastern Tobago |
0 |
Roxborough |
2.66802 |
NA |
11.2670 |
-60.5660 |
(11.266999999999999, -60.566000000000003) |
Landslide |
Landslide |
Small |
Tropical cyclone |
Tropical Storm Olga |
NA |
NA |
Trinidad and Tobago’s Newsday |
http://www.newsday.co.tt/news/0,69681.html |
| 780 |
9/7/08 |
NA |
NA |
Trinidad and Tobago |
TT |
Diego Martin |
8140 |
Petit Valley |
10.61854 |
NA |
10.7603 |
-61.4578 |
(10.760300000000001, -61.457799999999999) |
Landslide |
Landslide |
Medium |
Downpour |
NA |
NA |
NA |
NA |
http://www.newsday.co.tt/news/0,85847.html |
library(dplyr)
df_TT <- subset(df, state == "City of Port of Spain")
knitr::kable(head(df_TT))
| 891 |
11/16/08 |
NA |
NA |
Trinidad and Tobago |
TT |
City of Port of Spain |
49031 |
Port-of-Spain |
1.07831 |
NA |
10.6760 |
-61.5160 |
(10.676, -61.515999999999998) |
Landslide |
Complex |
Medium |
Downpour |
NA |
NA |
1 |
NA |
http://www.newsday.co.tt/news/0,90097.html |
| 2157 |
7/29/10 |
NA |
NA |
Trinidad and Tobago |
TT |
City of Port of Spain |
4342 |
Mucurapo |
7.43310 |
NA |
10.6871 |
-61.6002 |
(10.687099999999999, -61.600200000000001) |
Landslide |
Landslide |
Small |
Downpour |
NA |
NA |
0 |
NA |
http://www.i955fm.com/News.aspx?id=8920 |
| 2158 |
7/29/10 |
NA |
NA |
Trinidad and Tobago |
TT |
City of Port of Spain |
49031 |
Port-of-Spain |
2.15046 |
NA |
10.6738 |
-61.5006 |
(10.6738, -61.500599999999999) |
Landslide |
Landslide |
Small |
Downpour |
NA |
NA |
0 |
NA |
http://www.i955fm.com/News.aspx?id=8920 |
| 2199 |
8/5/10 |
13:45:00 |
NA |
Trinidad and Tobago |
TT |
City of Port of Spain |
4342 |
Mucurapo |
7.24469 |
NA |
10.6857 |
-61.5989 |
(10.685700000000001, -61.5989) |
Landslide |
Landslide |
Small |
Downpour |
NA |
NA |
0 |
NA |
NA |
| 4558 |
9/16/12 |
Early morning |
NA |
Trinidad and Tobago |
TT |
City of Port of Spain |
49031 |
Port-of-Spain |
2.54016 |
NA |
10.6771 |
-61.4982 |
(10.677099999999999, -61.498199999999997) |
Landslide |
Landslide |
Medium |
Downpour |
NA |
NA |
NA |
NA |
http://92.f9.78ae.static.theplanet.com/news/2012-09-16/morvant-landslide-leaves-motorists-frustrated |
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_TT, aes(fill=city, y=distance, x=state)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_TT, aes(fill=city, y=distance, x=state)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_TT, aes(x=state, y=distance, fill=city)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_TT <- df_TT %>%
arrange(desc(city)) %>%
mutate(prop = distance / sum(df_TT$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_TT, aes(x=state, y = prop, fill=city)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales
library(forecast)
data<- ts(df_TT$distance, frequency=12, start=2008)
knitr::kable(head(data))
| 1.07831 |
| 2.15046 |
| 2.54016 |
| 7.43310 |
| 7.24469 |
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto
library(qcc)
distance <- df_TT$distance
names(distance) <- df_TT$city
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por ciudades"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Mucurapo 7.433100 7.433100 36.353508 36.353508
## Mucurapo 7.244690 14.677790 35.432040 71.785548
## Port-of-Spain 2.540160 17.217950 12.423313 84.208861
## Port-of-Spain 2.150460 19.368410 10.517384 94.726245
## Port-of-Spain 1.078310 20.446720 5.273755 100.000000
stem(df_TT$"distance")
##
## The decimal point is at the |
##
## 0 | 1
## 2 | 25
## 4 |
## 6 | 24
head(df_TT)
## # A tibble: 5 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 891 11/16/08 <NA> <NA> Trinidad an~ TT City~ 49031
## 2 2158 7/29/10 <NA> <NA> Trinidad an~ TT City~ 49031
## 3 4558 9/16/12 Earl~ <NA> Trinidad an~ TT City~ 49031
## 4 2157 7/29/10 <NA> <NA> Trinidad an~ TT City~ 4342
## 5 2199 8/5/10 13:4~ <NA> Trinidad an~ TT City~ 4342
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_TT))
| 891 |
11/16/08 |
NA |
NA |
Trinidad and Tobago |
TT |
City of Port of Spain |
49031 |
Port-of-Spain |
1.07831 |
NA |
10.6760 |
-61.5160 |
(10.676, -61.515999999999998) |
Landslide |
Complex |
Medium |
Downpour |
NA |
NA |
1 |
NA |
http://www.newsday.co.tt/news/0,90097.html |
5.273755 |
2.636878 |
| 2158 |
7/29/10 |
NA |
NA |
Trinidad and Tobago |
TT |
City of Port of Spain |
49031 |
Port-of-Spain |
2.15046 |
NA |
10.6738 |
-61.5006 |
(10.6738, -61.500599999999999) |
Landslide |
Landslide |
Small |
Downpour |
NA |
NA |
0 |
NA |
http://www.i955fm.com/News.aspx?id=8920 |
10.517384 |
10.532447 |
| 4558 |
9/16/12 |
Early morning |
NA |
Trinidad and Tobago |
TT |
City of Port of Spain |
49031 |
Port-of-Spain |
2.54016 |
NA |
10.6771 |
-61.4982 |
(10.677099999999999, -61.498199999999997) |
Landslide |
Landslide |
Medium |
Downpour |
NA |
NA |
NA |
NA |
http://92.f9.78ae.static.theplanet.com/news/2012-09-16/morvant-landslide-leaves-motorists-frustrated |
12.423313 |
22.002796 |
| 2157 |
7/29/10 |
NA |
NA |
Trinidad and Tobago |
TT |
City of Port of Spain |
4342 |
Mucurapo |
7.43310 |
NA |
10.6871 |
-61.6002 |
(10.687099999999999, -61.600200000000001) |
Landslide |
Landslide |
Small |
Downpour |
NA |
NA |
0 |
NA |
http://www.i955fm.com/News.aspx?id=8920 |
36.353508 |
46.391206 |
| 2199 |
8/5/10 |
13:45:00 |
NA |
Trinidad and Tobago |
TT |
City of Port of Spain |
4342 |
Mucurapo |
7.24469 |
NA |
10.6857 |
-61.5989 |
(10.685700000000001, -61.5989) |
Landslide |
Landslide |
Small |
Downpour |
NA |
NA |
0 |
NA |
NA |
35.432040 |
82.283980 |
stem(df_TT$"distance")
##
## The decimal point is at the |
##
## 0 | 1
## 2 | 25
## 4 |
## 6 | 24
stem(df_TT$"distance", scale = 2)
##
## The decimal point is at the |
##
## 1 | 1
## 2 | 25
## 3 |
## 4 |
## 5 |
## 6 |
## 7 | 24
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 1.07831 |
1 |
20 |
20 |
20 |
20 |
| 2.15046 |
1 |
20 |
20 |
40 |
40 |
| 2.54016 |
1 |
20 |
20 |
60 |
60 |
| 7.24469 |
1 |
20 |
20 |
80 |
80 |
| 7.4331 |
1 |
20 |
20 |
100 |
100 |
| Total |
5 |
100 |
100 |
100 |
100 |
str(table)
## Classes 'freqtab' and 'data.frame': 6 obs. of 5 variables:
## $ n : num 1 1 1 1 1 5
## $ % : num 20 20 20 20 20 100
## $ val% : num 20 20 20 20 20 100
## $ %cum : num 20 40 60 80 100 100
## $ val%cum: num 20 40 60 80 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
| 1.07831 |
1 |
| 2.15046 |
1 |
| 2.54016 |
1 |
| 7.24469 |
1 |
| 7.4331 |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 1.07831 4.07831 7.07831 10.07831
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
| (1.08,4.08] |
2 |
0.5 |
2 |
| (4.08,7.08] |
0 |
0.0 |
2 |
| (7.08,10.1] |
2 |
0.5 |
4 |
str(Freq_table)
## 'data.frame': 3 obs. of 4 variables:
## $ distance: Factor w/ 3 levels "(1.08,4.08]",..: 1 2 3
## $ Freq : int 2 0 2
## $ Rel_Freq: num 0.5 0 0.5
## $ Cum_Freq: int 2 2 4
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
| (1.08,4.08] |
2 |
| (4.08,7.08] |
0 |
| (7.08,10.1] |
2 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")

library(pastecs)
stat.desc(df_TT)
## Warning in min(x): ningún argumento finito para min; retornando Inf
## Warning in max(x): ningun argumento finito para max; retornando -Inf
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## id date time continent_code country_name country_code
## nbr.val 5.000000e+00 NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA
## min 8.910000e+02 NA NA NA NA NA
## max 4.558000e+03 NA NA NA NA NA
## range 3.667000e+03 NA NA NA NA NA
## sum 1.196300e+04 NA NA NA NA NA
## median 2.158000e+03 NA NA NA NA NA
## mean 2.392600e+03 NA NA NA NA NA
## SE.mean 5.954740e+02 NA NA NA NA NA
## CI.mean.0.95 1.653301e+03 NA NA NA NA NA
## var 1.772946e+06 NA NA NA NA NA
## std.dev 1.331520e+03 NA NA NA NA NA
## coef.var 5.565160e-01 NA NA NA NA NA
## state population city distance location_description
## nbr.val NA 5.000000e+00 NA 5.0000000 NA
## nbr.null NA 0.000000e+00 NA 0.0000000 NA
## nbr.na NA 0.000000e+00 NA 0.0000000 NA
## min NA 4.342000e+03 NA 1.0783100 NA
## max NA 4.903100e+04 NA 7.4331000 NA
## range NA 4.468900e+04 NA 6.3547900 NA
## sum NA 1.557770e+05 NA 20.4467200 NA
## median NA 4.903100e+04 NA 2.5401600 NA
## mean NA 3.115540e+04 NA 4.0893440 NA
## SE.mean NA 1.094652e+04 NA 1.3483783 NA
## CI.mean.0.95 NA 3.039242e+04 NA 3.7436983 NA
## var NA 5.991320e+08 NA 9.0906201 NA
## std.dev NA 2.447717e+04 NA 3.0150655 NA
## coef.var NA 7.856479e-01 NA 0.7372981 NA
## latitude longitude geolocation hazard_type landslide_type
## nbr.val 5.000000e+00 5.000000e+00 NA NA NA
## nbr.null 0.000000e+00 0.000000e+00 NA NA NA
## nbr.na 0.000000e+00 0.000000e+00 NA NA NA
## min 1.067380e+01 -6.160020e+01 NA NA NA
## max 1.068710e+01 -6.149820e+01 NA NA NA
## range 1.330000e-02 1.020000e-01 NA NA NA
## sum 5.339970e+01 -3.077139e+02 NA NA NA
## median 1.067710e+01 -6.151600e+01 NA NA NA
## mean 1.067994e+01 -6.154278e+01 NA NA NA
## SE.mean 2.699370e-03 2.337756e-02 NA NA NA
## CI.mean.0.95 7.494653e-03 6.490652e-02 NA NA NA
## var 3.643300e-05 2.732552e-03 NA NA NA
## std.dev 6.035975e-03 5.227382e-02 NA NA NA
## coef.var 5.651694e-04 -8.493899e-04 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 0 4.0000000 NA
## nbr.null NA NA NA 0 3.0000000 NA
## nbr.na NA NA NA 5 1.0000000 NA
## min NA NA NA Inf 0.0000000 NA
## max NA NA NA -Inf 1.0000000 NA
## range NA NA NA -Inf 1.0000000 NA
## sum NA NA NA 0 1.0000000 NA
## median NA NA NA NA 0.0000000 NA
## mean NA NA NA NaN 0.2500000 NA
## SE.mean NA NA NA NA 0.2500000 NA
## CI.mean.0.95 NA NA NA NaN 0.7956116 NA
## var NA NA NA NA 0.2500000 NA
## std.dev NA NA NA NA 0.5000000 NA
## coef.var NA NA NA NA 2.0000000 NA
## source_link prop ypos
## nbr.val NA 5.0000000 5.000000
## nbr.null NA 0.0000000 0.000000
## nbr.na NA 0.0000000 0.000000
## min NA 5.2737554 2.636878
## max NA 36.3535080 82.283980
## range NA 31.0797526 79.647102
## sum NA 100.0000000 163.847307
## median NA 12.4233129 22.002796
## mean NA 20.0000000 32.769461
## SE.mean NA 6.5945946 14.414607
## CI.mean.0.95 NA 18.3095299 40.021365
## var NA 217.4433905 1038.904493
## std.dev NA 14.7459618 32.232041
## coef.var NA 0.7372981 0.983600
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Tunapuna/Piarco (Trinidad and Tobago)
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_TT <- subset(df, country_name == "Trinidad and Tobago")
knitr::kable(head(df_TT))
| 224 |
9/1/07 |
NA |
NA |
Trinidad and Tobago |
TT |
Tobago |
17000 |
Scarborough |
9.11607 |
NA |
11.2415 |
-60.6742 |
(11.2415, -60.674199999999999) |
Landslide |
Landslide |
Medium |
Tropical cyclone |
Hurricane Felix |
NA |
NA |
Trinadad Express |
http://www.trinidadexpress.com/index.pl/article_news?id=161197580 |
| 357 |
11/17/07 |
NA |
NA |
Trinidad and Tobago |
TT |
Eastern Tobago |
0 |
Roxborough |
7.33295 |
NA |
11.2965 |
-60.6312 |
(11.2965, -60.6312) |
Landslide |
Landslide |
Medium |
Rain |
NA |
NA |
NA |
Trinadad Express |
http://www.trinidadexpress.com/index.pl/article_news?id=161237574 |
| 390 |
12/11/07 |
NA |
NA |
Trinidad and Tobago |
TT |
Sangre Grande |
15968 |
Sangre Grande |
29.28864 |
NA |
10.8410 |
-61.0550 |
(10.840999999999999, -61.055) |
Landslide |
Landslide |
Medium |
Tropical cyclone |
Tropical Storm Olga |
NA |
3 |
Trinidad and Tobago’s Newsday |
http://www.newsday.co.tt/news/0,69681.html |
| 391 |
12/11/07 |
NA |
NA |
Trinidad and Tobago |
TT |
Eastern Tobago |
0 |
Roxborough |
8.62938 |
NA |
11.3000 |
-60.6440 |
(11.3, -60.643999999999998) |
Landslide |
Landslide |
Medium |
Tropical cyclone |
Tropical Storm Olga |
NA |
NA |
Trinidad and Tobago’s Newsday |
http://www.newsday.co.tt/news/0,69681.html |
| 392 |
12/11/07 |
NA |
NA |
Trinidad and Tobago |
TT |
Eastern Tobago |
0 |
Roxborough |
2.66802 |
NA |
11.2670 |
-60.5660 |
(11.266999999999999, -60.566000000000003) |
Landslide |
Landslide |
Small |
Tropical cyclone |
Tropical Storm Olga |
NA |
NA |
Trinidad and Tobago’s Newsday |
http://www.newsday.co.tt/news/0,69681.html |
| 780 |
9/7/08 |
NA |
NA |
Trinidad and Tobago |
TT |
Diego Martin |
8140 |
Petit Valley |
10.61854 |
NA |
10.7603 |
-61.4578 |
(10.760300000000001, -61.457799999999999) |
Landslide |
Landslide |
Medium |
Downpour |
NA |
NA |
NA |
NA |
http://www.newsday.co.tt/news/0,85847.html |
library(dplyr)
df_TT <- subset(df, state == "Tunapuna/Piarco")
knitr::kable(head(df_TT))
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_TT, aes(fill=city, y=distance, x=state)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_TT, aes(fill=city, y=distance, x=state)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_TT, aes(x=state, y=distance, fill=city)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_TT <- df_TT %>%
arrange(desc(city)) %>%
mutate(prop = distance / sum(df_TT$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_TT, aes(x=state, y = prop, fill=city)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales
library(forecast)
data<- ts(df_TT$distance, frequency=12, start=2008)
knitr::kable(head(data))
| 2.63186 |
| 2.64003 |
| 16.73194 |
| 13.75900 |
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto
library(qcc)
distance <- df_TT$distance
names(distance) <- df_TT$city
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por ciudades"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Paradise 16.731940 16.731940 46.785839 46.785839
## Paradise 13.759000 30.490940 38.472906 85.258745
## Paradise 2.640030 33.130970 7.382050 92.640795
## Tunapuna 2.631860 35.762830 7.359205 100.000000
stem(df_TT$"distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 33
## 0 |
## 1 | 4
## 1 | 7
head(df_TT)
## # A tibble: 4 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 4919 6/14/13 Morning <NA> Trinidad an~ TT Tuna~ 17758
## 2 2667 10/31/10 Morning <NA> Trinidad an~ TT Tuna~ 15067
## 3 4108 12/25/11 <NA> <NA> Trinidad an~ TT Tuna~ 15067
## 4 4374 5/30/12 <NA> <NA> Trinidad an~ TT Tuna~ 15067
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_TT))
stem(df_TT$"distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 33
## 0 |
## 1 | 4
## 1 | 7
stem(df_TT$"distance", scale = 2)
##
## The decimal point is at the |
##
## 2 | 66
## 4 |
## 6 |
## 8 |
## 10 |
## 12 | 8
## 14 |
## 16 | 7
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 2.63186 |
1 |
25 |
25 |
25 |
25 |
| 2.64003 |
1 |
25 |
25 |
50 |
50 |
| 13.759 |
1 |
25 |
25 |
75 |
75 |
| 16.73194 |
1 |
25 |
25 |
100 |
100 |
| Total |
4 |
100 |
100 |
100 |
100 |
str(table)
## Classes 'freqtab' and 'data.frame': 5 obs. of 5 variables:
## $ n : num 1 1 1 1 4
## $ % : num 25 25 25 25 100
## $ val% : num 25 25 25 25 100
## $ %cum : num 25 50 75 100 100
## $ val%cum: num 25 50 75 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
| 2.63186 |
1 |
| 2.64003 |
1 |
| 13.759 |
1 |
| 16.73194 |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 2.63186 7.63186 12.63186 17.63186
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
| (2.63,7.63] |
1 |
0.3333333 |
1 |
| (7.63,12.6] |
0 |
0.0000000 |
1 |
| (12.6,17.6] |
2 |
0.6666667 |
3 |
str(Freq_table)
## 'data.frame': 3 obs. of 4 variables:
## $ distance: Factor w/ 3 levels "(2.63,7.63]",..: 1 2 3
## $ Freq : int 1 0 2
## $ Rel_Freq: num 0.333 0 0.667
## $ Cum_Freq: int 1 1 3
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
| (2.63,7.63] |
1 |
| (7.63,12.6] |
0 |
| (12.6,17.6] |
2 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")

library(pastecs)
stat.desc(df_TT)
## Warning in min(x): ningún argumento finito para min; retornando Inf
## Warning in max(x): ningun argumento finito para max; retornando -Inf
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## id date time continent_code country_name country_code
## nbr.val 4.000000e+00 NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA
## min 2.667000e+03 NA NA NA NA NA
## max 4.919000e+03 NA NA NA NA NA
## range 2.252000e+03 NA NA NA NA NA
## sum 1.606800e+04 NA NA NA NA NA
## median 4.241000e+03 NA NA NA NA NA
## mean 4.017000e+03 NA NA NA NA NA
## SE.mean 4.806102e+02 NA NA NA NA NA
## CI.mean.0.95 1.529516e+03 NA NA NA NA NA
## var 9.239447e+05 NA NA NA NA NA
## std.dev 9.612204e+02 NA NA NA NA NA
## coef.var 2.392881e-01 NA NA NA NA NA
## state population city distance location_description
## nbr.val NA 4.000000e+00 NA 4.0000000 NA
## nbr.null NA 0.000000e+00 NA 0.0000000 NA
## nbr.na NA 0.000000e+00 NA 0.0000000 NA
## min NA 1.506700e+04 NA 2.6318600 NA
## max NA 1.775800e+04 NA 16.7319400 NA
## range NA 2.691000e+03 NA 14.1000800 NA
## sum NA 6.295900e+04 NA 35.7628300 NA
## median NA 1.506700e+04 NA 8.1995150 NA
## mean NA 1.573975e+04 NA 8.9407075 NA
## SE.mean NA 6.727500e+02 NA 3.6902952 NA
## CI.mean.0.95 NA 2.140991e+03 NA 11.7441662 NA
## var NA 1.810370e+06 NA 54.4731134 NA
## std.dev NA 1.345500e+03 NA 7.3805903 NA
## coef.var NA 8.548420e-02 NA 0.8255041 NA
## latitude longitude geolocation hazard_type landslide_type
## nbr.val 4.000000000 4.000000e+00 NA NA NA
## nbr.null 0.000000000 0.000000e+00 NA NA NA
## nbr.na 0.000000000 0.000000e+00 NA NA NA
## min 10.650100000 -6.140780e+01 NA NA NA
## max 10.790100000 -6.129840e+01 NA NA NA
## range 0.140000000 1.094000e-01 NA NA NA
## sum 42.875400000 -2.454898e+02 NA NA NA
## median 10.717600000 -6.139180e+01 NA NA NA
## mean 10.718850000 -6.137245e+01 NA NA NA
## SE.mean 0.035509823 2.521539e-02 NA NA NA
## CI.mean.0.95 0.113008104 8.024662e-02 NA NA NA
## var 0.005043790 2.543263e-03 NA NA NA
## std.dev 0.071019645 5.043078e-02 NA NA NA
## coef.var 0.006625678 -8.217169e-04 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 0 3 NA
## nbr.null NA NA NA 0 3 NA
## nbr.na NA NA NA 4 1 NA
## min NA NA NA Inf 0 NA
## max NA NA NA -Inf 0 NA
## range NA NA NA -Inf 0 NA
## sum NA NA NA 0 0 NA
## median NA NA NA NA 0 NA
## mean NA NA NA NaN 0 NA
## SE.mean NA NA NA NA 0 NA
## CI.mean.0.95 NA NA NA NaN 0 NA
## var NA NA NA NA 0 NA
## std.dev NA NA NA NA 0 NA
## coef.var NA NA NA NA NaN NA
## source_link prop ypos
## nbr.val NA 4.0000000 4.000000
## nbr.null NA 0.0000000 0.000000
## nbr.na NA 0.0000000 0.000000
## min NA 7.3592051 3.679603
## max NA 46.7858388 80.763547
## range NA 39.4266337 77.083944
## sum NA 100.0000000 133.627554
## median NA 22.9274781 24.592202
## mean NA 25.0000000 33.406889
## SE.mean NA 10.3188007 17.436867
## CI.mean.0.95 NA 32.8390292 55.491893
## var NA 425.9105928 1216.177346
## std.dev NA 20.6376014 34.873734
## coef.var NA 0.8255041 1.043908
boxplot(data, horizontal=TRUE, col='green')

Gráfico para City of San Fernando (Trinidad and Tobago)
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_TT <- subset(df, country_name == "Trinidad and Tobago")
knitr::kable(head(df_TT))
| 224 |
9/1/07 |
NA |
NA |
Trinidad and Tobago |
TT |
Tobago |
17000 |
Scarborough |
9.11607 |
NA |
11.2415 |
-60.6742 |
(11.2415, -60.674199999999999) |
Landslide |
Landslide |
Medium |
Tropical cyclone |
Hurricane Felix |
NA |
NA |
Trinadad Express |
http://www.trinidadexpress.com/index.pl/article_news?id=161197580 |
| 357 |
11/17/07 |
NA |
NA |
Trinidad and Tobago |
TT |
Eastern Tobago |
0 |
Roxborough |
7.33295 |
NA |
11.2965 |
-60.6312 |
(11.2965, -60.6312) |
Landslide |
Landslide |
Medium |
Rain |
NA |
NA |
NA |
Trinadad Express |
http://www.trinidadexpress.com/index.pl/article_news?id=161237574 |
| 390 |
12/11/07 |
NA |
NA |
Trinidad and Tobago |
TT |
Sangre Grande |
15968 |
Sangre Grande |
29.28864 |
NA |
10.8410 |
-61.0550 |
(10.840999999999999, -61.055) |
Landslide |
Landslide |
Medium |
Tropical cyclone |
Tropical Storm Olga |
NA |
3 |
Trinidad and Tobago’s Newsday |
http://www.newsday.co.tt/news/0,69681.html |
| 391 |
12/11/07 |
NA |
NA |
Trinidad and Tobago |
TT |
Eastern Tobago |
0 |
Roxborough |
8.62938 |
NA |
11.3000 |
-60.6440 |
(11.3, -60.643999999999998) |
Landslide |
Landslide |
Medium |
Tropical cyclone |
Tropical Storm Olga |
NA |
NA |
Trinidad and Tobago’s Newsday |
http://www.newsday.co.tt/news/0,69681.html |
| 392 |
12/11/07 |
NA |
NA |
Trinidad and Tobago |
TT |
Eastern Tobago |
0 |
Roxborough |
2.66802 |
NA |
11.2670 |
-60.5660 |
(11.266999999999999, -60.566000000000003) |
Landslide |
Landslide |
Small |
Tropical cyclone |
Tropical Storm Olga |
NA |
NA |
Trinidad and Tobago’s Newsday |
http://www.newsday.co.tt/news/0,69681.html |
| 780 |
9/7/08 |
NA |
NA |
Trinidad and Tobago |
TT |
Diego Martin |
8140 |
Petit Valley |
10.61854 |
NA |
10.7603 |
-61.4578 |
(10.760300000000001, -61.457799999999999) |
Landslide |
Landslide |
Medium |
Downpour |
NA |
NA |
NA |
NA |
http://www.newsday.co.tt/news/0,85847.html |
library(dplyr)
df_TT <- subset(df, state == "City of San Fernando")
knitr::kable(head(df_TT))
| 1117 |
8/21/09 |
NA |
NA |
Trinidad and Tobago |
TT |
City of San Fernando |
26700 |
Marabella |
9.42494 |
NA |
10.3717 |
-61.3917 |
(10.371700000000001, -61.3917) |
Landslide |
Landslide |
Small |
Downpour |
NA |
NA |
0 |
NA |
http://www.trinidadexpress.com/index.pl/article_news?id=161522186 |
| 2300 |
8/21/10 |
Morning |
NA |
Trinidad and Tobago |
TT |
City of San Fernando |
55419 |
San Fernando |
0.92162 |
NA |
10.2753 |
-61.4689 |
(10.2753, -61.468899999999998) |
Landslide |
Landslide |
Medium |
Downpour |
NA |
NA |
0 |
NA |
NA |
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_TT, aes(fill=city, y=distance, x=state)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_TT, aes(fill=city, y=distance, x=state)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_TT, aes(x=state, y=distance, fill=city)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_TT <- df_TT %>%
arrange(desc(city)) %>%
mutate(prop = distance / sum(df_TT$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_TT, aes(x=state, y = prop, fill=city)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales
library(forecast)
data<- ts(df_TT$distance, frequency=12, start=2008)
knitr::kable(head(data))
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto
library(qcc)
distance <- df_TT$distance
names(distance) <- df_TT$city
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por ciudades"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Marabella 9.424940 9.424940 91.092498 91.092498
## San Fernando 0.921620 10.346560 8.907502 100.000000
stem(df_TT$"distance")
##
## The decimal point is at the |
##
## 0 | 9
## 2 |
## 4 |
## 6 |
## 8 | 4
head(df_TT)
## # A tibble: 2 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 2300 8/21/10 Morning <NA> Trinidad an~ TT City~ 55419
## 2 1117 8/21/09 <NA> <NA> Trinidad an~ TT City~ 26700
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_TT))
| 2300 |
8/21/10 |
Morning |
NA |
Trinidad and Tobago |
TT |
City of San Fernando |
55419 |
San Fernando |
0.92162 |
NA |
10.2753 |
-61.4689 |
(10.2753, -61.468899999999998) |
Landslide |
Landslide |
Medium |
Downpour |
NA |
NA |
0 |
NA |
NA |
8.907502 |
4.453751 |
| 1117 |
8/21/09 |
NA |
NA |
Trinidad and Tobago |
TT |
City of San Fernando |
26700 |
Marabella |
9.42494 |
NA |
10.3717 |
-61.3917 |
(10.371700000000001, -61.3917) |
Landslide |
Landslide |
Small |
Downpour |
NA |
NA |
0 |
NA |
http://www.trinidadexpress.com/index.pl/article_news?id=161522186 |
91.092498 |
54.453751 |
stem(df_TT$"distance")
##
## The decimal point is at the |
##
## 0 | 9
## 2 |
## 4 |
## 6 |
## 8 | 4
stem(df_TT$"distance", scale = 2)
##
## The decimal point is at the |
##
## 0 | 9
## 1 |
## 2 |
## 3 |
## 4 |
## 5 |
## 6 |
## 7 |
## 8 |
## 9 | 4
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 0.92162 |
1 |
50 |
50 |
50 |
50 |
| 9.42494 |
1 |
50 |
50 |
100 |
100 |
| Total |
2 |
100 |
100 |
100 |
100 |
str(table)
## Classes 'freqtab' and 'data.frame': 3 obs. of 5 variables:
## $ n : num 1 1 2
## $ % : num 50 50 100
## $ val% : num 50 50 100
## $ %cum : num 50 100 100
## $ val%cum: num 50 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.92162 5.92162 10.92162
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
| (0.922,5.92] |
0 |
0 |
0 |
| (5.92,10.9] |
1 |
1 |
1 |
str(Freq_table)
## 'data.frame': 2 obs. of 4 variables:
## $ distance: Factor w/ 2 levels "(0.922,5.92]",..: 1 2
## $ Freq : int 0 1
## $ Rel_Freq: num 0 1
## $ Cum_Freq: int 0 1
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
| (0.922,5.92] |
0 |
| (5.92,10.9] |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")

library(pastecs)
stat.desc(df_TT)
## Warning in min(x): ningún argumento finito para min; retornando Inf
## Warning in max(x): ningun argumento finito para max; retornando -Inf
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## id date time continent_code country_name country_code
## nbr.val 2.000000e+00 NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA
## min 1.117000e+03 NA NA NA NA NA
## max 2.300000e+03 NA NA NA NA NA
## range 1.183000e+03 NA NA NA NA NA
## sum 3.417000e+03 NA NA NA NA NA
## median 1.708500e+03 NA NA NA NA NA
## mean 1.708500e+03 NA NA NA NA NA
## SE.mean 5.915000e+02 NA NA NA NA NA
## CI.mean.0.95 7.515720e+03 NA NA NA NA NA
## var 6.997445e+05 NA NA NA NA NA
## std.dev 8.365073e+02 NA NA NA NA NA
## coef.var 4.896151e-01 NA NA NA NA NA
## state population city distance location_description
## nbr.val NA 2.000000e+00 NA 2.000000 NA
## nbr.null NA 0.000000e+00 NA 0.000000 NA
## nbr.na NA 0.000000e+00 NA 0.000000 NA
## min NA 2.670000e+04 NA 0.921620 NA
## max NA 5.541900e+04 NA 9.424940 NA
## range NA 2.871900e+04 NA 8.503320 NA
## sum NA 8.211900e+04 NA 10.346560 NA
## median NA 4.105950e+04 NA 5.173280 NA
## mean NA 4.105950e+04 NA 5.173280 NA
## SE.mean NA 1.435950e+04 NA 4.251660 NA
## CI.mean.0.95 NA 1.824547e+05 NA 54.022462 NA
## var NA 4.123905e+08 NA 36.153226 NA
## std.dev NA 2.030740e+04 NA 6.012755 NA
## coef.var NA 4.945847e-01 NA 1.162271 NA
## latitude longitude geolocation hazard_type landslide_type
## nbr.val 2.000000000 2.000000e+00 NA NA NA
## nbr.null 0.000000000 0.000000e+00 NA NA NA
## nbr.na 0.000000000 0.000000e+00 NA NA NA
## min 10.275300000 -6.146890e+01 NA NA NA
## max 10.371700000 -6.139170e+01 NA NA NA
## range 0.096400000 7.720000e-02 NA NA NA
## sum 20.647000000 -1.228606e+02 NA NA NA
## median 10.323500000 -6.143030e+01 NA NA NA
## mean 10.323500000 -6.143030e+01 NA NA NA
## SE.mean 0.048200000 3.860000e-02 NA NA NA
## CI.mean.0.95 0.612439068 4.904595e-01 NA NA NA
## var 0.004646480 2.979920e-03 NA NA NA
## std.dev 0.068165094 5.458864e-02 NA NA NA
## coef.var 0.006602905 -8.886273e-04 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 0 2 NA
## nbr.null NA NA NA 0 2 NA
## nbr.na NA NA NA 2 0 NA
## min NA NA NA Inf 0 NA
## max NA NA NA -Inf 0 NA
## range NA NA NA -Inf 0 NA
## sum NA NA NA 0 0 NA
## median NA NA NA NA 0 NA
## mean NA NA NA NaN 0 NA
## SE.mean NA NA NA NA 0 NA
## CI.mean.0.95 NA NA NA NaN 0 NA
## var NA NA NA NA 0 NA
## std.dev NA NA NA NA 0 NA
## coef.var NA NA NA NA NaN NA
## source_link prop ypos
## nbr.val NA 2.000000 2.000000
## nbr.null NA 0.000000 0.000000
## nbr.na NA 0.000000 0.000000
## min NA 8.907502 4.453751
## max NA 91.092498 54.453751
## range NA 82.184997 50.000000
## sum NA 100.000000 58.907502
## median NA 50.000000 29.453751
## mean NA 50.000000 29.453751
## SE.mean NA 41.092498 25.000000
## CI.mean.0.95 NA 522.129697 317.655118
## var NA 3377.186846 1250.000000
## std.dev NA 58.113569 35.355339
## coef.var NA 1.162271 1.200368
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Dominican Republic
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_DR <- subset(df, country_name == "Dominican Republic")
knitr::kable(head(df_DR))
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_DR, aes(fill= state, y=distance, x=country_name)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_DR, aes(fill=state, y=distance, x=country_name)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_DR, aes(x=country_name, y=distance, fill=state)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_DR <- df_DR %>%
arrange(desc(state)) %>%
mutate(prop = distance / sum(df_DR$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_DR, aes(x=country_name, y=prop, fill=state)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales
library(forecast)
data<- ts(df_DR$distance, frequency=12, start=2008)
knitr::kable(head(data))
| 3.98059 |
| 1.10868 |
| 4.86398 |
| 4.31327 |
| 2.72462 |
| 1.72138 |
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto
library(qcc)
distance <- df_DR$distance
names(distance) <- df_DR$city
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por estados"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## La Agustina 5.710580 5.710580 16.167021 16.167021
## Pedro García 4.863980 10.574560 13.770242 29.937264
## Tamboril 4.313270 14.887830 12.211146 42.148410
## Santo Domingo Este 3.980590 18.868420 11.269308 53.417718
## Río Verde Abajo 3.726370 22.594790 10.549595 63.967313
## San José de Las Matas 2.724620 25.319410 7.713576 71.680888
## Bajos de Haina 1.721380 27.040790 4.873338 76.554226
## San Carlos 1.702980 28.743770 4.821247 81.375473
## Luperón 1.548850 30.292620 4.384895 85.760367
## Puerto Plata 1.196360 31.488980 3.386973 89.147340
## Santiago de los Caballeros 1.108680 32.597660 3.138745 92.286085
## Altamira 0.885000 33.482660 2.505492 94.791577
## Sabana de La Mar 0.752840 34.235500 2.131339 96.922916
## Santo Domingo 0.557210 34.792710 1.577498 98.500413
## Constanza 0.529690 35.322400 1.499587 100.000000
stem(df_DR$"distance")
##
## The decimal point is at the |
##
## 0 | 568912577
## 2 | 77
## 4 | 0397
head(df_DR)
## # A tibble: 6 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 6706 8/3/14 <NA> <NA> Dominican R~ DO Sant~ 0
## 2 388 12/11/07 <NA> <NA> Dominican R~ DO Sant~ 1200000
## 3 984 2/12/09 <NA> <NA> Dominican R~ DO Sant~ 1457
## 4 1178 9/20/09 <NA> <NA> Dominican R~ DO Sant~ 23304
## 5 3569 6/3/11 <NA> <NA> Dominican R~ DO Sant~ 9853
## 6 333 10/29/07 <NA> <NA> Dominican R~ DO San ~ 66784
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_DR))
stem(df_DR$"distance")
##
## The decimal point is at the |
##
## 0 | 568912577
## 2 | 77
## 4 | 0397
stem(df_DR$"distance", scale = 2)
##
## The decimal point is at the |
##
## 0 | 5689
## 1 | 12577
## 2 | 7
## 3 | 7
## 4 | 039
## 5 | 7
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 0.52969 |
1 |
6.7 |
6.7 |
6.7 |
6.7 |
| 0.55721 |
1 |
6.7 |
6.7 |
13.3 |
13.3 |
| 0.75284 |
1 |
6.7 |
6.7 |
20.0 |
20.0 |
| 0.885 |
1 |
6.7 |
6.7 |
26.7 |
26.7 |
| 1.10868 |
1 |
6.7 |
6.7 |
33.3 |
33.3 |
| 1.19636 |
1 |
6.7 |
6.7 |
40.0 |
40.0 |
| 1.54885 |
1 |
6.7 |
6.7 |
46.7 |
46.7 |
| 1.70298 |
1 |
6.7 |
6.7 |
53.3 |
53.3 |
| 1.72138 |
1 |
6.7 |
6.7 |
60.0 |
60.0 |
| 2.72462 |
1 |
6.7 |
6.7 |
66.7 |
66.7 |
| 3.72637 |
1 |
6.7 |
6.7 |
73.3 |
73.3 |
| 3.98059 |
1 |
6.7 |
6.7 |
80.0 |
80.0 |
| 4.31327 |
1 |
6.7 |
6.7 |
86.7 |
86.7 |
| 4.86398 |
1 |
6.7 |
6.7 |
93.3 |
93.3 |
| 5.71058 |
1 |
6.7 |
6.7 |
100.0 |
100.0 |
| Total |
15 |
100.0 |
100.0 |
100.0 |
100.0 |
str(table)
## Classes 'freqtab' and 'data.frame': 16 obs. of 5 variables:
## $ n : num 1 1 1 1 1 1 1 1 1 1 ...
## $ % : num 6.7 6.7 6.7 6.7 6.7 6.7 6.7 6.7 6.7 6.7 ...
## $ val% : num 6.7 6.7 6.7 6.7 6.7 6.7 6.7 6.7 6.7 6.7 ...
## $ %cum : num 6.7 13.3 20 26.7 33.3 40 46.7 53.3 60 66.7 ...
## $ val%cum: num 6.7 13.3 20 26.7 33.3 40 46.7 53.3 60 66.7 ...
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
| 0.52969 |
1 |
| 0.55721 |
1 |
| 0.75284 |
1 |
| 0.885 |
1 |
| 1.10868 |
1 |
| 1.19636 |
1 |
| 1.54885 |
1 |
| 1.70298 |
1 |
| 1.72138 |
1 |
| 2.72462 |
1 |
| 3.72637 |
1 |
| 3.98059 |
1 |
| 4.31327 |
1 |
| 4.86398 |
1 |
| 5.71058 |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.52969 2.52969 4.52969 6.52969
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
| (0.53,2.53] |
8 |
0.5714286 |
8 |
| (2.53,4.53] |
4 |
0.2857143 |
12 |
| (4.53,6.53] |
2 |
0.1428571 |
14 |
str(Freq_table)
## 'data.frame': 3 obs. of 4 variables:
## $ distance: Factor w/ 3 levels "(0.53,2.53]",..: 1 2 3
## $ Freq : int 8 4 2
## $ Rel_Freq: num 0.571 0.286 0.143
## $ Cum_Freq: int 8 12 14
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
| (0.53,2.53] |
8 |
| (2.53,4.53] |
4 |
| (4.53,6.53] |
2 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")

library(pastecs)
stat.desc(df_DR)
## id date time continent_code country_name country_code
## nbr.val 1.500000e+01 NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA
## min 1.240000e+02 NA NA NA NA NA
## max 6.707000e+03 NA NA NA NA NA
## range 6.583000e+03 NA NA NA NA NA
## sum 3.542100e+04 NA NA NA NA NA
## median 1.177000e+03 NA NA NA NA NA
## mean 2.361400e+03 NA NA NA NA NA
## SE.mean 6.027818e+02 NA NA NA NA NA
## CI.mean.0.95 1.292838e+03 NA NA NA NA NA
## var 5.450188e+06 NA NA NA NA NA
## std.dev 2.334564e+03 NA NA NA NA NA
## coef.var 9.886355e-01 NA NA NA NA NA
## state population city distance location_description
## nbr.val NA 1.500000e+01 NA 15.0000000 NA
## nbr.null NA 1.000000e+00 NA 0.0000000 NA
## nbr.na NA 0.000000e+00 NA 0.0000000 NA
## min NA 0.000000e+00 NA 0.5296900 NA
## max NA 2.201941e+06 NA 5.7105800 NA
## range NA 2.201941e+06 NA 5.1808900 NA
## sum NA 3.729279e+06 NA 35.3224000 NA
## median NA 1.345600e+04 NA 1.7029800 NA
## mean NA 2.486186e+05 NA 2.3548267 NA
## SE.mean NA 1.601963e+05 NA 0.4456851 NA
## CI.mean.0.95 NA 3.435870e+05 NA 0.9558995 NA
## var NA 3.849430e+11 NA 2.9795286 NA
## std.dev NA 6.204378e+05 NA 1.7261311 NA
## coef.var NA 2.495540e+00 NA 0.7330183 NA
## latitude longitude geolocation hazard_type landslide_type
## nbr.val 15.00000000 1.500000e+01 NA NA NA
## nbr.null 0.00000000 0.000000e+00 NA NA NA
## nbr.na 0.00000000 0.000000e+00 NA NA NA
## min 18.42700000 -7.096300e+01 NA NA NA
## max 19.90530000 -6.938220e+01 NA NA NA
## range 1.47830000 1.580800e+00 NA NA NA
## sum 286.98100000 -1.055795e+03 NA NA NA
## median 19.30500000 -7.060000e+01 NA NA NA
## mean 19.13206667 -7.038631e+01 NA NA NA
## SE.mean 0.13659378 1.250746e-01 NA NA NA
## CI.mean.0.95 0.29296451 2.682583e-01 NA NA NA
## var 0.27986790 2.346547e-01 NA NA NA
## std.dev 0.52902542 4.844117e-01 NA NA NA
## coef.var 0.02765124 -6.882187e-03 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 2 11.000000 NA
## nbr.null NA NA NA 2 4.000000 NA
## nbr.na NA NA NA 13 4.000000 NA
## min NA NA NA 0 0.000000 NA
## max NA NA NA 0 68.000000 NA
## range NA NA NA 0 68.000000 NA
## sum NA NA NA 0 100.000000 NA
## median NA NA NA 0 1.000000 NA
## mean NA NA NA 0 9.090909 NA
## SE.mean NA NA NA 0 6.092401 NA
## CI.mean.0.95 NA NA NA 0 13.574716 NA
## var NA NA NA 0 408.290909 NA
## std.dev NA NA NA 0 20.206210 NA
## coef.var NA NA NA NaN 2.222683 NA
## source_link prop ypos
## nbr.val NA 15.0000000 15.0000000
## nbr.null NA 0.0000000 0.0000000
## nbr.na NA 0.0000000 0.0000000
## min NA 1.4995867 5.6346539
## max NA 16.1670215 99.2112512
## range NA 14.6674348 93.5765973
## sum NA 100.0000000 830.1472720
## median NA 4.8212466 57.1753335
## mean NA 6.6666667 55.3431515
## SE.mean NA 1.2617635 7.1708290
## CI.mean.0.95 NA 2.7062135 15.3798986
## var NA 23.8807055 771.3118270
## std.dev NA 4.8867889 27.7725013
## coef.var NA 0.7330183 0.5018236
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Distrito Nacional (Dominican Republic)
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_DR <- subset(df, country_name == "Dominican Republic")
knitr::kable(head(df_DR))
library(dplyr)
df_DR <- subset(df, state == "Distrito Nacional")
knitr::kable(head(df_DR))
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_DR, aes(fill=city, y=distance, x=state)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_DR, aes(fill=city, y=distance, x=state)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_DR, aes(x=state, y=distance, fill=city)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_DR <- df_DR %>%
arrange(desc(city)) %>%
mutate(prop = distance / sum(df_DR$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_DR, aes(x=state, y = prop, fill=city)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales
library(forecast)
data<- ts(df_DR$distance, frequency=12, start=2008)
knitr::kable(head(data))
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto
library(qcc)
distance <- df_DR$distance
names(distance) <- df_DR$city
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por ciudades"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## La Agustina 5.710580 5.710580 71.644019 71.644019
## San Carlos 1.702980 7.413560 21.365314 93.009333
## Santo Domingo 0.557210 7.970770 6.990667 100.000000
stem(df_DR$"distance")
##
## The decimal point is at the |
##
## 0 | 67
## 2 |
## 4 | 7
head(df_DR)
## # A tibble: 3 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 3736 7/6/11 <NA> <NA> Dominican Re~ DO Dist~ 2201941
## 2 124 7/13/07 Night <NA> Dominican Re~ DO Dist~ 13456
## 3 746 8/26/08 <NA> <NA> Dominican Re~ DO Dist~ 10457
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_DR))
stem(df_DR$"distance")
##
## The decimal point is at the |
##
## 0 | 67
## 2 |
## 4 | 7
stem(df_DR$"distance", scale = 2)
##
## The decimal point is at the |
##
## 0 | 6
## 1 | 7
## 2 |
## 3 |
## 4 |
## 5 | 7
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 0.55721 |
1 |
33.3 |
33.3 |
33.3 |
33.3 |
| 1.70298 |
1 |
33.3 |
33.3 |
66.7 |
66.7 |
| 5.71058 |
1 |
33.3 |
33.3 |
100.0 |
100.0 |
| Total |
3 |
100.0 |
100.0 |
100.0 |
100.0 |
str(table)
## Classes 'freqtab' and 'data.frame': 4 obs. of 5 variables:
## $ n : num 1 1 1 3
## $ % : num 33.3 33.3 33.3 100
## $ val% : num 33.3 33.3 33.3 100
## $ %cum : num 33.3 66.7 100 100
## $ val%cum: num 33.3 66.7 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
| 0.55721 |
1 |
| 1.70298 |
1 |
| 5.71058 |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.55721 2.55721 4.55721 6.55721
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
| (0.557,2.56] |
1 |
0.5 |
1 |
| (2.56,4.56] |
0 |
0.0 |
1 |
| (4.56,6.56] |
1 |
0.5 |
2 |
str(Freq_table)
## 'data.frame': 3 obs. of 4 variables:
## $ distance: Factor w/ 3 levels "(0.557,2.56]",..: 1 2 3
## $ Freq : int 1 0 1
## $ Rel_Freq: num 0.5 0 0.5
## $ Cum_Freq: int 1 1 2
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
| (0.557,2.56] |
1 |
| (2.56,4.56] |
0 |
| (4.56,6.56] |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")

library(pastecs)
stat.desc(df_DR)
## Warning in min(x): ningún argumento finito para min; retornando Inf
## Warning in max(x): ningun argumento finito para max; retornando -Inf
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## id date time continent_code country_name country_code
## nbr.val 3.000000e+00 NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA
## min 1.240000e+02 NA NA NA NA NA
## max 3.736000e+03 NA NA NA NA NA
## range 3.612000e+03 NA NA NA NA NA
## sum 4.606000e+03 NA NA NA NA NA
## median 7.460000e+02 NA NA NA NA NA
## mean 1.535333e+03 NA NA NA NA NA
## SE.mean 1.114887e+03 NA NA NA NA NA
## CI.mean.0.95 4.796973e+03 NA NA NA NA NA
## var 3.728921e+06 NA NA NA NA NA
## std.dev 1.931042e+03 NA NA NA NA NA
## coef.var 1.257734e+00 NA NA NA NA NA
## state population city distance location_description latitude
## nbr.val NA 3.000000e+00 NA 3.000000 NA 3.000000000
## nbr.null NA 0.000000e+00 NA 0.000000 NA 0.000000000
## nbr.na NA 0.000000e+00 NA 0.000000 NA 0.000000000
## min NA 1.045700e+04 NA 0.557210 NA 18.475700000
## max NA 2.201941e+06 NA 5.710580 NA 18.550000000
## range NA 2.191484e+06 NA 5.153370 NA 0.074300000
## sum NA 2.225854e+06 NA 7.970770 NA 55.525700000
## median NA 1.345600e+04 NA 1.702980 NA 18.500000000
## mean NA 7.419513e+05 NA 2.656923 NA 18.508566667
## SE.mean NA 7.299953e+05 NA 1.562243 NA 0.021872078
## CI.mean.0.95 NA 3.140916e+06 NA 6.721790 NA 0.094107954
## var NA 1.598680e+12 NA 7.321812 NA 0.001435163
## std.dev NA 1.264389e+06 NA 2.705885 NA 0.037883550
## coef.var NA 1.704140e+00 NA 1.018428 NA 0.002046812
## longitude geolocation hazard_type landslide_type
## nbr.val 3.000000e+00 NA NA NA
## nbr.null 0.000000e+00 NA NA NA
## nbr.na 0.000000e+00 NA NA NA
## min -6.998330e+01 NA NA NA
## max -6.991400e+01 NA NA NA
## range 6.930000e-02 NA NA NA
## sum -2.098173e+02 NA NA NA
## median -6.992000e+01 NA NA NA
## mean -6.993910e+01 NA NA NA
## SE.mean 2.216777e-02 NA NA NA
## CI.mean.0.95 9.538021e-02 NA NA NA
## var 1.474230e-03 NA NA NA
## std.dev 3.839570e-02 NA NA NA
## coef.var -5.489877e-04 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 0 2.000000 NA
## nbr.null NA NA NA 0 0.000000 NA
## nbr.na NA NA NA 3 1.000000 NA
## min NA NA NA Inf 1.000000 NA
## max NA NA NA -Inf 8.000000 NA
## range NA NA NA -Inf 7.000000 NA
## sum NA NA NA 0 9.000000 NA
## median NA NA NA NA 4.500000 NA
## mean NA NA NA NaN 4.500000 NA
## SE.mean NA NA NA NA 3.500000 NA
## CI.mean.0.95 NA NA NA NaN 44.471717 NA
## var NA NA NA NA 24.500000 NA
## std.dev NA NA NA NA 4.949747 NA
## coef.var NA NA NA NA 1.099944 NA
## source_link prop ypos
## nbr.val NA 3.000000 3.000000
## nbr.null NA 0.000000 0.000000
## nbr.na NA 0.000000 0.000000
## min NA 6.990667 3.495334
## max NA 71.644019 64.177990
## range NA 64.653352 60.682657
## sum NA 100.000000 85.346648
## median NA 21.365314 17.673324
## mean NA 33.333333 28.448883
## SE.mean NA 19.599652 18.327399
## CI.mean.0.95 NA 84.330497 78.856435
## var NA 1152.439111 1007.680707
## std.dev NA 33.947594 31.743987
## coef.var NA 1.018428 1.115825
boxplot(data, horizontal=TRUE, col='green')

Gráfico para La Vega (Dominican Republic)
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_DR <- subset(df, country_name == "Dominican Republic")
knitr::kable(head(df_DR))
library(dplyr)
df_DR <- subset(df, state == "La Vega")
knitr::kable(head(df_DR))
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_DR, aes(fill=city, y=distance, x=state)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_DR, aes(fill=city, y=distance, x=state)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_DR, aes(x=state, y=distance, fill=city)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_DR <- df_DR %>%
arrange(desc(city)) %>%
mutate(prop = distance / sum(df_DR$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_DR, aes(x=state, y = prop, fill=city)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales
library(forecast)
data<- ts(df_DR$distance, frequency=12, start=2008)
knitr::kable(head(data))
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto
library(qcc)
distance <- df_DR$distance
names(distance) <- df_DR$city
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por ciudades"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Río Verde Abajo 3.72637 3.72637 87.55445 87.55445
## Constanza 0.52969 4.25606 12.44555 100.00000
stem(df_DR$"distance")
##
## The decimal point is at the |
##
## 0 | 5
## 1 |
## 2 |
## 3 | 7
head(df_DR)
## # A tibble: 2 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 343 11/1/07 <NA> <NA> Dominican R~ DO La V~ 3613
## 2 4051 11/18/11 <NA> <NA> Dominican R~ DO La V~ 29481
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_DR))
stem(df_DR$"distance")
##
## The decimal point is at the |
##
## 0 | 5
## 1 |
## 2 |
## 3 | 7
stem(df_DR$"distance", scale = 2)
##
## The decimal point is at the |
##
## 0 | 5
## 1 |
## 1 |
## 2 |
## 2 |
## 3 |
## 3 | 7
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 0.52969 |
1 |
50 |
50 |
50 |
50 |
| 3.72637 |
1 |
50 |
50 |
100 |
100 |
| Total |
2 |
100 |
100 |
100 |
100 |
str(table)
## Classes 'freqtab' and 'data.frame': 3 obs. of 5 variables:
## $ n : num 1 1 2
## $ % : num 50 50 100
## $ val% : num 50 50 100
## $ %cum : num 50 100 100
## $ val%cum: num 50 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.52969 2.52969 4.52969
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
| (0.53,2.53] |
0 |
0 |
0 |
| (2.53,4.53] |
1 |
1 |
1 |
str(Freq_table)
## 'data.frame': 2 obs. of 4 variables:
## $ distance: Factor w/ 2 levels "(0.53,2.53]",..: 1 2
## $ Freq : int 0 1
## $ Rel_Freq: num 0 1
## $ Cum_Freq: int 0 1
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
| (0.53,2.53] |
0 |
| (2.53,4.53] |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")

library(pastecs)
stat.desc(df_DR)
## Warning in min(x): ningún argumento finito para min; retornando Inf
## Warning in max(x): ningun argumento finito para max; retornando -Inf
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## id date time continent_code country_name country_code
## nbr.val 2.000000e+00 NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA
## min 3.430000e+02 NA NA NA NA NA
## max 4.051000e+03 NA NA NA NA NA
## range 3.708000e+03 NA NA NA NA NA
## sum 4.394000e+03 NA NA NA NA NA
## median 2.197000e+03 NA NA NA NA NA
## mean 2.197000e+03 NA NA NA NA NA
## SE.mean 1.854000e+03 NA NA NA NA NA
## CI.mean.0.95 2.355730e+04 NA NA NA NA NA
## var 6.874632e+06 NA NA NA NA NA
## std.dev 2.621952e+03 NA NA NA NA NA
## coef.var 1.193424e+00 NA NA NA NA NA
## state population city distance location_description latitude
## nbr.val NA 2.000000e+00 NA 2.000000 NA 2.00000000
## nbr.null NA 0.000000e+00 NA 0.000000 NA 0.00000000
## nbr.na NA 0.000000e+00 NA 0.000000 NA 0.00000000
## min NA 3.613000e+03 NA 0.529690 NA 18.90450000
## max NA 2.948100e+04 NA 3.726370 NA 19.30500000
## range NA 2.586800e+04 NA 3.196680 NA 0.40050000
## sum NA 3.309400e+04 NA 4.256060 NA 38.20950000
## median NA 1.654700e+04 NA 2.128030 NA 19.10475000
## mean NA 1.654700e+04 NA 2.128030 NA 19.10475000
## SE.mean NA 1.293400e+04 NA 1.598340 NA 0.20025000
## CI.mean.0.95 NA 1.643421e+05 NA 20.308835 NA 2.54441750
## var NA 3.345767e+08 NA 5.109382 NA 0.08020013
## std.dev NA 1.829144e+04 NA 2.260394 NA 0.28319627
## coef.var NA 1.105423e+00 NA 1.062200 NA 0.01482334
## longitude geolocation hazard_type landslide_type
## nbr.val 2.000000e+00 NA NA NA
## nbr.null 0.000000e+00 NA NA NA
## nbr.na 0.000000e+00 NA NA NA
## min -7.074400e+01 NA NA NA
## max -7.060000e+01 NA NA NA
## range 1.440000e-01 NA NA NA
## sum -1.413440e+02 NA NA NA
## median -7.067200e+01 NA NA NA
## mean -7.067200e+01 NA NA NA
## SE.mean 7.200000e-02 NA NA NA
## CI.mean.0.95 9.148467e-01 NA NA NA
## var 1.036800e-02 NA NA NA
## std.dev 1.018234e-01 NA NA NA
## coef.var -1.440788e-03 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 0 2.000000 NA
## nbr.null NA NA NA 0 1.000000 NA
## nbr.na NA NA NA 2 0.000000 NA
## min NA NA NA Inf 0.000000 NA
## max NA NA NA -Inf 68.000000 NA
## range NA NA NA -Inf 68.000000 NA
## sum NA NA NA 0 68.000000 NA
## median NA NA NA NA 34.000000 NA
## mean NA NA NA NaN 34.000000 NA
## SE.mean NA NA NA NA 34.000000 NA
## CI.mean.0.95 NA NA NA NaN 432.010961 NA
## var NA NA NA NA 2312.000000 NA
## std.dev NA NA NA NA 48.083261 NA
## coef.var NA NA NA NA 1.414214 NA
## source_link prop ypos
## nbr.val NA 2.00000 2.0000000
## nbr.null NA 0.00000 0.0000000
## nbr.na NA 0.00000 0.0000000
## min NA 12.44555 43.7772259
## max NA 87.55445 93.7772259
## range NA 75.10890 50.0000000
## sum NA 100.00000 137.5544518
## median NA 50.00000 68.7772259
## mean NA 50.00000 68.7772259
## SE.mean NA 37.55445 25.0000000
## CI.mean.0.95 NA 477.17455 317.6551184
## var NA 2820.67370 1250.0000000
## std.dev NA 53.11002 35.3553391
## coef.var NA 1.06220 0.5140559
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Puerto Plata (Dominican Republic)
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_DR <- subset(df, country_name == "Dominican Republic")
knitr::kable(head(df_DR))
library(dplyr)
df_DR <- subset(df, state == "Puerto Plata")
knitr::kable(head(df_DR))
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_DR, aes(fill=city, y=distance, x=state)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_DR, aes(fill=city, y=distance, x=state)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_DR, aes(x=state, y=distance, fill=city)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_DR <- df_DR %>%
arrange(desc(city)) %>%
mutate(prop = distance / sum(df_DR$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_DR, aes(x=state, y = prop, fill=city)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales
library(forecast)
data<- ts(df_DR$distance, frequency=12, start=2008)
knitr::kable(head(data))
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto
library(qcc)
distance <- df_DR$distance
names(distance) <- df_DR$city
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por ciudades"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Luperón 1.54885 1.54885 42.66558 42.66558
## Puerto Plata 1.19636 2.74521 32.95567 75.62125
## Altamira 0.88500 3.63021 24.37875 100.00000
stem(df_DR$"distance")
##
## The decimal point is 1 digit(s) to the left of the |
##
## 8 | 9
## 10 |
## 12 | 0
## 14 | 5
head(df_DR)
## # A tibble: 3 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 4655 12/5/12 <NA> <NA> Dominican Re~ DO Puer~ 146000
## 2 6707 11/7/14 <NA> <NA> Dominican Re~ DO Puer~ 4393
## 3 1177 9/20/09 <NA> <NA> Dominican Re~ DO Puer~ 4563
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_DR))
stem(df_DR$"distance")
##
## The decimal point is 1 digit(s) to the left of the |
##
## 8 | 9
## 10 |
## 12 | 0
## 14 | 5
stem(df_DR$"distance", scale = 2)
##
## The decimal point is 1 digit(s) to the left of the |
##
## 8 | 9
## 9 |
## 10 |
## 11 |
## 12 | 0
## 13 |
## 14 |
## 15 | 5
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 0.885 |
1 |
33.3 |
33.3 |
33.3 |
33.3 |
| 1.19636 |
1 |
33.3 |
33.3 |
66.7 |
66.7 |
| 1.54885 |
1 |
33.3 |
33.3 |
100.0 |
100.0 |
| Total |
3 |
100.0 |
100.0 |
100.0 |
100.0 |
str(table)
## Classes 'freqtab' and 'data.frame': 4 obs. of 5 variables:
## $ n : num 1 1 1 3
## $ % : num 33.3 33.3 33.3 100
## $ val% : num 33.3 33.3 33.3 100
## $ %cum : num 33.3 66.7 100 100
## $ val%cum: num 33.3 66.7 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
| 0.885 |
1 |
| 1.19636 |
1 |
| 1.54885 |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.885 1.885
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
str(Freq_table)
## 'data.frame': 1 obs. of 4 variables:
## $ distance: Factor w/ 1 level "(0.885,1.88]": 1
## $ Freq : int 2
## $ Rel_Freq: num 1
## $ Cum_Freq: int 2
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")

library(pastecs)
stat.desc(df_DR)
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## id date time continent_code country_name country_code
## nbr.val 3.000000e+00 NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA
## min 1.177000e+03 NA NA NA NA NA
## max 6.707000e+03 NA NA NA NA NA
## range 5.530000e+03 NA NA NA NA NA
## sum 1.253900e+04 NA NA NA NA NA
## median 4.655000e+03 NA NA NA NA NA
## mean 4.179667e+03 NA NA NA NA NA
## SE.mean 1.613968e+03 NA NA NA NA NA
## CI.mean.0.95 6.944345e+03 NA NA NA NA NA
## var 7.814681e+06 NA NA NA NA NA
## std.dev 2.795475e+03 NA NA NA NA NA
## coef.var 6.688273e-01 NA NA NA NA NA
## state population city distance location_description latitude
## nbr.val NA 3.000000e+00 NA 3.0000000 NA 3.00000000
## nbr.null NA 0.000000e+00 NA 0.0000000 NA 0.00000000
## nbr.na NA 0.000000e+00 NA 0.0000000 NA 0.00000000
## min NA 4.393000e+03 NA 0.8850000 NA 19.67500000
## max NA 1.460000e+05 NA 1.5488500 NA 19.90530000
## range NA 1.416070e+05 NA 0.6638500 NA 0.23030000
## sum NA 1.549560e+05 NA 3.6302100 NA 59.36300000
## median NA 4.563000e+03 NA 1.1963600 NA 19.78270000
## mean NA 5.165200e+04 NA 1.2100700 NA 19.78766667
## SE.mean NA 4.717403e+04 NA 0.1917596 NA 0.06652825
## CI.mean.0.95 NA 2.029734e+05 NA 0.8250748 NA 0.28624795
## var NA 6.676166e+09 NA 0.1103152 NA 0.01327802
## std.dev NA 8.170781e+04 NA 0.3321373 NA 0.11523031
## coef.var NA 1.581891e+00 NA 0.2744777 NA 0.00582334
## longitude geolocation hazard_type landslide_type
## nbr.val 3.000000e+00 NA NA NA
## nbr.null 0.000000e+00 NA NA NA
## nbr.na 0.000000e+00 NA NA NA
## min -7.096300e+01 NA NA NA
## max -7.068710e+01 NA NA NA
## range 2.759000e-01 NA NA NA
## sum -2.124863e+02 NA NA NA
## median -7.083620e+01 NA NA NA
## mean -7.082877e+01 NA NA NA
## SE.mean 7.973214e-02 NA NA NA
## CI.mean.0.95 3.430597e-01 NA NA NA
## var 1.907164e-02 NA NA NA
## std.dev 1.381001e-01 NA NA NA
## coef.var -1.949774e-03 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 1 2.000000 NA
## nbr.null NA NA NA 1 1.000000 NA
## nbr.na NA NA NA 2 1.000000 NA
## min NA NA NA 0 0.000000 NA
## max NA NA NA 0 2.000000 NA
## range NA NA NA 0 2.000000 NA
## sum NA NA NA 0 2.000000 NA
## median NA NA NA 0 1.000000 NA
## mean NA NA NA 0 1.000000 NA
## SE.mean NA NA NA NA 1.000000 NA
## CI.mean.0.95 NA NA NA NaN 12.706205 NA
## var NA NA NA NA 2.000000 NA
## std.dev NA NA NA NA 1.414214 NA
## coef.var NA NA NA NA 1.414214 NA
## source_link prop ypos
## nbr.val NA 3.0000000 3.0000000
## nbr.null NA 0.0000000 0.0000000
## nbr.na NA 0.0000000 0.0000000
## min NA 24.3787549 16.4778346
## max NA 42.6655758 87.8106225
## range NA 18.2868209 71.3327879
## sum NA 100.0000000 158.5769143
## median NA 32.9556692 54.2884571
## mean NA 33.3333333 52.8589714
## SE.mean NA 5.2823267 20.6044027
## CI.mean.0.95 NA 22.7280175 88.6535895
## var NA 83.7089270 1273.6242298
## std.dev NA 9.1492583 35.6878723
## coef.var NA 0.2744777 0.6751526
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Santiago (Dominican Republic)
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_DR <- subset(df, country_name == "Dominican Republic")
knitr::kable(head(df_DR))
library(dplyr)
df_DR <- subset(df, state == "Santiago")
knitr::kable(head(df_DR))
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_DR, aes(fill=city, y=distance, x=state)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_DR, aes(fill=city, y=distance, x=state)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_DR, aes(x=state, y=distance, fill=city)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_DR <- df_DR %>%
arrange(desc(city)) %>%
mutate(prop = distance / sum(df_DR$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_DR, aes(x=state, y = prop, fill=city)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales
library(forecast)
data<- ts(df_DR$distance, frequency=12, start=2008)
knitr::kable(head(data))
| 4.31327 |
| 1.10868 |
| 2.72462 |
| 4.86398 |
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto
library(qcc)
distance <- df_DR$distance
names(distance) <- df_DR$city
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por ciudades"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Pedro García 4.863980 4.863980 37.384891 37.384891
## Tamboril 4.313270 9.177250 33.152096 70.536987
## San José de Las Matas 2.724620 11.901870 20.941620 91.478608
## Santiago de los Caballeros 1.108680 13.010550 8.521392 100.000000
stem(df_DR$"distance")
##
## The decimal point is at the |
##
## 1 | 1
## 2 | 7
## 3 |
## 4 | 39
head(df_DR)
## # A tibble: 4 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 1178 9/20/09 <NA> <NA> Dominican R~ DO Sant~ 23304
## 2 388 12/11/07 <NA> <NA> Dominican R~ DO Sant~ 1200000
## 3 3569 6/3/11 <NA> <NA> Dominican R~ DO Sant~ 9853
## 4 984 2/12/09 <NA> <NA> Dominican R~ DO Sant~ 1457
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_DR))
stem(df_DR$"distance")
##
## The decimal point is at the |
##
## 1 | 1
## 2 | 7
## 3 |
## 4 | 39
stem(df_DR$"distance", scale = 2)
##
## The decimal point is at the |
##
## 1 | 1
## 1 |
## 2 |
## 2 | 7
## 3 |
## 3 |
## 4 | 3
## 4 | 9
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 1.10868 |
1 |
25 |
25 |
25 |
25 |
| 2.72462 |
1 |
25 |
25 |
50 |
50 |
| 4.31327 |
1 |
25 |
25 |
75 |
75 |
| 4.86398 |
1 |
25 |
25 |
100 |
100 |
| Total |
4 |
100 |
100 |
100 |
100 |
str(table)
## Classes 'freqtab' and 'data.frame': 5 obs. of 5 variables:
## $ n : num 1 1 1 1 4
## $ % : num 25 25 25 25 100
## $ val% : num 25 25 25 25 100
## $ %cum : num 25 50 75 100 100
## $ val%cum: num 25 50 75 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
| 1.10868 |
1 |
| 2.72462 |
1 |
| 4.31327 |
1 |
| 4.86398 |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 1.10868 3.10868 5.10868
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
| (1.11,3.11] |
1 |
0.3333333 |
1 |
| (3.11,5.11] |
2 |
0.6666667 |
3 |
str(Freq_table)
## 'data.frame': 2 obs. of 4 variables:
## $ distance: Factor w/ 2 levels "(1.11,3.11]",..: 1 2
## $ Freq : int 1 2
## $ Rel_Freq: num 0.333 0.667
## $ Cum_Freq: int 1 3
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
| (1.11,3.11] |
1 |
| (3.11,5.11] |
2 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")

library(pastecs)
stat.desc(df_DR)
## Warning in min(x): ningún argumento finito para min; retornando Inf
## Warning in max(x): ningun argumento finito para max; retornando -Inf
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## id date time continent_code country_name country_code
## nbr.val 4.000000e+00 NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA
## min 3.880000e+02 NA NA NA NA NA
## max 3.569000e+03 NA NA NA NA NA
## range 3.181000e+03 NA NA NA NA NA
## sum 6.119000e+03 NA NA NA NA NA
## median 1.081000e+03 NA NA NA NA NA
## mean 1.529750e+03 NA NA NA NA NA
## SE.mean 7.002205e+02 NA NA NA NA NA
## CI.mean.0.95 2.228414e+03 NA NA NA NA NA
## var 1.961235e+06 NA NA NA NA NA
## std.dev 1.400441e+03 NA NA NA NA NA
## coef.var 9.154705e-01 NA NA NA NA NA
## state population city distance location_description
## nbr.val NA 4.000000e+00 NA 4.0000000 NA
## nbr.null NA 0.000000e+00 NA 0.0000000 NA
## nbr.na NA 0.000000e+00 NA 0.0000000 NA
## min NA 1.457000e+03 NA 1.1086800 NA
## max NA 1.200000e+06 NA 4.8639800 NA
## range NA 1.198543e+06 NA 3.7553000 NA
## sum NA 1.234614e+06 NA 13.0105500 NA
## median NA 1.657850e+04 NA 3.5189450 NA
## mean NA 3.086535e+05 NA 3.2526375 NA
## SE.mean NA 2.971496e+05 NA 0.8464003 NA
## CI.mean.0.95 NA 9.456625e+05 NA 2.6936236 NA
## var NA 3.531914e+11 NA 2.8655741 NA
## std.dev NA 5.942991e+05 NA 1.6928007 NA
## coef.var NA 1.925457e+00 NA 0.5204394 NA
## latitude longitude geolocation hazard_type landslide_type
## nbr.val 4.000000000 4.000000e+00 NA NA NA
## nbr.null 0.000000000 0.000000e+00 NA NA NA
## nbr.na 0.000000000 0.000000e+00 NA NA NA
## min 19.355600000 -7.091890e+01 NA NA NA
## max 19.550000000 -7.058660e+01 NA NA NA
## range 0.194400000 3.323000e-01 NA NA NA
## sum 77.877300000 -2.828515e+02 NA NA NA
## median 19.485850000 -7.067300e+01 NA NA NA
## mean 19.469325000 -7.071287e+01 NA NA NA
## SE.mean 0.042711657 7.296329e-02 NA NA NA
## CI.mean.0.95 0.135927554 2.322018e-01 NA NA NA
## var 0.007297143 2.129457e-02 NA NA NA
## std.dev 0.085423314 1.459266e-01 NA NA NA
## coef.var 0.004387585 -2.063649e-03 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 0 3.000000 NA
## nbr.null NA NA NA 0 1.000000 NA
## nbr.na NA NA NA 4 1.000000 NA
## min NA NA NA Inf 0.000000 NA
## max NA NA NA -Inf 17.000000 NA
## range NA NA NA -Inf 17.000000 NA
## sum NA NA NA 0 18.000000 NA
## median NA NA NA NA 1.000000 NA
## mean NA NA NA NaN 6.000000 NA
## SE.mean NA NA NA NA 5.507571 NA
## CI.mean.0.95 NA NA NA NaN 23.697163 NA
## var NA NA NA NA 91.000000 NA
## std.dev NA NA NA NA 9.539392 NA
## coef.var NA NA NA NA 1.589899 NA
## source_link prop ypos
## nbr.val NA 4.0000000 4.000000
## nbr.null NA 0.0000000 0.000000
## nbr.na NA 0.0000000 0.000000
## min NA 8.5213923 16.576048
## max NA 37.3848915 81.307554
## range NA 28.8634992 64.731506
## sum NA 100.0000000 187.440692
## median NA 27.0468581 44.778545
## mean NA 25.0000000 46.860173
## SE.mean NA 6.5054923 13.604305
## CI.mean.0.95 NA 20.7033799 43.294969
## var NA 169.2857203 740.308432
## std.dev NA 13.0109846 27.208610
## coef.var NA 0.5204394 0.580634
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Honduras
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_HD <- subset(df, country_name == "Honduras")
knitr::kable(head(df_HD))
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_HD, aes(fill= state, y=distance, x=country_name)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_HD, aes(fill=state, y=distance, x=country_name)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_HD, aes(x=country_name, y=distance, fill=state)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_HD <- df_HD %>%
arrange(desc(state)) %>%
mutate(prop = distance / sum(df_HD$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_HD, aes(x=country_name, y=prop, fill=state)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4
## Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Greens is 9
## Returning the palette you asked for with that many colors

Grafico de series temporales
library(forecast)
data<- ts(df_HD$distance, frequency=12, start=2008)
knitr::kable(head(data))
| 0.31238 |
| 6.66574 |
| 2.91594 |
| 2.87349 |
| 2.00805 |
| 5.79867 |
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto
library(qcc)
distance <- df_HD$distance
names(distance) <- df_HD$state
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por estados"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Colón 36.3762900 36.3762900 21.8907391 21.8907391
## Comayagua 17.2861300 53.6624200 10.4025496 32.2932888
## Choluteca 11.6723700 65.3347900 7.0242679 39.3175567
## Comayagua 8.5258400 73.8606300 5.1307305 44.4482872
## Comayagua 7.2857500 81.1463800 4.3844618 48.8327489
## Yoro 6.6657400 87.8121200 4.0113485 52.8440974
## Copán 5.8972100 93.7093300 3.5488579 56.3929554
## Ocotepeque 5.7986700 99.5080000 3.4895580 59.8825133
## Francisco Morazán 4.7579100 104.2659100 2.8632432 62.7457566
## La Paz 4.6913300 108.9572400 2.8231763 65.5689329
## Comayagua 4.5336200 113.4908600 2.7282687 68.2972016
## Choluteca 3.6959600 117.1868200 2.2241767 70.5213783
## Francisco Morazán 3.6396200 120.8264400 2.1902721 72.7116504
## Francisco Morazán 3.5439900 124.3704300 2.1327233 74.8443736
## Cortés 3.5373700 127.9078000 2.1287395 76.9731131
## Francisco Morazán 3.2528100 131.1606100 1.9574953 78.9306084
## Francisco Morazán 3.1298600 134.2904700 1.8835057 80.8141140
## Francisco Morazán 2.9923900 137.2828600 1.8007782 82.6148922
## Santa Bárbara 2.9159400 140.1988000 1.7547716 84.3696639
## Francisco Morazán 2.9132600 143.1120600 1.7531588 86.1228227
## Santa Bárbara 2.8734900 145.9855500 1.7292258 87.8520485
## Francisco Morazán 2.2144200 148.1999700 1.3326068 89.1846553
## Francisco Morazán 2.0083000 150.2082700 1.2085667 90.3932220
## Ocotepeque 2.0080500 152.2163200 1.2084162 91.6016382
## El Paraíso 1.9005200 154.1168400 1.1437062 92.7453444
## Francisco Morazán 1.8589700 155.9758100 1.1187020 93.8640463
## Copán 1.3909500 157.3667600 0.8370541 94.7011005
## Francisco Morazán 1.3058300 158.6725900 0.7858301 95.4869306
## Francisco Morazán 1.2440400 159.9166300 0.7486458 96.2355763
## Francisco Morazán 1.2363900 161.1530200 0.7440421 96.9796184
## Francisco Morazán 0.9837700 162.1367900 0.5920189 97.5716373
## Cortés 0.9705700 163.1073600 0.5840754 98.1557127
## Francisco Morazán 0.9155200 164.0228800 0.5509470 98.7066598
## Copán 0.7441400 164.7670200 0.4478130 99.1544727
## Copán 0.4339100 165.2009300 0.2611209 99.4155937
## Choluteca 0.3698700 165.5708000 0.2225826 99.6381762
## Yoro 0.3123800 165.8831800 0.1879859 99.8261621
## Copán 0.2888700 166.1720500 0.1738379 100.0000000
stem(df_HD$"distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 000011111111222223333334444
## 0 | 55566779
## 1 | 2
## 1 | 7
## 2 |
## 2 |
## 3 |
## 3 | 6
head(df_HD)
## # A tibble: 6 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 6202 5/20/14 <NA> <NA> Honduras HN Yoro 15774
## 2 7467 1/22/15 <NA> <NA> Honduras HN Yoro 2188
## 3 6691 10/14/14 Night <NA> Honduras HN Sant~ 1759
## 4 7464 9/28/15 Morning <NA> Honduras HN Sant~ 1811
## 5 6672 10/13/14 <NA> <NA> Honduras HN Ocot~ 2389
## 6 7462 9/25/15 <NA> <NA> Honduras HN Ocot~ 1416
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_HD))
stem(df_HD$"distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 000011111111222223333334444
## 0 | 55566779
## 1 | 2
## 1 | 7
## 2 |
## 2 |
## 3 |
## 3 | 6
stem(df_HD$"distance", scale = 2)
##
## The decimal point is at the |
##
## 0 | 33447900223499
## 2 | 0029990135567
## 4 | 57889
## 6 | 73
## 8 | 5
## 10 | 7
## 12 |
## 14 |
## 16 | 3
## 18 |
## 20 |
## 22 |
## 24 |
## 26 |
## 28 |
## 30 |
## 32 |
## 34 |
## 36 | 4
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 0.28887 |
1 |
2.6 |
2.6 |
2.6 |
2.6 |
| 0.31238 |
1 |
2.6 |
2.6 |
5.3 |
5.3 |
| 0.36987 |
1 |
2.6 |
2.6 |
7.9 |
7.9 |
| 0.43391 |
1 |
2.6 |
2.6 |
10.5 |
10.5 |
| 0.74414 |
1 |
2.6 |
2.6 |
13.2 |
13.2 |
| 0.91552 |
1 |
2.6 |
2.6 |
15.8 |
15.8 |
| 0.97057 |
1 |
2.6 |
2.6 |
18.4 |
18.4 |
| 0.98377 |
1 |
2.6 |
2.6 |
21.1 |
21.1 |
| 1.23639 |
1 |
2.6 |
2.6 |
23.7 |
23.7 |
| 1.24404 |
1 |
2.6 |
2.6 |
26.3 |
26.3 |
| 1.30583 |
1 |
2.6 |
2.6 |
28.9 |
28.9 |
| 1.39095 |
1 |
2.6 |
2.6 |
31.6 |
31.6 |
| 1.85897 |
1 |
2.6 |
2.6 |
34.2 |
34.2 |
| 1.90052 |
1 |
2.6 |
2.6 |
36.8 |
36.8 |
| 2.00805 |
1 |
2.6 |
2.6 |
39.5 |
39.5 |
| 2.0083 |
1 |
2.6 |
2.6 |
42.1 |
42.1 |
| 2.21442 |
1 |
2.6 |
2.6 |
44.7 |
44.7 |
| 2.87349 |
1 |
2.6 |
2.6 |
47.4 |
47.4 |
| 2.91326 |
1 |
2.6 |
2.6 |
50.0 |
50.0 |
| 2.91594 |
1 |
2.6 |
2.6 |
52.6 |
52.6 |
| 2.99239 |
1 |
2.6 |
2.6 |
55.3 |
55.3 |
| 3.12986 |
1 |
2.6 |
2.6 |
57.9 |
57.9 |
| 3.25281 |
1 |
2.6 |
2.6 |
60.5 |
60.5 |
| 3.53737 |
1 |
2.6 |
2.6 |
63.2 |
63.2 |
| 3.54399 |
1 |
2.6 |
2.6 |
65.8 |
65.8 |
| 3.63962 |
1 |
2.6 |
2.6 |
68.4 |
68.4 |
| 3.69596 |
1 |
2.6 |
2.6 |
71.1 |
71.1 |
| 4.53362 |
1 |
2.6 |
2.6 |
73.7 |
73.7 |
| 4.69133 |
1 |
2.6 |
2.6 |
76.3 |
76.3 |
| 4.75791 |
1 |
2.6 |
2.6 |
78.9 |
78.9 |
| 5.79867 |
1 |
2.6 |
2.6 |
81.6 |
81.6 |
| 5.89721 |
1 |
2.6 |
2.6 |
84.2 |
84.2 |
| 6.66574 |
1 |
2.6 |
2.6 |
86.8 |
86.8 |
| 7.28575 |
1 |
2.6 |
2.6 |
89.5 |
89.5 |
| 8.52584 |
1 |
2.6 |
2.6 |
92.1 |
92.1 |
| 11.67237 |
1 |
2.6 |
2.6 |
94.7 |
94.7 |
| 17.28613 |
1 |
2.6 |
2.6 |
97.4 |
97.4 |
| 36.37629 |
1 |
2.6 |
2.6 |
100.0 |
100.0 |
| Total |
38 |
100.0 |
100.0 |
100.0 |
100.0 |
str(table)
## Classes 'freqtab' and 'data.frame': 39 obs. of 5 variables:
## $ n : num 1 1 1 1 1 1 1 1 1 1 ...
## $ % : num 2.6 2.6 2.6 2.6 2.6 2.6 2.6 2.6 2.6 2.6 ...
## $ val% : num 2.6 2.6 2.6 2.6 2.6 2.6 2.6 2.6 2.6 2.6 ...
## $ %cum : num 2.6 5.3 7.9 10.5 13.2 15.8 18.4 21.1 23.7 26.3 ...
## $ val%cum: num 2.6 5.3 7.9 10.5 13.2 15.8 18.4 21.1 23.7 26.3 ...
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
| 0.28887 |
1 |
| 0.31238 |
1 |
| 0.36987 |
1 |
| 0.43391 |
1 |
| 0.74414 |
1 |
| 0.91552 |
1 |
| 0.97057 |
1 |
| 0.98377 |
1 |
| 1.23639 |
1 |
| 1.24404 |
1 |
| 1.30583 |
1 |
| 1.39095 |
1 |
| 1.85897 |
1 |
| 1.90052 |
1 |
| 2.00805 |
1 |
| 2.0083 |
1 |
| 2.21442 |
1 |
| 2.87349 |
1 |
| 2.91326 |
1 |
| 2.91594 |
1 |
| 2.99239 |
1 |
| 3.12986 |
1 |
| 3.25281 |
1 |
| 3.53737 |
1 |
| 3.54399 |
1 |
| 3.63962 |
1 |
| 3.69596 |
1 |
| 4.53362 |
1 |
| 4.69133 |
1 |
| 4.75791 |
1 |
| 5.79867 |
1 |
| 5.89721 |
1 |
| 6.66574 |
1 |
| 7.28575 |
1 |
| 8.52584 |
1 |
| 11.67237 |
1 |
| 17.28613 |
1 |
| 36.37629 |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.28887 6.28887 12.28887 18.28887 24.28887 30.28887 36.28887 42.28887
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
| (0.289,6.29] |
31 |
0.8378378 |
31 |
| (6.29,12.3] |
4 |
0.1081081 |
35 |
| (12.3,18.3] |
1 |
0.0270270 |
36 |
| (18.3,24.3] |
0 |
0.0000000 |
36 |
| (24.3,30.3] |
0 |
0.0000000 |
36 |
| (30.3,36.3] |
0 |
0.0000000 |
36 |
| (36.3,42.3] |
1 |
0.0270270 |
37 |
str(Freq_table)
## 'data.frame': 7 obs. of 4 variables:
## $ distance: Factor w/ 7 levels "(0.289,6.29]",..: 1 2 3 4 5 6 7
## $ Freq : int 31 4 1 0 0 0 1
## $ Rel_Freq: num 0.838 0.108 0.027 0 0 ...
## $ Cum_Freq: int 31 35 36 36 36 36 37
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
| (0.289,6.29] |
31 |
| (6.29,12.3] |
4 |
| (12.3,18.3] |
1 |
| (18.3,24.3] |
0 |
| (24.3,30.3] |
0 |
| (30.3,36.3] |
0 |
| (36.3,42.3] |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")

library(pastecs)
stat.desc(df_HD)
## id date time continent_code country_name country_code
## nbr.val 3.800000e+01 NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA
## min 8.540000e+02 NA NA NA NA NA
## max 7.485000e+03 NA NA NA NA NA
## range 6.631000e+03 NA NA NA NA NA
## sum 2.290210e+05 NA NA NA NA NA
## median 7.448500e+03 NA NA NA NA NA
## mean 6.026868e+03 NA NA NA NA NA
## SE.mean 3.671138e+02 NA NA NA NA NA
## CI.mean.0.95 7.438432e+02 NA NA NA NA NA
## var 5.121356e+06 NA NA NA NA NA
## std.dev 2.263041e+03 NA NA NA NA NA
## coef.var 3.754921e-01 NA NA NA NA NA
## state population city distance location_description
## nbr.val NA 3.800000e+01 NA 38.000000 NA
## nbr.null NA 0.000000e+00 NA 0.000000 NA
## nbr.na NA 0.000000e+00 NA 0.000000 NA
## min NA 1.043000e+03 NA 0.288870 NA
## max NA 8.508480e+05 NA 36.376290 NA
## range NA 8.498050e+05 NA 36.087420 NA
## sum NA 7.001138e+06 NA 166.172050 NA
## median NA 1.936000e+03 NA 2.914600 NA
## mean NA 1.842405e+05 NA 4.372949 NA
## SE.mean NA 5.663200e+04 NA 1.023393 NA
## CI.mean.0.95 NA 1.147473e+05 NA 2.073592 NA
## var NA 1.218729e+11 NA 39.798695 NA
## std.dev NA 3.491031e+05 NA 6.308621 NA
## coef.var NA 1.894823e+00 NA 1.442647 NA
## latitude longitude geolocation hazard_type landslide_type
## nbr.val 38.00000000 3.800000e+01 NA NA NA
## nbr.null 0.00000000 0.000000e+00 NA NA NA
## nbr.na 0.00000000 0.000000e+00 NA NA NA
## min 13.28610000 -8.916660e+01 NA NA NA
## max 15.52270000 -8.526500e+01 NA NA NA
## range 2.23660000 3.901600e+00 NA NA NA
## sum 546.00580000 -3.329927e+03 NA NA NA
## median 14.20695000 -8.728760e+01 NA NA NA
## mean 14.36857368 -8.762966e+01 NA NA NA
## SE.mean 0.08447952 1.301577e-01 NA NA NA
## CI.mean.0.95 0.17117178 2.637245e-01 NA NA NA
## var 0.27119802 6.437585e-01 NA NA NA
## std.dev 0.52076676 8.023457e-01 NA NA NA
## coef.var 0.03624346 -9.156096e-03 NA NA NA
## landslide_size trigger storm_name injuries fatalities
## nbr.val NA NA NA 29.0000000 38.0000000
## nbr.null NA NA NA 26.0000000 25.0000000
## nbr.na NA NA NA 9.0000000 0.0000000
## min NA NA NA 0.0000000 0.0000000
## max NA NA NA 3.0000000 29.0000000
## range NA NA NA 3.0000000 29.0000000
## sum NA NA NA 6.0000000 86.0000000
## median NA NA NA 0.0000000 0.0000000
## mean NA NA NA 0.2068966 2.2631579
## SE.mean NA NA NA 0.1253499 0.9774733
## CI.mean.0.95 NA NA NA 0.2567675 1.9805491
## var NA NA NA 0.4556650 36.3072546
## std.dev NA NA NA 0.6750296 6.0255502
## coef.var NA NA NA 3.2626433 2.6624524
## source_name source_link prop ypos
## nbr.val NA NA 38.0000000 3.800000e+01
## nbr.null NA NA 0.0000000 0.000000e+00
## nbr.na NA NA 0.0000000 0.000000e+00
## min NA NA 0.1738379 9.399294e-02
## max NA NA 21.8907391 9.988871e+01
## range NA NA 21.7169012 9.979472e+01
## sum NA NA 100.0000000 1.347990e+03
## median NA NA 1.7539652 3.219648e+01
## mean NA NA 2.6315789 3.547342e+01
## SE.mean NA NA 0.6158637 4.046533e+00
## CI.mean.0.95 NA NA 1.2478585 8.199055e+00
## var NA NA 14.4129500 6.222284e+02
## std.dev NA NA 3.7964391 2.494451e+01
## coef.var NA NA 1.4426469 7.031887e-01
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Choluteca (Honduras)
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_HD <- subset(df, country_name == "Honduras")
knitr::kable(head(df_HD))
library(dplyr)
df_HD <- subset(df, state == "Choluteca")
knitr::kable(head(df_HD))
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_HD, aes(fill=city, y=distance, x=state)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_HD, aes(fill=city, y=distance, x=state)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_HD, aes(x=state, y=distance, fill=city)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_HD <- df_HD %>%
arrange(desc(city)) %>%
mutate(prop = distance / sum(df_HD$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_HD, aes(x=state, y = prop, fill=city)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales
library(forecast)
data<- ts(df_HD$distance, frequency=12, start=2008)
knitr::kable(head(data))
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto
library(qcc)
distance <- df_HD$distance
names(distance) <- df_HD$city
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por ciudades"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Duyure 11.672370 11.672370 74.165851 74.165851
## Ciudad Choluteca 3.695960 15.368330 23.484007 97.649858
## Corpus 0.369870 15.738200 2.350142 100.000000
stem(df_HD$"distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 04
## 0 |
## 1 | 2
head(df_HD)
## # A tibble: 3 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 7448 6/11/15 <NA> <NA> Honduras HN Choluteca 1164
## 2 7449 12/15/15 <NA> <NA> Honduras HN Choluteca 1199
## 3 6123 7/2/14 16:30 <NA> Honduras HN Choluteca 75872
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_HD))
stem(df_HD$"distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 04
## 0 |
## 1 | 2
stem(df_HD$"distance", scale = 2)
##
## The decimal point is at the |
##
## 0 | 4
## 2 | 7
## 4 |
## 6 |
## 8 |
## 10 | 7
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 0.36987 |
1 |
33.3 |
33.3 |
33.3 |
33.3 |
| 3.69596 |
1 |
33.3 |
33.3 |
66.7 |
66.7 |
| 11.67237 |
1 |
33.3 |
33.3 |
100.0 |
100.0 |
| Total |
3 |
100.0 |
100.0 |
100.0 |
100.0 |
str(table)
## Classes 'freqtab' and 'data.frame': 4 obs. of 5 variables:
## $ n : num 1 1 1 3
## $ % : num 33.3 33.3 33.3 100
## $ val% : num 33.3 33.3 33.3 100
## $ %cum : num 33.3 66.7 100 100
## $ val%cum: num 33.3 66.7 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
| 0.36987 |
1 |
| 3.69596 |
1 |
| 11.67237 |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.36987 4.36987 8.36987 12.36987
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
| (0.37,4.37] |
1 |
0.5 |
1 |
| (4.37,8.37] |
0 |
0.0 |
1 |
| (8.37,12.4] |
1 |
0.5 |
2 |
str(Freq_table)
## 'data.frame': 3 obs. of 4 variables:
## $ distance: Factor w/ 3 levels "(0.37,4.37]",..: 1 2 3
## $ Freq : int 1 0 1
## $ Rel_Freq: num 0.5 0 0.5
## $ Cum_Freq: int 1 1 2
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
| (0.37,4.37] |
1 |
| (4.37,8.37] |
0 |
| (8.37,12.4] |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")

library(pastecs)
stat.desc(df_HD)
## id date time continent_code country_name country_code
## nbr.val 3.000000e+00 NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA
## min 6.123000e+03 NA NA NA NA NA
## max 7.449000e+03 NA NA NA NA NA
## range 1.326000e+03 NA NA NA NA NA
## sum 2.102000e+04 NA NA NA NA NA
## median 7.448000e+03 NA NA NA NA NA
## mean 7.006667e+03 NA NA NA NA NA
## SE.mean 4.418334e+02 NA NA NA NA NA
## CI.mean.0.95 1.901056e+03 NA NA NA NA NA
## var 5.856503e+05 NA NA NA NA NA
## std.dev 7.652779e+02 NA NA NA NA NA
## coef.var 1.092214e-01 NA NA NA NA NA
## state population city distance location_description latitude
## nbr.val NA 3.000000e+00 NA 3.000000 NA 3.00000000
## nbr.null NA 0.000000e+00 NA 0.000000 NA 0.00000000
## nbr.na NA 0.000000e+00 NA 0.000000 NA 0.00000000
## min NA 1.164000e+03 NA 0.369870 NA 13.28610000
## max NA 7.587200e+04 NA 11.672370 NA 13.58070000
## range NA 7.470800e+04 NA 11.302500 NA 0.29460000
## sum NA 7.823500e+04 NA 15.738200 NA 40.15430000
## median NA 1.199000e+03 NA 3.695960 NA 13.28750000
## mean NA 2.607833e+04 NA 5.246067 NA 13.38476667
## SE.mean NA 2.489684e+04 NA 3.353543 NA 0.09796750
## CI.mean.0.95 NA 1.071224e+05 NA 14.429130 NA 0.42152013
## var NA 1.859557e+09 NA 33.738750 NA 0.02879289
## std.dev NA 4.312258e+04 NA 5.808507 NA 0.16968469
## coef.var NA 1.653579e+00 NA 1.107212 NA 0.01267745
## longitude geolocation hazard_type landslide_type
## nbr.val 3.000000e+00 NA NA NA
## nbr.null 0.000000e+00 NA NA NA
## nbr.na 0.000000e+00 NA NA NA
## min -8.703290e+01 NA NA NA
## max -8.691010e+01 NA NA NA
## range 1.228000e-01 NA NA NA
## sum -2.609755e+02 NA NA NA
## median -8.703250e+01 NA NA NA
## mean -8.699183e+01 NA NA NA
## SE.mean 4.086683e-02 NA NA NA
## CI.mean.0.95 1.758358e-01 NA NA NA
## var 5.010293e-03 NA NA NA
## std.dev 7.078343e-02 NA NA NA
## coef.var -8.136790e-04 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 3.000000 3.000000 NA
## nbr.null NA NA NA 2.000000 1.000000 NA
## nbr.na NA NA NA 0.000000 0.000000 NA
## min NA NA NA 0.000000 0.000000 NA
## max NA NA NA 3.000000 8.000000 NA
## range NA NA NA 3.000000 8.000000 NA
## sum NA NA NA 3.000000 9.000000 NA
## median NA NA NA 0.000000 1.000000 NA
## mean NA NA NA 1.000000 3.000000 NA
## SE.mean NA NA NA 1.000000 2.516611 NA
## CI.mean.0.95 NA NA NA 4.302653 10.828105 NA
## var NA NA NA 3.000000 19.000000 NA
## std.dev NA NA NA 1.732051 4.358899 NA
## coef.var NA NA NA 1.732051 1.452966 NA
## source_link prop ypos
## nbr.val NA 3.000000 3.0000000
## nbr.null NA 0.000000 0.0000000
## nbr.na NA 0.000000 0.0000000
## min NA 2.350142 37.0829256
## max NA 74.165851 88.2579965
## range NA 71.815710 51.1750708
## sum NA 100.000000 200.6818442
## median NA 23.484007 75.3409221
## mean NA 33.333333 66.8939481
## SE.mean NA 21.308300 15.3648462
## CI.mean.0.95 NA 91.682215 66.1095975
## var NA 1362.130955 708.2354967
## std.dev NA 36.907058 26.6126943
## coef.var NA 1.107212 0.3978341
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Colón (Honduras)
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_HD <- subset(df, country_name == "Honduras")
knitr::kable(head(df_HD))
library(dplyr)
df_HD <- subset(df, state == "Colón")
knitr::kable(head(df_HD))
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_HD, aes(fill=city, y=distance, x=state)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_HD, aes(fill=city, y=distance, x=state)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_HD, aes(x=state, y=distance, fill=city)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_HD <- df_HD %>%
arrange(desc(city)) %>%
mutate(prop = distance / sum(df_HD$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_HD, aes(x=state, y = prop, fill=city)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales
library(forecast)
data<- ts(df_HD$distance, frequency=12, start=2008)
knitr::kable(head(data))
| 0.09491 |
| 2.67409 |
| 4.74914 |
| 0.18619 |
| 2.28589 |
| 0.74760 |
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto
library(qcc)
distance <- df_HD$distance
names(distance) <- df_HD$city
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por ciudades"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Cusuna 36.3762900 36.3762900 71.7525220 71.7525220
## Nuevo San Juan 4.7491400 41.1254300 9.3677165 81.1202386
## Portobelo 2.6740900 43.7995200 5.2746638 86.3949024
## María Chiquita 2.2858900 46.0854100 4.5089363 90.9038387
## El Giral 1.8033000 47.8887100 3.5570236 94.4608623
## Margarita 0.7674000 48.6561100 1.5137026 95.9745649
## Margarita 0.7476000 49.4037100 1.4746470 97.4492119
## Cativá 0.6394800 50.0431900 1.2613794 98.7105913
## Colón 0.2036500 50.2468400 0.4017012 99.1122925
## Nueva Providencia 0.1861900 50.4330300 0.3672613 99.4795538
## Colón 0.1689400 50.6019700 0.3332355 99.8127893
## Portobelo 0.0949100 50.6968800 0.1872107 100.0000000
stem(df_HD$"distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 00001112235
## 1 |
## 2 |
## 3 | 6
head(df_HD)
## # A tibble: 6 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 2795 12/9/10 Morning <NA> Panama PA Colón 1274
## 2 4879 5/28/13 <NA> <NA> Panama PA Colón 1274
## 3 6702 5/9/14 <NA> <NA> Panama PA Colón 1232
## 4 7450 9/7/15 <NA> <NA> Panama PA Colón 0
## 5 7451 7/2/15 <NA> <NA> Panama PA Colón 1146
## 6 4880 5/28/13 <NA> <NA> Panama PA Colón 3302
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_HD))
stem(df_HD$"distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 00001112235
## 1 |
## 2 |
## 3 | 6
stem(df_HD$"distance", scale = 2)
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 0000111223
## 0 | 5
## 1 |
## 1 |
## 2 |
## 2 |
## 3 |
## 3 | 6
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 0.09491 |
1 |
8.3 |
8.3 |
8.3 |
8.3 |
| 0.16894 |
1 |
8.3 |
8.3 |
16.7 |
16.7 |
| 0.18619 |
1 |
8.3 |
8.3 |
25.0 |
25.0 |
| 0.20365 |
1 |
8.3 |
8.3 |
33.3 |
33.3 |
| 0.63948 |
1 |
8.3 |
8.3 |
41.7 |
41.7 |
| 0.7476 |
1 |
8.3 |
8.3 |
50.0 |
50.0 |
| 0.7674 |
1 |
8.3 |
8.3 |
58.3 |
58.3 |
| 1.8033 |
1 |
8.3 |
8.3 |
66.7 |
66.7 |
| 2.28589 |
1 |
8.3 |
8.3 |
75.0 |
75.0 |
| 2.67409 |
1 |
8.3 |
8.3 |
83.3 |
83.3 |
| 4.74914 |
1 |
8.3 |
8.3 |
91.7 |
91.7 |
| 36.37629 |
1 |
8.3 |
8.3 |
100.0 |
100.0 |
| Total |
12 |
100.0 |
100.0 |
100.0 |
100.0 |
str(table)
## Classes 'freqtab' and 'data.frame': 13 obs. of 5 variables:
## $ n : num 1 1 1 1 1 1 1 1 1 1 ...
## $ % : num 8.3 8.3 8.3 8.3 8.3 8.3 8.3 8.3 8.3 8.3 ...
## $ val% : num 8.3 8.3 8.3 8.3 8.3 8.3 8.3 8.3 8.3 8.3 ...
## $ %cum : num 8.3 16.7 25 33.3 41.7 50 58.3 66.7 75 83.3 ...
## $ val%cum: num 8.3 16.7 25 33.3 41.7 50 58.3 66.7 75 83.3 ...
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
| 0.09491 |
1 |
| 0.16894 |
1 |
| 0.18619 |
1 |
| 0.20365 |
1 |
| 0.63948 |
1 |
| 0.7476 |
1 |
| 0.7674 |
1 |
| 1.8033 |
1 |
| 2.28589 |
1 |
| 2.67409 |
1 |
| 4.74914 |
1 |
| 36.37629 |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.09491 8.09491 16.09491 24.09491 32.09491 40.09491
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
| (0.0949,8.09] |
10 |
0.9090909 |
10 |
| (8.09,16.1] |
0 |
0.0000000 |
10 |
| (16.1,24.1] |
0 |
0.0000000 |
10 |
| (24.1,32.1] |
0 |
0.0000000 |
10 |
| (32.1,40.1] |
1 |
0.0909091 |
11 |
str(Freq_table)
## 'data.frame': 5 obs. of 4 variables:
## $ distance: Factor w/ 5 levels "(0.0949,8.09]",..: 1 2 3 4 5
## $ Freq : int 10 0 0 0 1
## $ Rel_Freq: num 0.9091 0 0 0 0.0909
## $ Cum_Freq: int 10 10 10 10 11
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
| (0.0949,8.09] |
10 |
| (8.09,16.1] |
0 |
| (16.1,24.1] |
0 |
| (24.1,32.1] |
0 |
| (32.1,40.1] |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")

library(pastecs)
stat.desc(df_HD)
## id date time continent_code country_name country_code
## nbr.val 1.200000e+01 NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA
## min 2.652000e+03 NA NA NA NA NA
## max 7.452000e+03 NA NA NA NA NA
## range 4.800000e+03 NA NA NA NA NA
## sum 6.181400e+04 NA NA NA NA NA
## median 4.880500e+03 NA NA NA NA NA
## mean 5.151167e+03 NA NA NA NA NA
## SE.mean 5.231425e+02 NA NA NA NA NA
## CI.mean.0.95 1.151429e+03 NA NA NA NA NA
## var 3.284136e+06 NA NA NA NA NA
## std.dev 1.812219e+03 NA NA NA NA NA
## coef.var 3.518074e-01 NA NA NA NA NA
## state population city distance location_description
## nbr.val NA 1.200000e+01 NA 12.000000 NA
## nbr.null NA 1.000000e+00 NA 0.000000 NA
## nbr.na NA 0.000000e+00 NA 0.000000 NA
## min NA 0.000000e+00 NA 0.094910 NA
## max NA 7.664300e+04 NA 36.376290 NA
## range NA 7.664300e+04 NA 36.281380 NA
## sum NA 1.971740e+05 NA 50.696880 NA
## median NA 1.375500e+03 NA 0.757500 NA
## mean NA 1.643117e+04 NA 4.224740 NA
## SE.mean NA 8.446243e+03 NA 2.950226 NA
## CI.mean.0.95 NA 1.859006e+04 NA 6.493404 NA
## var NA 8.560683e+08 NA 104.446004 NA
## std.dev NA 2.925864e+04 NA 10.219883 NA
## coef.var NA 1.780680e+00 NA 2.419056 NA
## latitude longitude geolocation hazard_type landslide_type
## nbr.val 12.0000000 12.00000000 NA NA NA
## nbr.null 0.0000000 0.00000000 NA NA NA
## nbr.na 0.0000000 0.00000000 NA NA NA
## min 9.2332000 -85.26500000 NA NA NA
## max 15.5227000 -79.65050000 NA NA NA
## range 6.2895000 5.61450000 NA NA NA
## sum 118.6112000 -963.01940000 NA NA NA
## median 9.3590500 -79.81925000 NA NA NA
## mean 9.8842667 -80.25161667 NA NA NA
## SE.mean 0.5134407 0.45651767 NA NA NA
## CI.mean.0.95 1.1300754 1.00478862 NA NA NA
## var 3.1634562 2.50090061 NA NA NA
## std.dev 1.7786107 1.58142360 NA NA NA
## coef.var 0.1799436 -0.01970582 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 4 12.0000000 NA
## nbr.null NA NA NA 4 8.0000000 NA
## nbr.na NA NA NA 8 0.0000000 NA
## min NA NA NA 0 0.0000000 NA
## max NA NA NA 0 8.0000000 NA
## range NA NA NA 0 8.0000000 NA
## sum NA NA NA 0 15.0000000 NA
## median NA NA NA 0 0.0000000 NA
## mean NA NA NA 0 1.2500000 NA
## SE.mean NA NA NA 0 0.7084447 NA
## CI.mean.0.95 NA NA NA 0 1.5592763 NA
## var NA NA NA 0 6.0227273 NA
## std.dev NA NA NA 0 2.4541245 NA
## coef.var NA NA NA NaN 1.9632996 NA
## source_link prop ypos
## nbr.val NA 12.0000000 1.200000e+01
## nbr.null NA 0.0000000 0.000000e+00
## nbr.na NA 0.0000000 0.000000e+00
## min NA 0.1872107 9.360537e-02
## max NA 71.7525220 9.936931e+01
## range NA 71.5653113 9.927570e+01
## sum NA 100.0000000 4.705863e+02
## median NA 1.4941748 2.119020e+01
## mean NA 8.3333333 3.921552e+01
## SE.mean NA 5.8193444 1.127004e+01
## CI.mean.0.95 NA 12.8082906 2.480520e+01
## var NA 406.3772270 1.524167e+03
## std.dev NA 20.1588002 3.904058e+01
## coef.var NA 2.4190560 9.955389e-01
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Comayagua (Honduras)
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_HD <- subset(df, country_name == "Honduras")
knitr::kable(head(df_HD))
library(dplyr)
df_HD <- subset(df, state == "Comayagua")
knitr::kable(head(df_HD))
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_HD, aes(fill=city, y=distance, x=state)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_HD, aes(fill=city, y=distance, x=state)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_HD, aes(x=state, y=distance, fill=city)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_HD <- df_HD %>%
arrange(desc(city)) %>%
mutate(prop = distance / sum(df_HD$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_HD, aes(x=state, y = prop, fill=city)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales
library(forecast)
data<- ts(df_HD$distance, frequency=12, start=2008)
knitr::kable(head(data))
| 17.28613 |
| 7.28575 |
| 4.53362 |
| 8.52584 |
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto
library(qcc)
distance <- df_HD$distance
names(distance) <- df_HD$city
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por ciudades"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## La Libertad 17.28613 17.28613 45.93546 45.93546
## Concepción de Guasistagua 8.52584 25.81197 22.65622 68.59168
## El Sauce 7.28575 33.09772 19.36086 87.95254
## El Rancho 4.53362 37.63134 12.04746 100.00000
stem(df_HD$"distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 |
## 0 | 579
## 1 |
## 1 | 7
head(df_HD)
## # A tibble: 4 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 7455 10/29/15 <NA> <NA> Honduras HN Comayagua 4673
## 2 7454 10/16/15 18:00 <NA> Honduras HN Comayagua 1389
## 3 2533 10/3/10 13:00 <NA> Honduras HN Comayagua 1484
## 4 7456 10/16/15 23:00 <NA> Honduras HN Comayagua 1470
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_HD))
stem(df_HD$"distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 |
## 0 | 579
## 1 |
## 1 | 7
stem(df_HD$"distance", scale = 2)
##
## The decimal point is at the |
##
## 4 | 5
## 6 | 3
## 8 | 5
## 10 |
## 12 |
## 14 |
## 16 | 3
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 4.53362 |
1 |
25 |
25 |
25 |
25 |
| 7.28575 |
1 |
25 |
25 |
50 |
50 |
| 8.52584 |
1 |
25 |
25 |
75 |
75 |
| 17.28613 |
1 |
25 |
25 |
100 |
100 |
| Total |
4 |
100 |
100 |
100 |
100 |
str(table)
## Classes 'freqtab' and 'data.frame': 5 obs. of 5 variables:
## $ n : num 1 1 1 1 4
## $ % : num 25 25 25 25 100
## $ val% : num 25 25 25 25 100
## $ %cum : num 25 50 75 100 100
## $ val%cum: num 25 50 75 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
| 4.53362 |
1 |
| 7.28575 |
1 |
| 8.52584 |
1 |
| 17.28613 |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 4.53362 9.53362 14.53362 19.53362
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
| (4.53,9.53] |
2 |
0.6666667 |
2 |
| (9.53,14.5] |
0 |
0.0000000 |
2 |
| (14.5,19.5] |
1 |
0.3333333 |
3 |
str(Freq_table)
## 'data.frame': 3 obs. of 4 variables:
## $ distance: Factor w/ 3 levels "(4.53,9.53]",..: 1 2 3
## $ Freq : int 2 0 1
## $ Rel_Freq: num 0.667 0 0.333
## $ Cum_Freq: int 2 2 3
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
| (4.53,9.53] |
2 |
| (9.53,14.5] |
0 |
| (14.5,19.5] |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")

library(pastecs)
stat.desc(df_HD)
## id date time continent_code country_name country_code
## nbr.val 4.000000e+00 NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA
## min 2.533000e+03 NA NA NA NA NA
## max 7.456000e+03 NA NA NA NA NA
## range 4.923000e+03 NA NA NA NA NA
## sum 2.489800e+04 NA NA NA NA NA
## median 7.454500e+03 NA NA NA NA NA
## mean 6.224500e+03 NA NA NA NA NA
## SE.mean 1.230500e+03 NA NA NA NA NA
## CI.mean.0.95 3.916000e+03 NA NA NA NA NA
## var 6.056522e+06 NA NA NA NA NA
## std.dev 2.461000e+03 NA NA NA NA NA
## coef.var 3.953731e-01 NA NA NA NA NA
## state population city distance location_description
## nbr.val NA 4.000000e+00 NA 4.0000000 NA
## nbr.null NA 0.000000e+00 NA 0.0000000 NA
## nbr.na NA 0.000000e+00 NA 0.0000000 NA
## min NA 1.389000e+03 NA 4.5336200 NA
## max NA 4.673000e+03 NA 17.2861300 NA
## range NA 3.284000e+03 NA 12.7525100 NA
## sum NA 9.016000e+03 NA 37.6313400 NA
## median NA 1.477000e+03 NA 7.9057950 NA
## mean NA 2.254000e+03 NA 9.4078350 NA
## SE.mean NA 8.066051e+02 NA 2.7553987 NA
## CI.mean.0.95 NA 2.566978e+03 NA 8.7689084 NA
## var NA 2.602447e+06 NA 30.3688880 NA
## std.dev NA 1.613210e+03 NA 5.5107974 NA
## coef.var NA 7.157100e-01 NA 0.5857668 NA
## latitude longitude geolocation hazard_type landslide_type
## nbr.val 4.00000000 4.000000e+00 NA NA NA
## nbr.null 0.00000000 0.000000e+00 NA NA NA
## nbr.na 0.00000000 0.000000e+00 NA NA NA
## min 14.38980000 -8.776540e+01 NA NA NA
## max 14.90640000 -8.759300e+01 NA NA NA
## range 0.51660000 1.724000e-01 NA NA NA
## sum 58.35410000 -3.508022e+02 NA NA NA
## median 14.52895000 -8.772190e+01 NA NA NA
## mean 14.58852500 -8.770055e+01 NA NA NA
## SE.mean 0.11244615 3.738755e-02 NA NA NA
## CI.mean.0.95 0.35785382 1.189839e-01 NA NA NA
## var 0.05057654 5.591317e-03 NA NA NA
## std.dev 0.22489229 7.477511e-02 NA NA NA
## coef.var 0.01541570 -8.526185e-04 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 3 4.000000 NA
## nbr.null NA NA NA 3 2.000000 NA
## nbr.na NA NA NA 1 0.000000 NA
## min NA NA NA 0 0.000000 NA
## max NA NA NA 0 5.000000 NA
## range NA NA NA 0 5.000000 NA
## sum NA NA NA 0 6.000000 NA
## median NA NA NA 0 0.500000 NA
## mean NA NA NA 0 1.500000 NA
## SE.mean NA NA NA 0 1.190238 NA
## CI.mean.0.95 NA NA NA 0 3.787869 NA
## var NA NA NA 0 5.666667 NA
## std.dev NA NA NA 0 2.380476 NA
## coef.var NA NA NA NaN 1.586984 NA
## source_link prop ypos
## nbr.val NA 4.0000000 4.0000000
## nbr.null NA 0.0000000 0.0000000
## nbr.na NA 0.0000000 0.0000000
## min NA 12.0474583 22.9677312
## max NA 45.9354623 88.6718889
## range NA 33.8880040 65.7041578
## sum NA 100.0000000 238.5755596
## median NA 21.0085397 63.4679698
## mean NA 25.0000000 59.6438899
## SE.mean NA 7.3220850 13.9652032
## CI.mean.0.95 NA 23.3021423 44.4435093
## var NA 214.4517146 780.1076021
## std.dev NA 14.6441700 27.9304064
## coef.var NA 0.5857668 0.4682861
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Copán (Honduras)
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_HD <- subset(df, country_name == "Honduras")
knitr::kable(head(df_HD))
library(dplyr)
df_HD <- subset(df, state == "Copán")
knitr::kable(head(df_HD))
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_HD, aes(fill=city, y=distance, x=state)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_HD, aes(fill=city, y=distance, x=state)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_HD, aes(x=state, y=distance, fill=city)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_HD <- df_HD %>%
arrange(desc(city)) %>%
mutate(prop = distance / sum(df_HD$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_HD, aes(x=state, y = prop, fill=city)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales
library(forecast)
data<- ts(df_HD$distance, frequency=12, start=2008)
knitr::kable(head(data))
| 0.74414 |
| 0.28887 |
| 1.39095 |
| 5.89721 |
| 0.43391 |
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto
library(qcc)
distance <- df_HD$distance
names(distance) <- df_HD$city
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por ciudades"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Lucerna 5.897210 5.897210 67.357580 67.357580
## Ojos de Agua 1.390950 7.288160 15.887348 83.244928
## Santa Rosa de Copán 0.744140 8.032300 8.499523 91.744450
## Corquín 0.433910 8.466210 4.956094 96.700544
## Santa Rosa de Copán 0.288870 8.755080 3.299456 100.000000
stem(df_HD$"distance")
##
## The decimal point is at the |
##
## 0 | 3474
## 2 |
## 4 | 9
head(df_HD)
## # A tibble: 5 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 7457 9/6/15 <NA> <NA> Honduras HN Copán 27753
## 2 7458 9/6/15 <NA> <NA> Honduras HN Copán 27753
## 3 7459 11/21/15 22:30 <NA> Honduras HN Copán 1340
## 4 7461 9/24/15 <NA> <NA> Honduras HN Copán 1452
## 5 854 10/19/08 <NA> <NA> Honduras HN Copán 4752
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_HD))
stem(df_HD$"distance")
##
## The decimal point is at the |
##
## 0 | 3474
## 2 |
## 4 | 9
stem(df_HD$"distance", scale = 2)
##
## The decimal point is at the |
##
## 0 | 347
## 1 | 4
## 2 |
## 3 |
## 4 |
## 5 | 9
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 0.28887 |
1 |
20 |
20 |
20 |
20 |
| 0.43391 |
1 |
20 |
20 |
40 |
40 |
| 0.74414 |
1 |
20 |
20 |
60 |
60 |
| 1.39095 |
1 |
20 |
20 |
80 |
80 |
| 5.89721 |
1 |
20 |
20 |
100 |
100 |
| Total |
5 |
100 |
100 |
100 |
100 |
str(table)
## Classes 'freqtab' and 'data.frame': 6 obs. of 5 variables:
## $ n : num 1 1 1 1 1 5
## $ % : num 20 20 20 20 20 100
## $ val% : num 20 20 20 20 20 100
## $ %cum : num 20 40 60 80 100 100
## $ val%cum: num 20 40 60 80 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
| 0.28887 |
1 |
| 0.43391 |
1 |
| 0.74414 |
1 |
| 1.39095 |
1 |
| 5.89721 |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.28887 2.28887 4.28887 6.28887
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
| (0.289,2.29] |
3 |
0.75 |
3 |
| (2.29,4.29] |
0 |
0.00 |
3 |
| (4.29,6.29] |
1 |
0.25 |
4 |
str(Freq_table)
## 'data.frame': 3 obs. of 4 variables:
## $ distance: Factor w/ 3 levels "(0.289,2.29]",..: 1 2 3
## $ Freq : int 3 0 1
## $ Rel_Freq: num 0.75 0 0.25
## $ Cum_Freq: int 3 3 4
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
| (0.289,2.29] |
3 |
| (2.29,4.29] |
0 |
| (4.29,6.29] |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")

library(pastecs)
stat.desc(df_HD)
## id date time continent_code country_name country_code
## nbr.val 5.000000e+00 NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA
## min 8.540000e+02 NA NA NA NA NA
## max 7.461000e+03 NA NA NA NA NA
## range 6.607000e+03 NA NA NA NA NA
## sum 3.068900e+04 NA NA NA NA NA
## median 7.458000e+03 NA NA NA NA NA
## mean 6.137800e+03 NA NA NA NA NA
## SE.mean 1.320950e+03 NA NA NA NA NA
## CI.mean.0.95 3.667546e+03 NA NA NA NA NA
## var 8.724547e+06 NA NA NA NA NA
## std.dev 2.953734e+03 NA NA NA NA NA
## coef.var 4.812367e-01 NA NA NA NA NA
## state population city distance location_description latitude
## nbr.val NA 5.000000e+00 NA 5.000000 NA 5.000000000
## nbr.null NA 0.000000e+00 NA 0.000000 NA 0.000000000
## nbr.na NA 0.000000e+00 NA 0.000000 NA 0.000000000
## min NA 1.340000e+03 NA 0.288870 NA 14.563700000
## max NA 2.775300e+04 NA 5.897210 NA 14.769800000
## range NA 2.641300e+04 NA 5.608340 NA 0.206100000
## sum NA 6.305000e+04 NA 8.755080 NA 73.391900000
## median NA 4.752000e+03 NA 0.744140 NA 14.689300000
## mean NA 1.261000e+04 NA 1.751016 NA 14.678380000
## SE.mean NA 6.212419e+03 NA 1.053732 NA 0.042427531
## CI.mean.0.95 NA 1.724844e+04 NA 2.925628 NA 0.117797711
## var NA 1.929707e+08 NA 5.551753 NA 0.009000477
## std.dev NA 1.389139e+04 NA 2.356216 NA 0.094870844
## coef.var NA 1.101617e+00 NA 1.345628 NA 0.006463305
## longitude geolocation hazard_type landslide_type
## nbr.val 5.000000e+00 NA NA NA
## nbr.null 0.000000e+00 NA NA NA
## nbr.na 0.000000e+00 NA NA NA
## min -8.891440e+01 NA NA NA
## max -8.877310e+01 NA NA NA
## range 1.413000e-01 NA NA NA
## sum -4.441466e+02 NA NA NA
## median -8.880980e+01 NA NA NA
## mean -8.882932e+01 NA NA NA
## SE.mean 2.721322e-02 NA NA NA
## CI.mean.0.95 7.555601e-02 NA NA NA
## var 3.702797e-03 NA NA NA
## std.dev 6.085061e-02 NA NA NA
## coef.var -6.850285e-04 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 4 5.000000 NA
## nbr.null NA NA NA 4 3.000000 NA
## nbr.na NA NA NA 1 0.000000 NA
## min NA NA NA 0 0.000000 NA
## max NA NA NA 0 23.000000 NA
## range NA NA NA 0 23.000000 NA
## sum NA NA NA 0 24.000000 NA
## median NA NA NA 0 0.000000 NA
## mean NA NA NA 0 4.800000 NA
## SE.mean NA NA NA 0 4.554119 NA
## CI.mean.0.95 NA NA NA 0 12.644261 NA
## var NA NA NA 0 103.700000 NA
## std.dev NA NA NA 0 10.183320 NA
## coef.var NA NA NA NaN 2.121525 NA
## source_link prop ypos
## nbr.val NA 5.000000 5.000000
## nbr.null NA 0.000000 0.000000
## nbr.na NA 0.000000 0.000000
## min NA 3.299456 4.249761
## max NA 67.357580 97.521953
## range NA 64.058124 93.272192
## sum NA 100.000000 193.028733
## median NA 8.499523 19.742652
## mean NA 20.000000 38.605747
## SE.mean NA 12.035661 17.797515
## CI.mean.0.95 NA 33.416351 49.413822
## var NA 724.285634 1583.757624
## std.dev NA 26.912555 39.796452
## coef.var NA 1.345628 1.030843
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Cortés (Honduras)
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_HD <- subset(df, country_name == "Honduras")
knitr::kable(head(df_HD))
library(dplyr)
df_HD <- subset(df, state == "Cortés")
knitr::kable(head(df_HD))
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_HD, aes(fill=city, y=distance, x=state)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_HD, aes(fill=city, y=distance, x=state)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_HD, aes(x=state, y=distance, fill=city)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_HD <- df_HD %>%
arrange(desc(city)) %>%
mutate(prop = distance / sum(df_HD$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_HD, aes(x=state, y = prop, fill=city)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales
library(forecast)
data<- ts(df_HD$distance, frequency=12, start=2008)
knitr::kable(head(data))
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto
library(qcc)
distance <- df_HD$distance
names(distance) <- df_HD$city
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por ciudades"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Los Caminos 3.53737 3.53737 78.46977 78.46977
## Agua Azul Rancho 0.97057 4.50794 21.53023 100.00000
stem(df_HD$"distance")
##
## The decimal point is at the |
##
## 0 |
## 1 | 0
## 2 |
## 3 | 5
head(df_HD)
## # A tibble: 2 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 5415 8/29/13 0:05:00 <NA> Honduras HN Cortés 1146
## 2 6689 7/31/14 Night <NA> Honduras HN Cortés 1043
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_HD))
stem(df_HD$"distance")
##
## The decimal point is at the |
##
## 0 |
## 1 | 0
## 2 |
## 3 | 5
stem(df_HD$"distance", scale = 2)
##
## The decimal point is at the |
##
## 0 |
## 1 | 0
## 1 |
## 2 |
## 2 |
## 3 |
## 3 | 5
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 0.97057 |
1 |
50 |
50 |
50 |
50 |
| 3.53737 |
1 |
50 |
50 |
100 |
100 |
| Total |
2 |
100 |
100 |
100 |
100 |
str(table)
## Classes 'freqtab' and 'data.frame': 3 obs. of 5 variables:
## $ n : num 1 1 2
## $ % : num 50 50 100
## $ val% : num 50 50 100
## $ %cum : num 50 100 100
## $ val%cum: num 50 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.97057 2.97057 4.97057
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
| (0.971,2.97] |
0 |
0 |
0 |
| (2.97,4.97] |
1 |
1 |
1 |
str(Freq_table)
## 'data.frame': 2 obs. of 4 variables:
## $ distance: Factor w/ 2 levels "(0.971,2.97]",..: 1 2
## $ Freq : int 0 1
## $ Rel_Freq: num 0 1
## $ Cum_Freq: int 0 1
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
| (0.971,2.97] |
0 |
| (2.97,4.97] |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")

library(pastecs)
stat.desc(df_HD)
## id date time continent_code country_name country_code
## nbr.val 2.000000e+00 NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA
## min 5.415000e+03 NA NA NA NA NA
## max 6.689000e+03 NA NA NA NA NA
## range 1.274000e+03 NA NA NA NA NA
## sum 1.210400e+04 NA NA NA NA NA
## median 6.052000e+03 NA NA NA NA NA
## mean 6.052000e+03 NA NA NA NA NA
## SE.mean 6.370000e+02 NA NA NA NA NA
## CI.mean.0.95 8.093852e+03 NA NA NA NA NA
## var 8.115380e+05 NA NA NA NA NA
## std.dev 9.008540e+02 NA NA NA NA NA
## coef.var 1.488523e-01 NA NA NA NA NA
## state population city distance location_description
## nbr.val NA 2.000000e+00 NA 2.0000000 NA
## nbr.null NA 0.000000e+00 NA 0.0000000 NA
## nbr.na NA 0.000000e+00 NA 0.0000000 NA
## min NA 1.043000e+03 NA 0.9705700 NA
## max NA 1.146000e+03 NA 3.5373700 NA
## range NA 1.030000e+02 NA 2.5668000 NA
## sum NA 2.189000e+03 NA 4.5079400 NA
## median NA 1.094500e+03 NA 2.2539700 NA
## mean NA 1.094500e+03 NA 2.2539700 NA
## SE.mean NA 5.150000e+01 NA 1.2834000 NA
## CI.mean.0.95 NA 6.543695e+02 NA 16.3071432 NA
## var NA 5.304500e+03 NA 3.2942311 NA
## std.dev NA 7.283200e+01 NA 1.8150017 NA
## coef.var NA 6.654363e-02 NA 0.8052466 NA
## latitude longitude geolocation hazard_type landslide_type
## nbr.val 2.000000000 2.000000e+00 NA NA NA
## nbr.null 0.000000000 0.000000e+00 NA NA NA
## nbr.na 0.000000000 0.000000e+00 NA NA NA
## min 14.895500000 -8.794230e+01 NA NA NA
## max 14.951000000 -8.793380e+01 NA NA NA
## range 0.055500000 8.500000e-03 NA NA NA
## sum 29.846500000 -1.758761e+02 NA NA NA
## median 14.923250000 -8.793805e+01 NA NA NA
## mean 14.923250000 -8.793805e+01 NA NA NA
## SE.mean 0.027750000 4.250000e-03 NA NA NA
## CI.mean.0.95 0.352597181 5.400137e-02 NA NA NA
## var 0.001540125 3.612500e-05 NA NA NA
## std.dev 0.039244426 6.010408e-03 NA NA NA
## coef.var 0.002629751 -6.834820e-05 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 2.000000 2.000000 NA
## nbr.null NA NA NA 1.000000 1.000000 NA
## nbr.na NA NA NA 0.000000 0.000000 NA
## min NA NA NA 0.000000 0.000000 NA
## max NA NA NA 2.000000 6.000000 NA
## range NA NA NA 2.000000 6.000000 NA
## sum NA NA NA 2.000000 6.000000 NA
## median NA NA NA 1.000000 3.000000 NA
## mean NA NA NA 1.000000 3.000000 NA
## SE.mean NA NA NA 1.000000 3.000000 NA
## CI.mean.0.95 NA NA NA 12.706205 38.118614 NA
## var NA NA NA 2.000000 18.000000 NA
## std.dev NA NA NA 1.414214 4.242641 NA
## coef.var NA NA NA 1.414214 1.414214 NA
## source_link prop ypos
## nbr.val NA 2.0000000 2.0000000
## nbr.null NA 0.0000000 0.0000000
## nbr.na NA 0.0000000 0.0000000
## min NA 21.5302333 39.2348833
## max NA 78.4697667 89.2348833
## range NA 56.9395334 50.0000000
## sum NA 100.0000000 128.4697667
## median NA 50.0000000 64.2348833
## mean NA 50.0000000 64.2348833
## SE.mean NA 28.4697667 25.0000000
## CI.mean.0.95 NA 361.7426842 317.6551184
## var NA 1621.0552294 1250.0000000
## std.dev NA 40.2623302 35.3553391
## coef.var NA 0.8052466 0.5504071
boxplot(data, horizontal=TRUE, col='green')

Gráfico para La Paz (Honduras)
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_HD <- subset(df, country_name == "Honduras")
knitr::kable(head(df_HD))
library(dplyr)
df_HD <- subset(df, state == "La Paz")
knitr::kable(head(df_HD))
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_HD, aes(fill=city, y=distance, x=state)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_HD, aes(fill=city, y=distance, x=state)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_HD, aes(x=state, y=distance, fill=city)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_HD <- df_HD %>%
arrange(desc(city)) %>%
mutate(prop = distance / sum(df_HD$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_HD, aes(x=state, y = prop, fill=city)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales
library(forecast)
data<- ts(df_HD$distance, frequency=12, start=2008)
knitr::kable(head(data))
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto
library(qcc)
distance <- df_HD$distance
names(distance) <- df_HD$city
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por ciudades"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## San José 4.691330 4.691330 93.626987 93.626987
## San Pedro Masahuat 0.319330 5.010660 6.373013 100.000000
stem(df_HD$"distance")
##
## The decimal point is at the |
##
## 0 | 3
## 1 |
## 2 |
## 3 |
## 4 | 7
head(df_HD)
## # A tibble: 2 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 6683 10/15/14 <NA> <NA> El Salvador SV La Paz 2654
## 2 7460 9/25/15 <NA> <NA> Honduras HN La Paz 1463
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_HD))
stem(df_HD$"distance")
##
## The decimal point is at the |
##
## 0 | 3
## 1 |
## 2 |
## 3 |
## 4 | 7
stem(df_HD$"distance", scale = 2)
##
## The decimal point is at the |
##
## 0 | 3
## 0 |
## 1 |
## 1 |
## 2 |
## 2 |
## 3 |
## 3 |
## 4 |
## 4 | 7
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 0.31933 |
1 |
50 |
50 |
50 |
50 |
| 4.69133 |
1 |
50 |
50 |
100 |
100 |
| Total |
2 |
100 |
100 |
100 |
100 |
str(table)
## Classes 'freqtab' and 'data.frame': 3 obs. of 5 variables:
## $ n : num 1 1 2
## $ % : num 50 50 100
## $ val% : num 50 50 100
## $ %cum : num 50 100 100
## $ val%cum: num 50 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.31933 3.31933 6.31933
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
| (0.319,3.32] |
0 |
0 |
0 |
| (3.32,6.32] |
1 |
1 |
1 |
str(Freq_table)
## 'data.frame': 2 obs. of 4 variables:
## $ distance: Factor w/ 2 levels "(0.319,3.32]",..: 1 2
## $ Freq : int 0 1
## $ Rel_Freq: num 0 1
## $ Cum_Freq: int 0 1
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
| (0.319,3.32] |
0 |
| (3.32,6.32] |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")

library(pastecs)
stat.desc(df_HD)
## id date time continent_code country_name country_code
## nbr.val 2.000000e+00 NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA
## min 6.683000e+03 NA NA NA NA NA
## max 7.460000e+03 NA NA NA NA NA
## range 7.770000e+02 NA NA NA NA NA
## sum 1.414300e+04 NA NA NA NA NA
## median 7.071500e+03 NA NA NA NA NA
## mean 7.071500e+03 NA NA NA NA NA
## SE.mean 3.885000e+02 NA NA NA NA NA
## CI.mean.0.95 4.936361e+03 NA NA NA NA NA
## var 3.018645e+05 NA NA NA NA NA
## std.dev 5.494220e+02 NA NA NA NA NA
## coef.var 7.769525e-02 NA NA NA NA NA
## state population city distance location_description latitude
## nbr.val NA 2.000000e+00 NA 2.000000 NA 2.00000000
## nbr.null NA 0.000000e+00 NA 0.000000 NA 0.00000000
## nbr.na NA 0.000000e+00 NA 0.000000 NA 0.00000000
## min NA 1.463000e+03 NA 0.319330 NA 13.54610000
## max NA 2.654000e+03 NA 4.691330 NA 14.28010000
## range NA 1.191000e+03 NA 4.372000 NA 0.73400000
## sum NA 4.117000e+03 NA 5.010660 NA 27.82620000
## median NA 2.058500e+03 NA 2.505330 NA 13.91310000
## mean NA 2.058500e+03 NA 2.505330 NA 13.91310000
## SE.mean NA 5.955000e+02 NA 2.186000 NA 0.36700000
## CI.mean.0.95 NA 7.566545e+03 NA 27.775764 NA 4.66317714
## var NA 7.092405e+05 NA 9.557192 NA 0.26937800
## std.dev NA 8.421642e+02 NA 3.091471 NA 0.51901638
## coef.var NA 4.091155e-01 NA 1.233958 NA 0.03730415
## longitude geolocation hazard_type landslide_type
## nbr.val 2.000000e+00 NA NA NA
## nbr.null 0.000000e+00 NA NA NA
## nbr.na 0.000000e+00 NA NA NA
## min -8.904010e+01 NA NA NA
## max -8.793690e+01 NA NA NA
## range 1.103200e+00 NA NA NA
## sum -1.769770e+02 NA NA NA
## median -8.848850e+01 NA NA NA
## mean -8.848850e+01 NA NA NA
## SE.mean 5.516000e-01 NA NA NA
## CI.mean.0.95 7.008743e+00 NA NA NA
## var 6.085251e-01 NA NA NA
## std.dev 7.800802e-01 NA NA NA
## coef.var -8.815611e-03 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 2 2 NA
## nbr.null NA NA NA 2 2 NA
## nbr.na NA NA NA 0 0 NA
## min NA NA NA 0 0 NA
## max NA NA NA 0 0 NA
## range NA NA NA 0 0 NA
## sum NA NA NA 0 0 NA
## median NA NA NA 0 0 NA
## mean NA NA NA 0 0 NA
## SE.mean NA NA NA 0 0 NA
## CI.mean.0.95 NA NA NA 0 0 NA
## var NA NA NA 0 0 NA
## std.dev NA NA NA 0 0 NA
## coef.var NA NA NA NaN NaN NA
## source_link prop ypos
## nbr.val NA 2.000000 2.000000
## nbr.null NA 0.000000 0.000000
## nbr.na NA 0.000000 0.000000
## min NA 6.373013 3.186506
## max NA 93.626987 53.186506
## range NA 87.253975 50.000000
## sum NA 100.000000 56.373013
## median NA 50.000000 28.186506
## mean NA 50.000000 28.186506
## SE.mean NA 43.626987 25.000000
## CI.mean.0.95 NA 554.333432 317.655118
## var NA 3806.628035 1250.000000
## std.dev NA 61.697877 35.355339
## coef.var NA 1.233958 1.254336
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Francisco Morazán (Honduras)
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_HD <- subset(df, country_name == "Honduras")
knitr::kable(head(df_HD))
library(dplyr)
df_HD <- subset(df, state == "Francisco Morazán")
knitr::kable(head(df_HD))
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_HD, aes(fill=city, y=distance, x=state)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_HD, aes(fill=city, y=distance, x=state)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_HD, aes(x=state, y=distance, fill=city)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_HD <- df_HD %>%
arrange(desc(city)) %>%
mutate(prop = distance / sum(df_HD$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_HD, aes(x=state, y = prop, fill=city)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales
library(forecast)
data<- ts(df_HD$distance, frequency=12, start=2008)
knitr::kable(head(data))
| 1.30583 |
| 2.00830 |
| 2.99239 |
| 0.98377 |
| 1.24404 |
| 2.21442 |
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto
library(qcc)
distance <- df_HD$distance
names(distance) <- df_HD$city
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por ciudades"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Santa Lucía 4.757910 4.757910 13.217489 13.217489
## Río Abajo 3.639620 8.397530 10.110876 23.328364
## El Guapinol 3.543990 11.941520 9.845215 33.173580
## Tegucigalpa 3.252810 15.194330 9.036316 42.209896
## El Tablón 3.129860 18.324190 8.694761 50.904657
## Tegucigalpa 2.992390 21.316580 8.312869 59.217525
## Tegucigalpa 2.913260 24.229840 8.093045 67.310571
## Tegucigalpa 2.214420 26.444260 6.151666 73.462236
## Villa Nueva 2.008300 28.452560 5.579064 79.041300
## El Lolo 1.858970 30.311530 5.164224 84.205524
## Yaguacire 1.305830 31.617360 3.627600 87.833124
## Tegucigalpa 1.244040 32.861400 3.455947 91.289071
## Tegucigalpa 1.236390 34.097790 3.434695 94.723766
## Tegucigalpa 0.983770 35.081560 2.732916 97.456683
## Tegucigalpa 0.915520 35.997080 2.543317 100.000000
stem(df_HD$"distance")
##
## The decimal point is at the |
##
## 0 | 9
## 1 | 02239
## 2 | 029
## 3 | 01356
## 4 | 8
head(df_HD)
## # A tibble: 6 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 7483 9/28/15 Morning <NA> Honduras HN Fran~ 1449
## 2 7463 6/13/15 Morning <NA> Honduras HN Fran~ 2295
## 3 855 10/20/08 <NA> <NA> Honduras HN Fran~ 850848
## 4 2062 7/12/10 5:30:00 <NA> Honduras HN Fran~ 850848
## 5 2093 7/18/10 <NA> <NA> Honduras HN Fran~ 850848
## 6 2217 8/7/10 Overnight <NA> Honduras HN Fran~ 850848
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_HD))
stem(df_HD$"distance")
##
## The decimal point is at the |
##
## 0 | 9
## 1 | 02239
## 2 | 029
## 3 | 01356
## 4 | 8
stem(df_HD$"distance", scale = 2)
##
## The decimal point is at the |
##
## 0 | 9
## 1 | 0223
## 1 | 9
## 2 | 02
## 2 | 9
## 3 | 013
## 3 | 56
## 4 |
## 4 | 8
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 0.91552 |
1 |
6.7 |
6.7 |
6.7 |
6.7 |
| 0.98377 |
1 |
6.7 |
6.7 |
13.3 |
13.3 |
| 1.23639 |
1 |
6.7 |
6.7 |
20.0 |
20.0 |
| 1.24404 |
1 |
6.7 |
6.7 |
26.7 |
26.7 |
| 1.30583 |
1 |
6.7 |
6.7 |
33.3 |
33.3 |
| 1.85897 |
1 |
6.7 |
6.7 |
40.0 |
40.0 |
| 2.0083 |
1 |
6.7 |
6.7 |
46.7 |
46.7 |
| 2.21442 |
1 |
6.7 |
6.7 |
53.3 |
53.3 |
| 2.91326 |
1 |
6.7 |
6.7 |
60.0 |
60.0 |
| 2.99239 |
1 |
6.7 |
6.7 |
66.7 |
66.7 |
| 3.12986 |
1 |
6.7 |
6.7 |
73.3 |
73.3 |
| 3.25281 |
1 |
6.7 |
6.7 |
80.0 |
80.0 |
| 3.54399 |
1 |
6.7 |
6.7 |
86.7 |
86.7 |
| 3.63962 |
1 |
6.7 |
6.7 |
93.3 |
93.3 |
| 4.75791 |
1 |
6.7 |
6.7 |
100.0 |
100.0 |
| Total |
15 |
100.0 |
100.0 |
100.0 |
100.0 |
str(table)
## Classes 'freqtab' and 'data.frame': 16 obs. of 5 variables:
## $ n : num 1 1 1 1 1 1 1 1 1 1 ...
## $ % : num 6.7 6.7 6.7 6.7 6.7 6.7 6.7 6.7 6.7 6.7 ...
## $ val% : num 6.7 6.7 6.7 6.7 6.7 6.7 6.7 6.7 6.7 6.7 ...
## $ %cum : num 6.7 13.3 20 26.7 33.3 40 46.7 53.3 60 66.7 ...
## $ val%cum: num 6.7 13.3 20 26.7 33.3 40 46.7 53.3 60 66.7 ...
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
| 0.91552 |
1 |
| 0.98377 |
1 |
| 1.23639 |
1 |
| 1.24404 |
1 |
| 1.30583 |
1 |
| 1.85897 |
1 |
| 2.0083 |
1 |
| 2.21442 |
1 |
| 2.91326 |
1 |
| 2.99239 |
1 |
| 3.12986 |
1 |
| 3.25281 |
1 |
| 3.54399 |
1 |
| 3.63962 |
1 |
| 4.75791 |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.91552 1.91552 2.91552 3.91552 4.91552
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
| (0.916,1.92] |
5 |
0.3571429 |
5 |
| (1.92,2.92] |
3 |
0.2142857 |
8 |
| (2.92,3.92] |
5 |
0.3571429 |
13 |
| (3.92,4.92] |
1 |
0.0714286 |
14 |
str(Freq_table)
## 'data.frame': 4 obs. of 4 variables:
## $ distance: Factor w/ 4 levels "(0.916,1.92]",..: 1 2 3 4
## $ Freq : int 5 3 5 1
## $ Rel_Freq: num 0.3571 0.2143 0.3571 0.0714
## $ Cum_Freq: int 5 8 13 14
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
| (0.916,1.92] |
5 |
| (1.92,2.92] |
3 |
| (2.92,3.92] |
5 |
| (3.92,4.92] |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")

library(pastecs)
stat.desc(df_HD)
## id date time continent_code country_name country_code
## nbr.val 1.500000e+01 NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA
## min 8.550000e+02 NA NA NA NA NA
## max 7.485000e+03 NA NA NA NA NA
## range 6.630000e+03 NA NA NA NA NA
## sum 8.079700e+04 NA NA NA NA NA
## median 7.447000e+03 NA NA NA NA NA
## mean 5.386467e+03 NA NA NA NA NA
## SE.mean 6.991591e+02 NA NA NA NA NA
## CI.mean.0.95 1.499547e+03 NA NA NA NA NA
## var 7.332353e+06 NA NA NA NA NA
## std.dev 2.707832e+03 NA NA NA NA NA
## coef.var 5.027102e-01 NA NA NA NA NA
## state population city distance location_description
## nbr.val NA 1.500000e+01 NA 15.0000000 NA
## nbr.null NA 0.000000e+00 NA 0.0000000 NA
## nbr.na NA 0.000000e+00 NA 0.0000000 NA
## min NA 1.121000e+03 NA 0.9155200 NA
## max NA 8.508480e+05 NA 4.7579100 NA
## range NA 8.497270e+05 NA 3.8423900 NA
## sum NA 6.819247e+06 NA 35.9970800 NA
## median NA 8.508480e+05 NA 2.2144200 NA
## mean NA 4.546165e+05 NA 2.3998053 NA
## SE.mean NA 1.132090e+05 NA 0.2996574 NA
## CI.mean.0.95 NA 2.428092e+05 NA 0.6427013 NA
## var NA 1.922443e+11 NA 1.3469187 NA
## std.dev NA 4.384567e+05 NA 1.1605683 NA
## coef.var NA 9.644541e-01 NA 0.4836093 NA
## latitude longitude geolocation hazard_type landslide_type
## nbr.val 1.500000e+01 1.500000e+01 NA NA NA
## nbr.null 0.000000e+00 0.000000e+00 NA NA NA
## nbr.na 0.000000e+00 0.000000e+00 NA NA NA
## min 1.376450e+01 -8.743400e+01 NA NA NA
## max 1.413380e+01 -8.715280e+01 NA NA NA
## range 3.693000e-01 2.812000e-01 NA NA NA
## sum 2.109138e+02 -1.308243e+03 NA NA NA
## median 1.408140e+01 -8.720870e+01 NA NA NA
## mean 1.406092e+01 -8.721617e+01 NA NA NA
## SE.mean 2.234844e-02 1.715587e-02 NA NA NA
## CI.mean.0.95 4.793264e-02 3.679568e-02 NA NA NA
## var 7.491792e-03 4.414858e-03 NA NA NA
## std.dev 8.655514e-02 6.644440e-02 NA NA NA
## coef.var 6.155724e-03 -7.618358e-04 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 9.0000000 15.000000 NA
## nbr.null NA NA NA 8.0000000 10.000000 NA
## nbr.na NA NA NA 6.0000000 0.000000 NA
## min NA NA NA 0.0000000 0.000000 NA
## max NA NA NA 1.0000000 29.000000 NA
## range NA NA NA 1.0000000 29.000000 NA
## sum NA NA NA 1.0000000 40.000000 NA
## median NA NA NA 0.0000000 0.000000 NA
## mean NA NA NA 0.1111111 2.666667 NA
## SE.mean NA NA NA 0.1111111 1.918994 NA
## CI.mean.0.95 NA NA NA 0.2562227 4.115834 NA
## var NA NA NA 0.1111111 55.238095 NA
## std.dev NA NA NA 0.3333333 7.432234 NA
## coef.var NA NA NA 3.0000000 2.787088 NA
## source_link prop ypos
## nbr.val NA 15.0000000 15.0000000
## nbr.null NA 0.0000000 0.0000000
## nbr.na NA 0.0000000 0.0000000
## min NA 2.5433174 1.8137999
## max NA 13.2174888 95.0773924
## range NA 10.6741713 93.2635925
## sum NA 100.0000000 645.2684218
## median NA 6.1516656 37.8129143
## mean NA 6.6666667 43.0178948
## SE.mean NA 0.8324493 7.7398433
## CI.mean.0.95 NA 1.7854262 16.6003128
## var NA 10.3945777 898.5776101
## std.dev NA 3.2240623 29.9762841
## coef.var NA 0.4836093 0.6968329
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Ocotepeque (Honduras)
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_HD <- subset(df, country_name == "Honduras")
knitr::kable(head(df_HD))
library(dplyr)
df_HD <- subset(df, state == "Ocotepeque")
knitr::kable(head(df_HD))
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_HD, aes(fill=city, y=distance, x=state)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_HD, aes(fill=city, y=distance, x=state)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_HD, aes(x=state, y=distance, fill=city)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_HD <- df_HD %>%
arrange(desc(city)) %>%
mutate(prop = distance / sum(df_HD$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_HD, aes(x=state, y = prop, fill=city)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales
library(forecast)
data<- ts(df_HD$distance, frequency=12, start=2008)
knitr::kable(head(data))
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto
library(qcc)
distance <- df_HD$distance
names(distance) <- df_HD$city
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por ciudades"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## La Labor 5.79867 5.79867 74.27793 74.27793
## Sinuapa 2.00805 7.80672 25.72207 100.00000
stem(df_HD$"distance")
##
## The decimal point is at the |
##
## 2 | 0
## 3 |
## 4 |
## 5 | 8
head(df_HD)
## # A tibble: 2 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 6672 10/13/14 <NA> <NA> Honduras HN Ocotepeque 2389
## 2 7462 9/25/15 <NA> <NA> Honduras HN Ocotepeque 1416
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_HD))
stem(df_HD$"distance")
##
## The decimal point is at the |
##
## 2 | 0
## 3 |
## 4 |
## 5 | 8
stem(df_HD$"distance", scale = 2)
##
## The decimal point is at the |
##
## 2 | 0
## 2 |
## 3 |
## 3 |
## 4 |
## 4 |
## 5 |
## 5 | 8
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 2.00805 |
1 |
50 |
50 |
50 |
50 |
| 5.79867 |
1 |
50 |
50 |
100 |
100 |
| Total |
2 |
100 |
100 |
100 |
100 |
str(table)
## Classes 'freqtab' and 'data.frame': 3 obs. of 5 variables:
## $ n : num 1 1 2
## $ % : num 50 50 100
## $ val% : num 50 50 100
## $ %cum : num 50 100 100
## $ val%cum: num 50 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 2.00805 4.00805 6.00805
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
| (2.01,4.01] |
0 |
0 |
0 |
| (4.01,6.01] |
1 |
1 |
1 |
str(Freq_table)
## 'data.frame': 2 obs. of 4 variables:
## $ distance: Factor w/ 2 levels "(2.01,4.01]",..: 1 2
## $ Freq : int 0 1
## $ Rel_Freq: num 0 1
## $ Cum_Freq: int 0 1
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
| (2.01,4.01] |
0 |
| (4.01,6.01] |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")

library(pastecs)
stat.desc(df_HD)
## id date time continent_code country_name country_code
## nbr.val 2.000000e+00 NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA
## min 6.672000e+03 NA NA NA NA NA
## max 7.462000e+03 NA NA NA NA NA
## range 7.900000e+02 NA NA NA NA NA
## sum 1.413400e+04 NA NA NA NA NA
## median 7.067000e+03 NA NA NA NA NA
## mean 7.067000e+03 NA NA NA NA NA
## SE.mean 3.950000e+02 NA NA NA NA NA
## CI.mean.0.95 5.018951e+03 NA NA NA NA NA
## var 3.120500e+05 NA NA NA NA NA
## std.dev 5.586144e+02 NA NA NA NA NA
## coef.var 7.904547e-02 NA NA NA NA NA
## state population city distance location_description
## nbr.val NA 2.000000e+00 NA 2.0000000 NA
## nbr.null NA 0.000000e+00 NA 0.0000000 NA
## nbr.na NA 0.000000e+00 NA 0.0000000 NA
## min NA 1.416000e+03 NA 2.0080500 NA
## max NA 2.389000e+03 NA 5.7986700 NA
## range NA 9.730000e+02 NA 3.7906200 NA
## sum NA 3.805000e+03 NA 7.8067200 NA
## median NA 1.902500e+03 NA 3.9033600 NA
## mean NA 1.902500e+03 NA 3.9033600 NA
## SE.mean NA 4.865000e+02 NA 1.8953100 NA
## CI.mean.0.95 NA 6.181569e+03 NA 24.0821969 NA
## var NA 4.733645e+05 NA 7.1844000 NA
## std.dev NA 6.880149e+02 NA 2.6803731 NA
## coef.var NA 3.616373e-01 NA 0.6866836 NA
## latitude longitude geolocation hazard_type landslide_type
## nbr.val 2.000000000 2.000000e+00 NA NA NA
## nbr.null 0.000000000 0.000000e+00 NA NA NA
## nbr.na 0.000000000 0.000000e+00 NA NA NA
## min 14.457900000 -8.916660e+01 NA NA NA
## max 14.481000000 -8.905370e+01 NA NA NA
## range 0.023100000 1.129000e-01 NA NA NA
## sum 28.938900000 -1.782203e+02 NA NA NA
## median 14.469450000 -8.911015e+01 NA NA NA
## mean 14.469450000 -8.911015e+01 NA NA NA
## SE.mean 0.011550000 5.645000e-02 NA NA NA
## CI.mean.0.95 0.146756665 7.172653e-01 NA NA NA
## var 0.000266805 6.373205e-03 NA NA NA
## std.dev 0.016334167 7.983236e-02 NA NA NA
## coef.var 0.001128873 -8.958840e-04 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 2 2 NA
## nbr.null NA NA NA 2 2 NA
## nbr.na NA NA NA 0 0 NA
## min NA NA NA 0 0 NA
## max NA NA NA 0 0 NA
## range NA NA NA 0 0 NA
## sum NA NA NA 0 0 NA
## median NA NA NA 0 0 NA
## mean NA NA NA 0 0 NA
## SE.mean NA NA NA 0 0 NA
## CI.mean.0.95 NA NA NA 0 0 NA
## var NA NA NA 0 0 NA
## std.dev NA NA NA 0 0 NA
## coef.var NA NA NA NaN NaN NA
## source_link prop ypos
## nbr.val NA 2.0000000 2.0000000
## nbr.null NA 0.0000000 0.0000000
## nbr.na NA 0.0000000 0.0000000
## min NA 25.7220702 12.8610351
## max NA 74.2779298 62.8610351
## range NA 48.5558596 50.0000000
## sum NA 100.0000000 75.7220702
## median NA 50.0000000 37.8610351
## mean NA 50.0000000 37.8610351
## SE.mean NA 24.2779298 25.0000000
## CI.mean.0.95 NA 308.4803464 317.6551184
## var NA 1178.8357492 1250.0000000
## std.dev NA 34.3341776 35.3553391
## coef.var NA 0.6866836 0.9338186
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Santa Bárbara (Honduras)
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_HD <- subset(df, country_name == "Honduras")
knitr::kable(head(df_HD))
library(dplyr)
df_HD <- subset(df, state == "Santa Bárbara")
knitr::kable(head(df_HD))
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_HD, aes(fill=city, y=distance, x=state)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_HD, aes(fill=city, y=distance, x=state)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_HD, aes(x=state, y=distance, fill=city)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_HD <- df_HD %>%
arrange(desc(city)) %>%
mutate(prop = distance / sum(df_HD$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_HD, aes(x=state, y = prop, fill=city)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales
library(forecast)
data<- ts(df_HD$distance, frequency=12, start=2008)
knitr::kable(head(data))
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto
library(qcc)
distance <- df_HD$distance
names(distance) <- df_HD$city
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por ciudades"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Agualote 2.91594 2.91594 50.36662 50.36662
## Ilama 2.87349 5.78943 49.63338 100.00000
stem(df_HD$"distance")
##
## The decimal point is 2 digit(s) to the left of the |
##
## 287 | 3
## 288 |
## 289 |
## 290 |
## 291 | 6
head(df_HD)
## # A tibble: 2 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 7464 9/28/15 Morning <NA> Honduras HN Sant~ 1811
## 2 6691 10/14/14 Night <NA> Honduras HN Sant~ 1759
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_HD))
stem(df_HD$"distance")
##
## The decimal point is 2 digit(s) to the left of the |
##
## 287 | 3
## 288 |
## 289 |
## 290 |
## 291 | 6
stem(df_HD$"distance", scale = 2)
##
## The decimal point is 2 digit(s) to the left of the |
##
## 287 | 3
## 287 |
## 288 |
## 288 |
## 289 |
## 289 |
## 290 |
## 290 |
## 291 |
## 291 | 6
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 2.87349 |
1 |
50 |
50 |
50 |
50 |
| 2.91594 |
1 |
50 |
50 |
100 |
100 |
| Total |
2 |
100 |
100 |
100 |
100 |
str(table)
## Classes 'freqtab' and 'data.frame': 3 obs. of 5 variables:
## $ n : num 1 1 2
## $ % : num 50 50 100
## $ val% : num 50 50 100
## $ %cum : num 50 100 100
## $ val%cum: num 50 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 2.87349 3.87349
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
str(Freq_table)
## 'data.frame': 1 obs. of 4 variables:
## $ distance: Factor w/ 1 level "(2.87,3.87]": 1
## $ Freq : int 1
## $ Rel_Freq: num 1
## $ Cum_Freq: int 1
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")

library(pastecs)
stat.desc(df_HD)
## id date time continent_code country_name country_code
## nbr.val 2.000000e+00 NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA
## min 6.691000e+03 NA NA NA NA NA
## max 7.464000e+03 NA NA NA NA NA
## range 7.730000e+02 NA NA NA NA NA
## sum 1.415500e+04 NA NA NA NA NA
## median 7.077500e+03 NA NA NA NA NA
## mean 7.077500e+03 NA NA NA NA NA
## SE.mean 3.865000e+02 NA NA NA NA NA
## CI.mean.0.95 4.910948e+03 NA NA NA NA NA
## var 2.987645e+05 NA NA NA NA NA
## std.dev 5.465935e+02 NA NA NA NA NA
## coef.var 7.722975e-02 NA NA NA NA NA
## state population city distance location_description
## nbr.val NA 2.000000e+00 NA 2.0000000000 NA
## nbr.null NA 0.000000e+00 NA 0.0000000000 NA
## nbr.na NA 0.000000e+00 NA 0.0000000000 NA
## min NA 1.759000e+03 NA 2.8734900000 NA
## max NA 1.811000e+03 NA 2.9159400000 NA
## range NA 5.200000e+01 NA 0.0424500000 NA
## sum NA 3.570000e+03 NA 5.7894300000 NA
## median NA 1.785000e+03 NA 2.8947150000 NA
## mean NA 1.785000e+03 NA 2.8947150000 NA
## SE.mean NA 2.600000e+01 NA 0.0212250000 NA
## CI.mean.0.95 NA 3.303613e+02 NA 0.2696891955 NA
## var NA 1.352000e+03 NA 0.0009010013 NA
## std.dev NA 3.676955e+01 NA 0.0300166829 NA
## coef.var NA 2.059919e-02 NA 0.0103694778 NA
## latitude longitude geolocation hazard_type landslide_type
## nbr.val 2.00000000 2.000000e+00 NA NA NA
## nbr.null 0.00000000 0.000000e+00 NA NA NA
## nbr.na 0.00000000 0.000000e+00 NA NA NA
## min 15.09090000 -8.855100e+01 NA NA NA
## max 15.30900000 -8.820720e+01 NA NA NA
## range 0.21810000 3.438000e-01 NA NA NA
## sum 30.39990000 -1.767582e+02 NA NA NA
## median 15.19995000 -8.837910e+01 NA NA NA
## mean 15.19995000 -8.837910e+01 NA NA NA
## SE.mean 0.10905000 1.719000e-01 NA NA NA
## CI.mean.0.95 1.38561163 2.184197e+00 NA NA NA
## var 0.02378380 5.909922e-02 NA NA NA
## std.dev 0.15421999 2.431033e-01 NA NA NA
## coef.var 0.01014609 -2.750688e-03 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 2 2.0000000 NA
## nbr.null NA NA NA 2 1.0000000 NA
## nbr.na NA NA NA 0 0.0000000 NA
## min NA NA NA 0 0.0000000 NA
## max NA NA NA 0 1.0000000 NA
## range NA NA NA 0 1.0000000 NA
## sum NA NA NA 0 1.0000000 NA
## median NA NA NA 0 0.5000000 NA
## mean NA NA NA 0 0.5000000 NA
## SE.mean NA NA NA 0 0.5000000 NA
## CI.mean.0.95 NA NA NA 0 6.3531024 NA
## var NA NA NA 0 0.5000000 NA
## std.dev NA NA NA 0 0.7071068 NA
## coef.var NA NA NA NaN 1.4142136 NA
## source_link prop ypos
## nbr.val NA 2.00000000 2.0000000
## nbr.null NA 0.00000000 0.0000000
## nbr.na NA 0.00000000 0.0000000
## min NA 49.63338360 24.8166918
## max NA 50.36661640 74.8166918
## range NA 0.73323281 50.0000000
## sum NA 100.00000000 99.6333836
## median NA 50.00000000 49.8166918
## mean NA 50.00000000 49.8166918
## SE.mean NA 0.36661640 25.0000000
## CI.mean.0.95 NA 4.65830307 317.6551184
## var NA 0.26881517 1250.0000000
## std.dev NA 0.51847389 35.3553391
## coef.var NA 0.01036948 0.7097087
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Nicaragua
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_NC <- subset(df, country_name == "Honduras")
knitr::kable(head(df_NC))
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_NC, aes(fill= state, y=distance, x=country_name)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_NC, aes(fill=state, y=distance, x=country_name)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_NC, aes(x=country_name, y=distance, fill=state)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_NC <- df_NC %>%
arrange(desc(state)) %>%
mutate(prop = distance / sum(df_NC$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_NC, aes(x=country_name, y=prop, fill=state)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4
## Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Greens is 9
## Returning the palette you asked for with that many colors

Grafico de series temporales
library(forecast)
data<- ts(df_NC$distance, frequency=12, start=2008)
knitr::kable(head(data))
| 0.31238 |
| 6.66574 |
| 2.91594 |
| 2.87349 |
| 2.00805 |
| 5.79867 |
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto
library(qcc)
distance <- df_NC$distance
names(distance) <- df_NC$state
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por estados"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Colón 36.3762900 36.3762900 21.8907391 21.8907391
## Comayagua 17.2861300 53.6624200 10.4025496 32.2932888
## Choluteca 11.6723700 65.3347900 7.0242679 39.3175567
## Comayagua 8.5258400 73.8606300 5.1307305 44.4482872
## Comayagua 7.2857500 81.1463800 4.3844618 48.8327489
## Yoro 6.6657400 87.8121200 4.0113485 52.8440974
## Copán 5.8972100 93.7093300 3.5488579 56.3929554
## Ocotepeque 5.7986700 99.5080000 3.4895580 59.8825133
## Francisco Morazán 4.7579100 104.2659100 2.8632432 62.7457566
## La Paz 4.6913300 108.9572400 2.8231763 65.5689329
## Comayagua 4.5336200 113.4908600 2.7282687 68.2972016
## Choluteca 3.6959600 117.1868200 2.2241767 70.5213783
## Francisco Morazán 3.6396200 120.8264400 2.1902721 72.7116504
## Francisco Morazán 3.5439900 124.3704300 2.1327233 74.8443736
## Cortés 3.5373700 127.9078000 2.1287395 76.9731131
## Francisco Morazán 3.2528100 131.1606100 1.9574953 78.9306084
## Francisco Morazán 3.1298600 134.2904700 1.8835057 80.8141140
## Francisco Morazán 2.9923900 137.2828600 1.8007782 82.6148922
## Santa Bárbara 2.9159400 140.1988000 1.7547716 84.3696639
## Francisco Morazán 2.9132600 143.1120600 1.7531588 86.1228227
## Santa Bárbara 2.8734900 145.9855500 1.7292258 87.8520485
## Francisco Morazán 2.2144200 148.1999700 1.3326068 89.1846553
## Francisco Morazán 2.0083000 150.2082700 1.2085667 90.3932220
## Ocotepeque 2.0080500 152.2163200 1.2084162 91.6016382
## El Paraíso 1.9005200 154.1168400 1.1437062 92.7453444
## Francisco Morazán 1.8589700 155.9758100 1.1187020 93.8640463
## Copán 1.3909500 157.3667600 0.8370541 94.7011005
## Francisco Morazán 1.3058300 158.6725900 0.7858301 95.4869306
## Francisco Morazán 1.2440400 159.9166300 0.7486458 96.2355763
## Francisco Morazán 1.2363900 161.1530200 0.7440421 96.9796184
## Francisco Morazán 0.9837700 162.1367900 0.5920189 97.5716373
## Cortés 0.9705700 163.1073600 0.5840754 98.1557127
## Francisco Morazán 0.9155200 164.0228800 0.5509470 98.7066598
## Copán 0.7441400 164.7670200 0.4478130 99.1544727
## Copán 0.4339100 165.2009300 0.2611209 99.4155937
## Choluteca 0.3698700 165.5708000 0.2225826 99.6381762
## Yoro 0.3123800 165.8831800 0.1879859 99.8261621
## Copán 0.2888700 166.1720500 0.1738379 100.0000000
stem(df_NC$"distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 000011111111222223333334444
## 0 | 55566779
## 1 | 2
## 1 | 7
## 2 |
## 2 |
## 3 |
## 3 | 6
head(df_NC)
## # A tibble: 6 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 6202 5/20/14 <NA> <NA> Honduras HN Yoro 15774
## 2 7467 1/22/15 <NA> <NA> Honduras HN Yoro 2188
## 3 6691 10/14/14 Night <NA> Honduras HN Sant~ 1759
## 4 7464 9/28/15 Morning <NA> Honduras HN Sant~ 1811
## 5 6672 10/13/14 <NA> <NA> Honduras HN Ocot~ 2389
## 6 7462 9/25/15 <NA> <NA> Honduras HN Ocot~ 1416
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_NC))
stem(df_NC$"distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 000011111111222223333334444
## 0 | 55566779
## 1 | 2
## 1 | 7
## 2 |
## 2 |
## 3 |
## 3 | 6
stem(df_NC$"distance", scale = 2)
##
## The decimal point is at the |
##
## 0 | 33447900223499
## 2 | 0029990135567
## 4 | 57889
## 6 | 73
## 8 | 5
## 10 | 7
## 12 |
## 14 |
## 16 | 3
## 18 |
## 20 |
## 22 |
## 24 |
## 26 |
## 28 |
## 30 |
## 32 |
## 34 |
## 36 | 4
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 0.28887 |
1 |
2.6 |
2.6 |
2.6 |
2.6 |
| 0.31238 |
1 |
2.6 |
2.6 |
5.3 |
5.3 |
| 0.36987 |
1 |
2.6 |
2.6 |
7.9 |
7.9 |
| 0.43391 |
1 |
2.6 |
2.6 |
10.5 |
10.5 |
| 0.74414 |
1 |
2.6 |
2.6 |
13.2 |
13.2 |
| 0.91552 |
1 |
2.6 |
2.6 |
15.8 |
15.8 |
| 0.97057 |
1 |
2.6 |
2.6 |
18.4 |
18.4 |
| 0.98377 |
1 |
2.6 |
2.6 |
21.1 |
21.1 |
| 1.23639 |
1 |
2.6 |
2.6 |
23.7 |
23.7 |
| 1.24404 |
1 |
2.6 |
2.6 |
26.3 |
26.3 |
| 1.30583 |
1 |
2.6 |
2.6 |
28.9 |
28.9 |
| 1.39095 |
1 |
2.6 |
2.6 |
31.6 |
31.6 |
| 1.85897 |
1 |
2.6 |
2.6 |
34.2 |
34.2 |
| 1.90052 |
1 |
2.6 |
2.6 |
36.8 |
36.8 |
| 2.00805 |
1 |
2.6 |
2.6 |
39.5 |
39.5 |
| 2.0083 |
1 |
2.6 |
2.6 |
42.1 |
42.1 |
| 2.21442 |
1 |
2.6 |
2.6 |
44.7 |
44.7 |
| 2.87349 |
1 |
2.6 |
2.6 |
47.4 |
47.4 |
| 2.91326 |
1 |
2.6 |
2.6 |
50.0 |
50.0 |
| 2.91594 |
1 |
2.6 |
2.6 |
52.6 |
52.6 |
| 2.99239 |
1 |
2.6 |
2.6 |
55.3 |
55.3 |
| 3.12986 |
1 |
2.6 |
2.6 |
57.9 |
57.9 |
| 3.25281 |
1 |
2.6 |
2.6 |
60.5 |
60.5 |
| 3.53737 |
1 |
2.6 |
2.6 |
63.2 |
63.2 |
| 3.54399 |
1 |
2.6 |
2.6 |
65.8 |
65.8 |
| 3.63962 |
1 |
2.6 |
2.6 |
68.4 |
68.4 |
| 3.69596 |
1 |
2.6 |
2.6 |
71.1 |
71.1 |
| 4.53362 |
1 |
2.6 |
2.6 |
73.7 |
73.7 |
| 4.69133 |
1 |
2.6 |
2.6 |
76.3 |
76.3 |
| 4.75791 |
1 |
2.6 |
2.6 |
78.9 |
78.9 |
| 5.79867 |
1 |
2.6 |
2.6 |
81.6 |
81.6 |
| 5.89721 |
1 |
2.6 |
2.6 |
84.2 |
84.2 |
| 6.66574 |
1 |
2.6 |
2.6 |
86.8 |
86.8 |
| 7.28575 |
1 |
2.6 |
2.6 |
89.5 |
89.5 |
| 8.52584 |
1 |
2.6 |
2.6 |
92.1 |
92.1 |
| 11.67237 |
1 |
2.6 |
2.6 |
94.7 |
94.7 |
| 17.28613 |
1 |
2.6 |
2.6 |
97.4 |
97.4 |
| 36.37629 |
1 |
2.6 |
2.6 |
100.0 |
100.0 |
| Total |
38 |
100.0 |
100.0 |
100.0 |
100.0 |
str(table)
## Classes 'freqtab' and 'data.frame': 39 obs. of 5 variables:
## $ n : num 1 1 1 1 1 1 1 1 1 1 ...
## $ % : num 2.6 2.6 2.6 2.6 2.6 2.6 2.6 2.6 2.6 2.6 ...
## $ val% : num 2.6 2.6 2.6 2.6 2.6 2.6 2.6 2.6 2.6 2.6 ...
## $ %cum : num 2.6 5.3 7.9 10.5 13.2 15.8 18.4 21.1 23.7 26.3 ...
## $ val%cum: num 2.6 5.3 7.9 10.5 13.2 15.8 18.4 21.1 23.7 26.3 ...
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
| 0.28887 |
1 |
| 0.31238 |
1 |
| 0.36987 |
1 |
| 0.43391 |
1 |
| 0.74414 |
1 |
| 0.91552 |
1 |
| 0.97057 |
1 |
| 0.98377 |
1 |
| 1.23639 |
1 |
| 1.24404 |
1 |
| 1.30583 |
1 |
| 1.39095 |
1 |
| 1.85897 |
1 |
| 1.90052 |
1 |
| 2.00805 |
1 |
| 2.0083 |
1 |
| 2.21442 |
1 |
| 2.87349 |
1 |
| 2.91326 |
1 |
| 2.91594 |
1 |
| 2.99239 |
1 |
| 3.12986 |
1 |
| 3.25281 |
1 |
| 3.53737 |
1 |
| 3.54399 |
1 |
| 3.63962 |
1 |
| 3.69596 |
1 |
| 4.53362 |
1 |
| 4.69133 |
1 |
| 4.75791 |
1 |
| 5.79867 |
1 |
| 5.89721 |
1 |
| 6.66574 |
1 |
| 7.28575 |
1 |
| 8.52584 |
1 |
| 11.67237 |
1 |
| 17.28613 |
1 |
| 36.37629 |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.28887 6.28887 12.28887 18.28887 24.28887 30.28887 36.28887 42.28887
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
| (0.289,6.29] |
31 |
0.8378378 |
31 |
| (6.29,12.3] |
4 |
0.1081081 |
35 |
| (12.3,18.3] |
1 |
0.0270270 |
36 |
| (18.3,24.3] |
0 |
0.0000000 |
36 |
| (24.3,30.3] |
0 |
0.0000000 |
36 |
| (30.3,36.3] |
0 |
0.0000000 |
36 |
| (36.3,42.3] |
1 |
0.0270270 |
37 |
str(Freq_table)
## 'data.frame': 7 obs. of 4 variables:
## $ distance: Factor w/ 7 levels "(0.289,6.29]",..: 1 2 3 4 5 6 7
## $ Freq : int 31 4 1 0 0 0 1
## $ Rel_Freq: num 0.838 0.108 0.027 0 0 ...
## $ Cum_Freq: int 31 35 36 36 36 36 37
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
| (0.289,6.29] |
31 |
| (6.29,12.3] |
4 |
| (12.3,18.3] |
1 |
| (18.3,24.3] |
0 |
| (24.3,30.3] |
0 |
| (30.3,36.3] |
0 |
| (36.3,42.3] |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")

library(pastecs)
stat.desc(df_NC)
## id date time continent_code country_name country_code
## nbr.val 3.800000e+01 NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA
## min 8.540000e+02 NA NA NA NA NA
## max 7.485000e+03 NA NA NA NA NA
## range 6.631000e+03 NA NA NA NA NA
## sum 2.290210e+05 NA NA NA NA NA
## median 7.448500e+03 NA NA NA NA NA
## mean 6.026868e+03 NA NA NA NA NA
## SE.mean 3.671138e+02 NA NA NA NA NA
## CI.mean.0.95 7.438432e+02 NA NA NA NA NA
## var 5.121356e+06 NA NA NA NA NA
## std.dev 2.263041e+03 NA NA NA NA NA
## coef.var 3.754921e-01 NA NA NA NA NA
## state population city distance location_description
## nbr.val NA 3.800000e+01 NA 38.000000 NA
## nbr.null NA 0.000000e+00 NA 0.000000 NA
## nbr.na NA 0.000000e+00 NA 0.000000 NA
## min NA 1.043000e+03 NA 0.288870 NA
## max NA 8.508480e+05 NA 36.376290 NA
## range NA 8.498050e+05 NA 36.087420 NA
## sum NA 7.001138e+06 NA 166.172050 NA
## median NA 1.936000e+03 NA 2.914600 NA
## mean NA 1.842405e+05 NA 4.372949 NA
## SE.mean NA 5.663200e+04 NA 1.023393 NA
## CI.mean.0.95 NA 1.147473e+05 NA 2.073592 NA
## var NA 1.218729e+11 NA 39.798695 NA
## std.dev NA 3.491031e+05 NA 6.308621 NA
## coef.var NA 1.894823e+00 NA 1.442647 NA
## latitude longitude geolocation hazard_type landslide_type
## nbr.val 38.00000000 3.800000e+01 NA NA NA
## nbr.null 0.00000000 0.000000e+00 NA NA NA
## nbr.na 0.00000000 0.000000e+00 NA NA NA
## min 13.28610000 -8.916660e+01 NA NA NA
## max 15.52270000 -8.526500e+01 NA NA NA
## range 2.23660000 3.901600e+00 NA NA NA
## sum 546.00580000 -3.329927e+03 NA NA NA
## median 14.20695000 -8.728760e+01 NA NA NA
## mean 14.36857368 -8.762966e+01 NA NA NA
## SE.mean 0.08447952 1.301577e-01 NA NA NA
## CI.mean.0.95 0.17117178 2.637245e-01 NA NA NA
## var 0.27119802 6.437585e-01 NA NA NA
## std.dev 0.52076676 8.023457e-01 NA NA NA
## coef.var 0.03624346 -9.156096e-03 NA NA NA
## landslide_size trigger storm_name injuries fatalities
## nbr.val NA NA NA 29.0000000 38.0000000
## nbr.null NA NA NA 26.0000000 25.0000000
## nbr.na NA NA NA 9.0000000 0.0000000
## min NA NA NA 0.0000000 0.0000000
## max NA NA NA 3.0000000 29.0000000
## range NA NA NA 3.0000000 29.0000000
## sum NA NA NA 6.0000000 86.0000000
## median NA NA NA 0.0000000 0.0000000
## mean NA NA NA 0.2068966 2.2631579
## SE.mean NA NA NA 0.1253499 0.9774733
## CI.mean.0.95 NA NA NA 0.2567675 1.9805491
## var NA NA NA 0.4556650 36.3072546
## std.dev NA NA NA 0.6750296 6.0255502
## coef.var NA NA NA 3.2626433 2.6624524
## source_name source_link prop ypos
## nbr.val NA NA 38.0000000 3.800000e+01
## nbr.null NA NA 0.0000000 0.000000e+00
## nbr.na NA NA 0.0000000 0.000000e+00
## min NA NA 0.1738379 9.399294e-02
## max NA NA 21.8907391 9.988871e+01
## range NA NA 21.7169012 9.979472e+01
## sum NA NA 100.0000000 1.347990e+03
## median NA NA 1.7539652 3.219648e+01
## mean NA NA 2.6315789 3.547342e+01
## SE.mean NA NA 0.6158637 4.046533e+00
## CI.mean.0.95 NA NA 1.2478585 8.199055e+00
## var NA NA 14.4129500 6.222284e+02
## std.dev NA NA 3.7964391 2.494451e+01
## coef.var NA NA 1.4426469 7.031887e-01
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Masaya (Nicaragua)
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_NC <- subset(df, country_name == "Nicaragua")
knitr::kable(head(df_NC))
| 229 |
9/4/07 |
NA |
NA |
Nicaragua |
NI |
Atlántico Norte |
6315 |
Bonanza |
54.90196 |
NA |
13.6670 |
-84.2435 |
(13.667, -84.243499999999997) |
Landslide |
Complex |
Medium |
Tropical cyclone |
Hurricane Felix |
NA |
NA |
United Nations Development Programme - Relief Web |
http://www.reliefweb.int/ |
| 826 |
10/3/08 |
NA |
NA |
Nicaragua |
NI |
Masaya |
5182 |
Tisma |
14.49301 |
NA |
12.1200 |
-85.8900 |
(12.12, -85.89) |
Landslide |
Landslide |
Medium |
Downpour |
NA |
NA |
9 |
CBC |
http://www.cbc.ca/world/story/2008/10/04/nicaragua-flooding.html |
| 2289 |
8/20/10 |
NA |
NA |
Nicaragua |
NI |
Managua |
16469 |
El Crucero |
5.84054 |
NA |
12.0420 |
-86.2998 |
(12.042, -86.299800000000005) |
Landslide |
Mudslide |
Medium |
Downpour |
NA |
NA |
3 |
NA |
NA |
| 2330 |
8/25/10 |
NA |
NA |
Nicaragua |
NI |
Jinotega |
2367 |
San José de Bocay |
1.36745 |
NA |
13.5317 |
-85.5325 |
(13.531700000000001, -85.532499999999999) |
Landslide |
Landslide |
Medium |
Downpour |
NA |
NA |
NA |
NA |
NA |
| 6089 |
6/23/14 |
NA |
NA |
Nicaragua |
NI |
Chontales |
5827 |
Santo Domingo |
31.14242 |
Unknown |
12.3535 |
-84.8095 |
(12.3535, -84.8095) |
Landslide |
Landslide |
Small |
Continuous rain |
NA |
0 |
0 |
Wilfried Strauch |
NA |
| 6090 |
6/23/14 |
NA |
NA |
Nicaragua |
NI |
Chontales |
5827 |
Santo Domingo |
31.24511 |
Unknown |
12.3521 |
-84.8080 |
(12.3521, -84.808000000000007) |
Landslide |
Landslide |
Medium |
Continuous rain |
NA |
0 |
0 |
Wilfried Strauch |
NA |
library(dplyr)
df_NC <- subset(df, state == "Masaya")
knitr::kable(head(df_NC))
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_NC, aes(fill=city, y=distance, x=state)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_NC, aes(fill=city, y=distance, x=state)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_NC, aes(x=state, y=distance, fill=city)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_NC <- df_NC %>%
arrange(desc(city)) %>%
mutate(prop = distance / sum(df_NC$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_NC, aes(x=state, y = prop, fill=city)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales
library(forecast)
data<- ts(df_NC$distance, frequency=12, start=2008)
knitr::kable(head(data))
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto
library(qcc)
distance <- df_NC$distance
names(distance) <- df_NC$city
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por ciudades"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Tisma 14.49301 14.49301 90.24116 90.24116
## San Juan de Oriente 1.56730 16.06031 9.75884 100.00000
stem(df_NC$"distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 2
## 0 |
## 1 | 4
head(df_NC)
## # A tibble: 2 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 826 10/3/08 <NA> <NA> Nicaragua NI Masaya 5182
## 2 7481 5/13/15 <NA> <NA> Nicaragua NI Masaya 2111
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_NC))
stem(df_NC$"distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 2
## 0 |
## 1 | 4
stem(df_NC$"distance", scale = 2)
##
## The decimal point is at the |
##
## 0 | 6
## 2 |
## 4 |
## 6 |
## 8 |
## 10 |
## 12 |
## 14 | 5
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 1.5673 |
1 |
50 |
50 |
50 |
50 |
| 14.49301 |
1 |
50 |
50 |
100 |
100 |
| Total |
2 |
100 |
100 |
100 |
100 |
str(table)
## Classes 'freqtab' and 'data.frame': 3 obs. of 5 variables:
## $ n : num 1 1 2
## $ % : num 50 50 100
## $ val% : num 50 50 100
## $ %cum : num 50 100 100
## $ val%cum: num 50 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 1.5673 8.5673 15.5673
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
| (1.57,8.57] |
0 |
0 |
0 |
| (8.57,15.6] |
1 |
1 |
1 |
str(Freq_table)
## 'data.frame': 2 obs. of 4 variables:
## $ distance: Factor w/ 2 levels "(1.57,8.57]",..: 1 2
## $ Freq : int 0 1
## $ Rel_Freq: num 0 1
## $ Cum_Freq: int 0 1
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
| (1.57,8.57] |
0 |
| (8.57,15.6] |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")

library(pastecs)
stat.desc(df_NC)
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## id date time continent_code country_name country_code
## nbr.val 2.000000e+00 NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA
## min 8.260000e+02 NA NA NA NA NA
## max 7.481000e+03 NA NA NA NA NA
## range 6.655000e+03 NA NA NA NA NA
## sum 8.307000e+03 NA NA NA NA NA
## median 4.153500e+03 NA NA NA NA NA
## mean 4.153500e+03 NA NA NA NA NA
## SE.mean 3.327500e+03 NA NA NA NA NA
## CI.mean.0.95 4.227990e+04 NA NA NA NA NA
## var 2.214451e+07 NA NA NA NA NA
## std.dev 4.705796e+03 NA NA NA NA NA
## coef.var 1.132971e+00 NA NA NA NA NA
## state population city distance location_description latitude
## nbr.val NA 2.000000e+00 NA 2.000000 NA 2.00000000
## nbr.null NA 0.000000e+00 NA 0.000000 NA 0.00000000
## nbr.na NA 0.000000e+00 NA 0.000000 NA 0.00000000
## min NA 2.111000e+03 NA 1.567300 NA 11.90130000
## max NA 5.182000e+03 NA 14.493010 NA 12.12000000
## range NA 3.071000e+03 NA 12.925710 NA 0.21870000
## sum NA 7.293000e+03 NA 16.060310 NA 24.02130000
## median NA 3.646500e+03 NA 8.030155 NA 12.01065000
## mean NA 3.646500e+03 NA 8.030155 NA 12.01065000
## SE.mean NA 1.535500e+03 NA 6.462855 NA 0.10935000
## CI.mean.0.95 NA 1.951038e+04 NA 82.118359 NA 1.38942349
## var NA 4.715521e+06 NA 83.536990 NA 0.02391484
## std.dev NA 2.171525e+03 NA 9.139857 NA 0.15464425
## coef.var NA 5.955094e-01 NA 1.138192 NA 0.01287559
## longitude geolocation hazard_type landslide_type
## nbr.val 2.000000e+00 NA NA NA
## nbr.null 0.000000e+00 NA NA NA
## nbr.na 0.000000e+00 NA NA NA
## min -8.606000e+01 NA NA NA
## max -8.589000e+01 NA NA NA
## range 1.700000e-01 NA NA NA
## sum -1.719500e+02 NA NA NA
## median -8.597500e+01 NA NA NA
## mean -8.597500e+01 NA NA NA
## SE.mean 8.500000e-02 NA NA NA
## CI.mean.0.95 1.080027e+00 NA NA NA
## var 1.445000e-02 NA NA NA
## std.dev 1.202082e-01 NA NA NA
## coef.var -1.398176e-03 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 1 2.000000 NA
## nbr.null NA NA NA 1 1.000000 NA
## nbr.na NA NA NA 1 0.000000 NA
## min NA NA NA 0 0.000000 NA
## max NA NA NA 0 9.000000 NA
## range NA NA NA 0 9.000000 NA
## sum NA NA NA 0 9.000000 NA
## median NA NA NA 0 4.500000 NA
## mean NA NA NA 0 4.500000 NA
## SE.mean NA NA NA NA 4.500000 NA
## CI.mean.0.95 NA NA NA NaN 57.177921 NA
## var NA NA NA NA 40.500000 NA
## std.dev NA NA NA NA 6.363961 NA
## coef.var NA NA NA NA 1.414214 NA
## source_link prop ypos
## nbr.val NA 2.000000 2.0000000
## nbr.null NA 0.000000 0.0000000
## nbr.na NA 0.000000 0.0000000
## min NA 9.758840 45.1205799
## max NA 90.241160 95.1205799
## range NA 80.482319 50.0000000
## sum NA 100.000000 140.2411597
## median NA 50.000000 70.1205799
## mean NA 50.000000 70.1205799
## SE.mean NA 40.241160 25.0000000
## CI.mean.0.95 NA 511.312414 317.6551184
## var NA 3238.701873 1250.0000000
## std.dev NA 56.909594 35.3553391
## coef.var NA 1.138192 0.5042077
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Managua (Nicaragua)
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_NC <- subset(df, country_name == "Nicaragua")
knitr::kable(head(df_NC))
| 229 |
9/4/07 |
NA |
NA |
Nicaragua |
NI |
Atlántico Norte |
6315 |
Bonanza |
54.90196 |
NA |
13.6670 |
-84.2435 |
(13.667, -84.243499999999997) |
Landslide |
Complex |
Medium |
Tropical cyclone |
Hurricane Felix |
NA |
NA |
United Nations Development Programme - Relief Web |
http://www.reliefweb.int/ |
| 826 |
10/3/08 |
NA |
NA |
Nicaragua |
NI |
Masaya |
5182 |
Tisma |
14.49301 |
NA |
12.1200 |
-85.8900 |
(12.12, -85.89) |
Landslide |
Landslide |
Medium |
Downpour |
NA |
NA |
9 |
CBC |
http://www.cbc.ca/world/story/2008/10/04/nicaragua-flooding.html |
| 2289 |
8/20/10 |
NA |
NA |
Nicaragua |
NI |
Managua |
16469 |
El Crucero |
5.84054 |
NA |
12.0420 |
-86.2998 |
(12.042, -86.299800000000005) |
Landslide |
Mudslide |
Medium |
Downpour |
NA |
NA |
3 |
NA |
NA |
| 2330 |
8/25/10 |
NA |
NA |
Nicaragua |
NI |
Jinotega |
2367 |
San José de Bocay |
1.36745 |
NA |
13.5317 |
-85.5325 |
(13.531700000000001, -85.532499999999999) |
Landslide |
Landslide |
Medium |
Downpour |
NA |
NA |
NA |
NA |
NA |
| 6089 |
6/23/14 |
NA |
NA |
Nicaragua |
NI |
Chontales |
5827 |
Santo Domingo |
31.14242 |
Unknown |
12.3535 |
-84.8095 |
(12.3535, -84.8095) |
Landslide |
Landslide |
Small |
Continuous rain |
NA |
0 |
0 |
Wilfried Strauch |
NA |
| 6090 |
6/23/14 |
NA |
NA |
Nicaragua |
NI |
Chontales |
5827 |
Santo Domingo |
31.24511 |
Unknown |
12.3521 |
-84.8080 |
(12.3521, -84.808000000000007) |
Landslide |
Landslide |
Medium |
Continuous rain |
NA |
0 |
0 |
Wilfried Strauch |
NA |
library(dplyr)
df_NC <- subset(df, state == "Managua")
knitr::kable(head(df_NC))
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_NC, aes(fill=city, y=distance, x=state)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_NC, aes(fill=city, y=distance, x=state)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_NC, aes(x=state, y=distance, fill=city)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_NC <- df_NC %>%
arrange(desc(city)) %>%
mutate(prop = distance / sum(df_NC$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_NC, aes(x=state, y = prop, fill=city)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales
library(forecast)
data<- ts(df_NC$distance, frequency=12, start=2008)
knitr::kable(head(data))
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto
library(qcc)
distance <- df_NC$distance
names(distance) <- df_NC$city
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por ciudades"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Terrabona 18.92056 18.92056 62.32717 62.32717
## El Crucero 5.84054 24.76110 19.23962 81.56679
## Ciudad Sandino 5.59574 30.35684 18.43321 100.00000
stem(df_NC$"distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 66
## 1 |
## 1 | 9
head(df_NC)
## # A tibble: 3 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 7477 6/12/15 <NA> <NA> Nicaragua NI Managua 1902
## 2 2289 8/20/10 <NA> <NA> Nicaragua NI Managua 16469
## 3 6270 10/16/14 Night <NA> Nicaragua NI Managua 70013
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_NC))
stem(df_NC$"distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 66
## 1 |
## 1 | 9
stem(df_NC$"distance", scale = 2)
##
## The decimal point is at the |
##
## 4 | 68
## 6 |
## 8 |
## 10 |
## 12 |
## 14 |
## 16 |
## 18 | 9
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 5.59574 |
1 |
33.3 |
33.3 |
33.3 |
33.3 |
| 5.84054 |
1 |
33.3 |
33.3 |
66.7 |
66.7 |
| 18.92056 |
1 |
33.3 |
33.3 |
100.0 |
100.0 |
| Total |
3 |
100.0 |
100.0 |
100.0 |
100.0 |
str(table)
## Classes 'freqtab' and 'data.frame': 4 obs. of 5 variables:
## $ n : num 1 1 1 3
## $ % : num 33.3 33.3 33.3 100
## $ val% : num 33.3 33.3 33.3 100
## $ %cum : num 33.3 66.7 100 100
## $ val%cum: num 33.3 66.7 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
| 5.59574 |
1 |
| 5.84054 |
1 |
| 18.92056 |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 5.59574 10.59574 15.59574 20.59574
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
| (5.6,10.6] |
1 |
0.5 |
1 |
| (10.6,15.6] |
0 |
0.0 |
1 |
| (15.6,20.6] |
1 |
0.5 |
2 |
str(Freq_table)
## 'data.frame': 3 obs. of 4 variables:
## $ distance: Factor w/ 3 levels "(5.6,10.6]","(10.6,15.6]",..: 1 2 3
## $ Freq : int 1 0 1
## $ Rel_Freq: num 0.5 0 0.5
## $ Cum_Freq: int 1 1 2
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
| (5.6,10.6] |
1 |
| (10.6,15.6] |
0 |
| (15.6,20.6] |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")

library(pastecs)
stat.desc(df_NC)
## id date time continent_code country_name country_code
## nbr.val 3.000000e+00 NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA
## min 2.289000e+03 NA NA NA NA NA
## max 7.477000e+03 NA NA NA NA NA
## range 5.188000e+03 NA NA NA NA NA
## sum 1.603600e+04 NA NA NA NA NA
## median 6.270000e+03 NA NA NA NA NA
## mean 5.345333e+03 NA NA NA NA NA
## SE.mean 1.567386e+03 NA NA NA NA NA
## CI.mean.0.95 6.743916e+03 NA NA NA NA NA
## var 7.370092e+06 NA NA NA NA NA
## std.dev 2.714791e+03 NA NA NA NA NA
## coef.var 5.078807e-01 NA NA NA NA NA
## state population city distance location_description
## nbr.val NA 3.000000e+00 NA 3.0000000 NA
## nbr.null NA 0.000000e+00 NA 0.0000000 NA
## nbr.na NA 0.000000e+00 NA 0.0000000 NA
## min NA 1.902000e+03 NA 5.5957400 NA
## max NA 7.001300e+04 NA 18.9205600 NA
## range NA 6.811100e+04 NA 13.3248200 NA
## sum NA 8.838400e+04 NA 30.3568400 NA
## median NA 1.646900e+04 NA 5.8405400 NA
## mean NA 2.946133e+04 NA 10.1189467 NA
## SE.mean NA 2.070731e+04 NA 4.4013740 NA
## CI.mean.0.95 NA 8.909635e+04 NA 18.9375839 NA
## var NA 1.286378e+09 NA 58.1162797 NA
## std.dev NA 3.586611e+04 NA 7.6234034 NA
## coef.var NA 1.217396e+00 NA 0.7533791 NA
## latitude longitude geolocation hazard_type landslide_type
## nbr.val 3.00000000 3.00000000 NA NA NA
## nbr.null 0.00000000 0.00000000 NA NA NA
## nbr.na 0.00000000 0.00000000 NA NA NA
## min 12.04200000 -86.29980000 NA NA NA
## max 12.57690000 -86.04180000 NA NA NA
## range 0.53490000 0.25800000 NA NA NA
## sum 36.73260000 -258.58250000 NA NA NA
## median 12.11370000 -86.24090000 NA NA NA
## mean 12.24420000 -86.19416667 NA NA NA
## SE.mean 0.16763272 0.07805768 NA NA NA
## CI.mean.0.95 0.72126540 0.33585508 NA NA NA
## var 0.08430219 0.01827900 NA NA NA
## std.dev 0.29034839 0.13519986 NA NA NA
## coef.var 0.02371314 -0.00156855 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 2 3.000000 NA
## nbr.null NA NA NA 2 1.000000 NA
## nbr.na NA NA NA 1 0.000000 NA
## min NA NA NA 0 0.000000 NA
## max NA NA NA 0 9.000000 NA
## range NA NA NA 0 9.000000 NA
## sum NA NA NA 0 12.000000 NA
## median NA NA NA 0 3.000000 NA
## mean NA NA NA 0 4.000000 NA
## SE.mean NA NA NA 0 2.645751 NA
## CI.mean.0.95 NA NA NA 0 11.383749 NA
## var NA NA NA 0 21.000000 NA
## std.dev NA NA NA 0 4.582576 NA
## coef.var NA NA NA NaN 1.145644 NA
## source_link prop ypos
## nbr.val NA 3.0000000 3.0000000
## nbr.null NA 0.0000000 0.0000000
## nbr.na NA 0.0000000 0.0000000
## min NA 18.4332098 31.1635862
## max NA 62.3271724 90.7833951
## range NA 43.8939626 59.6198089
## sum NA 100.0000000 193.8939626
## median NA 19.2396178 71.9469813
## mean NA 33.3333333 64.6313209
## SE.mean NA 14.4987885 17.5951657
## CI.mean.0.95 NA 62.3832518 75.7058878
## var NA 630.6446011 928.7695694
## std.dev NA 25.1126383 30.4757210
## coef.var NA 0.7533791 0.4715318
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Jinotega (Nicaragua)
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_NC <- subset(df, country_name == "Nicaragua")
knitr::kable(head(df_NC))
| 229 |
9/4/07 |
NA |
NA |
Nicaragua |
NI |
Atlántico Norte |
6315 |
Bonanza |
54.90196 |
NA |
13.6670 |
-84.2435 |
(13.667, -84.243499999999997) |
Landslide |
Complex |
Medium |
Tropical cyclone |
Hurricane Felix |
NA |
NA |
United Nations Development Programme - Relief Web |
http://www.reliefweb.int/ |
| 826 |
10/3/08 |
NA |
NA |
Nicaragua |
NI |
Masaya |
5182 |
Tisma |
14.49301 |
NA |
12.1200 |
-85.8900 |
(12.12, -85.89) |
Landslide |
Landslide |
Medium |
Downpour |
NA |
NA |
9 |
CBC |
http://www.cbc.ca/world/story/2008/10/04/nicaragua-flooding.html |
| 2289 |
8/20/10 |
NA |
NA |
Nicaragua |
NI |
Managua |
16469 |
El Crucero |
5.84054 |
NA |
12.0420 |
-86.2998 |
(12.042, -86.299800000000005) |
Landslide |
Mudslide |
Medium |
Downpour |
NA |
NA |
3 |
NA |
NA |
| 2330 |
8/25/10 |
NA |
NA |
Nicaragua |
NI |
Jinotega |
2367 |
San José de Bocay |
1.36745 |
NA |
13.5317 |
-85.5325 |
(13.531700000000001, -85.532499999999999) |
Landslide |
Landslide |
Medium |
Downpour |
NA |
NA |
NA |
NA |
NA |
| 6089 |
6/23/14 |
NA |
NA |
Nicaragua |
NI |
Chontales |
5827 |
Santo Domingo |
31.14242 |
Unknown |
12.3535 |
-84.8095 |
(12.3535, -84.8095) |
Landslide |
Landslide |
Small |
Continuous rain |
NA |
0 |
0 |
Wilfried Strauch |
NA |
| 6090 |
6/23/14 |
NA |
NA |
Nicaragua |
NI |
Chontales |
5827 |
Santo Domingo |
31.24511 |
Unknown |
12.3521 |
-84.8080 |
(12.3521, -84.808000000000007) |
Landslide |
Landslide |
Medium |
Continuous rain |
NA |
0 |
0 |
Wilfried Strauch |
NA |
library(dplyr)
df_NC <- subset(df, state == "Jinotega")
knitr::kable(head(df_NC))
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_NC, aes(fill=city, y=distance, x=state)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_NC, aes(fill=city, y=distance, x=state)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_NC, aes(x=state, y=distance, fill=city)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_NC <- df_NC %>%
arrange(desc(city)) %>%
mutate(prop = distance / sum(df_NC$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_NC, aes(x=state, y = prop, fill=city)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales
library(forecast)
data<- ts(df_NC$distance, frequency=12, start=2008)
knitr::kable(head(data))
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto
library(qcc)
distance <- df_NC$distance
names(distance) <- df_NC$city
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por ciudades"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Wiwilí 25.815140 25.815140 87.120921 87.120921
## Jinotega 2.448800 28.263940 8.264209 95.385130
## San José de Bocay 1.367450 29.631390 4.614870 100.000000
stem(df_NC$"distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 12
## 0 |
## 1 |
## 1 |
## 2 |
## 2 | 6
head(df_NC)
## # A tibble: 3 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 7470 10/8/15 <NA> <NA> Nicaragua NI Jinotega 6955
## 2 2330 8/25/10 <NA> <NA> Nicaragua NI Jinotega 2367
## 3 7471 2/19/16 <NA> <NA> Nicaragua NI Jinotega 51073
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_NC))
stem(df_NC$"distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 12
## 0 |
## 1 |
## 1 |
## 2 |
## 2 | 6
stem(df_NC$"distance", scale = 2)
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 12
## 0 |
## 1 |
## 1 |
## 2 |
## 2 | 6
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 1.36745 |
1 |
33.3 |
33.3 |
33.3 |
33.3 |
| 2.4488 |
1 |
33.3 |
33.3 |
66.7 |
66.7 |
| 25.81514 |
1 |
33.3 |
33.3 |
100.0 |
100.0 |
| Total |
3 |
100.0 |
100.0 |
100.0 |
100.0 |
str(table)
## Classes 'freqtab' and 'data.frame': 4 obs. of 5 variables:
## $ n : num 1 1 1 3
## $ % : num 33.3 33.3 33.3 100
## $ val% : num 33.3 33.3 33.3 100
## $ %cum : num 33.3 66.7 100 100
## $ val%cum: num 33.3 66.7 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
| 1.36745 |
1 |
| 2.4488 |
1 |
| 25.81514 |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 1.36745 10.36745 19.36745 28.36745
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
| (1.37,10.4] |
1 |
0.5 |
1 |
| (10.4,19.4] |
0 |
0.0 |
1 |
| (19.4,28.4] |
1 |
0.5 |
2 |
str(Freq_table)
## 'data.frame': 3 obs. of 4 variables:
## $ distance: Factor w/ 3 levels "(1.37,10.4]",..: 1 2 3
## $ Freq : int 1 0 1
## $ Rel_Freq: num 0.5 0 0.5
## $ Cum_Freq: int 1 1 2
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
| (1.37,10.4] |
1 |
| (10.4,19.4] |
0 |
| (19.4,28.4] |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")

library(pastecs)
stat.desc(df_NC)
## id date time continent_code country_name country_code
## nbr.val 3.000000e+00 NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA
## min 2.330000e+03 NA NA NA NA NA
## max 7.471000e+03 NA NA NA NA NA
## range 5.141000e+03 NA NA NA NA NA
## sum 1.727100e+04 NA NA NA NA NA
## median 7.470000e+03 NA NA NA NA NA
## mean 5.757000e+03 NA NA NA NA NA
## SE.mean 1.713500e+03 NA NA NA NA NA
## CI.mean.0.95 7.372596e+03 NA NA NA NA NA
## var 8.808247e+06 NA NA NA NA NA
## std.dev 2.967869e+03 NA NA NA NA NA
## coef.var 5.155236e-01 NA NA NA NA NA
## state population city distance location_description
## nbr.val NA 3.000000e+00 NA 3.000000 NA
## nbr.null NA 0.000000e+00 NA 0.000000 NA
## nbr.na NA 0.000000e+00 NA 0.000000 NA
## min NA 2.367000e+03 NA 1.367450 NA
## max NA 5.107300e+04 NA 25.815140 NA
## range NA 4.870600e+04 NA 24.447690 NA
## sum NA 6.039500e+04 NA 29.631390 NA
## median NA 6.955000e+03 NA 2.448800 NA
## mean NA 2.013167e+04 NA 9.877130 NA
## SE.mean NA 1.552726e+04 NA 7.975117 NA
## CI.mean.0.95 NA 6.680839e+04 NA 34.314157 NA
## var NA 7.232870e+08 NA 190.807452 NA
## std.dev NA 2.689400e+04 NA 13.813307 NA
## coef.var NA 1.335905e+00 NA 1.398514 NA
## latitude longitude geolocation hazard_type landslide_type
## nbr.val 3.00000000 3.000000e+00 NA NA NA
## nbr.null 0.00000000 0.000000e+00 NA NA NA
## nbr.na 0.00000000 0.000000e+00 NA NA NA
## min 13.08050000 -8.599250e+01 NA NA NA
## max 13.81760000 -8.553250e+01 NA NA NA
## range 0.73710000 4.600000e-01 NA NA NA
## sum 40.42980000 -2.572130e+02 NA NA NA
## median 13.53170000 -8.568800e+01 NA NA NA
## mean 13.47660000 -8.573767e+01 NA NA NA
## SE.mean 0.21455855 1.350927e-01 NA NA NA
## CI.mean.0.95 0.92317092 5.812568e-01 NA NA NA
## var 0.13810611 5.475008e-02 NA NA NA
## std.dev 0.37162630 2.339874e-01 NA NA NA
## coef.var 0.02757567 -2.729108e-03 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 2 2 NA
## nbr.null NA NA NA 2 2 NA
## nbr.na NA NA NA 1 1 NA
## min NA NA NA 0 0 NA
## max NA NA NA 0 0 NA
## range NA NA NA 0 0 NA
## sum NA NA NA 0 0 NA
## median NA NA NA 0 0 NA
## mean NA NA NA 0 0 NA
## SE.mean NA NA NA 0 0 NA
## CI.mean.0.95 NA NA NA 0 0 NA
## var NA NA NA 0 0 NA
## std.dev NA NA NA 0 0 NA
## coef.var NA NA NA NaN NaN NA
## source_link prop ypos
## nbr.val NA 3.000000 3.0000000
## nbr.null NA 0.000000 0.0000000
## nbr.na NA 0.000000 0.0000000
## min NA 4.614870 43.5604607
## max NA 87.120921 95.8678955
## range NA 82.506052 52.3074348
## sum NA 100.000000 228.8567124
## median NA 8.264209 89.4283562
## mean NA 33.333333 76.2855708
## SE.mean NA 26.914419 16.4678124
## CI.mean.0.95 NA 115.803400 70.8552781
## var NA 2173.157894 813.5665396
## std.dev NA 46.617142 28.5230878
## coef.var NA 1.398514 0.3738989
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Costa Rica
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_CR <- subset(df, country_name == "Costa Rica")
knitr::kable(head(df_CR))
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_CR, aes(fill= state, y=distance, x=country_name)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_CR, aes(fill=state, y=distance, x=country_name)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_CR, aes(x=country_name, y=distance, fill=state)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_CR <- df_CR %>%
arrange(desc(state)) %>%
mutate(prop = distance / sum(df_CR$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_CR, aes(x=country_name, y=prop, fill=state)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales
library(forecast)
data<- ts(df_CR$distance, frequency=12, start=2008)
knitr::kable(head(data))
| 4.57763 |
| 9.56251 |
| 1.85787 |
| 16.24937 |
| 12.85801 |
| 0.25254 |
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto
library(qcc)
distance <- df_CR$distance
names(distance) <- df_CR$state
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por estados"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## San José 22.32368000 22.32368000 3.45492296 3.45492296
## Heredia 21.95470000 44.27838000 3.39781780 6.85274076
## San José 21.67452000 65.95290000 3.35445576 10.20719651
## Puntarenas 20.06633000 86.01923000 3.10556433 13.31276084
## Heredia 19.54581000 105.56504000 3.02500608 16.33776693
## Heredia 19.51432000 125.07936000 3.02013254 19.35789946
## Cartago 19.28722000 144.36658000 2.98498542 22.34288488
## Puntarenas 18.00524000 162.37182000 2.78657986 25.12946474
## Guanacaste 17.65521000 180.02703000 2.73240749 27.86187223
## Limón 17.23264000 197.25967000 2.66700847 30.52888070
## San José 16.24937000 213.50904000 2.51483275 33.04371345
## San José 15.64997000 229.15901000 2.42206664 35.46578009
## Heredia 15.05161000 244.21062000 2.32946150 37.79524159
## Heredia 14.81614000 259.02676000 2.29301900 40.08826059
## Puntarenas 13.48919000 272.51595000 2.08765366 42.17591425
## San José 12.85801000 285.37396000 1.98996913 44.16588338
## Guanacaste 12.33807000 297.71203000 1.90950065 46.07538402
## Guanacaste 12.21952000 309.93155000 1.89115326 47.96653728
## Guanacaste 12.18115000 322.11270000 1.88521493 49.85175221
## Alajuela 11.96524000 334.07794000 1.85179963 51.70355185
## Puntarenas 11.74074000 345.81868000 1.81705490 53.52060675
## San José 11.31047000 357.12915000 1.75046419 55.27107094
## San José 10.73752000 367.86667000 1.66179162 56.93286257
## Alajuela 10.32968000 378.19635000 1.59867229 58.53153486
## Guanacaste 10.21631000 388.41266000 1.58112659 60.11266145
## Heredia 10.01310000 398.42576000 1.54967681 61.66233825
## San José 10.01198000 408.43774000 1.54950347 63.21184172
## Heredia 9.85736000 418.29510000 1.52557371 64.73741544
## Alajuela 9.84213000 428.13723000 1.52321664 66.26063208
## Cartago 9.63616000 437.77339000 1.49133971 67.75197179
## Alajuela 9.61692000 447.39031000 1.48836203 69.24033382
## San José 9.56251000 456.95282000 1.47994127 70.72027510
## San José 9.53611000 466.48893000 1.47585548 72.19613057
## Cartago 9.53493000 476.02386000 1.47567286 73.67180343
## Puntarenas 8.92048000 484.94434000 1.38057754 75.05238097
## San José 8.39161000 493.33595000 1.29872701 76.35110797
## San José 8.27042000 501.60637000 1.27997104 77.63107902
## San José 8.21372000 509.82009000 1.27119587 78.90227489
## Puntarenas 7.87044000 517.69053000 1.21806816 80.12034305
## Alajuela 6.92174000 524.61227000 1.07124267 81.19158572
## Alajuela 6.88715000 531.49942000 1.06588935 82.25747506
## Alajuela 6.80061000 538.30003000 1.05249599 83.30997105
## San José 6.49523000 544.79526000 1.00523387 84.31520492
## Alajuela 5.96634000 550.76160000 0.92338024 85.23858516
## Alajuela 5.95519000 556.71679000 0.92165461 86.16023978
## Alajuela 5.57523000 562.29202000 0.86285013 87.02308991
## Alajuela 5.43516000 567.72718000 0.84117220 87.86426211
## Limón 5.36500000 573.09218000 0.83031390 88.69457601
## Cartago 5.15142000 578.24360000 0.79725920 89.49183521
## Alajuela 5.12667000 583.37027000 0.79342877 90.28526397
## Puntarenas 4.93053000 588.30080000 0.76307317 91.04833715
## San José 4.89954000 593.20034000 0.75827701 91.80661415
## Alajuela 4.87432000 598.07466000 0.75437384 92.56098799
## San José 4.57763000 602.65229000 0.70845663 93.26944461
## Alajuela 4.24199000 606.89428000 0.65651132 93.92595593
## Puntarenas 3.82425000 610.71853000 0.59185982 94.51781575
## San José 3.71407000 614.43260000 0.57480782 95.09262357
## San José 3.67691000 618.10951000 0.56905675 95.66168032
## Alajuela 3.21979000 621.32930000 0.49831060 96.15999092
## Alajuela 3.08916000 624.41846000 0.47809366 96.63808457
## Alajuela 3.08459000 627.50305000 0.47738638 97.11547096
## Cartago 3.07297000 630.57602000 0.47558801 97.59105897
## Cartago 2.94804000 633.52406000 0.45625323 98.04731220
## San José 2.92605000 636.45011000 0.45284995 98.50016215
## Alajuela 2.08469000 638.53480000 0.32263692 98.82279907
## San José 1.85787000 640.39267000 0.28753314 99.11033220
## Alajuela 1.47396000 641.86663000 0.22811733 99.33844953
## San José 1.16705000 643.03368000 0.18061842 99.51906795
## San José 0.72957000 643.76325000 0.11291186 99.63197981
## Alajuela 0.70048000 644.46373000 0.10840974 99.74038955
## San José 0.55804000 645.02177000 0.08636503 99.82675458
## Puntarenas 0.35225000 645.37402000 0.05451595 99.88127053
## Heredia 0.26208000 645.63610000 0.04056080 99.92183132
## San José 0.25254000 645.88864000 0.03908434 99.96091566
## San José 0.25254000 646.14118000 0.03908434 100.00000000
stem(df_CR$"distance")
##
## The decimal point is at the |
##
## 0 | 3334677259
## 2 | 1991112778
## 4 | 2699912446
## 6 | 0058999
## 8 | 23495566689
## 10 | 0023737
## 12 | 022395
## 14 | 816
## 16 | 227
## 18 | 0355
## 20 | 17
## 22 | 03
head(df_CR)
## # A tibble: 6 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 299 10/9/07 <NA> <NA> Costa Rica CR San José 3072
## 2 302 10/11/07 <NA> <NA> Costa Rica CR San José 26669
## 3 776 9/6/08 <NA> <NA> Costa Rica CR San José 10028
## 4 838 10/12/08 <NA> <NA> Costa Rica CR San José 34877
## 5 839 10/12/08 <NA> <NA> Costa Rica CR San José 8292
## 6 2526 10/1/10 <NA> <NA> Costa Rica CR San José 2833
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_CR))
stem(df_CR$"distance")
##
## The decimal point is at the |
##
## 0 | 3334677259
## 2 | 1991112778
## 4 | 2699912446
## 6 | 0058999
## 8 | 23495566689
## 10 | 0023737
## 12 | 022395
## 14 | 816
## 16 | 227
## 18 | 0355
## 20 | 17
## 22 | 03
stem(df_CR$"distance", scale = 2)
##
## The decimal point is at the |
##
## 0 | 3334677259
## 2 | 1991112778
## 4 | 2699912446
## 6 | 0058999
## 8 | 23495566689
## 10 | 0023737
## 12 | 022395
## 14 | 816
## 16 | 227
## 18 | 0355
## 20 | 17
## 22 | 03
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 0.25254 |
2 |
2.7 |
2.7 |
2.7 |
2.7 |
| 0.26208 |
1 |
1.3 |
1.3 |
4.0 |
4.0 |
| 0.35225 |
1 |
1.3 |
1.3 |
5.3 |
5.3 |
| 0.55804 |
1 |
1.3 |
1.3 |
6.7 |
6.7 |
| 0.70048 |
1 |
1.3 |
1.3 |
8.0 |
8.0 |
| 0.72957 |
1 |
1.3 |
1.3 |
9.3 |
9.3 |
| 1.16705 |
1 |
1.3 |
1.3 |
10.7 |
10.7 |
| 1.47396 |
1 |
1.3 |
1.3 |
12.0 |
12.0 |
| 1.85787 |
1 |
1.3 |
1.3 |
13.3 |
13.3 |
| 2.08469 |
1 |
1.3 |
1.3 |
14.7 |
14.7 |
| 2.92605 |
1 |
1.3 |
1.3 |
16.0 |
16.0 |
| 2.94804 |
1 |
1.3 |
1.3 |
17.3 |
17.3 |
| 3.07297 |
1 |
1.3 |
1.3 |
18.7 |
18.7 |
| 3.08459 |
1 |
1.3 |
1.3 |
20.0 |
20.0 |
| 3.08916 |
1 |
1.3 |
1.3 |
21.3 |
21.3 |
| 3.21979 |
1 |
1.3 |
1.3 |
22.7 |
22.7 |
| 3.67691 |
1 |
1.3 |
1.3 |
24.0 |
24.0 |
| 3.71407 |
1 |
1.3 |
1.3 |
25.3 |
25.3 |
| 3.82425 |
1 |
1.3 |
1.3 |
26.7 |
26.7 |
| 4.24199 |
1 |
1.3 |
1.3 |
28.0 |
28.0 |
| 4.57763 |
1 |
1.3 |
1.3 |
29.3 |
29.3 |
| 4.87432 |
1 |
1.3 |
1.3 |
30.7 |
30.7 |
| 4.89954 |
1 |
1.3 |
1.3 |
32.0 |
32.0 |
| 4.93053 |
1 |
1.3 |
1.3 |
33.3 |
33.3 |
| 5.12667 |
1 |
1.3 |
1.3 |
34.7 |
34.7 |
| 5.15142 |
1 |
1.3 |
1.3 |
36.0 |
36.0 |
| 5.365 |
1 |
1.3 |
1.3 |
37.3 |
37.3 |
| 5.43516 |
1 |
1.3 |
1.3 |
38.7 |
38.7 |
| 5.57523 |
1 |
1.3 |
1.3 |
40.0 |
40.0 |
| 5.95519 |
1 |
1.3 |
1.3 |
41.3 |
41.3 |
| 5.96634 |
1 |
1.3 |
1.3 |
42.7 |
42.7 |
| 6.49523 |
1 |
1.3 |
1.3 |
44.0 |
44.0 |
| 6.80061 |
1 |
1.3 |
1.3 |
45.3 |
45.3 |
| 6.88715 |
1 |
1.3 |
1.3 |
46.7 |
46.7 |
| 6.92174 |
1 |
1.3 |
1.3 |
48.0 |
48.0 |
| 7.87044 |
1 |
1.3 |
1.3 |
49.3 |
49.3 |
| 8.21372 |
1 |
1.3 |
1.3 |
50.7 |
50.7 |
| 8.27042 |
1 |
1.3 |
1.3 |
52.0 |
52.0 |
| 8.39161 |
1 |
1.3 |
1.3 |
53.3 |
53.3 |
| 8.92048 |
1 |
1.3 |
1.3 |
54.7 |
54.7 |
| 9.53493 |
1 |
1.3 |
1.3 |
56.0 |
56.0 |
| 9.53611 |
1 |
1.3 |
1.3 |
57.3 |
57.3 |
| 9.56251 |
1 |
1.3 |
1.3 |
58.7 |
58.7 |
| 9.61692 |
1 |
1.3 |
1.3 |
60.0 |
60.0 |
| 9.63616 |
1 |
1.3 |
1.3 |
61.3 |
61.3 |
| 9.84213 |
1 |
1.3 |
1.3 |
62.7 |
62.7 |
| 9.85736 |
1 |
1.3 |
1.3 |
64.0 |
64.0 |
| 10.01198 |
1 |
1.3 |
1.3 |
65.3 |
65.3 |
| 10.0131 |
1 |
1.3 |
1.3 |
66.7 |
66.7 |
| 10.21631 |
1 |
1.3 |
1.3 |
68.0 |
68.0 |
| 10.32968 |
1 |
1.3 |
1.3 |
69.3 |
69.3 |
| 10.73752 |
1 |
1.3 |
1.3 |
70.7 |
70.7 |
| 11.31047 |
1 |
1.3 |
1.3 |
72.0 |
72.0 |
| 11.74074 |
1 |
1.3 |
1.3 |
73.3 |
73.3 |
| 11.96524 |
1 |
1.3 |
1.3 |
74.7 |
74.7 |
| 12.18115 |
1 |
1.3 |
1.3 |
76.0 |
76.0 |
| 12.21952 |
1 |
1.3 |
1.3 |
77.3 |
77.3 |
| 12.33807 |
1 |
1.3 |
1.3 |
78.7 |
78.7 |
| 12.85801 |
1 |
1.3 |
1.3 |
80.0 |
80.0 |
| 13.48919 |
1 |
1.3 |
1.3 |
81.3 |
81.3 |
| 14.81614 |
1 |
1.3 |
1.3 |
82.7 |
82.7 |
| 15.05161 |
1 |
1.3 |
1.3 |
84.0 |
84.0 |
| 15.64997 |
1 |
1.3 |
1.3 |
85.3 |
85.3 |
| 16.24937 |
1 |
1.3 |
1.3 |
86.7 |
86.7 |
| 17.23264 |
1 |
1.3 |
1.3 |
88.0 |
88.0 |
| 17.65521 |
1 |
1.3 |
1.3 |
89.3 |
89.3 |
| 18.00524 |
1 |
1.3 |
1.3 |
90.7 |
90.7 |
| 19.28722 |
1 |
1.3 |
1.3 |
92.0 |
92.0 |
| 19.51432 |
1 |
1.3 |
1.3 |
93.3 |
93.3 |
| 19.54581 |
1 |
1.3 |
1.3 |
94.7 |
94.7 |
| 20.06633 |
1 |
1.3 |
1.3 |
96.0 |
96.0 |
| 21.67452 |
1 |
1.3 |
1.3 |
97.3 |
97.3 |
| 21.9547 |
1 |
1.3 |
1.3 |
98.7 |
98.7 |
| 22.32368 |
1 |
1.3 |
1.3 |
100.0 |
100.0 |
| Total |
75 |
100.0 |
100.0 |
100.0 |
100.0 |
str(table)
## Classes 'freqtab' and 'data.frame': 75 obs. of 5 variables:
## $ n : num 2 1 1 1 1 1 1 1 1 1 ...
## $ % : num 2.7 1.3 1.3 1.3 1.3 1.3 1.3 1.3 1.3 1.3 ...
## $ val% : num 2.7 1.3 1.3 1.3 1.3 1.3 1.3 1.3 1.3 1.3 ...
## $ %cum : num 2.7 4 5.3 6.7 8 9.3 10.7 12 13.3 14.7 ...
## $ val%cum: num 2.7 4 5.3 6.7 8 9.3 10.7 12 13.3 14.7 ...
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
| 0.25254 |
2 |
| 0.26208 |
1 |
| 0.35225 |
1 |
| 0.55804 |
1 |
| 0.70048 |
1 |
| 0.72957 |
1 |
| 1.16705 |
1 |
| 1.47396 |
1 |
| 1.85787 |
1 |
| 2.08469 |
1 |
| 2.92605 |
1 |
| 2.94804 |
1 |
| 3.07297 |
1 |
| 3.08459 |
1 |
| 3.08916 |
1 |
| 3.21979 |
1 |
| 3.67691 |
1 |
| 3.71407 |
1 |
| 3.82425 |
1 |
| 4.24199 |
1 |
| 4.57763 |
1 |
| 4.87432 |
1 |
| 4.89954 |
1 |
| 4.93053 |
1 |
| 5.12667 |
1 |
| 5.15142 |
1 |
| 5.365 |
1 |
| 5.43516 |
1 |
| 5.57523 |
1 |
| 5.95519 |
1 |
| 5.96634 |
1 |
| 6.49523 |
1 |
| 6.80061 |
1 |
| 6.88715 |
1 |
| 6.92174 |
1 |
| 7.87044 |
1 |
| 8.21372 |
1 |
| 8.27042 |
1 |
| 8.39161 |
1 |
| 8.92048 |
1 |
| 9.53493 |
1 |
| 9.53611 |
1 |
| 9.56251 |
1 |
| 9.61692 |
1 |
| 9.63616 |
1 |
| 9.84213 |
1 |
| 9.85736 |
1 |
| 10.01198 |
1 |
| 10.0131 |
1 |
| 10.21631 |
1 |
| 10.32968 |
1 |
| 10.73752 |
1 |
| 11.31047 |
1 |
| 11.74074 |
1 |
| 11.96524 |
1 |
| 12.18115 |
1 |
| 12.21952 |
1 |
| 12.33807 |
1 |
| 12.85801 |
1 |
| 13.48919 |
1 |
| 14.81614 |
1 |
| 15.05161 |
1 |
| 15.64997 |
1 |
| 16.24937 |
1 |
| 17.23264 |
1 |
| 17.65521 |
1 |
| 18.00524 |
1 |
| 19.28722 |
1 |
| 19.51432 |
1 |
| 19.54581 |
1 |
| 20.06633 |
1 |
| 21.67452 |
1 |
| 21.9547 |
1 |
| 22.32368 |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.25254 4.25254 8.25254 12.25254 16.25254 20.25254 24.25254
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
| (0.253,4.25] |
19 |
0.2602740 |
19 |
| (4.25,8.25] |
17 |
0.2328767 |
36 |
| (8.25,12.3] |
20 |
0.2739726 |
56 |
| (12.3,16.3] |
7 |
0.0958904 |
63 |
| (16.3,20.3] |
7 |
0.0958904 |
70 |
| (20.3,24.3] |
3 |
0.0410959 |
73 |
str(Freq_table)
## 'data.frame': 6 obs. of 4 variables:
## $ distance: Factor w/ 6 levels "(0.253,4.25]",..: 1 2 3 4 5 6
## $ Freq : int 19 17 20 7 7 3
## $ Rel_Freq: num 0.2603 0.2329 0.274 0.0959 0.0959 ...
## $ Cum_Freq: int 19 36 56 63 70 73
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
| (0.253,4.25] |
19 |
| (4.25,8.25] |
17 |
| (8.25,12.3] |
20 |
| (12.3,16.3] |
7 |
| (16.3,20.3] |
7 |
| (20.3,24.3] |
3 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")

library(pastecs)
stat.desc(df_CR)
## id date time continent_code country_name country_code
## nbr.val 7.500000e+01 NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA
## min 2.490000e+02 NA NA NA NA NA
## max 7.496000e+03 NA NA NA NA NA
## range 7.247000e+03 NA NA NA NA NA
## sum 3.121850e+05 NA NA NA NA NA
## median 3.762000e+03 NA NA NA NA NA
## mean 4.162467e+03 NA NA NA NA NA
## SE.mean 2.838021e+02 NA NA NA NA NA
## CI.mean.0.95 5.654880e+02 NA NA NA NA NA
## var 6.040771e+06 NA NA NA NA NA
## std.dev 2.457798e+03 NA NA NA NA NA
## coef.var 5.904667e-01 NA NA NA NA NA
## state population city distance location_description
## nbr.val NA 7.500000e+01 NA 75.0000000 NA
## nbr.null NA 4.000000e+00 NA 0.0000000 NA
## nbr.na NA 0.000000e+00 NA 0.0000000 NA
## min NA 0.000000e+00 NA 0.2525400 NA
## max NA 3.350070e+05 NA 22.3236800 NA
## range NA 3.350070e+05 NA 22.0711400 NA
## sum NA 1.127522e+06 NA 646.1411800 NA
## median NA 6.784000e+03 NA 8.2137200 NA
## mean NA 1.503363e+04 NA 8.6152157 NA
## SE.mean NA 4.498709e+03 NA 0.6924979 NA
## CI.mean.0.95 NA 8.963873e+03 NA 1.3798322 NA
## var NA 1.517878e+09 NA 35.9665004 NA
## std.dev NA 3.895996e+04 NA 5.9972077 NA
## coef.var NA 2.591521e+00 NA 0.6961181 NA
## latitude longitude geolocation hazard_type landslide_type
## nbr.val 75.00000000 7.500000e+01 NA NA NA
## nbr.null 0.00000000 0.000000e+00 NA NA NA
## nbr.na 0.00000000 0.000000e+00 NA NA NA
## min 8.61170000 -8.535550e+01 NA NA NA
## max 10.89160000 -8.294180e+01 NA NA NA
## range 2.27990000 2.413700e+00 NA NA NA
## sum 740.80410000 -6.307925e+03 NA NA NA
## median 9.96430000 -8.408790e+01 NA NA NA
## mean 9.87738800 -8.410567e+01 NA NA NA
## SE.mean 0.05198610 5.429493e-02 NA NA NA
## CI.mean.0.95 0.10358456 1.081850e-01 NA NA NA
## var 0.20269158 2.210955e-01 NA NA NA
## std.dev 0.45021281 4.702079e-01 NA NA NA
## coef.var 0.04558015 -5.590680e-03 NA NA NA
## landslide_size trigger storm_name injuries fatalities
## nbr.val NA NA NA 27.0000000 60.0000000
## nbr.null NA NA NA 26.0000000 50.0000000
## nbr.na NA NA NA 48.0000000 15.0000000
## min NA NA NA 0.0000000 0.0000000
## max NA NA NA 3.0000000 23.0000000
## range NA NA NA 3.0000000 23.0000000
## sum NA NA NA 3.0000000 61.0000000
## median NA NA NA 0.0000000 0.0000000
## mean NA NA NA 0.1111111 1.0166667
## SE.mean NA NA NA 0.1111111 0.4750805
## CI.mean.0.95 NA NA NA 0.2283922 0.9506339
## var NA NA NA 0.3333333 13.5420904
## std.dev NA NA NA 0.5773503 3.6799579
## coef.var NA NA NA 5.1961524 3.6196308
## source_name source_link prop ypos
## nbr.val NA NA 75.00000000 75.0000000
## nbr.null NA NA 0.00000000 0.0000000
## nbr.na NA NA 0.00000000 0.0000000
## min NA NA 0.03908434 0.3542283
## max NA NA 3.45492296 99.8386815
## range NA NA 3.41583863 99.4844532
## sum NA NA 100.00000000 3898.8052162
## median NA NA 1.27119587 49.1708732
## mean NA NA 1.33333333 51.9840695
## SE.mean NA NA 0.10717439 3.7309554
## CI.mean.0.95 NA NA 0.21354964 7.4340909
## var NA NA 0.86147631 1044.0021043
## std.dev NA NA 0.92815748 32.3110214
## coef.var NA NA 0.69611811 0.6215562
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Heredia (Costa Rica)
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_CR <- subset(df, country_name == "Costa Rica")
knitr::kable(head(df_CR))
library(dplyr)
df_CR <- subset(df, state == "Heredia")
knitr::kable(head(df_CR))
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_CR, aes(fill=city, y=distance, x=state)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_CR, aes(fill=city, y=distance, x=state)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_CR, aes(x=state, y=distance, fill=city)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_CR <- df_CR %>%
arrange(desc(city)) %>%
mutate(prop = distance / sum(df_CR$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_CR, aes(x=state, y = prop, fill=city)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales
library(forecast)
data<- ts(df_CR$distance, frequency=12, start=2008)
knitr::kable(head(data))
| 21.95470 |
| 9.85736 |
| 0.26208 |
| 10.01310 |
| 19.51432 |
| 14.81614 |
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto
library(qcc)
distance <- df_CR$distance
names(distance) <- df_CR$city
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por ciudades"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Santo Domingo 21.954700 21.954700 19.776315 19.776315
## Ángeles 19.545810 41.500510 17.606440 37.382755
## Ángeles 19.514320 61.014830 17.578074 54.960829
## Ángeles 15.051610 76.066440 13.558162 68.518991
## Ángeles 14.816140 90.882580 13.346056 81.865047
## Dulce Nombre de Jesus 10.013100 100.895680 9.019582 90.884629
## Santo Domingo 9.857360 110.753040 8.879295 99.763924
## Heredia 0.262080 111.015120 0.236076 100.000000
stem(df_CR$"distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 0
## 0 |
## 1 | 00
## 1 | 55
## 2 | 002
head(df_CR)
## # A tibble: 6 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 4358 5/13/12 <NA> <NA> Costa Rica CR Here~ 5745
## 2 5541 9/16/13 <NA> <NA> Costa Rica CR Here~ 5745
## 3 249 9/9/07 <NA> <NA> Costa Rica CR Here~ 21947
## 4 6696 12/13/14 Night <NA> Costa Rica CR Here~ 0
## 5 1786 4/27/10 Earl~ <NA> Costa Rica CR Here~ 1355
## 6 2598 10/15/10 <NA> <NA> Costa Rica CR Here~ 1355
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_CR))
stem(df_CR$"distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 0
## 0 |
## 1 | 00
## 1 | 55
## 2 | 002
stem(df_CR$"distance", scale = 2)
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 0
## 0 |
## 1 | 00
## 1 | 55
## 2 | 002
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 0.26208 |
1 |
12.5 |
12.5 |
12.5 |
12.5 |
| 9.85736 |
1 |
12.5 |
12.5 |
25.0 |
25.0 |
| 10.0131 |
1 |
12.5 |
12.5 |
37.5 |
37.5 |
| 14.81614 |
1 |
12.5 |
12.5 |
50.0 |
50.0 |
| 15.05161 |
1 |
12.5 |
12.5 |
62.5 |
62.5 |
| 19.51432 |
1 |
12.5 |
12.5 |
75.0 |
75.0 |
| 19.54581 |
1 |
12.5 |
12.5 |
87.5 |
87.5 |
| 21.9547 |
1 |
12.5 |
12.5 |
100.0 |
100.0 |
| Total |
8 |
100.0 |
100.0 |
100.0 |
100.0 |
str(table)
## Classes 'freqtab' and 'data.frame': 9 obs. of 5 variables:
## $ n : num 1 1 1 1 1 1 1 1 8
## $ % : num 12.5 12.5 12.5 12.5 12.5 12.5 12.5 12.5 100
## $ val% : num 12.5 12.5 12.5 12.5 12.5 12.5 12.5 12.5 100
## $ %cum : num 12.5 25 37.5 50 62.5 75 87.5 100 100
## $ val%cum: num 12.5 25 37.5 50 62.5 75 87.5 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
| 0.26208 |
1 |
| 9.85736 |
1 |
| 10.0131 |
1 |
| 14.81614 |
1 |
| 15.05161 |
1 |
| 19.51432 |
1 |
| 19.54581 |
1 |
| 21.9547 |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.26208 6.26208 12.26208 18.26208 24.26208
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
| (0.262,6.26] |
0 |
0.0000000 |
0 |
| (6.26,12.3] |
2 |
0.2857143 |
2 |
| (12.3,18.3] |
2 |
0.2857143 |
4 |
| (18.3,24.3] |
3 |
0.4285714 |
7 |
str(Freq_table)
## 'data.frame': 4 obs. of 4 variables:
## $ distance: Factor w/ 4 levels "(0.262,6.26]",..: 1 2 3 4
## $ Freq : int 0 2 2 3
## $ Rel_Freq: num 0 0.286 0.286 0.429
## $ Cum_Freq: int 0 2 4 7
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
| (0.262,6.26] |
0 |
| (6.26,12.3] |
2 |
| (12.3,18.3] |
2 |
| (18.3,24.3] |
3 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")

library(pastecs)
stat.desc(df_CR)
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## id date time continent_code country_name country_code
## nbr.val 8.000000e+00 NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA
## min 2.490000e+02 NA NA NA NA NA
## max 6.696000e+03 NA NA NA NA NA
## range 6.447000e+03 NA NA NA NA NA
## sum 2.744200e+04 NA NA NA NA NA
## median 3.107000e+03 NA NA NA NA NA
## mean 3.430250e+03 NA NA NA NA NA
## SE.mean 7.315967e+02 NA NA NA NA NA
## CI.mean.0.95 1.729951e+03 NA NA NA NA NA
## var 4.281870e+06 NA NA NA NA NA
## std.dev 2.069268e+03 NA NA NA NA NA
## coef.var 6.032412e-01 NA NA NA NA NA
## state population city distance location_description
## nbr.val NA 8.000000e+00 NA 8.0000000 NA
## nbr.null NA 1.000000e+00 NA 0.0000000 NA
## nbr.na NA 0.000000e+00 NA 0.0000000 NA
## min NA 0.000000e+00 NA 0.2620800 NA
## max NA 2.194700e+04 NA 21.9547000 NA
## range NA 2.194700e+04 NA 21.6926200 NA
## sum NA 3.885700e+04 NA 111.0151200 NA
## median NA 1.355000e+03 NA 14.9338750 NA
## mean NA 4.857125e+03 NA 13.8768900 NA
## SE.mean NA 2.557523e+03 NA 2.4924134 NA
## CI.mean.0.95 NA 6.047580e+03 NA 5.8936213 NA
## var NA 5.232738e+07 NA 49.6969984 NA
## std.dev NA 7.233767e+03 NA 7.0496098 NA
## coef.var NA 1.489310e+00 NA 0.5080108 NA
## latitude longitude geolocation hazard_type landslide_type
## nbr.val 8.000000000 8.000000e+00 NA NA NA
## nbr.null 0.000000000 0.000000e+00 NA NA NA
## nbr.na 0.000000000 0.000000e+00 NA NA NA
## min 10.000000000 -8.414890e+01 NA NA NA
## max 10.205400000 -8.390410e+01 NA NA NA
## range 0.205400000 2.448000e-01 NA NA NA
## sum 81.063300000 -6.720410e+02 NA NA NA
## median 10.144250000 -8.397730e+01 NA NA NA
## mean 10.132912500 -8.400512e+01 NA NA NA
## SE.mean 0.022739522 2.987758e-02 NA NA NA
## CI.mean.0.95 0.053770426 7.064924e-02 NA NA NA
## var 0.004136687 7.141356e-03 NA NA NA
## std.dev 0.064317081 8.450655e-02 NA NA NA
## coef.var 0.006347344 -1.005969e-03 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 1 6.0000000 NA
## nbr.null NA NA NA 1 5.0000000 NA
## nbr.na NA NA NA 7 2.0000000 NA
## min NA NA NA 0 0.0000000 NA
## max NA NA NA 0 2.0000000 NA
## range NA NA NA 0 2.0000000 NA
## sum NA NA NA 0 2.0000000 NA
## median NA NA NA 0 0.0000000 NA
## mean NA NA NA 0 0.3333333 NA
## SE.mean NA NA NA NA 0.3333333 NA
## CI.mean.0.95 NA NA NA NaN 0.8568606 NA
## var NA NA NA NA 0.6666667 NA
## std.dev NA NA NA NA 0.8164966 NA
## coef.var NA NA NA NA 2.4494897 NA
## source_link prop ypos
## nbr.val NA 8.0000000 8.0000000
## nbr.null NA 0.0000000 0.0000000
## nbr.na NA 0.0000000 0.0000000
## min NA 0.2360760 9.8881576
## max NA 19.7763152 93.2209189
## range NA 19.5402392 83.3327613
## sum NA 100.0000000 376.0014582
## median NA 13.4521090 40.0508913
## mean NA 12.5000000 47.0001823
## SE.mean NA 2.2451117 10.1053438
## CI.mean.0.95 NA 5.3088456 23.8953410
## var NA 40.3242124 816.9437832
## std.dev NA 6.3501348 28.5822285
## coef.var NA 0.5080108 0.6081302
boxplot(data, horizontal=TRUE, col='green')

Gráfico para San José (Costa Rica)
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_CR <- subset(df, country_name == "Costa Rica")
knitr::kable(head(df_CR))
library(dplyr)
df_CR <- subset(df, state == "San José")
knitr::kable(head(df_CR))
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_CR, aes(fill=city, y=distance, x=state)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_CR, aes(fill=city, y=distance, x=state)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_CR, aes(x=state, y=distance, fill=city)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_CR <- df_CR %>%
arrange(desc(city)) %>%
mutate(prop = distance / sum(df_CR$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_CR, aes(x=state, y = prop, fill=city)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4
## Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Greens is 9
## Returning the palette you asked for with that many colors

Grafico de series temporales
library(forecast)
data<- ts(df_CR$distance, frequency=12, start=2008)
knitr::kable(head(data))
| 6.49523 |
| 12.85801 |
| 8.27042 |
| 0.55804 |
| 0.72957 |
| 1.16705 |
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto
library(qcc)
distance <- df_CR$distance
names(distance) <- df_CR$state
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 200, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por ciudades"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## San José 22.3236800 22.3236800 11.3956252 11.3956252
## San José 21.6745200 43.9982000 11.0642469 22.4598721
## San José 16.2493700 60.2475700 8.2948569 30.7547290
## San José 15.6499700 75.8975400 7.9888797 38.7436087
## San José 12.8580100 88.7555500 6.5636608 45.3072695
## San José 11.3104700 100.0660200 5.7736842 51.0809536
## San José 10.7375200 110.8035400 5.4812089 56.5621626
## San José 10.0119800 120.8155200 5.1108407 61.6730032
## San José 9.5625100 130.3780300 4.8813986 66.5544018
## San José 9.5361100 139.9141400 4.8679221 71.4223240
## San José 8.3916100 148.3057500 4.2836863 75.7060103
## San José 8.2704200 156.5761700 4.2218222 79.9278325
## San José 8.2137200 164.7898900 4.1928784 84.1207108
## San José 6.4952300 171.2851200 3.3156364 87.4363473
## San José 4.8995400 176.1846600 2.5010805 89.9374278
## San José 4.5776300 180.7622900 2.3367543 92.2741821
## San José 3.7140700 184.4763600 1.8959307 94.1701128
## San José 3.6769100 188.1532700 1.8769615 96.0470743
## San José 2.9260500 191.0793200 1.4936681 97.5407425
## San José 1.8578700 192.9371900 0.9483916 98.4891341
## San José 1.1670500 194.1042400 0.5957470 99.0848810
## San José 0.7295700 194.8338100 0.3724254 99.4573065
## San José 0.5580400 195.3918500 0.2848641 99.7421705
## San José 0.2525400 195.6443900 0.1289147 99.8710853
## San José 0.2525400 195.8969300 0.1289147 100.0000000
stem(df_CR$"distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 001112344
## 0 | 556888
## 1 | 000113
## 1 | 66
## 2 | 22
head(df_CR)
## # A tibble: 6 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 2686 11/4/10 <NA> <NA> Costa Rica CR San José 22433
## 2 839 10/12/08 <NA> <NA> Costa Rica CR San José 8292
## 3 7496 11/11/15 <NA> <NA> Costa Rica CR San José 8292
## 4 2681 11/4/10 <NA> <NA> Costa Rica CR San José 4255
## 5 7444 10/29/15 <NA> <NA> Costa Rica CR San José 26047
## 6 7494 9/25/15 <NA> <NA> Costa Rica CR San José 335007
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_CR))
stem(df_CR$"distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 001112344
## 0 | 556888
## 1 | 000113
## 1 | 66
## 2 | 22
stem(df_CR$"distance", scale = 2)
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 001112344
## 0 | 556888
## 1 | 000113
## 1 | 66
## 2 | 22
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 0.25254 |
2 |
8 |
8 |
8 |
8 |
| 0.55804 |
1 |
4 |
4 |
12 |
12 |
| 0.72957 |
1 |
4 |
4 |
16 |
16 |
| 1.16705 |
1 |
4 |
4 |
20 |
20 |
| 1.85787 |
1 |
4 |
4 |
24 |
24 |
| 2.92605 |
1 |
4 |
4 |
28 |
28 |
| 3.67691 |
1 |
4 |
4 |
32 |
32 |
| 3.71407 |
1 |
4 |
4 |
36 |
36 |
| 4.57763 |
1 |
4 |
4 |
40 |
40 |
| 4.89954 |
1 |
4 |
4 |
44 |
44 |
| 6.49523 |
1 |
4 |
4 |
48 |
48 |
| 8.21372 |
1 |
4 |
4 |
52 |
52 |
| 8.27042 |
1 |
4 |
4 |
56 |
56 |
| 8.39161 |
1 |
4 |
4 |
60 |
60 |
| 9.53611 |
1 |
4 |
4 |
64 |
64 |
| 9.56251 |
1 |
4 |
4 |
68 |
68 |
| 10.01198 |
1 |
4 |
4 |
72 |
72 |
| 10.73752 |
1 |
4 |
4 |
76 |
76 |
| 11.31047 |
1 |
4 |
4 |
80 |
80 |
| 12.85801 |
1 |
4 |
4 |
84 |
84 |
| 15.64997 |
1 |
4 |
4 |
88 |
88 |
| 16.24937 |
1 |
4 |
4 |
92 |
92 |
| 21.67452 |
1 |
4 |
4 |
96 |
96 |
| 22.32368 |
1 |
4 |
4 |
100 |
100 |
| Total |
25 |
100 |
100 |
100 |
100 |
str(table)
## Classes 'freqtab' and 'data.frame': 25 obs. of 5 variables:
## $ n : num 2 1 1 1 1 1 1 1 1 1 ...
## $ % : num 8 4 4 4 4 4 4 4 4 4 ...
## $ val% : num 8 4 4 4 4 4 4 4 4 4 ...
## $ %cum : num 8 12 16 20 24 28 32 36 40 44 ...
## $ val%cum: num 8 12 16 20 24 28 32 36 40 44 ...
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
| 0.25254 |
2 |
| 0.55804 |
1 |
| 0.72957 |
1 |
| 1.16705 |
1 |
| 1.85787 |
1 |
| 2.92605 |
1 |
| 3.67691 |
1 |
| 3.71407 |
1 |
| 4.57763 |
1 |
| 4.89954 |
1 |
| 6.49523 |
1 |
| 8.21372 |
1 |
| 8.27042 |
1 |
| 8.39161 |
1 |
| 9.53611 |
1 |
| 9.56251 |
1 |
| 10.01198 |
1 |
| 10.73752 |
1 |
| 11.31047 |
1 |
| 12.85801 |
1 |
| 15.64997 |
1 |
| 16.24937 |
1 |
| 21.67452 |
1 |
| 22.32368 |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.25254 5.25254 10.25254 15.25254 20.25254 25.25254
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
| (0.253,5.25] |
9 |
0.3913043 |
9 |
| (5.25,10.3] |
7 |
0.3043478 |
16 |
| (10.3,15.3] |
3 |
0.1304348 |
19 |
| (15.3,20.3] |
2 |
0.0869565 |
21 |
| (20.3,25.3] |
2 |
0.0869565 |
23 |
str(Freq_table)
## 'data.frame': 5 obs. of 4 variables:
## $ distance: Factor w/ 5 levels "(0.253,5.25]",..: 1 2 3 4 5
## $ Freq : int 9 7 3 2 2
## $ Rel_Freq: num 0.391 0.304 0.13 0.087 0.087
## $ Cum_Freq: int 9 16 19 21 23
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
| (0.253,5.25] |
9 |
| (5.25,10.3] |
7 |
| (10.3,15.3] |
3 |
| (15.3,20.3] |
2 |
| (20.3,25.3] |
2 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")

library(pastecs)
stat.desc(df_CR)
## id date time continent_code country_name country_code
## nbr.val 2.500000e+01 NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA
## min 2.990000e+02 NA NA NA NA NA
## max 7.496000e+03 NA NA NA NA NA
## range 7.197000e+03 NA NA NA NA NA
## sum 1.039670e+05 NA NA NA NA NA
## median 2.692000e+03 NA NA NA NA NA
## mean 4.158680e+03 NA NA NA NA NA
## SE.mean 5.330458e+02 NA NA NA NA NA
## CI.mean.0.95 1.100153e+03 NA NA NA NA NA
## var 7.103446e+06 NA NA NA NA NA
## std.dev 2.665229e+03 NA NA NA NA NA
## coef.var 6.408834e-01 NA NA NA NA NA
## state population city distance location_description
## nbr.val NA 2.500000e+01 NA 25.000000 NA
## nbr.null NA 3.000000e+00 NA 0.000000 NA
## nbr.na NA 0.000000e+00 NA 0.000000 NA
## min NA 0.000000e+00 NA 0.252540 NA
## max NA 3.350070e+05 NA 22.323680 NA
## range NA 3.350070e+05 NA 22.071140 NA
## sum NA 6.934110e+05 NA 195.896930 NA
## median NA 1.002800e+04 NA 8.213720 NA
## mean NA 2.773644e+04 NA 7.835877 NA
## SE.mean NA 1.308803e+04 NA 1.270388 NA
## CI.mean.0.95 NA 2.701236e+04 NA 2.621953 NA
## var NA 4.282410e+09 NA 40.347169 NA
## std.dev NA 6.544013e+04 NA 6.351942 NA
## coef.var NA 2.359356e+00 NA 0.810623 NA
## latitude longitude geolocation hazard_type landslide_type
## nbr.val 25.00000000 2.500000e+01 NA NA NA
## nbr.null 0.00000000 0.000000e+00 NA NA NA
## nbr.na 0.00000000 0.000000e+00 NA NA NA
## min 9.19220000 -8.439700e+01 NA NA NA
## max 10.14950000 -8.355650e+01 NA NA NA
## range 0.95730000 8.405000e-01 NA NA NA
## sum 242.83950000 -2.098785e+03 NA NA NA
## median 9.77890000 -8.396730e+01 NA NA NA
## mean 9.71358000 -8.395140e+01 NA NA NA
## SE.mean 0.06054644 4.905849e-02 NA NA NA
## CI.mean.0.95 0.12496171 1.012517e-01 NA NA NA
## var 0.09164678 6.016838e-02 NA NA NA
## std.dev 0.30273219 2.452924e-01 NA NA NA
## coef.var 0.03116587 -2.921839e-03 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 10 21.000000 NA
## nbr.null NA NA NA 10 17.000000 NA
## nbr.na NA NA NA 15 4.000000 NA
## min NA NA NA 0 0.000000 NA
## max NA NA NA 0 23.000000 NA
## range NA NA NA 0 23.000000 NA
## sum NA NA NA 0 40.000000 NA
## median NA NA NA 0 0.000000 NA
## mean NA NA NA 0 1.904762 NA
## SE.mean NA NA NA 0 1.172918 NA
## CI.mean.0.95 NA NA NA 0 2.446664 NA
## var NA NA NA 0 28.890476 NA
## std.dev NA NA NA 0 5.374986 NA
## coef.var NA NA NA NaN 2.821868 NA
## source_link prop ypos
## nbr.val NA 25.0000000 25.0000000
## nbr.null NA 0.0000000 0.0000000
## nbr.na NA 0.0000000 0.0000000
## min NA 0.1289147 1.6578182
## max NA 11.3956252 97.5593007
## range NA 11.2667105 95.9014825
## sum NA 100.0000000 1339.6995323
## median NA 4.1928784 66.5210348
## mean NA 4.0000000 53.5879813
## SE.mean NA 0.6484984 6.3076988
## CI.mean.0.95 NA 1.3384349 13.0184506
## var NA 10.5137540 994.6766161
## std.dev NA 3.2424919 31.5384942
## coef.var NA 0.8106230 0.5885367
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Alajuela (Costa Rica)
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_CR <- subset(df, country_name == "Costa Rica")
knitr::kable(head(df_CR))
library(dplyr)
df_CR <- subset(df, state == "Alajuela")
knitr::kable(head(df_CR))
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_CR, aes(fill=city, y=distance, x=state)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_CR, aes(fill=city, y=distance, x=state)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_CR, aes(x=state, y=distance, fill=city)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_CR <- df_CR %>%
arrange(desc(city)) %>%
mutate(prop = distance / sum(df_CR$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_CR, aes(x=state, y = prop, fill=city)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4
## Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Greens is 9
## Returning the palette you asked for with that many colors

Grafico de series temporales
library(forecast)
data<- ts(df_CR$distance, frequency=12, start=2008)
knitr::kable(head(data))
| 0.70048 |
| 3.21979 |
| 5.43516 |
| 1.47396 |
| 9.61692 |
| 4.87432 |
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto
library(qcc)
distance <- df_CR$distance
names(distance) <- df_CR$state
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por ciudades"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Alajuela 11.9652400 11.9652400 10.5708367 10.5708367
## Alajuela 10.3296800 22.2949200 9.1258813 19.6967180
## Alajuela 9.8421300 32.1370500 8.6951494 28.3918674
## Alajuela 9.6169200 41.7539700 8.4961849 36.8880523
## Alajuela 6.9217400 48.6757100 6.1150953 43.0031476
## Alajuela 6.8871500 55.5628600 6.0845364 49.0876840
## Alajuela 6.8006100 62.3634700 6.0080816 55.0957655
## Alajuela 5.9663400 68.3298100 5.2710356 60.3668011
## Alajuela 5.9551900 74.2850000 5.2611850 65.6279861
## Alajuela 5.5752300 79.8602300 4.9255047 70.5534908
## Alajuela 5.4351600 85.2953900 4.8017582 75.3552490
## Alajuela 5.1266700 90.4220600 4.5292189 79.8844679
## Alajuela 4.8743200 95.2963800 4.3062772 84.1907451
## Alajuela 4.2419900 99.5383700 3.7476376 87.9383828
## Alajuela 3.2197900 102.7581600 2.8445626 90.7829454
## Alajuela 3.0891600 105.8473200 2.7291559 93.5121013
## Alajuela 3.0845900 108.9319100 2.7251185 96.2372198
## Alajuela 2.0846900 111.0166000 1.8417447 98.0789646
## Alajuela 1.4739600 112.4905600 1.3021879 99.3811524
## Alajuela 0.7004800 113.1910400 0.6188476 100.0000000
stem(df_CR$"distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 1123334
## 0 | 555666777
## 1 | 0002
head(df_CR)
## # A tibble: 6 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 3762 7/12/11 <NA> <NA> Costa Rica CR Alajuela 4185
## 2 7486 10/27/15 <NA> <NA> Costa Rica CR Alajuela 5745
## 3 2703 11/5/10 <NA> <NA> Costa Rica CR Alajuela 2107
## 4 2516 9/29/10 <NA> <NA> Costa Rica CR Alajuela 3624
## 5 2682 11/4/10 <NA> <NA> Costa Rica CR Alajuela 3624
## 6 5408 8/27/13 <NA> <NA> Costa Rica CR Alajuela 1015
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_CR))
stem(df_CR$"distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 1123334
## 0 | 555666777
## 1 | 0002
stem(df_CR$"distance", scale = 2)
##
## The decimal point is at the |
##
## 0 | 75
## 2 | 1112
## 4 | 29146
## 6 | 00899
## 8 | 68
## 10 | 3
## 12 | 0
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 0.70048 |
1 |
5 |
5 |
5 |
5 |
| 1.47396 |
1 |
5 |
5 |
10 |
10 |
| 2.08469 |
1 |
5 |
5 |
15 |
15 |
| 3.08459 |
1 |
5 |
5 |
20 |
20 |
| 3.08916 |
1 |
5 |
5 |
25 |
25 |
| 3.21979 |
1 |
5 |
5 |
30 |
30 |
| 4.24199 |
1 |
5 |
5 |
35 |
35 |
| 4.87432 |
1 |
5 |
5 |
40 |
40 |
| 5.12667 |
1 |
5 |
5 |
45 |
45 |
| 5.43516 |
1 |
5 |
5 |
50 |
50 |
| 5.57523 |
1 |
5 |
5 |
55 |
55 |
| 5.95519 |
1 |
5 |
5 |
60 |
60 |
| 5.96634 |
1 |
5 |
5 |
65 |
65 |
| 6.80061 |
1 |
5 |
5 |
70 |
70 |
| 6.88715 |
1 |
5 |
5 |
75 |
75 |
| 6.92174 |
1 |
5 |
5 |
80 |
80 |
| 9.61692 |
1 |
5 |
5 |
85 |
85 |
| 9.84213 |
1 |
5 |
5 |
90 |
90 |
| 10.32968 |
1 |
5 |
5 |
95 |
95 |
| 11.96524 |
1 |
5 |
5 |
100 |
100 |
| Total |
20 |
100 |
100 |
100 |
100 |
str(table)
## Classes 'freqtab' and 'data.frame': 21 obs. of 5 variables:
## $ n : num 1 1 1 1 1 1 1 1 1 1 ...
## $ % : num 5 5 5 5 5 5 5 5 5 5 ...
## $ val% : num 5 5 5 5 5 5 5 5 5 5 ...
## $ %cum : num 5 10 15 20 25 30 35 40 45 50 ...
## $ val%cum: num 5 10 15 20 25 30 35 40 45 50 ...
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
| 0.70048 |
1 |
| 1.47396 |
1 |
| 2.08469 |
1 |
| 3.08459 |
1 |
| 3.08916 |
1 |
| 3.21979 |
1 |
| 4.24199 |
1 |
| 4.87432 |
1 |
| 5.12667 |
1 |
| 5.43516 |
1 |
| 5.57523 |
1 |
| 5.95519 |
1 |
| 5.96634 |
1 |
| 6.80061 |
1 |
| 6.88715 |
1 |
| 6.92174 |
1 |
| 9.61692 |
1 |
| 9.84213 |
1 |
| 10.32968 |
1 |
| 11.96524 |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.70048 3.70048 6.70048 9.70048 12.70048
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
| (0.7,3.7] |
5 |
0.2631579 |
5 |
| (3.7,6.7] |
7 |
0.3684211 |
12 |
| (6.7,9.7] |
4 |
0.2105263 |
16 |
| (9.7,12.7] |
3 |
0.1578947 |
19 |
str(Freq_table)
## 'data.frame': 4 obs. of 4 variables:
## $ distance: Factor w/ 4 levels "(0.7,3.7]","(3.7,6.7]",..: 1 2 3 4
## $ Freq : int 5 7 4 3
## $ Rel_Freq: num 0.263 0.368 0.211 0.158
## $ Cum_Freq: int 5 12 16 19
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
| (0.7,3.7] |
5 |
| (3.7,6.7] |
7 |
| (6.7,9.7] |
4 |
| (9.7,12.7] |
3 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")

library(pastecs)
stat.desc(df_CR)
## id date time continent_code country_name country_code
## nbr.val 2.000000e+01 NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA
## min 3.010000e+02 NA NA NA NA NA
## max 7.488000e+03 NA NA NA NA NA
## range 7.187000e+03 NA NA NA NA NA
## sum 9.718800e+04 NA NA NA NA NA
## median 5.878000e+03 NA NA NA NA NA
## mean 4.859400e+03 NA NA NA NA NA
## SE.mean 5.261514e+02 NA NA NA NA NA
## CI.mean.0.95 1.101248e+03 NA NA NA NA NA
## var 5.536707e+06 NA NA NA NA NA
## std.dev 2.353021e+03 NA NA NA NA NA
## coef.var 4.842204e-01 NA NA NA NA NA
## state population city distance location_description
## nbr.val NA 2.000000e+01 NA 20.0000000 NA
## nbr.null NA 0.000000e+00 NA 0.0000000 NA
## nbr.na NA 0.000000e+00 NA 0.0000000 NA
## min NA 1.015000e+03 NA 0.7004800 NA
## max NA 4.749400e+04 NA 11.9652400 NA
## range NA 4.647900e+04 NA 11.2647600 NA
## sum NA 1.924900e+05 NA 113.1910400 NA
## median NA 7.014000e+03 NA 5.5051950 NA
## mean NA 9.624500e+03 NA 5.6595520 NA
## SE.mean NA 2.281502e+03 NA 0.6812501 NA
## CI.mean.0.95 NA 4.775238e+03 NA 1.4258729 NA
## var NA 1.041050e+08 NA 9.2820347 NA
## std.dev NA 1.020319e+04 NA 3.0466432 NA
## coef.var NA 1.060126e+00 NA 0.5383188 NA
## latitude longitude geolocation hazard_type landslide_type
## nbr.val 20.00000000 2.000000e+01 NA NA NA
## nbr.null 0.00000000 0.000000e+00 NA NA NA
## nbr.na 0.00000000 0.000000e+00 NA NA NA
## min 9.91890000 -8.501410e+01 NA NA NA
## max 10.89160000 -8.418070e+01 NA NA NA
## range 0.97270000 8.334000e-01 NA NA NA
## sum 202.24760000 -1.688552e+03 NA NA NA
## median 10.04315000 -8.444405e+01 NA NA NA
## mean 10.11238000 -8.442758e+01 NA NA NA
## SE.mean 0.05493583 4.594981e-02 NA NA NA
## CI.mean.0.95 0.11498201 9.617405e-02 NA NA NA
## var 0.06035891 4.222770e-02 NA NA NA
## std.dev 0.24568050 2.054938e-01 NA NA NA
## coef.var 0.02429502 -2.433965e-03 NA NA NA
## landslide_size trigger storm_name injuries fatalities
## nbr.val NA NA NA 11.0000000 18.0000000
## nbr.null NA NA NA 10.0000000 15.0000000
## nbr.na NA NA NA 9.0000000 2.0000000
## min NA NA NA 0.0000000 0.0000000
## max NA NA NA 3.0000000 14.0000000
## range NA NA NA 3.0000000 14.0000000
## sum NA NA NA 3.0000000 16.0000000
## median NA NA NA 0.0000000 0.0000000
## mean NA NA NA 0.2727273 0.8888889
## SE.mean NA NA NA 0.2727273 0.7749716
## CI.mean.0.95 NA NA NA 0.6076742 1.6350471
## var NA NA NA 0.8181818 10.8104575
## std.dev NA NA NA 0.9045340 3.2879260
## coef.var NA NA NA 3.3166248 3.6989168
## source_name source_link prop ypos
## nbr.val NA NA 20.0000000 20.0000000
## nbr.null NA NA 0.0000000 0.0000000
## nbr.na NA NA 0.0000000 0.0000000
## min NA NA 0.6188476 0.3094238
## max NA NA 10.5708367 97.5372476
## range NA NA 9.9519891 97.2278239
## sum NA NA 100.0000000 984.6035428
## median NA NA 4.8636314 51.7474020
## mean NA NA 5.0000000 49.2301771
## SE.mean NA NA 0.6018587 7.4144621
## CI.mean.0.95 NA NA 1.2597047 15.5186476
## var NA NA 7.2446780 1099.4849742
## std.dev NA NA 2.6915940 33.1584827
## coef.var NA NA 0.5383188 0.6735398
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Puntarenas (Costa Rica)
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_CR <- subset(df, country_name == "Costa Rica")
knitr::kable(head(df_CR))
library(dplyr)
df_CR <- subset(df, state == "Puntarenas")
knitr::kable(head(df_CR))
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_CR, aes(fill=city, y=distance, x=state)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_CR, aes(fill=city, y=distance, x=state)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_CR, aes(x=state, y=distance, fill=city)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_CR <- df_CR %>%
arrange(desc(city)) %>%
mutate(prop = distance / sum(df_CR$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_CR, aes(x=state, y = prop, fill=city)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales
library(forecast)
data<- ts(df_CR$distance, frequency=12, start=2008)
knitr::kable(head(data))
| 18.00524 |
| 13.48919 |
| 3.82425 |
| 8.92048 |
| 11.74074 |
| 7.87044 |
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto
library(qcc)
distance <- df_CR$distance
names(distance) <- df_CR$city
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por ciudades"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Ciudad Cortés 20.0663300 20.0663300 22.4960244 22.4960244
## San Vito 18.0052400 38.0715700 20.1853711 42.6813955
## Parrita 13.4891900 51.5607600 15.1225036 57.8038990
## Golfito 11.7407400 63.3015000 13.1623457 70.9662447
## Miramar 8.9204800 72.2219800 10.0005998 80.9668445
## Golfito 7.8704400 80.0924200 8.8234176 89.7902622
## Corredor 4.9305300 85.0229500 5.5275341 95.3177962
## Miramar 3.8242500 88.8472000 4.2873022 99.6050985
## Buenos Aires 0.3522500 89.1994500 0.3949015 100.0000000
stem(df_CR$"distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 04
## 0 | 589
## 1 | 23
## 1 | 8
## 2 | 0
head(df_CR)
## # A tibble: 6 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 1296 11/13/09 <NA> <NA> Costa Rica CR Puntarenas 3981
## 2 2689 11/4/10 <NA> <NA> Costa Rica CR Puntarenas 3734
## 3 323 10/24/07 <NA> <NA> Costa Rica CR Puntarenas 6540
## 4 848 10/16/08 <NA> <NA> Costa Rica CR Puntarenas 6540
## 5 845 10/15/08 <NA> <NA> Costa Rica CR Puntarenas 6777
## 6 2685 11/4/10 <NA> <NA> Costa Rica CR Puntarenas 6777
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_CR))
stem(df_CR$"distance")
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 04
## 0 | 589
## 1 | 23
## 1 | 8
## 2 | 0
stem(df_CR$"distance", scale = 2)
##
## The decimal point is at the |
##
## 0 | 4
## 2 | 8
## 4 | 9
## 6 | 9
## 8 | 9
## 10 | 7
## 12 | 5
## 14 |
## 16 |
## 18 | 0
## 20 | 1
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 0.35225 |
1 |
11.1 |
11.1 |
11.1 |
11.1 |
| 3.82425 |
1 |
11.1 |
11.1 |
22.2 |
22.2 |
| 4.93053 |
1 |
11.1 |
11.1 |
33.3 |
33.3 |
| 7.87044 |
1 |
11.1 |
11.1 |
44.4 |
44.4 |
| 8.92048 |
1 |
11.1 |
11.1 |
55.6 |
55.6 |
| 11.74074 |
1 |
11.1 |
11.1 |
66.7 |
66.7 |
| 13.48919 |
1 |
11.1 |
11.1 |
77.8 |
77.8 |
| 18.00524 |
1 |
11.1 |
11.1 |
88.9 |
88.9 |
| 20.06633 |
1 |
11.1 |
11.1 |
100.0 |
100.0 |
| Total |
9 |
100.0 |
100.0 |
100.0 |
100.0 |
str(table)
## Classes 'freqtab' and 'data.frame': 10 obs. of 5 variables:
## $ n : num 1 1 1 1 1 1 1 1 1 9
## $ % : num 11.1 11.1 11.1 11.1 11.1 11.1 11.1 11.1 11.1 100
## $ val% : num 11.1 11.1 11.1 11.1 11.1 11.1 11.1 11.1 11.1 100
## $ %cum : num 11.1 22.2 33.3 44.4 55.6 66.7 77.8 88.9 100 100
## $ val%cum: num 11.1 22.2 33.3 44.4 55.6 66.7 77.8 88.9 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
| 0.35225 |
1 |
| 3.82425 |
1 |
| 4.93053 |
1 |
| 7.87044 |
1 |
| 8.92048 |
1 |
| 11.74074 |
1 |
| 13.48919 |
1 |
| 18.00524 |
1 |
| 20.06633 |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 0.35225 4.35225 8.35225 12.35225 16.35225 20.35225
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
| (0.352,4.35] |
1 |
0.125 |
1 |
| (4.35,8.35] |
2 |
0.250 |
3 |
| (8.35,12.4] |
2 |
0.250 |
5 |
| (12.4,16.4] |
1 |
0.125 |
6 |
| (16.4,20.4] |
2 |
0.250 |
8 |
str(Freq_table)
## 'data.frame': 5 obs. of 4 variables:
## $ distance: Factor w/ 5 levels "(0.352,4.35]",..: 1 2 3 4 5
## $ Freq : int 1 2 2 1 2
## $ Rel_Freq: num 0.125 0.25 0.25 0.125 0.25
## $ Cum_Freq: int 1 3 5 6 8
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
| (0.352,4.35] |
1 |
| (4.35,8.35] |
2 |
| (8.35,12.4] |
2 |
| (12.4,16.4] |
1 |
| (16.4,20.4] |
2 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")

library(pastecs)
stat.desc(df_CR)
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## id date time continent_code country_name country_code
## nbr.val 9.000000e+00 NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA
## min 3.230000e+02 NA NA NA NA NA
## max 7.493000e+03 NA NA NA NA NA
## range 7.170000e+03 NA NA NA NA NA
## sum 2.155700e+04 NA NA NA NA NA
## median 2.685000e+03 NA NA NA NA NA
## mean 2.395222e+03 NA NA NA NA NA
## SE.mean 7.132643e+02 NA NA NA NA NA
## CI.mean.0.95 1.644790e+03 NA NA NA NA NA
## var 4.578713e+06 NA NA NA NA NA
## std.dev 2.139793e+03 NA NA NA NA NA
## coef.var 8.933588e-01 NA NA NA NA NA
## state population city distance location_description
## nbr.val NA 9.000000e+00 NA 9.0000000 NA
## nbr.null NA 0.000000e+00 NA 0.0000000 NA
## nbr.na NA 0.000000e+00 NA 0.0000000 NA
## min NA 3.734000e+03 NA 0.3522500 NA
## max NA 1.168000e+04 NA 20.0663300 NA
## range NA 7.946000e+03 NA 19.7140800 NA
## sum NA 5.696300e+04 NA 89.1994500 NA
## median NA 6.540000e+03 NA 8.9204800 NA
## mean NA 6.329222e+03 NA 9.9110500 NA
## SE.mean NA 8.172298e+02 NA 2.1831653 NA
## CI.mean.0.95 NA 1.884535e+03 NA 5.0343882 NA
## var NA 6.010781e+06 NA 42.8958955 NA
## std.dev NA 2.451689e+03 NA 6.5494958 NA
## coef.var NA 3.873603e-01 NA 0.6608276 NA
## latitude longitude geolocation hazard_type landslide_type
## nbr.val 9.00000000 9.000000e+00 NA NA NA
## nbr.null 0.00000000 0.000000e+00 NA NA NA
## nbr.na 0.00000000 0.000000e+00 NA NA NA
## min 8.61170000 -8.480900e+01 NA NA NA
## max 10.11100000 -8.294180e+01 NA NA NA
## range 1.49930000 1.867200e+00 NA NA NA
## sum 82.72080000 -7.528426e+02 NA NA NA
## median 8.98960000 -8.332680e+01 NA NA NA
## mean 9.19120000 -8.364918e+01 NA NA NA
## SE.mean 0.19984316 2.553648e-01 NA NA NA
## CI.mean.0.95 0.46083916 5.888723e-01 NA NA NA
## var 0.35943561 5.869007e-01 NA NA NA
## std.dev 0.59952949 7.660945e-01 NA NA NA
## coef.var 0.06522864 -9.158422e-03 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 1 6.0000000 NA
## nbr.null NA NA NA 1 5.0000000 NA
## nbr.na NA NA NA 8 3.0000000 NA
## min NA NA NA 0 0.0000000 NA
## max NA NA NA 0 1.0000000 NA
## range NA NA NA 0 1.0000000 NA
## sum NA NA NA 0 1.0000000 NA
## median NA NA NA 0 0.0000000 NA
## mean NA NA NA 0 0.1666667 NA
## SE.mean NA NA NA NA 0.1666667 NA
## CI.mean.0.95 NA NA NA NaN 0.4284303 NA
## var NA NA NA NA 0.1666667 NA
## std.dev NA NA NA NA 0.4082483 NA
## coef.var NA NA NA NA 2.4494897 NA
## source_link prop ypos
## nbr.val NA 9.0000000 9.0000000
## nbr.null NA 0.0000000 0.0000000
## nbr.na NA 0.0000000 0.0000000
## min NA 0.3949015 10.0926855
## max NA 22.4960244 99.8025492
## range NA 22.1011228 89.7098637
## sum NA 100.0000000 505.7380343
## median NA 10.0005998 56.1769495
## mean NA 11.1111111 56.1931149
## SE.mean NA 2.4475098 9.7228963
## CI.mean.0.95 NA 5.6439677 22.4210391
## var NA 53.9127377 850.8124161
## std.dev NA 7.3425294 29.1686890
## coef.var NA 0.6608276 0.5190794
boxplot(data, horizontal=TRUE, col='green')

Gráfico para Guanacaste (Costa Rica)
library(readr)
library(knitr)
df <- read_csv("https://raw.githubusercontent.com/lihkir/AnalisisEstadisticoUN/main/Data/catalog.csv")
## Rows: 1693 Columns: 23
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (16): date, time, continent_code, country_name, country_code, state/prov...
## dbl (7): id, population, distance, latitude, longitude, injuries, fatalities
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
colnames(df)[7] <- "state"
colnames(df)[9] <- "city"
df_CR <- subset(df, country_name == "Costa Rica")
knitr::kable(head(df_CR))
library(dplyr)
df_CR <- subset(df, state == "Guanacaste")
knitr::kable(head(df_CR))
Gráfico de barras agrupados
library(ggplot2)
library(dplyr)
ggplot(df_CR, aes(fill=city, y=distance, x=state)) +
geom_bar(position="dodge", stat="identity")

Gráfico de barras apilados
library(ggplot2)
library(dplyr)
ggplot(df_CR, aes(fill=city, y=distance, x=state)) +
geom_bar(position="stack", stat="identity")

Gráfico circular
library(ggplot2)
library(dplyr)
ggplot(df_CR, aes(x=state, y=distance, fill=city)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0)

library(ggplot2)
library(dplyr)
df_CR <- df_CR %>%
arrange(desc(city)) %>%
mutate(prop = distance / sum(df_CR$distance) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
require(scales)
ggplot(df_CR, aes(x=state, y = prop, fill=city)) +
geom_bar(stat="identity", width=1, color="black") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = percent(prop/100)), color = "black", size=3) +
scale_fill_brewer(palette="Set4")
## Warning in pal_name(palette, type): Unknown palette Set4

Grafico de series temporales
library(forecast)
data<- ts(df_CR$distance, frequency=12, start=2008)
knitr::kable(head(data))
| 10.21631 |
| 12.33807 |
| 12.21952 |
| 12.18115 |
| 17.65521 |
autoplot(data) + labs(title = "Gráfico de series temporales", x="date", y = "distancia", colour = "green") +theme_bw()

Diagrama de pareto
library(qcc)
distance <- df_CR$distance
names(distance) <- df_CR$city
pareto.chart(distance,
ylab="distance",
col = heat.colors(length(distance)),
cumperc = seq(0, 100, by = 10),
ylab2 = "Porcentaje acumulado",
main = "Grafico de Pareto por ciudades"
)

##
## Pareto chart analysis for distance
## Frequency Cum.Freq. Percentage Cum.Percent.
## Bagaces 17.65521 17.65521 27.32571 27.32571
## Tilarán 12.33807 29.99328 19.09615 46.42185
## Tilarán 12.21952 42.21280 18.91266 65.33451
## Tilarán 12.18115 54.39395 18.85328 84.18779
## Tilarán 10.21631 64.61026 15.81221 100.00000
stem(df_CR$"distance")
##
## The decimal point is at the |
##
## 10 | 2
## 12 | 223
## 14 |
## 16 | 7
head(df_CR)
## # A tibble: 5 x 25
## id date time continent_code country_name country_code state population
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 2683 11/4/10 <NA> <NA> Costa Rica CR Guan~ 7301
## 2 4375 5/31/12 <NA> <NA> Costa Rica CR Guan~ 7301
## 3 5571 10/3/13 <NA> <NA> Costa Rica CR Guan~ 7301
## 4 5591 10/8/13 Morning <NA> Costa Rica CR Guan~ 7301
## 5 556 5/29/08 <NA> <NA> Costa Rica CR Guan~ 4108
## # ... with 17 more variables: city <chr>, distance <dbl>,
## # location_description <chr>, latitude <dbl>, longitude <dbl>,
## # geolocation <chr>, hazard_type <chr>, landslide_type <chr>,
## # landslide_size <chr>, trigger <chr>, storm_name <chr>, injuries <dbl>,
## # fatalities <dbl>, source_name <chr>, source_link <chr>, prop <dbl>,
## # ypos <dbl>
knitr::kable(head(df_CR))
stem(df_CR$"distance")
##
## The decimal point is at the |
##
## 10 | 2
## 12 | 223
## 14 |
## 16 | 7
stem(df_CR$"distance", scale = 2)
##
## The decimal point is at the |
##
## 10 | 2
## 11 |
## 12 | 223
## 13 |
## 14 |
## 15 |
## 16 |
## 17 | 7
Tablas de frecuencia
library(questionr)
table <- questionr::freq(distance, cum = TRUE, sort = "dec", total = TRUE)
knitr::kable(table)
| 10.21631 |
1 |
20 |
20 |
20 |
20 |
| 12.18115 |
1 |
20 |
20 |
40 |
40 |
| 12.21952 |
1 |
20 |
20 |
60 |
60 |
| 12.33807 |
1 |
20 |
20 |
80 |
80 |
| 17.65521 |
1 |
20 |
20 |
100 |
100 |
| Total |
5 |
100 |
100 |
100 |
100 |
str(table)
## Classes 'freqtab' and 'data.frame': 6 obs. of 5 variables:
## $ n : num 1 1 1 1 1 5
## $ % : num 20 20 20 20 20 100
## $ val% : num 20 20 20 20 20 100
## $ %cum : num 20 40 60 80 100 100
## $ val%cum: num 20 40 60 80 100 100
x <- row.names(table)
y <- table$n
names <- x[1:(length(x)-1)]
freqs <- y[1:(length(y)-1)]
df <- data.frame(x = names, y = freqs)
knitr::kable(df)
| 10.21631 |
1 |
| 12.18115 |
1 |
| 12.21952 |
1 |
| 12.33807 |
1 |
| 17.65521 |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Distancia") +
ylab("Frecuencia de deslizamientos") + theme(axis.text.x = element_text(angle = 90))

Tabla de frecuencias agrupada
n_sturges = 1 + log(length(distance))/log(2)
n_sturgesc = ceiling(n_sturges)
n_sturgesf = floor(n_sturges)
n_clases = 0
if (n_sturgesc%%2 == 0) {
n_clases = n_sturgesf
} else {
n_clases = n_sturgesc
}
R = max(distance) - min(distance)
w = ceiling(R/n_clases)
bins <- seq(min(distance), max(distance)+w, by = w)
bins
## [1] 10.21631 13.21631 16.21631 19.21631
distance <- cut(distance, bins)
Freq_table <- transform(table(distance), Rel_Freq=prop.table(Freq), Cum_Freq=cumsum(Freq))
knitr::kable(Freq_table)
| (10.2,13.2] |
3 |
0.75 |
3 |
| (13.2,16.2] |
0 |
0.00 |
3 |
| (16.2,19.2] |
1 |
0.25 |
4 |
str(Freq_table)
## 'data.frame': 3 obs. of 4 variables:
## $ distance: Factor w/ 3 levels "(10.2,13.2]",..: 1 2 3
## $ Freq : int 3 0 1
## $ Rel_Freq: num 0.75 0 0.25
## $ Cum_Freq: int 3 3 4
df <- data.frame(x = Freq_table$distance, y = Freq_table$Freq)
knitr::kable(df)
| (10.2,13.2] |
3 |
| (13.2,16.2] |
0 |
| (16.2,19.2] |
1 |
library(ggplot2)
ggplot(data=df, aes(x=x, y=y)) +
geom_bar(stat="identity", color="green", fill="aquamarine") +
xlab("Rango de deslizamiento") +
ylab("Frecuencia")

library(pastecs)
stat.desc(df_CR)
## Warning in min(x): ningún argumento finito para min; retornando Inf
## Warning in max(x): ningun argumento finito para max; retornando -Inf
## Warning in qt((0.5 + p/2), (Nbrval - 1)): NaNs produced
## id date time continent_code country_name country_code
## nbr.val 5.000000e+00 NA NA NA NA NA
## nbr.null 0.000000e+00 NA NA NA NA NA
## nbr.na 0.000000e+00 NA NA NA NA NA
## min 5.560000e+02 NA NA NA NA NA
## max 5.591000e+03 NA NA NA NA NA
## range 5.035000e+03 NA NA NA NA NA
## sum 1.877600e+04 NA NA NA NA NA
## median 4.375000e+03 NA NA NA NA NA
## mean 3.755200e+03 NA NA NA NA NA
## SE.mean 9.601025e+02 NA NA NA NA NA
## CI.mean.0.95 2.665672e+03 NA NA NA NA NA
## var 4.608984e+06 NA NA NA NA NA
## std.dev 2.146854e+03 NA NA NA NA NA
## coef.var 5.717018e-01 NA NA NA NA NA
## state population city distance location_description
## nbr.val NA 5.000000e+00 NA 5.0000000 NA
## nbr.null NA 0.000000e+00 NA 0.0000000 NA
## nbr.na NA 0.000000e+00 NA 0.0000000 NA
## min NA 4.108000e+03 NA 10.2163100 NA
## max NA 7.301000e+03 NA 17.6552100 NA
## range NA 3.193000e+03 NA 7.4389000 NA
## sum NA 3.331200e+04 NA 64.6102600 NA
## median NA 7.301000e+03 NA 12.2195200 NA
## mean NA 6.662400e+03 NA 12.9220520 NA
## SE.mean NA 6.386000e+02 NA 1.2471437 NA
## CI.mean.0.95 NA 1.773038e+03 NA 3.4626259 NA
## var NA 2.039050e+06 NA 7.7768366 NA
## std.dev NA 1.427953e+03 NA 2.7886980 NA
## coef.var NA 2.143301e-01 NA 0.2158092 NA
## latitude longitude geolocation hazard_type landslide_type
## nbr.val 5.000000000 5.000000e+00 NA NA NA
## nbr.null 0.000000000 0.000000e+00 NA NA NA
## nbr.na 0.000000000 0.000000e+00 NA NA NA
## min 10.402400000 -8.535550e+01 NA NA NA
## max 10.556200000 -8.487510e+01 NA NA NA
## range 0.153800000 4.804000e-01 NA NA NA
## sum 52.522300000 -4.249159e+02 NA NA NA
## median 10.554300000 -8.489520e+01 NA NA NA
## mean 10.504460000 -8.498318e+01 NA NA NA
## SE.mean 0.032060437 9.316065e-02 NA NA NA
## CI.mean.0.95 0.089014042 2.586554e-01 NA NA NA
## var 0.005139358 4.339454e-02 NA NA NA
## std.dev 0.071689316 2.083136e-01 NA NA NA
## coef.var 0.006824655 -2.451233e-03 NA NA NA
## landslide_size trigger storm_name injuries fatalities source_name
## nbr.val NA NA NA 0 2.000000 NA
## nbr.null NA NA NA 0 1.000000 NA
## nbr.na NA NA NA 5 3.000000 NA
## min NA NA NA Inf 0.000000 NA
## max NA NA NA -Inf 2.000000 NA
## range NA NA NA -Inf 2.000000 NA
## sum NA NA NA 0 2.000000 NA
## median NA NA NA NA 1.000000 NA
## mean NA NA NA NaN 1.000000 NA
## SE.mean NA NA NA NA 1.000000 NA
## CI.mean.0.95 NA NA NA NaN 12.706205 NA
## var NA NA NA NA 2.000000 NA
## std.dev NA NA NA NA 1.414214 NA
## coef.var NA NA NA NA 1.414214 NA
## source_link prop ypos
## nbr.val NA 5.0000000 5.000000
## nbr.null NA 0.0000000 0.000000
## nbr.na NA 0.0000000 0.000000
## min NA 15.8122100 7.906105
## max NA 27.3257065 86.337147
## range NA 11.5134965 78.431042
## sum NA 100.0000000 227.215879
## median NA 18.9126619 44.364688
## mean NA 20.0000000 45.443176
## SE.mean NA 1.9302564 13.789187
## CI.mean.0.95 NA 5.3592509 38.284922
## var NA 18.6294484 950.708442
## std.dev NA 4.3161845 30.833560
## coef.var NA 0.2158092 0.678508
boxplot(data, horizontal=TRUE, col='green')
