Curva cumulativa
Carregando os dados
dados <- read.csv("planilha.csv")
str(dados)
'data.frame': 779 obs. of 70 variables:
$ BINOMIO_AUTOR : chr "Abrawayaomys_ruschii" "Akodon_azarae" "Akodon_cursor" "Akodon_diauarum_Brandao_et_al_2022" ...
$ genus : chr "Abrawayaomys" "Akodon" "Akodon" "Akodon" ...
$ epithet : chr "ruschii" "azarae" "cursor" "diauarum" ...
$ EPONYM : num 1 1 0 0 0 1 0 0 0 1 ...
$ Nationality : chr "Brazil" "Spain" "" "" ...
$ Notes : chr "Augusto Ruschi (1915–1986)" "Feliz de Azara (1746–1811)" "" "" ...
$ Gender : chr "M" "M" "" "" ...
$ Nobility : int 0 0 NA NA NA 0 NA NA NA 0 ...
$ Army : int 0 1 NA NA NA 0 NA NA NA 0 ...
$ Academic : int 1 1 NA NA NA 1 NA NA NA 1 ...
$ Collector : int 0 1 NA NA NA 0 NA NA NA 0 ...
$ Family : int 0 0 NA NA NA 0 NA NA NA 0 ...
$ Bankers : int 0 0 NA NA NA 0 NA NA NA 0 ...
$ Colonial.administration : int 0 1 NA NA NA 0 NA NA NA 0 ...
$ Others : int 0 0 NA NA NA 0 NA NA NA 0 ...
$ synonym_number : int 0 7 0 0 0 1 0 0 0 0 ...
$ Order : chr "Rodentia" "Rodentia" "Rodentia" "Rodentia" ...
$ Ano_de_Descricao_da_Especie: int 1979 1829 1887 2022 2021 1990 1913 1998 2000 1998 ...
$ BOLD_STATUS : int 1 1 1 0 0 1 1 0 1 1 ...
$ MA : int 1 0 1 0 0 1 1 1 1 0 ...
$ AM : int 0 0 0 0 0 0 0 0 0 0 ...
$ CE : int 0 0 1 1 1 1 1 0 1 0 ...
$ CA : int 0 0 1 0 0 0 0 0 0 0 ...
$ PT : int 0 0 0 0 0 0 0 0 0 0 ...
$ PP : int 0 1 0 0 0 0 1 0 0 1 ...
$ MAR : int 0 0 0 0 0 0 0 0 0 0 ...
$ Carnivoro : int 0 0 0 0 0 0 0 0 0 0 ...
$ Frugivoro : int 1 0 0 0 0 0 0 0 0 0 ...
$ Folivoro : int 0 0 0 0 0 0 0 0 0 0 ...
$ Gomivoro : int 0 0 0 0 0 0 0 0 0 0 ...
$ Granivoro : int 1 0 0 0 0 0 0 0 0 0 ...
$ Herbivoro : int 0 0 0 0 0 0 0 0 0 0 ...
$ Hematofago : int 0 0 0 0 0 0 0 0 0 0 ...
$ Insetivoro : int 0 1 1 1 1 1 1 1 1 1 ...
$ Mirmecofago : int 0 0 0 0 0 0 0 0 0 0 ...
$ Nectarivoro : int 0 0 0 0 0 0 0 0 0 0 ...
$ Onivoro : int 0 1 1 1 1 1 1 1 1 1 ...
$ Planctofago : int 0 0 0 0 0 0 0 0 0 0 ...
$ Piscivoro : int 0 0 0 0 0 0 0 0 0 0 ...
$ Predador_de_Sementes : int 0 0 0 0 0 0 0 0 0 0 ...
$ Teutofago : int 0 0 0 0 0 0 0 0 0 0 ...
$ Arboricola : int 0 0 0 0 0 0 0 0 0 0 ...
$ Voador : int 0 0 0 0 0 0 0 0 0 0 ...
$ Terrestre : int 0 1 1 1 1 1 1 1 1 1 ...
$ Aquatico : int 0 0 0 0 0 0 0 0 0 0 ...
$ Escansorial : int 0 0 0 0 0 0 0 0 0 0 ...
$ Semi_fossorial : int 1 0 0 0 0 0 0 0 0 0 ...
$ Semi_aquatico : int 0 0 0 0 0 0 0 0 0 0 ...
$ Fossorial : int 0 0 0 0 0 0 0 0 0 0 ...
$ Endemic_0_1 : int 0 0 1 1 1 1 0 1 0 0 ...
$ Distr_0_1 : int 0 0 0 1 1 0 0 1 0 0 ...
$ Weight_Atual : num 59 24 50 25 24.5 18 43 20.5 35 40 ...
$ Not_evaluated_IUCN : int 0 0 0 1 1 0 0 0 0 0 ...
$ Data_Defficient : int 0 0 0 0 0 1 0 1 0 0 ...
$ Least_Concern : int 1 1 1 0 0 0 1 0 1 1 ...
$ Near_Threatened : int 0 0 0 0 0 0 0 0 0 0 ...
$ Vulnerable : int 0 0 0 0 0 0 0 0 0 0 ...
$ Endangered : int 0 0 0 0 0 0 0 0 0 0 ...
$ Critically_Endangered : int 0 0 0 0 0 0 0 0 0 0 ...
$ Extinct_in_the_Wild : int 0 0 0 0 0 0 0 0 0 0 ...
$ Extinct : int 0 0 0 0 0 0 0 0 0 0 ...
$ Not_evaluated_trend : int 0 0 0 1 1 0 0 0 0 0 ...
$ Increasing : int 0 0 0 0 0 0 0 0 0 0 ...
$ Decreasing : int 0 0 0 0 0 0 1 0 0 0 ...
$ Unknown : int 1 0 1 0 0 1 0 1 1 0 ...
$ Stable : int 0 1 0 0 0 0 0 0 0 1 ...
$ Roadkill : int 0 0 0 NA NA 0 0 0 0 0 ...
$ Caca : int 0 0 0 NA NA 0 0 0 0 0 ...
$ CITES : chr "0" "0" "0" "0" ...
$ Coletado_no_Brasil : int 0 1 1 0 0 0 1 0 1 0 ...
Tirando as linhas finais
Warning: package 'tidyverse' was built under R version 4.0.5
-- Attaching packages --------------------------------------- tidyverse 1.3.1 --
v ggplot2 3.4.2 v purrr 1.0.1
v tibble 3.2.1 v dplyr 1.1.2
v tidyr 1.3.0 v stringr 1.5.0
v readr 2.1.4 v forcats 1.0.0
-- Conflicts ------------------------------------------ tidyverse_conflicts() --
x dplyr::filter() masks stats::filter()
x dplyr::lag() masks stats::lag()
dados1 <- dados %>% slice(1:775)
Transformando em contagem
dados_cum <- dados1 %>% count(EPONYM,Ano_de_Descricao_da_Especie)
dados_all <- dados1 %>% count(Ano_de_Descricao_da_Especie)
Plotanto espécies total e eponimos ao longo do tempo
ggplot()+
geom_point(data=filter(dados_cum, EPONYM==1), aes(x=Ano_de_Descricao_da_Especie, y=cumsum(n)))+
geom_line(data=filter(dados_cum, EPONYM==1), aes(x=Ano_de_Descricao_da_Especie, y=cumsum(n)))+
geom_point(data = dados_all, aes(x=Ano_de_Descricao_da_Especie, y=cumsum(n)))
Plotando só eponimos
ggplot()+
geom_bar(data=filter(dados_cum, EPONYM==1), aes(x=Ano_de_Descricao_da_Especie, y=cumsum(n)), stat="identity")+
geom_line(data=filter(dados_cum, EPONYM==1), aes(x=Ano_de_Descricao_da_Especie, y=cumsum(n)))+
theme_classic()
ggplot(data=filter(dados_cum, EPONYM==1), aes(x=Ano_de_Descricao_da_Especie, y=cumsum(n)))+
geom_point()+
geom_line()+
theme_classic()
Comparação de gêneros
Manipulando os dados (colocar todos os valores inexistentes ou 0 para indeterminado)
dados1$Gender <- as.factor(dados1$Gender)
dados1$Gender[dados1$Gender==""] <- 0
so.epon <- dados1 %>% filter(EPONYM==1) %>% mutate(Gender = recode(Gender, `0`= "Indeterminado", `F` = "Female", `M`= "Male"))
Opções de gráficos
ggplot(data=so.epon, aes(x=Gender, y=EPONYM, fill= Gender))+
geom_bar(stat= "identity")
ggplot(so.epon)+
geom_bar(aes(x="", y=EPONYM, fill= Gender),stat= "identity")+
coord_polar("y") +
theme_void()
The echo: false
option disables the printing of code (only output is displayed).
Eponimos por status
dados2 <- so.epon %>% pivot_longer(8:15, values_to= "valores", names_to= "Status")
dados2$Status <- as.factor(dados2$Status)
ggplot(data=dados2, aes(x=valores, y=Status, fill= Status))+
geom_bar(stat= "identity")
Warning: Removed 211 rows containing missing values (`position_stack()`).
Eponimos por ordem
ggplot(data=dados1, aes(x=as.factor(Order), y=as.factor(EPONYM), fill= as.factor(EPONYM)))+
geom_bar(stat= "identity")+
theme(axis.text.x = element_text(angle=45, hjust=1))
Eponimos por status por Ordem
ggplot(data=dados2, aes(x=Status, y=valores, fill= as.factor(Order)))+
geom_bar(stat= "identity")
Warning: Removed 211 rows containing missing values (`position_stack()`).
Proporção de sinonimos por categoria (ainda ficou ruim)
dados1$EPONYM <- as.factor(dados1$EPONYM)
freq <- dados1 %>%
group_by(EPONYM) %>%
count(synonym_number) %>%
mutate(freq = n / sum(n) * 100) %>%
select(-n)
ggplot(data= na.omit(freq), aes(x=synonym_number, y=freq, fill= EPONYM))+
geom_bar(stat= "identity", position=position_dodge())
Talvez agregando por categorias?
dados1$synonym_number <- as.numeric(dados1$synonym_number)
new <- dados1 %>% mutate(synonym_cat = case_when(synonym_number == 0 ~ "0",
synonym_number <= 10 ~ "1-10",
synonym_number <= 20 ~ "11-20",
synonym_number > 30 ~ ">21"))
freq2 <- new %>%
group_by(EPONYM) %>%
count(synonym_cat) %>%
mutate(freq = n / sum(n) * 100) %>%
select(-n)
freq2
# A tibble: 13 x 3
# Groups: EPONYM [3]
EPONYM synonym_cat freq
<fct> <chr> <dbl>
1 0 0 36.1
2 0 1-10 47.9
3 0 11-20 6.87
4 0 >21 0.859
5 0 <NA> 8.25
6 1 0 53.8
7 1 1-10 39.0
8 1 11-20 2.20
9 1 >21 0.549
10 1 <NA> 4.40
11 <NA> 0 45.5
12 <NA> 1-10 45.5
13 <NA> 11-20 9.09
ggplot(data= filter(freq2, (!is.na(EPONYM))), aes(x=synonym_cat, y=freq, fill= EPONYM))+
geom_bar(stat= "identity", position=position_dodge())
Mapa
Google's Terms of Service: https://cloud.google.com/maps-platform/terms/.
Please cite ggmap if you use it! See citation("ggmap") for details.
mundo <- map_data("world")
country <- unique(as.factor(mundo$region))
dados3 <- dados1 %>% mutate(dados1, Nationality = recode(Nationality, `USA ` = "USA"))
paises <- unique(as.factor(dados2$Nationality))[-3]
paises.reg <- map_data("world", region=paises)
count_contry <- dados3 %>% count(Nationality) %>% slice(-1) %>% rename(region = Nationality)
paises.final <- merge(paises.reg, count_contry, by = c("region"))
ggplot()+
geom_map(data=mundo, map=mundo, aes(x=long, y=lat, map_id=region), fill="gray95", color="darkgray")+
geom_map(data=paises.final, map=paises.final, aes(map_id=region, fill=n), color="darkgray",)+
scale_fill_gradient(low= "#fff7bc", high="#cc4c02", limits=c(1, 42), breaks = c(1, 10,20, 30, 40), name= "Number of eponyms")+
expand_limits(x = mundo$long, y = mundo$lat)+
theme_minimal()+
guides(fill = guide_legend(title.position="top", title.hjust = 0.5))+
coord_map(xlim=c(-180,180), ylim = c(-55,100))+
theme(legend.position = "top")
Warning in geom_map(data = mundo, map = mundo, aes(x = long, y = lat, map_id =
region), : Ignoring unknown aesthetics: x and y
Ficou ruim no Quarto, mas no R deu certo, sei lá porque…
