Eponimos

Curva cumulativa

Carregando os dados

dados <- read.csv("planilha.csv") 
str(dados)

'data.frame':   779 obs. of  70 variables:
 $ BINOMIO_AUTOR              : chr  "Abrawayaomys_ruschii" "Akodon_azarae" "Akodon_cursor" "Akodon_diauarum_Brandao_et_al_2022" ...
 $ genus                      : chr  "Abrawayaomys" "Akodon" "Akodon" "Akodon" ...
 $ epithet                    : chr  "ruschii" "azarae" "cursor" "diauarum" ...
 $ EPONYM                     : num  1 1 0 0 0 1 0 0 0 1 ...
 $ Nationality                : chr  "Brazil" "Spain" "" "" ...
 $ Notes                      : chr  "Augusto Ruschi (1915–1986)" "Feliz de Azara (1746–1811)" "" "" ...
 $ Gender                     : chr  "M" "M" "" "" ...
 $ Nobility                   : int  0 0 NA NA NA 0 NA NA NA 0 ...
 $ Army                       : int  0 1 NA NA NA 0 NA NA NA 0 ...
 $ Academic                   : int  1 1 NA NA NA 1 NA NA NA 1 ...
 $ Collector                  : int  0 1 NA NA NA 0 NA NA NA 0 ...
 $ Family                     : int  0 0 NA NA NA 0 NA NA NA 0 ...
 $ Bankers                    : int  0 0 NA NA NA 0 NA NA NA 0 ...
 $ Colonial.administration    : int  0 1 NA NA NA 0 NA NA NA 0 ...
 $ Others                     : int  0 0 NA NA NA 0 NA NA NA 0 ...
 $ synonym_number             : int  0 7 0 0 0 1 0 0 0 0 ...
 $ Order                      : chr  "Rodentia" "Rodentia" "Rodentia" "Rodentia" ...
 $ Ano_de_Descricao_da_Especie: int  1979 1829 1887 2022 2021 1990 1913 1998 2000 1998 ...
 $ BOLD_STATUS                : int  1 1 1 0 0 1 1 0 1 1 ...
 $ MA                         : int  1 0 1 0 0 1 1 1 1 0 ...
 $ AM                         : int  0 0 0 0 0 0 0 0 0 0 ...
 $ CE                         : int  0 0 1 1 1 1 1 0 1 0 ...
 $ CA                         : int  0 0 1 0 0 0 0 0 0 0 ...
 $ PT                         : int  0 0 0 0 0 0 0 0 0 0 ...
 $ PP                         : int  0 1 0 0 0 0 1 0 0 1 ...
 $ MAR                        : int  0 0 0 0 0 0 0 0 0 0 ...
 $ Carnivoro                  : int  0 0 0 0 0 0 0 0 0 0 ...
 $ Frugivoro                  : int  1 0 0 0 0 0 0 0 0 0 ...
 $ Folivoro                   : int  0 0 0 0 0 0 0 0 0 0 ...
 $ Gomivoro                   : int  0 0 0 0 0 0 0 0 0 0 ...
 $ Granivoro                  : int  1 0 0 0 0 0 0 0 0 0 ...
 $ Herbivoro                  : int  0 0 0 0 0 0 0 0 0 0 ...
 $ Hematofago                 : int  0 0 0 0 0 0 0 0 0 0 ...
 $ Insetivoro                 : int  0 1 1 1 1 1 1 1 1 1 ...
 $ Mirmecofago                : int  0 0 0 0 0 0 0 0 0 0 ...
 $ Nectarivoro                : int  0 0 0 0 0 0 0 0 0 0 ...
 $ Onivoro                    : int  0 1 1 1 1 1 1 1 1 1 ...
 $ Planctofago                : int  0 0 0 0 0 0 0 0 0 0 ...
 $ Piscivoro                  : int  0 0 0 0 0 0 0 0 0 0 ...
 $ Predador_de_Sementes       : int  0 0 0 0 0 0 0 0 0 0 ...
 $ Teutofago                  : int  0 0 0 0 0 0 0 0 0 0 ...
 $ Arboricola                 : int  0 0 0 0 0 0 0 0 0 0 ...
 $ Voador                     : int  0 0 0 0 0 0 0 0 0 0 ...
 $ Terrestre                  : int  0 1 1 1 1 1 1 1 1 1 ...
 $ Aquatico                   : int  0 0 0 0 0 0 0 0 0 0 ...
 $ Escansorial                : int  0 0 0 0 0 0 0 0 0 0 ...
 $ Semi_fossorial             : int  1 0 0 0 0 0 0 0 0 0 ...
 $ Semi_aquatico              : int  0 0 0 0 0 0 0 0 0 0 ...
 $ Fossorial                  : int  0 0 0 0 0 0 0 0 0 0 ...
 $ Endemic_0_1                : int  0 0 1 1 1 1 0 1 0 0 ...
 $ Distr_0_1                  : int  0 0 0 1 1 0 0 1 0 0 ...
 $ Weight_Atual               : num  59 24 50 25 24.5 18 43 20.5 35 40 ...
 $ Not_evaluated_IUCN         : int  0 0 0 1 1 0 0 0 0 0 ...
 $ Data_Defficient            : int  0 0 0 0 0 1 0 1 0 0 ...
 $ Least_Concern              : int  1 1 1 0 0 0 1 0 1 1 ...
 $ Near_Threatened            : int  0 0 0 0 0 0 0 0 0 0 ...
 $ Vulnerable                 : int  0 0 0 0 0 0 0 0 0 0 ...
 $ Endangered                 : int  0 0 0 0 0 0 0 0 0 0 ...
 $ Critically_Endangered      : int  0 0 0 0 0 0 0 0 0 0 ...
 $ Extinct_in_the_Wild        : int  0 0 0 0 0 0 0 0 0 0 ...
 $ Extinct                    : int  0 0 0 0 0 0 0 0 0 0 ...
 $ Not_evaluated_trend        : int  0 0 0 1 1 0 0 0 0 0 ...
 $ Increasing                 : int  0 0 0 0 0 0 0 0 0 0 ...
 $ Decreasing                 : int  0 0 0 0 0 0 1 0 0 0 ...
 $ Unknown                    : int  1 0 1 0 0 1 0 1 1 0 ...
 $ Stable                     : int  0 1 0 0 0 0 0 0 0 1 ...
 $ Roadkill                   : int  0 0 0 NA NA 0 0 0 0 0 ...
 $ Caca                       : int  0 0 0 NA NA 0 0 0 0 0 ...
 $ CITES                      : chr  "0" "0" "0" "0" ...
 $ Coletado_no_Brasil         : int  0 1 1 0 0 0 1 0 1 0 ...

Tirando as linhas finais

library(tidyverse)

Warning: package 'tidyverse' was built under R version 4.0.5

-- Attaching packages --------------------------------------- tidyverse 1.3.1 --

v ggplot2 3.4.2     v purrr   1.0.1
v tibble  3.2.1     v dplyr   1.1.2
v tidyr   1.3.0     v stringr 1.5.0
v readr   2.1.4     v forcats 1.0.0

-- Conflicts ------------------------------------------ tidyverse_conflicts() --
x dplyr::filter() masks stats::filter()
x dplyr::lag()    masks stats::lag()

dados1 <- dados %>% slice(1:775)

Transformando em contagem

dados_cum <- dados1 %>% count(EPONYM,Ano_de_Descricao_da_Especie)
dados_all <- dados1 %>% count(Ano_de_Descricao_da_Especie)

Plotanto espécies total e eponimos ao longo do tempo

ggplot()+
  geom_point(data=filter(dados_cum, EPONYM==1), aes(x=Ano_de_Descricao_da_Especie, y=cumsum(n)))+
  geom_line(data=filter(dados_cum, EPONYM==1), aes(x=Ano_de_Descricao_da_Especie, y=cumsum(n)))+
  geom_point(data = dados_all, aes(x=Ano_de_Descricao_da_Especie, y=cumsum(n)))

Plotando só eponimos

ggplot()+
  geom_bar(data=filter(dados_cum, EPONYM==1), aes(x=Ano_de_Descricao_da_Especie, y=cumsum(n)), stat="identity")+
  geom_line(data=filter(dados_cum, EPONYM==1), aes(x=Ano_de_Descricao_da_Especie, y=cumsum(n)))+
  theme_classic()

ggplot(data=filter(dados_cum, EPONYM==1), aes(x=Ano_de_Descricao_da_Especie, y=cumsum(n)))+
  geom_point()+
  geom_line()+
  theme_classic()

Comparação de gêneros

Manipulando os dados (colocar todos os valores inexistentes ou 0 para indeterminado)

dados1$Gender <- as.factor(dados1$Gender)
dados1$Gender[dados1$Gender==""] <- 0
so.epon <- dados1 %>% filter(EPONYM==1) %>% mutate(Gender = recode(Gender, `0`= "Indeterminado", `F` = "Female", `M`= "Male"))

Opções de gráficos

ggplot(data=so.epon, aes(x=Gender, y=EPONYM, fill= Gender))+
  geom_bar(stat= "identity")

ggplot(so.epon)+
  geom_bar(aes(x="", y=EPONYM, fill= Gender),stat= "identity")+
  coord_polar("y") +
  theme_void()

The echo: false option disables the printing of code (only output is displayed).

Eponimos por status

dados2 <- so.epon %>% pivot_longer(8:15, values_to= "valores", names_to= "Status")
dados2$Status <- as.factor(dados2$Status)

ggplot(data=dados2, aes(x=valores, y=Status, fill= Status))+
  geom_bar(stat= "identity")

Warning: Removed 211 rows containing missing values (`position_stack()`).

Eponimos por ordem

ggplot(data=dados1, aes(x=as.factor(Order), y=as.factor(EPONYM), fill= as.factor(EPONYM)))+
  geom_bar(stat= "identity")+
  theme(axis.text.x = element_text(angle=45, hjust=1))

Eponimos por status por Ordem

ggplot(data=dados2, aes(x=Status, y=valores, fill= as.factor(Order)))+
  geom_bar(stat= "identity")

Warning: Removed 211 rows containing missing values (`position_stack()`).

Proporção de sinonimos por categoria (ainda ficou ruim)

dados1$EPONYM <- as.factor(dados1$EPONYM)
freq <- dados1 %>% 
  group_by(EPONYM) %>% 
  count(synonym_number) %>% 
  mutate(freq = n / sum(n) * 100) %>% 
  select(-n)

ggplot(data= na.omit(freq), aes(x=synonym_number, y=freq, fill= EPONYM))+
  geom_bar(stat= "identity", position=position_dodge())

Talvez agregando por categorias?

dados1$synonym_number <- as.numeric(dados1$synonym_number)
new <- dados1 %>% mutate(synonym_cat = case_when(synonym_number == 0 ~ "0",
                                                 synonym_number <= 10 ~ "1-10",
                                                 synonym_number <= 20 ~ "11-20",
                                                 synonym_number > 30 ~ ">21"))

freq2 <- new %>% 
  group_by(EPONYM) %>% 
  count(synonym_cat) %>% 
  mutate(freq = n / sum(n) * 100) %>% 
  select(-n)
freq2

# A tibble: 13 x 3
# Groups:   EPONYM [3]
   EPONYM synonym_cat   freq
   <fct>  <chr>        <dbl>
 1 0      0           36.1  
 2 0      1-10        47.9  
 3 0      11-20        6.87 
 4 0      >21          0.859
 5 0      <NA>         8.25 
 6 1      0           53.8  
 7 1      1-10        39.0  
 8 1      11-20        2.20 
 9 1      >21          0.549
10 1      <NA>         4.40 
11 <NA>   0           45.5  
12 <NA>   1-10        45.5  
13 <NA>   11-20        9.09

ggplot(data= filter(freq2, (!is.na(EPONYM))), aes(x=synonym_cat, y=freq, fill= EPONYM))+
  geom_bar(stat= "identity", position=position_dodge())

Mapa

library(ggmap)

Google's Terms of Service: https://cloud.google.com/maps-platform/terms/.

Please cite ggmap if you use it! See citation("ggmap") for details.

mundo <- map_data("world")
country <- unique(as.factor(mundo$region))


dados3 <- dados1 %>% mutate(dados1, Nationality = recode(Nationality, `USA ` = "USA"))
paises <- unique(as.factor(dados2$Nationality))[-3]
paises.reg <- map_data("world", region=paises)

count_contry <- dados3 %>% count(Nationality) %>% slice(-1) %>% rename(region = Nationality)
paises.final <- merge(paises.reg, count_contry, by = c("region"))

ggplot()+
  geom_map(data=mundo, map=mundo, aes(x=long, y=lat, map_id=region), fill="gray95", color="darkgray")+
  geom_map(data=paises.final, map=paises.final, aes(map_id=region, fill=n), color="darkgray",)+
  scale_fill_gradient(low= "#fff7bc", high="#cc4c02", limits=c(1, 42), breaks = c(1, 10,20, 30, 40), name= "Number of eponyms")+
  expand_limits(x = mundo$long, y = mundo$lat)+
  theme_minimal()+
   guides(fill = guide_legend(title.position="top", title.hjust = 0.5))+
  coord_map(xlim=c(-180,180), ylim = c(-55,100))+
  theme(legend.position = "top")

Warning in geom_map(data = mundo, map = mundo, aes(x = long, y = lat, map_id =
region), : Ignoring unknown aesthetics: x and y

Ficou ruim no Quarto, mas no R deu certo, sei lá porque…