1.1.Revisar os conceitos de análise bi-variada aprendidas na última aula.
1.2.Praticar a utilização de base de dados para extrair informações e conhecer melhor os dados.
1.3.Gerar gráficos para variáveis (no ggplot2).
2.1.Usaremos o banco de dados do Museu de Arte Moderna (MoMA).
2.2.Disponível publicamente no GitHub.
2.3.Foi analisado por fivethirtyeight.com, Alison Hill e por outros.
Vamos usar quatro pacotes do R: readr,dplyr, ggplot2, e ggthemes.
library(dplyr)
library(ggplot2)
library(ggthemes)
## Warning: package 'ggthemes' was built under R version 4.2.2
library(flextable)
library(corrplot)
## Warning: package 'corrplot' was built under R version 4.2.2
library(janitor)
library(RColorBrewer)
library(readr)
Banco_Moma <- read_delim("C:/Users/wandr/OneDrive/Documentos/ENEM_UNIRIO/2022.2/R/Base_de_dados-master/arte_MOMA.csv",
delim = ";", escape_double = FALSE, trim_ws = TRUE)
View(Banco_Moma)
str(Banco_Moma)
## spec_tbl_df [2,253 × 24] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ ...1 : num [1:2253] 1 2 3 4 5 6 7 8 9 10 ...
## $ title : chr [1:2253] "Rope and People, I" "Fire in the Evening" "Portrait of an Equilibrist" "Guitar" ...
## $ artist : chr [1:2253] "Joan Mir\xf3" "Paul Klee" "Paul Klee" "Pablo Picasso" ...
## $ artist_bio : chr [1:2253] "(Spanish, 1893-1983)" "(German, born Switzerland. 1879-1940)" "(German, born Switzerland. 1879-1940)" "(Spanish, 1881-1973)" ...
## $ artist_birth_year: num [1:2253] 1893 1879 1879 1881 1880 ...
## $ artist_death_year: num [1:2253] 1983 1940 1940 1973 1946 ...
## $ num_artists : num [1:2253] 1 1 1 1 1 1 1 1 1 1 ...
## $ n_female_artists : num [1:2253] 0 0 0 0 0 0 0 0 0 0 ...
## $ n_male_artists : num [1:2253] 1 1 1 1 1 1 1 1 1 1 ...
## $ artist_gender : chr [1:2253] "Male" "Male" "Male" "Male" ...
## $ year_acquired : num [1:2253] 1936 1970 1966 1955 1939 ...
## $ year_created : num [1:2253] 1935 1929 1927 1919 1925 ...
## $ circumference_cm : logi [1:2253] NA NA NA NA NA NA ...
## $ depth_cm : num [1:2253] NA NA NA NA NA NA NA NA NA NA ...
## $ diameter_cm : logi [1:2253] NA NA NA NA NA NA ...
## $ height_cm : num [1:2253] 1048 338 603 2159 508 ...
## $ length_cm : logi [1:2253] NA NA NA NA NA NA ...
## $ width_cm : num [1:2253] 746 333 368 787 54 ...
## $ seat_height_cm : logi [1:2253] NA NA NA NA NA NA ...
## $ purchase : logi [1:2253] FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ gift : logi [1:2253] TRUE FALSE FALSE TRUE TRUE FALSE ...
## $ exchange : logi [1:2253] FALSE FALSE FALSE FALSE TRUE FALSE ...
## $ classification : chr [1:2253] "Painting" "Painting" "Painting" "Painting" ...
## $ department : chr [1:2253] "Painting & Sculpture" "Painting & Sculpture" "Painting & Sculpture" "Painting & Sculpture" ...
## - attr(*, "spec")=
## .. cols(
## .. ...1 = col_double(),
## .. title = col_character(),
## .. artist = col_character(),
## .. artist_bio = col_character(),
## .. artist_birth_year = col_double(),
## .. artist_death_year = col_double(),
## .. num_artists = col_double(),
## .. n_female_artists = col_double(),
## .. n_male_artists = col_double(),
## .. artist_gender = col_character(),
## .. year_acquired = col_double(),
## .. year_created = col_double(),
## .. circumference_cm = col_logical(),
## .. depth_cm = col_number(),
## .. diameter_cm = col_logical(),
## .. height_cm = col_number(),
## .. length_cm = col_logical(),
## .. width_cm = col_number(),
## .. seat_height_cm = col_logical(),
## .. purchase = col_logical(),
## .. gift = col_logical(),
## .. exchange = col_logical(),
## .. classification = col_character(),
## .. department = col_character()
## .. )
## - attr(*, "problems")=<externalptr>
head(Banco_Moma)%>%data.frame()%>%flextable()%>%theme_tron_legacy()
...1 | title | artist | artist_bio | artist_birth_year | artist_death_year | num_artists | n_female_artists | n_male_artists | artist_gender | year_acquired | year_created | circumference_cm | depth_cm | diameter_cm | height_cm | length_cm | width_cm | seat_height_cm | purchase | gift | exchange | classification | department |
1 | Rope and People, I | Joan Mir | (Spanish, 1893-1983) | 1,893 | 1,983 | 1 | 0 | 1 | Male | 1,936 | 1,935 | 1,048 | 746 | FALSE | TRUE | FALSE | Painting | Painting & Sculpture | |||||
2 | Fire in the Evening | Paul Klee | (German, born Switzerland. 1879-1940) | 1,879 | 1,940 | 1 | 0 | 1 | Male | 1,970 | 1,929 | 338 | 333 | FALSE | FALSE | FALSE | Painting | Painting & Sculpture | |||||
3 | Portrait of an Equilibrist | Paul Klee | (German, born Switzerland. 1879-1940) | 1,879 | 1,940 | 1 | 0 | 1 | Male | 1,966 | 1,927 | 603 | 368 | FALSE | FALSE | FALSE | Painting | Painting & Sculpture | |||||
4 | Guitar | Pablo Picasso | (Spanish, 1881-1973) | 1,881 | 1,973 | 1 | 0 | 1 | Male | 1,955 | 1,919 | 2,159 | 787 | FALSE | TRUE | FALSE | Painting | Painting & Sculpture | |||||
5 | Grandmother | Arthur Dove | (American, 1880-1946) | 1,880 | 1,946 | 1 | 0 | 1 | Male | 1,939 | 1,925 | 508 | 54 | FALSE | TRUE | TRUE | Painting | Painting & Sculpture | |||||
6 | ""M'Amenez-y"" | Francis Picabia | (French, 1879-1953) | 1,879 | 1,953 | 1 | 0 | 1 | Male | 1,968 | 1,919 | 1,292 | 899 | FALSE | FALSE | FALSE | Painting | Painting & Sculpture |
noquote(paste("Existem", Banco_Moma %>% nrow(), "pinturas no MoMA."))
## [1] Existem 2253 pinturas no MoMA.
noquote(paste("E existem", Banco_Moma %>% ncol(), "variáveis no banco de pinturas do MoMA."))
## [1] E existem 24 variáveis no banco de pinturas do MoMA.
paste("Temos",
Banco_Moma %>% filter(year_acquired == min(year_acquired, na.rm = TRUE)) %>% nrow(),
"quadro(s) como o(s) mais antigo(s) do acervo. Ele(s) é(são) do ano",
min(select(Banco_Moma,year_acquired), na.rm=TRUE),".")
## [1] "Temos 2 quadro(s) como o(s) mais antigo(s) do acervo. Ele(s) é(são) do ano 1930 ."
paste("Seu(s) título(s) é(são):",
Banco_Moma %>%
filter(year_acquired == min(year_acquired, na.rm = T)) %>%
pull(title) %>% first(),
Banco_Moma %>%
filter(year_acquired == min(year_acquired, na.rm = T)) %>%
pull(title) %>% first() ,
"e",
Banco_Moma %>%
filter(year_acquired == min(year_acquired, na.rm = T)) %>%
pull(title) %>% last(),
", do(s) autor(es)",
Banco_Moma %>%
filter(year_acquired == min(year_acquired, na.rm = T)) %>%
pull(artist) %>% first(),
"e",
Banco_Moma %>%
filter(year_acquired == min(year_acquired, na.rm = T)) %>%
pull(artist) %>% last(),
"respectivamente.")
## [1] "Seu(s) título(s) é(são): House by the Railroad House by the Railroad e Seated Nude , do(s) autor(es) Edward Hopper e Bernard Karfiol respectivamente."
noquote(paste("A obra mais antiga do acervo do MoMA é do ano",
min(Banco_Moma$year_created, na.rm = T),
", feita por",
Banco_Moma %>% filter(year_created == min(year_created, na.rm = T)) %>% pull(artist),
"e tem o título",
Banco_Moma %>% filter(year_created == min(year_created, na.rm = T)) %>% pull(title)))
## [1] A obra mais antiga do acervo do MoMA é do ano 1872 , feita por Odilon Redon e tem o título Landscape at Daybreak
noquote(paste("Existem ",
Banco_Moma %>% count(artist) %>% count() %>% pull(n),
"artistas distintos no MoMA."))
## [1] Existem 989 artistas distintos no MoMA.
noquote(paste("O/A artista que tem mais obras na coleção do MoMA é:",
Banco_Moma %>% count(artist) %>% arrange(-n) %>% pull(artist) %>% first(),
"com um total de",
Banco_Moma %>% count(artist) %>% arrange(-n) %>% pull(n) %>% first(),
"obras."))
## [1] O/A artista que tem mais obras na coleção do MoMA é: Pablo Picasso com um total de 55 obras.
noquote(paste("São", Banco_Moma %>% count(artist) %>% arrange(-n) %>% pull(n) %>% first(),
"obras deste artista."))
## [1] São 55 obras deste artista.
Banco_Moma %>% count(artist_gender) %>% mutate(n = as.character(paste(n, "obras")))
## # A tibble: 3 × 2
## artist_gender n
## <chr> <chr>
## 1 Female 252 obras
## 2 Male 1991 obras
## 3 <NA> 10 obras
Banco_Moma %>% count(artist_gender, artist) %>% count(artist_gender) %>% mutate(n = as.character(paste(n, "artistas")))
## # A tibble: 3 × 2
## artist_gender n
## <chr> <chr>
## 1 Female 143 artistas
## 2 Male 837 artistas
## 3 <NA> 9 artistas
noquote(paste("Foram adquiridas mais obras no ano",
Banco_Moma %>% count(year_acquired) %>% arrange(-n) %>% pull(year_acquired) %>% first()))
## [1] Foram adquiridas mais obras no ano 1985
noquote(paste("No ano",
Banco_Moma %>% count(year_created) %>% arrange(-n) %>% pull(year_created) %>% first(),
"foram criadas",
Banco_Moma %>% count(year_created) %>% arrange(-n) %>% pull(n) %>% first(),
"obras."))
## [1] No ano 1977 foram criadas 57 obras.
Banco_Moma %>% group_by(year_acquired) %>%
filter(year_acquired == min(year_acquired))%>%
filter(artist_gender=="Female")%>% summarise(year_acquired,title,artist,year_created)
## # A tibble: 252 × 4
## # Groups: year_acquired [68]
## year_acquired title artist year_…¹
## <dbl> <chr> <chr> <dbl>
## 1 1937 Landscape, 47 "Natalia Goncharova" 1912
## 2 1938 Shack "Loren MacIver" 1934
## 3 1940 Hopscotch "Loren MacIver" 1940
## 4 1941 Shadows with Painting "Irene Rice Pereira" 1940
## 5 1941 Figure "Varvara Stepanova" 1921
## 6 1942 Still Life in Red "Amelia Pel\xe1ez Del … 1938
## 7 1942 White Lines "Irene Rice Pereira" 1942
## 8 1942 Musical Squash "Maud Morgan" 1942
## 9 1942 Desolation "Raquel Forner" 1942
## 10 1943 Self-Portrait with Cropped Hair "Frida Kahlo" 1940
## # … with 242 more rows, and abbreviated variable name ¹year_created
A primeira pintura de uma artista mulher foi Landscape, adquirida em 1937, da artista Natalia Goncharova e criada em 1912.
idade <- Banco_Moma$artist_death_year - Banco_Moma$artist_birth_year
Banco_Moma["idade"] <- Banco_Moma$artist_death_year - Banco_Moma$artist_birth_year
Banco_Moma %>% select(idade,artist) %>% arrange(desc(idade))
## # A tibble: 2,253 × 2
## idade artist
## <dbl> <chr>
## 1 102 Dorothea Tanning
## 2 101 Will Barnet
## 3 101 Clara McDonald Williamson
## 4 101 Theodore Lux Feininger
## 5 99 Georgia O'Keeffe
## 6 99 Georgia O'Keeffe
## 7 99 Enrico Donati
## 8 99 Enrico Donati
## 9 99 Georgia O'Keeffe
## 10 99 Georgia O'Keeffe
## # … with 2,243 more rows
A artista, que viveu mais tempo foi Dorothea Tanning, 102 anos.
Banco_Moma %>% summarise(media=mean(idade,na.rm=TRUE))
## # A tibble: 1 × 1
## media
## <dbl>
## 1 74.7
A idade média dos artistas é 74,7 anos.
Banco_Moma %>% group_by(artist_gender) %>% summarise(media=mean(idade,na.rm=TRUE))
## # A tibble: 3 × 2
## artist_gender media
## <chr> <dbl>
## 1 Female 74.0
## 2 Male 74.7
## 3 <NA> 72
Sim, em média artistas homens vivem mais que as mulheres.
moma_dim <- Banco_Moma %>%
filter(height_cm < 600, width_cm < 760) %>%
mutate(hw_ratio = height_cm / width_cm,
hw_cat = case_when(
hw_ratio > 1 ~ "mais alto que largo",
hw_ratio < 1 ~ "mais largo que alto",
hw_ratio == 1 ~ "quadrado perfeito"
))
library(ggthemes)
ggplot(moma_dim, aes(x = width_cm, y = height_cm, colour = hw_cat)) +
geom_point(alpha = .9) +
ggtitle("Pinturas do MoMA, altas e largas") +
scale_colour_manual(name = "",
values = c("#e0cc16", "#c216e0", "#eb0918")) +
theme_fivethirtyeight() +
theme(axis.title = element_text()) +
labs(x = "Largura", y = "Altura")