# Minicurso de Visualizacao de Dados
# Carregando os pacotes
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.0.5
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.0.5
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(corrplot)
## corrplot 0.89 loaded
library(GGally)
## Warning: package 'GGally' was built under R version 4.0.5
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
library(LearnBayes)
## Warning: package 'LearnBayes' was built under R version 4.0.3
# Carregando o banco de dados
data("studentdata")
# Pedindo ajuda sobre o banco de dados
? studentdata
## starting httpd help server ...
## done
# Estrutura do banco de dados
str(studentdata)
## 'data.frame': 657 obs. of 11 variables:
## $ Student: int 1 2 3 4 5 6 7 8 9 10 ...
## $ Height : num 67 64 61 61 70 63 61 64 66 65 ...
## $ Gender : Factor w/ 2 levels "female","male": 1 1 1 1 2 1 1 1 1 2 ...
## $ Shoes : num 10 20 12 3 4 NA 12 25 30 10 ...
## $ Number : int 5 7 2 6 5 3 3 4 3 7 ...
## $ Dvds : num 10 5 6 40 6 5 53 20 40 22 ...
## $ ToSleep: num -2.5 1.5 -1.5 2 0 1 1.5 0.5 -0.5 2.5 ...
## $ WakeUp : num 5.5 8 7.5 8.5 9 8.5 7.5 7.5 7 8.5 ...
## $ Haircut: num 60 0 48 10 15 25 35 25 30 12 ...
## $ Job : num 30 20 0 0 17.5 0 20 0 25 0 ...
## $ Drink : Factor w/ 3 levels "milk","pop","water": 3 2 1 3 2 3 3 2 3 1 ...
# Excluir uma coluna
studentdata$Student <- NULL
### Graficos para 1 variavel ###
# Quantitativa - Job
j <- ggplot(studentdata, aes(x = Job))
j

# Histograma
j + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 32 rows containing non-finite values (stat_bin).

# Definindo o numero de retangulos (bins)
j + geom_histogram(bins = 10)
## Warning: Removed 32 rows containing non-finite values (stat_bin).

# Definindo a largura dos retangulos
j + geom_histogram(binwidth = 5)
## Warning: Removed 32 rows containing non-finite values (stat_bin).

# Argumentos esteticos:
# alpha: transparencia - valores entre 0 e 1
# color: cor da linha de contorno
# fill: cor de preenchimento
# linetype: tipo de linha
# size: tamanho da linha
j + geom_histogram(
binwidth = 5,
alpha = 1,
color = "black",
fill = "olivedrab4",
linetype = 4,
size = 1.2
)
## Warning: Removed 32 rows containing non-finite values (stat_bin).

# Grafico de Densidades
j + geom_density()
## Warning: Removed 32 rows containing non-finite values (stat_density).

# Ajustando a suavizacao do grafico de densidade
j + geom_density(adjust = 2)
## Warning: Removed 32 rows containing non-finite values (stat_density).

j + geom_density(
adjust = 2,
alpha = 1,
color = "black",
fill = "olivedrab4",
linetype = 4,
size = 1.2
)
## Warning: Removed 32 rows containing non-finite values (stat_density).

# Boxplot
j2 <- ggplot(studentdata, aes(y = Job))
j2 + geom_boxplot()
## Warning: Removed 32 rows containing non-finite values (stat_boxplot).

# Mudando a largura do boxplot
j2 + geom_boxplot(width = 2)
## Warning: Removed 32 rows containing non-finite values (stat_boxplot).

j2 + geom_boxplot(
width = 2,
alpha = 1,
color = "black",
fill = "olivedrab4",
linetype = 1,
size = 1
)
## Warning: Removed 32 rows containing non-finite values (stat_boxplot).

# Mudando os outliers
# outlier.color: cor do outlier
# outlier.stroke: tamanho do outlier
# outlier.shape: forma do outlier
j2 + geom_boxplot(
width = 2,
alpha = 1,
color = "black",
fill = "olivedrab4",
linetype = 1,
size = 1,
outlier.color = "olivedrab4",
outlier.stroke = 2,
outlier.shape = 4
)
## Warning: Removed 32 rows containing non-finite values (stat_boxplot).

# Qualitativa - Drink
d <- ggplot(studentdata, aes(x = Drink))
# Grafico de Barras
d + geom_bar()

# Removendo os NAs no grafico
# Opcao 1
studentdata %>% filter(!is.na(Drink)) %>%
ggplot(aes(x = Drink)) +
geom_bar()

# Opcao 2
studentdata2 <- studentdata %>% filter(!is.na(Drink))
d2 <- ggplot(studentdata2, aes(x = Drink))
d2 + geom_bar()

d2 + geom_bar(
alpha = 1,
color = "black",
fill = "olivedrab4",
linetype = 1,
size = 1
)

### Graficos para 2 variaveis ###
# Quantitativas
# Calculando a correlacao
# Removendo as colunas 2 e 10, pois elas sao qualitativas
correl <- cor(studentdata[, -c(2, 10)],
use = "complete.obs")
corrplot(correl)

# method: circle, square, ellipse, number, shade, color e pie
# type: full, lower e upper
# tl.col: cor das palavras
corrplot(correl,
method = "pie",
type = "upper",
tl.col = "black")

# Grafico de Dispersao
# Height e Shoes
hs <- ggplot(studentdata, aes(x = Shoes,
y = Height))
hs + geom_point()
## Warning: Removed 32 rows containing missing values (geom_point).

# Adicionando curva de ajuste
hs + geom_point() + geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 32 rows containing non-finite values (stat_smooth).
## Warning: Removed 32 rows containing missing values (geom_point).

hs + geom_point(
alpha = 0.5,
color = "olivedrab4",
shape = 15,
size = 3) +
geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 32 rows containing non-finite values (stat_smooth).
## Warning: Removed 32 rows containing missing values (geom_point).

# Qualitativa x Qualitativa (Gender e Drink)
gd <- ggplot(studentdata2, aes(x = Gender,
fill = Drink))
# Grafico de Barras
# Barras empilhadas em 100%
gd + geom_bar(position = "fill")

# Barras multiplas
gd + geom_bar(position = "dodge")

gd + geom_bar(position = "fill",
alpha = 0.3,
color = "black",
size = 0.8,
linetype = 2)

# Qualitativa x Quantitativa (Gender e Job)
gj <- ggplot(studentdata, aes(x = Gender,
y = Job))
# Boxplot
gj + geom_boxplot(outlier.shape = 3)
## Warning: Removed 32 rows containing non-finite values (stat_boxplot).

# Histograma
gj2 <- ggplot(studentdata, aes(fill = Gender,
x = Job))
gj2 + geom_histogram(color = 65)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 32 rows containing non-finite values (stat_bin).

# Densidades
gj2 + geom_density(alpha = 0.5)
## Warning: Removed 32 rows containing non-finite values (stat_density).

# Adicionando jitter no boxplot
gj + geom_boxplot(outlier.color = "red") +
geom_jitter(alpha = 0.3,
size = 0.7,
color = "black",
shape = 4)
## Warning: Removed 32 rows containing non-finite values (stat_boxplot).
## Warning: Removed 32 rows containing missing values (geom_point).

# Adicionando um grafico de violino
gj + geom_violin(color = "grey",
linetype = 2) +
geom_boxplot(width = 0.3)
## Warning: Removed 32 rows containing non-finite values (stat_ydensity).
## Warning: Removed 32 rows containing non-finite values (stat_boxplot).

### 3 variaveis ###
# Height, Shoes, Gender
# Categorias definidas pela cor
hsg <- ggplot(studentdata, aes(x = Shoes,
y = Height,
color = Gender))
hsg + geom_point() +
geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 32 rows containing non-finite values (stat_smooth).
## Warning: Removed 32 rows containing missing values (geom_point).

# Categorias definidas pela forma
hsg2 <- ggplot(studentdata, aes(x = Shoes,
y = Height,
shape = Gender))
hsg2 + geom_point(size = 2) +
geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 32 rows containing non-finite values (stat_smooth).
## Warning: Removed 32 rows containing missing values (geom_point).

# Colorindo pela bebida e formato dos pontos pelo genero
hsgd <- ggplot(studentdata2, aes(x = Shoes,
y = Height,
color = Drink,
shape = Gender))
hsgd + geom_point(size = 2)
## Warning: Removed 29 rows containing missing values (geom_point).

# Criando multiplos (graficos separados)
# a partir da variavel Gender
hsg2 + geom_point() +
geom_smooth() +
facet_wrap(~Gender)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 32 rows containing non-finite values (stat_smooth).
## Warning: Removed 32 rows containing missing values (geom_point).

# Colorindo pela bebida e criando multiplos pelo genero
hsd <- ggplot(studentdata2, aes(x = Shoes,
y = Height,
color = Drink))
hsd + geom_point() +
facet_wrap(~Gender)
## Warning: Removed 29 rows containing missing values (geom_point).

# Graficos Par a Par
ggpairs(studentdata2[,c(1:3,10)])
## Warning: Removed 8 rows containing non-finite values (stat_density).
## Warning: Removed 8 rows containing non-finite values (stat_boxplot).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 29 rows containing missing values
## Warning: Removed 8 rows containing non-finite values (stat_boxplot).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 8 rows containing non-finite values (stat_bin).
## Warning: Removed 21 rows containing non-finite values (stat_boxplot).
## Warning: Removed 29 rows containing missing values (geom_point).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 21 rows containing non-finite values (stat_bin).
## Warning: Removed 21 rows containing non-finite values (stat_density).
## Warning: Removed 21 rows containing non-finite values (stat_boxplot).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 8 rows containing non-finite values (stat_bin).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 21 rows containing non-finite values (stat_bin).

# Colorindo pela variavel Gender
ggpairs(studentdata2[,c(1:3,10)],
aes(color = Gender))
## Warning: Removed 8 rows containing non-finite values (stat_density).
## Warning: Removed 8 rows containing non-finite values (stat_boxplot).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 29 rows containing missing values
## Warning: Removed 8 rows containing non-finite values (stat_boxplot).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 8 rows containing non-finite values (stat_bin).
## Warning: Removed 21 rows containing non-finite values (stat_boxplot).
## Warning: Removed 29 rows containing missing values (geom_point).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 21 rows containing non-finite values (stat_bin).
## Warning: Removed 21 rows containing non-finite values (stat_density).
## Warning: Removed 21 rows containing non-finite values (stat_boxplot).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 8 rows containing non-finite values (stat_bin).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 21 rows containing non-finite values (stat_bin).

# Heatmap
# Variaveis quantitativas
library(superheat)
## Warning: package 'superheat' was built under R version 4.0.5
superheat(studentdata[1:25,c(1,3:5)], scale = T)

hs + geom_point()
## Warning: Removed 32 rows containing missing values (geom_point).

### Customizando os graficos ###
# Funcao `labs`
str(studentdata)
## 'data.frame': 657 obs. of 10 variables:
## $ Height : num 67 64 61 61 70 63 61 64 66 65 ...
## $ Gender : Factor w/ 2 levels "female","male": 1 1 1 1 2 1 1 1 1 2 ...
## $ Shoes : num 10 20 12 3 4 NA 12 25 30 10 ...
## $ Number : int 5 7 2 6 5 3 3 4 3 7 ...
## $ Dvds : num 10 5 6 40 6 5 53 20 40 22 ...
## $ ToSleep: num -2.5 1.5 -1.5 2 0 1 1.5 0.5 -0.5 2.5 ...
## $ WakeUp : num 5.5 8 7.5 8.5 9 8.5 7.5 7.5 7 8.5 ...
## $ Haircut: num 60 0 48 10 15 25 35 25 30 12 ...
## $ Job : num 30 20 0 0 17.5 0 20 0 25 0 ...
## $ Drink : Factor w/ 3 levels "milk","pop","water": 3 2 1 3 2 3 3 2 3 1 ...
levels(studentdata2$Gender)
## [1] "female" "male"
levels(studentdata2$Gender) <- c("Feminino", "Masculino")
levels(studentdata2$Drink)
## [1] "milk" "pop" "water"
levels(studentdata2$Drink) <- c("Leite", "Refrigerante", "Água")
hsgd <- ggplot(studentdata2, aes(x = Shoes,
y = Height,
color = Drink,
shape = Gender))
hsgd + geom_point(size = 2) +
labs(title = "Relação entre Altura e Pares de Sapato",
subtitle = "Avaliação por gênero e bebida",
caption = "Fonte: pacote LearnBayes",
x = "Número de Pares de Sapatos",
y = "Altura (cm)",
shape = "Gênero",
color = "Bebida")
## Warning: Removed 29 rows containing missing values (geom_point).

# Shoes, Height, Drink, Gender, Job
hsgdj <- ggplot(studentdata2, aes(x = Shoes,
y = Height,
color = Drink,
shape = Gender,
size = Job))
hsgdj + geom_point() +
labs(title = "Relação entre Altura e Pares de Sapato",
subtitle = "Avaliação por gênero e bebida",
caption = "Fonte: pacote LearnBayes",
x = "Número de Pares de Sapatos",
y = "Altura (cm)",
shape = "Gênero",
color = "Bebida",
size = "Jornal de trabalho semanal")
## Warning: Removed 59 rows containing missing values (geom_point).

# Extra #
titulo <- expression(paste("Relação entre ", italic("Altura"), " e Pares de Sapato"))
hsgdj + geom_point() +
labs(title = titulo,
subtitle = "Avaliação por gênero e bebida",
caption = "Fonte: pacote LearnBayes",
x = "Número de Pares de Sapatos",
y = "Altura (cm)",
shape = "Gênero",
color = "Bebida",
size = "Jornal de trabalho semanal")
## Warning: Removed 59 rows containing missing values (geom_point).

# Paletas de cores
p <- hsgdj + geom_point() +
labs(title = titulo,
subtitle = "Avaliação por gênero e bebida",
caption = "Fonte: pacote LearnBayes",
x = "Número de Pares de Sapatos",
y = "Altura (cm)",
shape = "Gênero",
color = "Bebida",
size = "Jornal de trabalho semanal")
p + scale_color_brewer(palette = "Set2")
## Warning: Removed 59 rows containing missing values (geom_point).

p + scale_color_grey()
## Warning: Removed 59 rows containing missing values (geom_point).

p + scale_color_viridis_d()
## Warning: Removed 59 rows containing missing values (geom_point).

library(tvthemes)
## Warning: package 'tvthemes' was built under R version 4.0.5
p + scale_color_spongeBob()
## Warning: Removed 59 rows containing missing values (geom_point).

p + scale_color_simpsons()
## Warning: Removed 59 rows containing missing values (geom_point).

library(harrypotter)
## Warning: package 'harrypotter' was built under R version 4.0.5
# https://github.com/aljrico/harrypotter
p + scale_color_hp(discrete = T,
option = "HarryPotter")
## Warning: Removed 59 rows containing missing values (geom_point).

# OBS: se for grafico de barras, histograma, densidade
# ou boxplot, usar scale_fill_
# Fonte
# family: serif, mono, sans, symbol
# face: plain, italic, bold, bold.italic
p + scale_color_brewer(palette = "Set2") +
theme(text = element_text(size = 12,
family = "serif",
face = "bold"))
## Warning: Removed 59 rows containing missing values (geom_point).

# Legenda
p + scale_color_brewer(palette = "Set2") +
theme(text = element_text(size = 12,
family = "serif",
face = "bold"),
legend.position = "right")
## Warning: Removed 59 rows containing missing values (geom_point).

# Temas prontos
p + theme_bw()
## Warning: Removed 59 rows containing missing values (geom_point).

p + theme_classic()
## Warning: Removed 59 rows containing missing values (geom_point).

p + theme_dark()
## Warning: Removed 59 rows containing missing values (geom_point).

p + theme_avatar() + scale_color_avatar()
## Warning: Removed 59 rows containing missing values (geom_point).

p + theme_simpsons() + scale_color_simpsons()
## Warning: Removed 59 rows containing missing values (geom_point).

library(hrbrthemes)
## Warning: package 'hrbrthemes' was built under R version 4.0.5
##
## Attaching package: 'hrbrthemes'
## The following objects are masked from 'package:tvthemes':
##
## import_roboto_condensed, import_titillium_web
p + theme_ft_rc()
## Warning: Removed 59 rows containing missing values (geom_point).

p + theme_ipsum()
## Warning: Removed 59 rows containing missing values (geom_point).

p + theme_ipsum_tw()
## Warning: Removed 59 rows containing missing values (geom_point).

library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
p2 <- hsgdj + geom_point() +
labs(title = "Relação entre Altura e Pares de Sapato",
subtitle = "Avaliação por gênero e bebida",
caption = "Fonte: pacote LearnBayes",
x = "Número de Pares de Sapatos",
y = "Altura (cm)",
shape = "Gênero",
color = "Bebida",
size = "Jornal de trabalho semanal")
ggplotly(p2)
p3 <- gj + geom_boxplot(outlier.shape = 3)
ggplotly(p3)
## Warning: Removed 32 rows containing non-finite values (stat_boxplot).
# Exportar graficos para word e powerpoint
library(eoffice)
## Warning: package 'eoffice' was built under R version 4.0.5
topptx(p3, filename = "p3.pptx",
width = 6, height = 4)
## Warning: Removed 32 rows containing non-finite values (stat_boxplot).
# Identificar diretorio (onde salvou a imagem)
getwd()
## [1] "C:/Users/Leticia/Google Drive/Cursos Ministrados/(2021)CursoVisualizacaoDados"
todocx(p3, filename = "p3.docx",
width = 6, height = 4)
## Warning: Removed 32 rows containing non-finite values (stat_boxplot).
# Definindo intervalos dos eixos
# Breaks: c(inicio, fim, anda de quanto em quanto)
# Limits: c(inicio, fim)
hs + geom_point() +
scale_y_continuous(breaks = seq(50,85,2),
limits = c(50,85)) +
scale_x_continuous(breaks = seq(0,170,20),
limits = c(0,170))
## Warning: Removed 32 rows containing missing values (geom_point).
