# Minicurso de Visualizacao de Dados

# Carregando os pacotes
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.0.5
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.0.5
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(corrplot)
## corrplot 0.89 loaded
library(GGally)
## Warning: package 'GGally' was built under R version 4.0.5
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
library(LearnBayes)
## Warning: package 'LearnBayes' was built under R version 4.0.3
# Carregando o banco de dados
data("studentdata")

# Pedindo ajuda sobre o banco de dados
? studentdata
## starting httpd help server ...
##  done
# Estrutura do banco de dados
str(studentdata)
## 'data.frame':    657 obs. of  11 variables:
##  $ Student: int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Height : num  67 64 61 61 70 63 61 64 66 65 ...
##  $ Gender : Factor w/ 2 levels "female","male": 1 1 1 1 2 1 1 1 1 2 ...
##  $ Shoes  : num  10 20 12 3 4 NA 12 25 30 10 ...
##  $ Number : int  5 7 2 6 5 3 3 4 3 7 ...
##  $ Dvds   : num  10 5 6 40 6 5 53 20 40 22 ...
##  $ ToSleep: num  -2.5 1.5 -1.5 2 0 1 1.5 0.5 -0.5 2.5 ...
##  $ WakeUp : num  5.5 8 7.5 8.5 9 8.5 7.5 7.5 7 8.5 ...
##  $ Haircut: num  60 0 48 10 15 25 35 25 30 12 ...
##  $ Job    : num  30 20 0 0 17.5 0 20 0 25 0 ...
##  $ Drink  : Factor w/ 3 levels "milk","pop","water": 3 2 1 3 2 3 3 2 3 1 ...
# Excluir uma coluna
studentdata$Student <- NULL

### Graficos para 1 variavel ###

# Quantitativa - Job
j <- ggplot(studentdata, aes(x = Job))
j

# Histograma
j + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 32 rows containing non-finite values (stat_bin).

# Definindo o numero de retangulos (bins)
j + geom_histogram(bins = 10)
## Warning: Removed 32 rows containing non-finite values (stat_bin).

# Definindo a largura dos retangulos
j + geom_histogram(binwidth = 5)
## Warning: Removed 32 rows containing non-finite values (stat_bin).

# Argumentos esteticos:
# alpha: transparencia - valores entre 0 e 1
# color: cor da linha de contorno
# fill: cor de preenchimento
# linetype: tipo de linha
# size: tamanho da linha

j + geom_histogram(
  binwidth = 5,
  alpha = 1,
  color = "black",
  fill = "olivedrab4",
  linetype = 4,
  size = 1.2
)
## Warning: Removed 32 rows containing non-finite values (stat_bin).

# Grafico de Densidades
j + geom_density()
## Warning: Removed 32 rows containing non-finite values (stat_density).

# Ajustando a suavizacao do grafico de densidade
j + geom_density(adjust = 2)
## Warning: Removed 32 rows containing non-finite values (stat_density).

j + geom_density(
  adjust = 2,
  alpha = 1,
  color = "black",
  fill = "olivedrab4",
  linetype = 4,
  size = 1.2
)
## Warning: Removed 32 rows containing non-finite values (stat_density).

# Boxplot
j2 <- ggplot(studentdata, aes(y = Job))
j2 + geom_boxplot()
## Warning: Removed 32 rows containing non-finite values (stat_boxplot).

# Mudando a largura do boxplot
j2 + geom_boxplot(width = 2)
## Warning: Removed 32 rows containing non-finite values (stat_boxplot).

j2 + geom_boxplot(
  width = 2,
  alpha = 1,
  color = "black",
  fill = "olivedrab4",
  linetype = 1,
  size = 1
)
## Warning: Removed 32 rows containing non-finite values (stat_boxplot).

# Mudando os outliers
# outlier.color: cor do outlier
# outlier.stroke: tamanho do outlier
# outlier.shape: forma do outlier

j2 + geom_boxplot(
  width = 2,
  alpha = 1,
  color = "black",
  fill = "olivedrab4",
  linetype = 1,
  size = 1,
  outlier.color = "olivedrab4",
  outlier.stroke = 2,
  outlier.shape = 4
)
## Warning: Removed 32 rows containing non-finite values (stat_boxplot).

# Qualitativa - Drink
d <- ggplot(studentdata, aes(x = Drink))

# Grafico de Barras
d + geom_bar()

# Removendo os NAs no grafico

# Opcao 1
studentdata %>% filter(!is.na(Drink)) %>%
  ggplot(aes(x = Drink)) +
  geom_bar()

# Opcao 2
studentdata2 <- studentdata %>% filter(!is.na(Drink))

d2 <- ggplot(studentdata2, aes(x = Drink))
d2 + geom_bar()

d2 + geom_bar(
  alpha = 1,
  color = "black",
  fill = "olivedrab4",
  linetype = 1,
  size = 1
)

### Graficos para 2 variaveis ###

# Quantitativas

# Calculando a correlacao
# Removendo as colunas 2 e 10, pois elas sao qualitativas
correl <- cor(studentdata[, -c(2, 10)],
              use = "complete.obs")

corrplot(correl)

# method: circle, square, ellipse, number, shade, color e pie
# type: full, lower e upper
# tl.col: cor das palavras
corrplot(correl,
         method = "pie",
         type = "upper",
         tl.col = "black")

# Grafico de Dispersao
# Height e Shoes

hs <- ggplot(studentdata, aes(x = Shoes,
                              y = Height))

hs + geom_point()
## Warning: Removed 32 rows containing missing values (geom_point).

# Adicionando curva de ajuste
hs + geom_point() + geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 32 rows containing non-finite values (stat_smooth).
## Warning: Removed 32 rows containing missing values (geom_point).

hs + geom_point(
  alpha = 0.5,
  color = "olivedrab4",
  shape = 15,
  size = 3) +
  geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 32 rows containing non-finite values (stat_smooth).

## Warning: Removed 32 rows containing missing values (geom_point).

# Qualitativa x Qualitativa (Gender e Drink)

gd <- ggplot(studentdata2, aes(x = Gender, 
                               fill = Drink))

# Grafico de Barras

# Barras empilhadas em 100%
gd + geom_bar(position = "fill") 

# Barras multiplas
gd + geom_bar(position = "dodge")

gd + geom_bar(position = "fill",
              alpha = 0.3,
              color = "black",
              size = 0.8,
              linetype = 2) 

# Qualitativa x Quantitativa (Gender e Job)

gj <- ggplot(studentdata, aes(x = Gender,
                              y = Job))

# Boxplot
gj + geom_boxplot(outlier.shape = 3)
## Warning: Removed 32 rows containing non-finite values (stat_boxplot).

# Histograma
gj2 <- ggplot(studentdata, aes(fill = Gender,
                              x = Job))
gj2 + geom_histogram(color = 65)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 32 rows containing non-finite values (stat_bin).

# Densidades
gj2 + geom_density(alpha = 0.5)
## Warning: Removed 32 rows containing non-finite values (stat_density).

# Adicionando jitter no boxplot
gj + geom_boxplot(outlier.color = "red") +
  geom_jitter(alpha = 0.3,
              size = 0.7,
              color = "black",
              shape = 4)
## Warning: Removed 32 rows containing non-finite values (stat_boxplot).

## Warning: Removed 32 rows containing missing values (geom_point).

# Adicionando um grafico de violino
gj + geom_violin(color = "grey",
                 linetype = 2) +
  geom_boxplot(width = 0.3)
## Warning: Removed 32 rows containing non-finite values (stat_ydensity).
## Warning: Removed 32 rows containing non-finite values (stat_boxplot).

### 3 variaveis ###

# Height, Shoes, Gender

# Categorias definidas pela cor
hsg <- ggplot(studentdata, aes(x = Shoes,
                               y = Height,
                               color = Gender))
hsg + geom_point() +
  geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 32 rows containing non-finite values (stat_smooth).
## Warning: Removed 32 rows containing missing values (geom_point).

# Categorias definidas pela forma
hsg2 <- ggplot(studentdata, aes(x = Shoes,
                                y = Height,
                                shape = Gender))
hsg2 + geom_point(size = 2) +
  geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 32 rows containing non-finite values (stat_smooth).

## Warning: Removed 32 rows containing missing values (geom_point).

# Colorindo pela bebida e formato dos pontos pelo genero
hsgd <- ggplot(studentdata2, aes(x = Shoes,
                                y = Height,
                                color = Drink,
                                shape = Gender))
hsgd + geom_point(size = 2)
## Warning: Removed 29 rows containing missing values (geom_point).

# Criando multiplos (graficos separados) 
# a partir da variavel Gender
hsg2 + geom_point() +
  geom_smooth() +
  facet_wrap(~Gender)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 32 rows containing non-finite values (stat_smooth).

## Warning: Removed 32 rows containing missing values (geom_point).

# Colorindo pela bebida e criando multiplos pelo genero
hsd <- ggplot(studentdata2, aes(x = Shoes,
                                y = Height,
                                color = Drink))
hsd + geom_point() +
  facet_wrap(~Gender)
## Warning: Removed 29 rows containing missing values (geom_point).

# Graficos Par a Par
ggpairs(studentdata2[,c(1:3,10)])
## Warning: Removed 8 rows containing non-finite values (stat_density).
## Warning: Removed 8 rows containing non-finite values (stat_boxplot).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 29 rows containing missing values
## Warning: Removed 8 rows containing non-finite values (stat_boxplot).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 8 rows containing non-finite values (stat_bin).
## Warning: Removed 21 rows containing non-finite values (stat_boxplot).
## Warning: Removed 29 rows containing missing values (geom_point).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 21 rows containing non-finite values (stat_bin).
## Warning: Removed 21 rows containing non-finite values (stat_density).
## Warning: Removed 21 rows containing non-finite values (stat_boxplot).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 8 rows containing non-finite values (stat_bin).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 21 rows containing non-finite values (stat_bin).

# Colorindo pela variavel Gender
ggpairs(studentdata2[,c(1:3,10)],
        aes(color = Gender))
## Warning: Removed 8 rows containing non-finite values (stat_density).
## Warning: Removed 8 rows containing non-finite values (stat_boxplot).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 29 rows containing missing values
## Warning: Removed 8 rows containing non-finite values (stat_boxplot).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 8 rows containing non-finite values (stat_bin).
## Warning: Removed 21 rows containing non-finite values (stat_boxplot).
## Warning: Removed 29 rows containing missing values (geom_point).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 21 rows containing non-finite values (stat_bin).
## Warning: Removed 21 rows containing non-finite values (stat_density).
## Warning: Removed 21 rows containing non-finite values (stat_boxplot).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 8 rows containing non-finite values (stat_bin).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 21 rows containing non-finite values (stat_bin).

# Heatmap
# Variaveis quantitativas

library(superheat)
## Warning: package 'superheat' was built under R version 4.0.5
superheat(studentdata[1:25,c(1,3:5)], scale = T)

hs + geom_point()
## Warning: Removed 32 rows containing missing values (geom_point).

### Customizando os graficos ###

# Funcao `labs`
str(studentdata)
## 'data.frame':    657 obs. of  10 variables:
##  $ Height : num  67 64 61 61 70 63 61 64 66 65 ...
##  $ Gender : Factor w/ 2 levels "female","male": 1 1 1 1 2 1 1 1 1 2 ...
##  $ Shoes  : num  10 20 12 3 4 NA 12 25 30 10 ...
##  $ Number : int  5 7 2 6 5 3 3 4 3 7 ...
##  $ Dvds   : num  10 5 6 40 6 5 53 20 40 22 ...
##  $ ToSleep: num  -2.5 1.5 -1.5 2 0 1 1.5 0.5 -0.5 2.5 ...
##  $ WakeUp : num  5.5 8 7.5 8.5 9 8.5 7.5 7.5 7 8.5 ...
##  $ Haircut: num  60 0 48 10 15 25 35 25 30 12 ...
##  $ Job    : num  30 20 0 0 17.5 0 20 0 25 0 ...
##  $ Drink  : Factor w/ 3 levels "milk","pop","water": 3 2 1 3 2 3 3 2 3 1 ...
levels(studentdata2$Gender)
## [1] "female" "male"
levels(studentdata2$Gender) <- c("Feminino", "Masculino")

levels(studentdata2$Drink)
## [1] "milk"  "pop"   "water"
levels(studentdata2$Drink) <- c("Leite", "Refrigerante", "Água")

hsgd <- ggplot(studentdata2, aes(x = Shoes,
                                 y = Height,
                                 color = Drink,
                                 shape = Gender))

hsgd + geom_point(size = 2) +
  labs(title = "Relação entre Altura e Pares de Sapato",
       subtitle = "Avaliação por gênero e bebida",
       caption = "Fonte: pacote LearnBayes",
       x = "Número de Pares de Sapatos",
       y = "Altura (cm)",
       shape = "Gênero",
       color = "Bebida")
## Warning: Removed 29 rows containing missing values (geom_point).

# Shoes, Height, Drink, Gender, Job
hsgdj <- ggplot(studentdata2, aes(x = Shoes,
                                 y = Height,
                                 color = Drink,
                                 shape = Gender,
                                 size = Job))

hsgdj + geom_point() +
  labs(title = "Relação entre Altura e Pares de Sapato",
       subtitle = "Avaliação por gênero e bebida",
       caption = "Fonte: pacote LearnBayes",
       x = "Número de Pares de Sapatos",
       y = "Altura (cm)",
       shape = "Gênero",
       color = "Bebida",
       size = "Jornal de trabalho semanal")
## Warning: Removed 59 rows containing missing values (geom_point).

# Extra #

titulo <- expression(paste("Relação entre ", italic("Altura"), " e Pares de Sapato"))

hsgdj + geom_point() +
  labs(title = titulo,
       subtitle = "Avaliação por gênero e bebida",
       caption = "Fonte: pacote LearnBayes",
       x = "Número de Pares de Sapatos",
       y = "Altura (cm)",
       shape = "Gênero",
       color = "Bebida",
       size = "Jornal de trabalho semanal")
## Warning: Removed 59 rows containing missing values (geom_point).

# Paletas de cores

p <- hsgdj + geom_point() +
  labs(title = titulo,
       subtitle = "Avaliação por gênero e bebida",
       caption = "Fonte: pacote LearnBayes",
       x = "Número de Pares de Sapatos",
       y = "Altura (cm)",
       shape = "Gênero",
       color = "Bebida",
       size = "Jornal de trabalho semanal") 

p + scale_color_brewer(palette = "Set2")
## Warning: Removed 59 rows containing missing values (geom_point).

p + scale_color_grey()
## Warning: Removed 59 rows containing missing values (geom_point).

p + scale_color_viridis_d()
## Warning: Removed 59 rows containing missing values (geom_point).

library(tvthemes)
## Warning: package 'tvthemes' was built under R version 4.0.5
p + scale_color_spongeBob()
## Warning: Removed 59 rows containing missing values (geom_point).

p + scale_color_simpsons()
## Warning: Removed 59 rows containing missing values (geom_point).

library(harrypotter)
## Warning: package 'harrypotter' was built under R version 4.0.5
# https://github.com/aljrico/harrypotter

p + scale_color_hp(discrete = T,
                   option = "HarryPotter")
## Warning: Removed 59 rows containing missing values (geom_point).

# OBS: se for grafico de barras, histograma, densidade 
# ou boxplot, usar scale_fill_

# Fonte

# family: serif, mono, sans, symbol
# face: plain, italic, bold, bold.italic

p + scale_color_brewer(palette = "Set2") +
  theme(text = element_text(size = 12,
                            family = "serif",
                            face = "bold"))
## Warning: Removed 59 rows containing missing values (geom_point).

# Legenda

p + scale_color_brewer(palette = "Set2") +
  theme(text = element_text(size = 12,
                            family = "serif",
                            face = "bold"),
        legend.position = "right")
## Warning: Removed 59 rows containing missing values (geom_point).

# Temas prontos
p + theme_bw()
## Warning: Removed 59 rows containing missing values (geom_point).

p + theme_classic()
## Warning: Removed 59 rows containing missing values (geom_point).

p + theme_dark()
## Warning: Removed 59 rows containing missing values (geom_point).

p + theme_avatar() + scale_color_avatar()
## Warning: Removed 59 rows containing missing values (geom_point).

p + theme_simpsons() + scale_color_simpsons()
## Warning: Removed 59 rows containing missing values (geom_point).

library(hrbrthemes)
## Warning: package 'hrbrthemes' was built under R version 4.0.5
## 
## Attaching package: 'hrbrthemes'
## The following objects are masked from 'package:tvthemes':
## 
##     import_roboto_condensed, import_titillium_web
p + theme_ft_rc()
## Warning: Removed 59 rows containing missing values (geom_point).

p + theme_ipsum()
## Warning: Removed 59 rows containing missing values (geom_point).

p + theme_ipsum_tw()
## Warning: Removed 59 rows containing missing values (geom_point).

library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
p2 <- hsgdj + geom_point() +
  labs(title = "Relação entre Altura e Pares de Sapato",
       subtitle = "Avaliação por gênero e bebida",
       caption = "Fonte: pacote LearnBayes",
       x = "Número de Pares de Sapatos",
       y = "Altura (cm)",
       shape = "Gênero",
       color = "Bebida",
       size = "Jornal de trabalho semanal")

ggplotly(p2)
p3 <- gj + geom_boxplot(outlier.shape = 3)

ggplotly(p3)
## Warning: Removed 32 rows containing non-finite values (stat_boxplot).
# Exportar graficos para word e powerpoint
library(eoffice)
## Warning: package 'eoffice' was built under R version 4.0.5
topptx(p3, filename = "p3.pptx",
       width = 6, height = 4)
## Warning: Removed 32 rows containing non-finite values (stat_boxplot).
# Identificar diretorio (onde salvou a imagem)
getwd()
## [1] "C:/Users/Leticia/Google Drive/Cursos Ministrados/(2021)CursoVisualizacaoDados"
todocx(p3, filename = "p3.docx",
       width = 6, height = 4)
## Warning: Removed 32 rows containing non-finite values (stat_boxplot).
# Definindo intervalos dos eixos
# Breaks: c(inicio, fim, anda de quanto em quanto)
# Limits: c(inicio, fim)

hs + geom_point() +
  scale_y_continuous(breaks = seq(50,85,2),
                     limits = c(50,85)) +
  scale_x_continuous(breaks = seq(0,170,20),
                     limits = c(0,170))
## Warning: Removed 32 rows containing missing values (geom_point).