Exercício 12 - Visualização de Dados

Questão 1

MRT_1F <- c(517.1468515630205, 85.13094142168089, 30.333207896694553,
            12.694776264558937, 3.3041601673945418, 1.1823111717498882,
            1.1892293502386786)
MRT_3F <- c(156.68929936163462, 11.540837783562276, 0.4512835621696538,
            0.4509797929766453, 0.4502068233039181, 0.4496185276300172,
            0.4543157082191288)
MRT_5F <- c(83.90319666471157, 0.3068151086494968, 0.30522314133037304,
            0.3072588968084928, 0.30655265997285697, 0.3055812715727718,
            0.3053297166713006)
MRT_10F <- c(29.55430642951759, 0.19832832665772515, 0.1971923924717474,
             0.19796648905716516, 0.19615594370806338, 0.2034569237883263,
             0.19617420889447737)
MRT_15F <- c(11.317736530583566, 0.167364215666193, 0.16172168266811013,
             0.16701085329580515, 0.1598052657153692, 0.1645934043532696,
             0.16216563797118075)
MRT_sem_F <- c(11.93430909937736, 0.6095414637034009,
               0.6060645101029295, 0.612167181646899, 0.6146761002685637,
               0.6096747087200697, 0.6125810476877268)

relogio <- c(0.1, 0.5, 1, 1.5, 2, 2.5, 3)

layout(matrix(c(1, 1, 1, 1,
                2, 3, 4, 5,
                6, 0, 0, 0),
              nrow = 3, byrow = TRUE))

# Gráfico de linhas
plot(relogio, MRT_1F,
     type = "b", pch = 4, col = "black", lwd = 1.5,
     ylim = c(0, max(MRT_1F)),
     xlab = "Time between Things requests (seconds)",
     ylab = "Response Time (sec.)")

lines(relogio, MRT_3F,    type = "b", pch = 17, col = "orange", lwd = 1.5)
lines(relogio, MRT_5F,    type = "b", pch = 8,  col = "red",    lwd = 1.5)
lines(relogio, MRT_10F,   type = "b", pch = 2,  col = "purple", lwd = 1.5)
lines(relogio, MRT_15F,   type = "b", pch = 5,  col = "blue",   lwd = 1.5)
lines(relogio, MRT_sem_F, type = "b", pch = 8,  col = "green",  lwd = 1.5)

legend("topright",
       legend = c("1 Fog", "3 Fogs", "5 Fogs", "10 Fogs", "15 Fogs", "w/o Fog"),
       col    = c("black", "orange", "red", "purple", "blue", "green"),
       pch    = c(4, 17, 8, 2, 5, 8),
       lwd    = 1.5, cex = 0.75)

# Gráficos de barras
grafico_barras <- function(dados_fog, rotulo_fog) {
  matriz <- rbind(MRT_sem_F, dados_fog)
  barplot(matriz,
          beside    = TRUE,
          names.arg = relogio,
          col       = c("#E6E6E6", "#666666"),
          log       = "y",
          xlab      = "Time between Things requests",
          ylab      = "Response time (s)")
  legend("topright",
         legend = c("w/o Fog", rotulo_fog),
         fill   = c("#E6E6E6", "#666666"),
         cex    = 0.75)
}

grafico_barras(MRT_1F,  "1 Fog")
grafico_barras(MRT_3F,  "3 Fogs")
grafico_barras(MRT_5F,  "5 Fogs")
grafico_barras(MRT_10F, "10 Fogs")
grafico_barras(MRT_15F, "15 Fogs")

Questão 2

dados_refeicao <- matrix(
  c(53.8, 33.9,  2.6,  0.0,
    43.6, 54.2, 60.5, 21.4,
     2.6, 11.9, 36.8, 78.6),
  nrow = 3, byrow = TRUE,
  dimnames = list(
    c("Good", "Very Good", "Excellent"),
    c("$10-19", "$20-29", "$30-39", "$40-49")
  )
)

cores <- c("#4e9af1", "#f1c94e", "#f1654e")

barplot(dados_refeicao,
        beside = FALSE,
        col    = cores,
        main   = "Qualidade de Refeição por Faixa de Preço",
        xlab   = "Faixa de Preço",
        ylab   = "Percentual (%)",
        ylim   = c(0, 120))

legend("topright",
       legend = rownames(dados_refeicao),
       fill   = cores,
       title  = "Qualidade")

Questão 3

dados_maio    <- subset(airquality, Month == 5)
temperatura_c <- (dados_maio$Temp - 32) / 1.8

hist(temperatura_c,
     main   = "Temperaturas em Maio (°C)",
     xlab   = "Temperatura (°C)",
     ylab   = "Densidade",
     col    = "steelblue",
     border = "white",
     freq   = FALSE)

lines(density(temperatura_c), col = "red", lwd = 2)

Questão 4

vendas <- read.table("https://training-course-material.com/images/8/8f/Sales.txt",
                     header = TRUE, sep = "\t", stringsAsFactors = FALSE)

total_por_pais <- tapply(vendas$SALES, vendas$COUNTRY, sum)
percentual     <- round(100 * total_por_pais / sum(total_por_pais), 1)
cores          <- rainbow(length(total_por_pais))

pie(total_por_pais,
    labels = paste0(percentual, "%"),
    col    = cores,
    main   = "Total de Vendas por País")

legend("bottomright",
       legend = names(total_por_pais),
       fill   = cores,
       cex    = 0.8)

Questão 5

boxplot(count ~ spray,
        data    = InsectSprays,
        outline = FALSE,
        col     = "yellow",
        main    = "Contagem de Insetos por Inseticida",
        xlab    = "Tipo de Inseticida",
        ylab    = "Contagem de Insetos")

Questão 6

setwd("C:/Users/nando/Downloads")

# Função para converter usedMemory para MB
converter_para_mb <- function(x) {
  valor <- as.numeric(gsub("[A-Za-z]+", "", x))
  unidade <- gsub("[0-9. ]+", "", x)
  ifelse(grepl("TB", unidade), valor * 1000000,
  ifelse(grepl("GB", unidade), valor * 1024,
  ifelse(grepl("MB", unidade), valor,
  ifelse(grepl("KB", unidade), valor / 1024, valor))))
}

# Função para carregar e processar cada CSV
carregar_dados <- function(arquivo) {
  dados <- read.csv(arquivo, stringsAsFactors = FALSE)
  dados$currentTime <- as.POSIXct(dados$currentTime,
                                   format = "%Y-%m-%d %H:%M:%OS",
                                   tz = "UTC")
  dados$memoria_mb <- converter_para_mb(dados$usedMemory)
  dados$tempo_h    <- as.numeric(difftime(dados$currentTime,
                                           min(dados$currentTime, na.rm = TRUE),
                                           units = "hours"))
  dados
}

dados_none <- carregar_dados("monitoringCloudData_NONE.csv")
dados_01   <- carregar_dados("monitoringCloudData_0.1.csv")
dados_05   <- carregar_dados("monitoringCloudData_0.5.csv")
dados_1    <- carregar_dados("monitoringCloudData_1.csv")

layout(matrix(1:4, nrow = 2, byrow = TRUE))

plot(dados_none$tempo_h, dados_none$memoria_mb, type = "l",
     main = "Memory Analysis (None Workload)",
     xlab = "Time (hour)", ylab = "Used Memory (MB)")

plot(dados_01$tempo_h, dados_01$memoria_mb, type = "l",
     main = "Memory Analysis (Workload of 0.1)",
     xlab = "Time (hour)", ylab = "Used Memory (MB)")

plot(dados_05$tempo_h, dados_05$memoria_mb, type = "l",
     main = "Memory Analysis (Workload of 0.5)",
     xlab = "Time (hour)", ylab = "Used Memory (MB)")

plot(dados_1$tempo_h, dados_1$memoria_mb, type = "l",
     main = "Memory Analysis (Workload of 1.0)",
     xlab = "Time (hour)", ylab = "Used Memory (MB)")

Questão 7

library(plotly)
library(dplyr)

netflix <- read.csv("netflix_titles.csv", stringsAsFactors = FALSE)

# Filtrar apenas conteúdos com um único país
netflix_unico_pais <- netflix %>%
  filter(!is.na(country) & country != "" & !grepl(",", country))

top10_paises <- netflix_unico_pais %>%
  count(country, name = "total") %>%
  arrange(desc(total)) %>%
  slice_head(n = 10)

plot_ly(top10_paises,
        labels   = ~country,
        values   = ~total,
        type     = "pie",
        textinfo = "label+percent") %>%
  layout(title = "Top 10 Países com Mais Conteúdo na Netflix")

Questão 8

plot_ly(
  type = "table",
  header = list(
    values = c("País", "Total de Conteúdos"),
    align  = "center",
    fill   = list(color = "gray"),
    font   = list(color = "white", size = 13)
  ),
  cells = list(
    values = list(top10_paises$country, top10_paises$total),
    align  = "center"
  )
)

Questão 9

netflix_decada <- netflix %>%
  filter(!is.na(release_year)) %>%
  mutate(decada = floor(release_year / 10) * 10) %>%
  group_by(decada, type) %>%
  summarise(total = n(), .groups = "drop")

series <- netflix_decada %>% filter(type == "TV Show")
filmes <- netflix_decada %>% filter(type == "Movie")

plot_ly() %>%
  add_trace(data = series, x = ~decada, y = ~total,
            type = "scatter", mode = "lines+markers",
            name = "TV Series",
            line   = list(color = "blue"),
            marker = list(color = "blue")) %>%
  add_trace(data = filmes, x = ~decada, y = ~total,
            type = "scatter", mode = "lines+markers",
            name = "Movies",
            line   = list(color = "yellow"),
            marker = list(color = "yellow")) %>%
  layout(title = "Quantidade de Conteúdo por Década na Netflix",
         xaxis = list(title = "Década"),
         yaxis = list(title = "Qnd. Conteúdo"))

Questão 10

generos_alvo <- c("Dramas", "Action & Adventure", "Comedies")

filmes_genero <- netflix %>%
  filter(type == "Movie",
         release_year >= 2000, release_year <= 2010) %>%
  mutate(primeiro_genero = trimws(sub(",.*", "", listed_in))) %>%
  filter(primeiro_genero %in% generos_alvo) %>%
  count(release_year, primeiro_genero, name = "quantidade")

plot_ly(filmes_genero,
        x      = ~as.character(release_year),
        y      = ~quantidade,
        color  = ~primeiro_genero,
        colors = c("Dramas"             = "blue",
                   "Action & Adventure" = "orange",
                   "Comedies"           = "green"),
        type   = "bar") %>%
  layout(barmode = "group",
         title   = "Filmes por Gênero (2000-2010)",
         xaxis   = list(title = "Ano de Lançamento"),
         yaxis   = list(title = "Qnt. de Lançamentos"))