Exercício 12 - Visualização de Dados

Questão 1

MRT_1F   <- c(517.1468515630205, 85.13094142168089, 30.333207896694553,
              12.694776264558937, 3.3041601673945418, 1.1823111717498882,
              1.1892293502386786)
MRT_3F   <- c(156.68929936163462, 11.540837783562276, 0.4512835621696538,
              0.4509797929766453, 0.4502068233039181, 0.4496185276300172,
              0.4543157082191288)
MRT_5F   <- c(83.90319666471157, 0.3068151086494968, 0.30522314133037304,
              0.3072588968084928, 0.30655265997285697, 0.3055812715727718,
              0.3053297166713006)
MRT_10F  <- c(29.55430642951759, 0.19832832665772515, 0.1971923924717474,
              0.19796648905716516, 0.19615594370806338, 0.2034569237883263,
              0.19617420889447737)
MRT_15F  <- c(11.317736530583566, 0.167364215666193, 0.16172168266811013,
              0.16701085329580515, 0.1598052657153692, 0.1645934043532696,
              0.16216563797118075)
MRT_sem_F <- c(11.93430909937736, 0.6095414637034009,
               0.6060645101029295, 0.612167181646899, 0.6146761002685637,
               0.6096747087200697, 0.6125810476877268)
clock <- c(0.1, 0.5, 1, 1.5, 2, 2.5, 3)

# Layout: linha do tempo (top) + 5 barras (bottom grid)
layout(matrix(c(1, 1,
                2, 3,
                4, 5,
                6, 6), nrow = 4, byrow = TRUE))

# --- Gráfico de linhas ---
par(mar = c(5, 5, 2, 2))
plot(clock, MRT_1F, type = "b", pch = 4, col = "black",
     ylim = c(0, max(MRT_1F)),
     xlab = "Time between Things requests (seconds)",
     ylab = "Response Time (sec.)",
     lwd = 1.5)
lines(clock, MRT_3F,   type = "b", pch = 17, col = "orange",  lwd = 1.5)
lines(clock, MRT_5F,   type = "b", pch = 16, col = "red",     lwd = 1.5)
lines(clock, MRT_10F,  type = "b", pch = 24, col = "purple",  lwd = 1.5)
lines(clock, MRT_15F,  type = "b", pch = 25, col = "green",   lwd = 1.5)
lines(clock, MRT_sem_F,type = "b", pch = 23, col = "cyan",    lwd = 1.5)
legend("topright",
       legend = c("1 Fog", "3 Fogs", "5 Fogs", "10 Fogs", "15 Fogs", "w/o Fog"),
       col    = c("black", "orange", "red", "purple", "green", "cyan"),
       pch    = c(4, 17, 16, 24, 25, 23),
       lty    = 1, lwd = 1.5, cex = 0.8)

# Função auxiliar para gráfico de barras
plot_bar <- function(fog_data, fog_label) {
  mat <- rbind(MRT_sem_F, fog_data)
  barplot(mat,
          beside   = TRUE,
          names.arg = clock,
          log      = "y",
          col      = c("#E6E6E6", "#666666"),
          xlab     = "Time between Things requests",
          ylab     = "Response time (s)",
          legend.text = c("w/o Fog", fog_label),
          args.legend = list(x = "topright", cex = 0.7),
          cex.names = 0.7)
}

plot_bar(MRT_1F,  "1 Fog")
plot_bar(MRT_3F,  "3 Fogs")
plot_bar(MRT_5F,  "5 Fogs")
plot_bar(MRT_10F, "10 Fogs")
plot_bar(MRT_15F, "15 Fogs")

Questão 2

meal_data <- matrix(
  c(53.8, 43.6, 2.6,
    33.9, 54.2, 11.9,
    2.6,  60.5, 36.8,
    0.0,  21.4, 78.6),
  nrow = 3, byrow = FALSE,
  dimnames = list(
    c("Good", "Very Good", "Excellent"),
    c("$10-19", "$20-29", "$30-39", "$40-49")
  )
)

cores <- c("#4E79A7", "#F28E2B", "#59A14F")

barplot(meal_data,
        beside  = FALSE,
        col     = cores,
        main    = "Qualidade de Refeição por Faixa de Preço",
        xlab    = "Faixa de Preço",
        ylab    = "Percentual (%)",
        ylim    = c(0, 110),
        legend.text = rownames(meal_data),
        args.legend = list(x = "topright", bty = "n"))

Questão 3

may_temps_f <- airquality$Temp[airquality$Month == 5]
may_temps_c <- (may_temps_f - 32) / 1.8

hist(may_temps_c,
     main  = "Histograma das Temperaturas de Maio",
     xlab  = "Temperatura (°C)",
     ylab  = "Frequência",
     col   = "steelblue",
     border = "white",
     freq  = FALSE)

lines(density(may_temps_c), col = "red", lwd = 2)

Questão 4

sales <- read.table(
  "https://training-course-material.com/images/8/8f/Sales.txt",
  header = TRUE
)

# Identifica automaticamente a coluna de país (character) e a coluna numérica de vendas
col_char <- names(sales)[sapply(sales, is.character) | sapply(sales, is.factor)][1]
col_num  <- names(sales)[sapply(sales, is.numeric)][1]

# Agrega total por país usando os nomes reais das colunas
formula_agg <- as.formula(paste(col_num, "~", col_char))
total_vendas <- aggregate(formula_agg, data = sales, FUN = sum)
names(total_vendas) <- c("Country", "Sales")

pct        <- round(total_vendas$Sales / sum(total_vendas$Sales) * 100, 1)
labels_pct <- paste0(pct, "%")
cores_pizza <- rainbow(nrow(total_vendas))

pie(total_vendas$Sales,
    labels = labels_pct,
    col    = cores_pizza,
    main   = "Total de Vendas por País")

legend("bottomleft",
       legend = total_vendas$Country,
       fill   = cores_pizza,
       cex    = 0.75,
       bty    = "n")

Questão 5

boxplot(count ~ spray,
        data    = InsectSprays,
        outline = FALSE,
        col     = "yellow",
        main    = "Contagem de Insetos por Inseticida",
        xlab    = "Tipo de Inseticida",
        ylab    = "Contagem de Insetos")

Questão 6

library(lubridate)

# Função para carregar e tratar os dados
carregar_dados <- function(caminho) {
  df <- read.csv(caminho, stringsAsFactors = FALSE)
  colnames(df) <- trimws(colnames(df))

  # Converte currentTime para POSIXct
  df$currentTime <- as.POSIXct(df$currentTime, format = "%Y-%m-%d %H:%M:%OS")

  # Torna o tempo contínuo (horas a partir do início)
  df$time_h <- as.numeric(difftime(df$currentTime, df$currentTime[1], units = "hours"))

  # Converte usedMemory para MB
  converter_mb <- function(x) {
    x <- trimws(x)
    valor <- as.numeric(gsub("[^0-9.]", "", x))
    unidade <- gsub("[0-9. ]", "", x)
    dplyr::case_when(
      grepl("TB", unidade, ignore.case = TRUE) ~ valor * 1e6,
      grepl("GB", unidade, ignore.case = TRUE) ~ valor * 1024,
      grepl("MB", unidade, ignore.case = TRUE) ~ valor,
      grepl("KB", unidade, ignore.case = TRUE) ~ valor / 1024,
      TRUE ~ valor
    )
  }

  df$usedMemory_MB <- converter_mb(df$usedMemory)
  df
}

d01   <- carregar_dados("monitoringCloudData_0.1.csv")
d05   <- carregar_dados("monitoringCloudData_0.5.csv")
d1    <- carregar_dados("monitoringCloudData_1.csv")
dnone <- carregar_dados("monitoringCloudData_NONE.csv")

layout(matrix(c(1, 2, 3, 4), nrow = 2, byrow = TRUE))

plot_mem <- function(df, titulo) {
  plot(df$time_h, df$usedMemory_MB,
       type = "l", lwd = 0.8,
       main = titulo,
       xlab = "Time (hour)",
       ylab = "Used Memory (MB)")
}

plot_mem(dnone, "Memory Analysis (None Workload)")
plot_mem(d01,   "Memory Analysis (Workload of 0.1)")
plot_mem(d05,   "Memory Analysis (Workload of 0.5)")
plot_mem(d1,    "Memory Analysis (Workload of 1.0)")

Questão 7

library(plotly)
library(dplyr)

netflix <- read.csv("netflix_titles.csv", stringsAsFactors = FALSE)

# Apenas conteúdos com UM único país
netflix_1pais <- netflix %>%
  filter(!is.na(country) & country != "" & !grepl(",", country))

top10 <- netflix_1pais %>%
  count(country, name = "total") %>%
  arrange(desc(total)) %>%
  slice(1:10)

plot_ly(top10,
        labels = ~country,
        values = ~total,
        type   = "pie",
        textinfo = "label+percent") %>%
  layout(title = "Top 10 Países com Mais Conteúdo na Netflix")

Questão 8

top10_tabela <- top10 %>%
  rename(País = country, `Total de Conteúdos` = total)

plot_ly(
  type = "table",
  header = list(
    values = c("<b>País</b>", "<b>Total de Conteúdos</b>"),
    fill   = list(color = "gray"),
    font   = list(color = "white", size = 13),
    align  = "center"
  ),
  cells = list(
    values = list(top10_tabela$País, top10_tabela$`Total de Conteúdos`),
    align  = "center"
  )
) %>%
  layout(title = "Top 10 Países - Tabela de Conteúdos")

Questão 9

netflix_decadas <- netflix %>%
  filter(!is.na(release_year)) %>%
  mutate(decada = floor(release_year / 10) * 10)

series <- netflix_decadas %>%
  filter(type == "TV Show") %>%
  count(decada, name = "total")

filmes <- netflix_decadas %>%
  filter(type == "Movie") %>%
  count(decada, name = "total")

plot_ly() %>%
  add_trace(data = series, x = ~decada, y = ~total,
            type = "scatter", mode = "lines+markers",
            name = "TV Series",
            line = list(color = "blue"),
            marker = list(color = "blue")) %>%
  add_trace(data = filmes, x = ~decada, y = ~total,
            type = "scatter", mode = "lines+markers",
            name = "Movies",
            line = list(color = "orange"),
            marker = list(color = "orange")) %>%
  layout(title  = "Quantidade de Conteúdo por Década na Netflix",
         xaxis  = list(title = "Década"),
         yaxis  = list(title = "Qnd. Conteúdo"))

Questão 10

generos_alvo <- c("Dramas", "Action & Adventure", "Comedies")

filmes_genero <- netflix %>%
  filter(type == "Movie",
         release_year >= 2000,
         release_year <= 2010) %>%
  mutate(genero_principal = trimws(sub(",.*", "", listed_in))) %>%
  filter(genero_principal %in% generos_alvo) %>%
  count(release_year, genero_principal, name = "total")

cores_genero <- c("Dramas" = "blue",
                  "Action & Adventure" = "orange",
                  "Comedies" = "green")

plot_ly() %>%
  add_trace(
    data = filter(filmes_genero, genero_principal == "Dramas"),
    x = ~release_year, y = ~total,
    type = "bar", name = "Drama",
    marker = list(color = "blue")
  ) %>%
  add_trace(
    data = filter(filmes_genero, genero_principal == "Action & Adventure"),
    x = ~release_year, y = ~total,
    type = "bar", name = "Ação e Aventura",
    marker = list(color = "orange")
  ) %>%
  add_trace(
    data = filter(filmes_genero, genero_principal == "Comedies"),
    x = ~release_year, y = ~total,
    type = "bar", name = "Comédia",
    marker = list(color = "green")
  ) %>%
  layout(barmode = "group",
         title   = "Filmes por Gênero (2000–2010)",
         xaxis   = list(title = "Ano de Lançamento", dtick = 1),
         yaxis   = list(title = "Qnt. de Lançamentos"))

Exercício 12 - Visualização de Dados

Lucas Matias da silva

19 de maio de 2026

Questão 1

Questão 2

Questão 3

Questão 4

Questão 5

Questão 6

Questão 7

Questão 8

Questão 9

Questão 10