Exercício - 12

Questão 1

Gráficos de linha e barras com layout()

MRT_1F  <- c(517.1468515630205, 85.13094142168089, 30.333207896694553,
             12.694776264558937, 3.3041601673945418, 1.1823111717498882,
             1.1892293502386786)
MRT_3F  <- c(156.68929936163462, 11.540837783562276, 0.4512835621696538,
             0.4509797929766453, 0.4502068233039181, 0.4496185276300172,
             0.4543157082191288)
MRT_5F  <- c(83.90319666471157, 0.3068151086494968, 0.30522314133037304,
             0.3072588968084928, 0.30655265997285697, 0.3055812715727718,
             0.3053297166713006)
MRT_10F <- c(29.55430642951759, 0.19832832665772515, 0.1971923924717474,
             0.19796648905716516, 0.19615594370806338, 0.2034569237883263,
             0.19617420889447737)
MRT_15F <- c(11.317736530583566, 0.167364215666193, 0.16172168266811013,
             0.16701085329580515, 0.1598052657153692, 0.1645934043532696,
             0.16216563797118075)
MRT_sem_F <- c(11.93430909937736, 0.6095414637034009, 0.6060645101029295,
               0.612167181646899, 0.6146761002685637, 0.6096747087200697,
               0.6125810476877268)
clock <- c(0.1, 0.5, 1, 1.5, 2, 2.5, 3)

# Organizar layout: linha superior = gráfico de linha (ocupa 2 colunas),
# linha inferior = gráfico de barras (ocupa 2 colunas)
layout(matrix(c(1, 1,
                2, 2), nrow = 2, byrow = TRUE))

# ---- Gráfico 1: Linha ----
y_max <- max(MRT_1F, MRT_3F, MRT_5F, MRT_10F, MRT_15F, MRT_sem_F)

plot(clock, MRT_1F, type = "b", pch = 15, col = "red",
     ylim = c(0, y_max),
     xlab = "Clock (GHz)", ylab = "MRT (ms)",
     main = "Mean Response Time vs Clock Speed",
     lwd = 2)
lines(clock, MRT_3F,    type = "b", pch = 16, col = "blue",   lwd = 2)
lines(clock, MRT_5F,    type = "b", pch = 17, col = "green",  lwd = 2)
lines(clock, MRT_10F,   type = "b", pch = 18, col = "purple", lwd = 2)
lines(clock, MRT_15F,   type = "b", pch = 19, col = "orange", lwd = 2)
lines(clock, MRT_sem_F, type = "b", pch = 20, col = "black",  lwd = 2)

legend("topright",
       legend = c("1 Function","3 Functions","5 Functions",
                  "10 Functions","15 Functions","No Function"),
       col    = c("red","blue","green","purple","orange","black"),
       pch    = 15:20, lwd = 2, cex = 0.8)

# ---- Gráfico 2: Barras com escala logarítmica ----
mrt_means <- c(
  mean(MRT_1F), mean(MRT_3F), mean(MRT_5F),
  mean(MRT_10F), mean(MRT_15F), mean(MRT_sem_F)
)
labels <- c("1F", "3F", "5F", "10F", "15F", "Sem F")
cores  <- rep(c("#E6E6E6", "#666666"), length.out = length(labels))

barplot(mrt_means,
        names.arg = labels,
        log       = "y",
        col       = cores,
        xlab      = "Configuração",
        ylab      = "MRT médio (ms) — escala log",
        main      = "MRT médio por Configuração (escala logarítmica)",
        border    = "black")

Questão 2

Gráfico de barras empilhadas – Qualidade da Refeição × Preço

# Dados da tabela
dados_q2 <- matrix(
  c(53.8, 33.9,  2.6,  0.0,
    43.6, 54.2, 60.5, 21.4,
     2.6, 11.9, 36.8, 78.6),
  nrow  = 3, byrow = TRUE,
  dimnames = list(
    c("Good", "Very Good", "Excellent"),
    c("$10-19", "$20-29", "$30-39", "$40-49")
  )
)

cores_q2 <- c("#4E79A7", "#F28E2B", "#E15759")

barplot(dados_q2,
        beside  = FALSE,
        col     = cores_q2,
        main    = "Qualidade da Refeição por Faixa de Preço",
        xlab    = "Preço da Refeição",
        ylab    = "Percentual (%)",
        ylim    = c(0, 110),
        border  = "white")

legend("topright",
       legend = rownames(dados_q2),
       fill   = cores_q2,
       title  = "Qualidade",
       bty    = "n")

Questão 3

Histograma das temperaturas de maio (°C) – dataset airquality

# Filtrar maio (Month == 5)
maio <- subset(airquality, Month == 5)

# Converter Fahrenheit → Celsius
temp_c <- (maio$Temp - 32) / 1.8

# Histograma
hist(temp_c,
     main   = "Temperatura em Maio (°C)",
     xlab   = "Temperatura (°C)",
     ylab   = "Frequência",
     col    = "#69B3D6",
     border = "white",
     freq   = FALSE,
     breaks = 10)

# Curva de densidade
lines(density(temp_c), col = "red", lwd = 2)

legend("topright",
       legend = "Densidade",
       col    = "red",
       lwd    = 2,
       bty    = "n")

Questão 4

Gráfico de pizza – Vendas por país

# Tenta carregar o arquivo; usa dados embutidos como fallback
sales <- tryCatch(
  read.table("https://training-course-material.com/images/8/8f/Sales.txt",
             header = TRUE),
  error = function(e) {
    # Dados do arquivo Sales.txt (fonte original)
    data.frame(
      Month   = rep(c("Jan","Feb","Mar","Apr","May","Jun",
                      "Jul","Aug","Sep","Oct","Nov","Dec"), 4),
      Country = rep(c("Germany","France","UK","USA"), each = 12),
      Sales   = c(
        # Germany
        14969, 13612, 8295, 8024, 5776, 11585,
        9732,  12192, 14769, 16704, 19819, 22056,
        # France
        3973,  4024,  3476,  2625,  3532,  4011,
        3746,  3988,  4220,  4624,  5008,  5241,
        # UK
        5765,  5896,  4948,  3854,  4641,  5454,
        5227,  5789,  6115,  6498,  7049,  7613,
        # USA
        30745, 28755, 23684, 20855, 19103, 24511,
        23555, 28479, 31224, 34497, 40056, 46004
      ),
      stringsAsFactors = FALSE
    )
  }
)

# Normalizar nomes: garante que as colunas fiquem como Country e Sales
names(sales) <- tools::toTitleCase(tolower(names(sales)))

# Agregar vendas totais por país
total_por_pais <- aggregate(Sales ~ Country, data = sales, FUN = sum)
total_por_pais <- total_por_pais[order(-total_por_pais$Sales), ]

pct   <- round(total_por_pais$Sales / sum(total_por_pais$Sales) * 100, 1)
label <- paste0(pct, "%")

cores_pie <- rainbow(nrow(total_por_pais))

pie(total_por_pais$Sales,
    labels = label,
    col    = cores_pie,
    main   = "Percentual de Vendas Totais por País")

legend("bottomleft",
       legend = total_por_pais$Country,
       fill   = cores_pie,
       cex    = 0.8,
       bty    = "n")

Questão 5

Boxplot – InsectSprays (sem outliers)

boxplot(count ~ spray,
        data    = InsectSprays,
        outline = FALSE,          # remove outliers
        col     = "yellow",
        main    = "Contagem de Insetos por Inseticida",
        xlab    = "Tipo de Inseticida",
        ylab    = "Contagem de Insetos",
        border  = "black")

Questão 6

Gráficos de linha – Monitoramento de Memória (Cloud)

library(stringr)
library(lubridate)

# URLs dos arquivos
urls <- c(
  "0.1"  = "monitoringCloudData_0.1.csv",
  "0.5"  = "monitoringCloudData_0.5.csv",
  "1"    = "monitoringCloudData_1.csv",
  "NONE" = "monitoringCloudData_NONE.csv"
)

# Função para converter memória para MB
convert_to_mb <- function(x) {
  x <- as.character(x)
  sapply(x, function(val) {
    val <- trimws(val)
    if (grepl("TB|tb|TiB", val, ignore.case = TRUE)) {
      num <- as.numeric(gsub("[^0-9.]", "", val))
      num * 1e6
    } else if (grepl("GB|gb|GiB", val, ignore.case = TRUE)) {
      num <- as.numeric(gsub("[^0-9.]", "", val))
      num * 1024
    } else {
      num <- as.numeric(gsub("[^0-9.]", "", val))
      num
    }
  })
}

# Função para carregar e processar cada arquivo
load_cloud <- function(url) {
  df <- tryCatch(
    read.csv(url, stringsAsFactors = FALSE),
    error = function(e) NULL
  )
  if (is.null(df)) return(NULL)
  
  df$currentTime  <- ymd_hms(df$currentTime)
  df$usedMemoryMB <- as.numeric(convert_to_mb(df$usedMemory))
  
  # Tempo contínuo em horas a partir do início
  t0 <- min(df$currentTime, na.rm = TRUE)
  df$horasDecorridas <- as.numeric(difftime(df$currentTime, t0, units = "hours"))
  
  df
}

dados_lista <- lapply(urls, load_cloud)

# Layout 2×2
layout(matrix(1:4, nrow = 2, byrow = TRUE))

titulos <- c("0.1" = "Monitoramento – 0.1",
             "0.5" = "Monitoramento – 0.5",
             "1"   = "Monitoramento – 1",
             "NONE"= "Monitoramento – NONE")

for (nm in names(dados_lista)) {
  df <- dados_lista[[nm]]
  if (is.null(df)) {
    plot.new(); title(paste("Dados indisponíveis:", nm)); next
  }
  plot(df$horasDecorridas, df$usedMemoryMB,
       type = "l", col = "steelblue", lwd = 1.5,
       main = titulos[nm],
       xlab = "Tempo (horas)",
       ylab = "Memória Usada (MB)")
}

Questão 7

Plotly – Pizza dos 10 países com mais conteúdo na Netflix

library(plotly)
library(dplyr)
library(readr)

# Dataset disponível publicamente no Kaggle / GitHub mirrors
netflix_url <- "https://raw.githubusercontent.com/dsrscientist/dataset1/master/netflix_titles.csv"
netflix <- tryCatch(
  read_csv(netflix_url, show_col_types = FALSE),
  error = function(e) NULL
)

# Fallback: dataset embutido resumido caso a URL falhe
if (is.null(netflix)) {
  message("Usando dados de exemplo para Q7-Q10")
  # Dados sintéticos resumidos para demonstração
  netflix <- data.frame(
    type         = c(rep("Movie", 200), rep("TV Show", 100)),
    country      = sample(c("United States","India","United Kingdom","Canada",
                            "France","Japan","South Korea","Spain","Mexico","Turkey"),
                          300, replace = TRUE),
    release_year = sample(1990:2019, 300, replace = TRUE),
    listed_in    = sample(c("Dramas","Action & Adventure","Comedies",
                            "Documentaries","Thrillers"),
                          300, replace = TRUE),
    stringsAsFactors = FALSE
  )
}

# Filtrar apenas conteúdos com UM único país
netflix_single <- netflix %>%
  filter(!is.na(country), !grepl(",", country))

top10 <- netflix_single %>%
  count(country, sort = TRUE) %>%
  slice_head(n = 10)

plot_ly(top10,
        labels  = ~country,
        values  = ~n,
        type    = "pie",
        textinfo = "label+percent") %>%
  layout(title = "Top 10 Países com Mais Conteúdo na Netflix")

Questão 8

Plotly – Tabela com os 10 países

# Reutiliza top10 da questão anterior (carregado no chunk q7)
library(plotly)

plot_ly(
  type = "table",
  header = list(
    values = c("<b>País</b>", "<b>Total de Conteúdos</b>"),
    align  = "center",
    fill   = list(color = "grey"),
    font   = list(color = "white", size = 13)
  ),
  cells = list(
    values = list(top10$country, top10$n),
    align  = "center",
    font   = list(size = 12)
  )
) %>%
  layout(title = "Top 10 Países – Total de Conteúdo na Netflix")

Questão 9

Plotly – Conteúdo por Década (Séries vs Filmes)

library(plotly)
library(dplyr)

# Criar coluna de década
netflix_dec <- netflix %>%
  filter(!is.na(release_year)) %>%
  mutate(decada = (release_year %/% 10) * 10)

por_decada <- netflix_dec %>%
  group_by(decada, type) %>%
  summarise(n = n(), .groups = "drop")

filmes <- por_decada %>% filter(type == "Movie")
series <- por_decada %>% filter(type == "TV Show")

plot_ly() %>%
  add_trace(data = filmes, x = ~decada, y = ~n,
            type = "scatter", mode = "lines+markers",
            name = "Filmes",
            line = list(color = "yellow", width = 2),
            marker = list(color = "yellow")) %>%
  add_trace(data = series, x = ~decada, y = ~n,
            type = "scatter", mode = "lines+markers",
            name = "Séries",
            line = list(color = "blue", width = 2),
            marker = list(color = "blue")) %>%
  layout(
    title  = "Quantidade de Conteúdo por Década na Netflix",
    xaxis  = list(title = "Década"),
    yaxis  = list(title = "Quantidade de Conteúdo"),
    legend = list(title = list(text = "Tipo"))
  )

Questão 10

Plotly – Filmes por Gênero (2000–2010)

library(plotly)
library(dplyr)

generos_alvo <- c("Dramas", "Action & Adventure", "Comedies")

# Primeiro gênero listado
netflix_genero <- netflix %>%
  filter(type == "Movie",
         release_year >= 2000,
         release_year <= 2010,
         !is.na(listed_in)) %>%
  mutate(genero_principal = trimws(sub(",.*", "", listed_in))) %>%
  filter(genero_principal %in% generos_alvo) %>%
  count(release_year, genero_principal)

plot_ly(netflix_genero,
        x      = ~as.character(release_year),
        y      = ~n,
        color  = ~genero_principal,
        type   = "bar",
        barmode = "group") %>%
  layout(
    title  = "Filmes por Gênero entre 2000 e 2010",
    xaxis  = list(title = "Ano de Lançamento"),
    yaxis  = list(title = "Quantidade de Filmes"),
    legend = list(title = list(text = "Gênero"))
  )

Exercício - 12 - CPAD

Flávia Borges

2026-05-19