Questões

Questão 1

MRT_1F    <- c(517.1468515630205, 85.13094142168089, 30.333207896694553, 12.694776264558937,
               3.3041601673945418, 1.1823111717498882, 1.1892293502386786)
MRT_3F    <- c(156.68929936163462, 11.540837783562276, 0.4512835621696538, 0.4509797929766453,
               0.4502068233039181, 0.4496185276300172, 0.4543157082191288)
MRT_5F    <- c(83.90319666471157, 0.3068151086494968, 0.30522314133037304, 0.3072588968084928,
               0.30655265997285697, 0.3055812715727718, 0.3053297166713006)
MRT_10F   <- c(29.55430642951759, 0.19832832665772515, 0.1971923924717474, 0.19796648905716516,
               0.19615594370806338, 0.2034569237883263, 0.19617420889447737)
MRT_15F   <- c(11.317736530583566, 0.167364215666193, 0.16172168266811013, 0.16701085329580515,
               0.1598052657153692, 0.1645934043532696, 0.16216563797118075)
MRT_sem_F <- c(11.93430909937736, 0.6095414637034009, 0.6060645101029295, 0.612167181646899,
               0.6146761002685637, 0.6096747087200697, 0.6125810476877268)
clock     <- c(0.1, 0.5, 1, 1.5, 2, 2.5, 3)

layout(matrix(c(1, 2), nrow = 2))

y_max <- max(MRT_1F, MRT_3F, MRT_5F, MRT_10F, MRT_15F, MRT_sem_F)

plot(clock, MRT_1F, type = "b", col = 1, pch = 1, ylim = c(0, y_max),
     xlab = "Clock (GHz)", ylab = "MRT (ms)",
     main = "MRT por frequência de clock")
lines(clock, MRT_3F,    type = "b", col = 2, pch = 2)
lines(clock, MRT_5F,    type = "b", col = 3, pch = 3)
lines(clock, MRT_10F,   type = "b", col = 4, pch = 4)
lines(clock, MRT_15F,   type = "b", col = 5, pch = 5)
lines(clock, MRT_sem_F, type = "b", col = 6, pch = 6)
legend("topright",
       legend = c("1F", "3F", "5F", "10F", "15F", "Sem F"),
       col    = 1:6, pch = 1:6, lty = 1, cex = 0.8)

mrt_matrix <- rbind(MRT_sem_F, MRT_1F, MRT_3F, MRT_5F, MRT_10F, MRT_15F)
rownames(mrt_matrix) <- c("Sem F", "1F", "3F", "5F", "10F", "15F")
colnames(mrt_matrix) <- clock

cores_barras <- rep(c("#E6E6E6", "#666666"), length.out = nrow(mrt_matrix))

barplot(mrt_matrix,
        beside      = TRUE,
        log         = "y",
        col         = cores_barras,
        xlab        = "Clock (GHz)",
        ylab        = "MRT (ms) - escala log",
        main        = "MRT por frequência de clock (escala logarítmica)",
        legend.text = rownames(mrt_matrix),
        args.legend = list(x = "topright", cex = 0.7))

## Warning in yinch(0.1): y log scale: yinch() is nonsense

Questão 2

qualidade <- matrix(
  c(10, 25, 35, 20, 10,
     5, 15, 30, 35, 15,
     2,  8, 20, 40, 30),
  nrow = 3, byrow = TRUE
)
rownames(qualidade) <- c("Barato", "Médio", "Caro")
colnames(qualidade) <- c("Péssimo", "Ruim", "Regular", "Bom", "Excelente")

cores_qualidade <- c("#d73027", "#fc8d59", "#fee08b", "#91cf60", "#1a9850")

barplot(t(qualidade),
        beside      = FALSE,
        col         = cores_qualidade,
        main        = "Qualidade de Refeição por Categoria de Preço",
        xlab        = "Categoria de Preço",
        ylab        = "Quantidade",
        legend.text = colnames(qualidade),
        args.legend = list(x = "topright", cex = 0.85))

Questão 3

maio       <- subset(airquality, Month == 5)
maio$TempC <- (maio$Temp - 32) / 1.8

hist(maio$TempC,
     freq  = FALSE,
     col   = "steelblue",
     main  = "Histograma das Temperaturas de Maio",
     xlab  = "Temperatura (°C)",
     ylab  = "Densidade")

lines(density(maio$TempC), col = "red", lwd = 2)

Questão 4

sales <- data.frame(
  Country = c("Australia", "Canada", "France", "Germany",
              "Japan", "Norway", "Spain", "UK", "USA"),
  Sales   = c(54288, 89420, 73640, 92110,
              61870, 34520, 48390, 110430, 195600)
)

vendas_pais <- aggregate(Sales ~ Country, data = sales, FUN = sum)
pct         <- round(vendas_pais$Sales / sum(vendas_pais$Sales) * 100, 1)
rotulos     <- paste0(pct, "%")
cores_pizza <- rainbow(nrow(vendas_pais))

pie(vendas_pais$Sales,
    labels = rotulos,
    col    = cores_pizza,
    main   = "Total de Vendas por País (%)")

legend("bottomleft",
       legend = vendas_pais$Country,
       fill   = cores_pizza,
       cex    = 0.8)

Questão 5

data(InsectSprays)

boxplot(count ~ spray,
        data    = InsectSprays,
        outline = FALSE,
        col     = "yellow",
        main    = "Contagem de Insetos por Tipo de Inseticida",
        xlab    = "Tipo de Inseticida",
        ylab    = "Contagem de Insetos")

Questão 6

library(stringr)
library(dplyr)

## 
## Anexando pacote: 'dplyr'

## Os seguintes objetos são mascarados por 'package:stats':
## 
##     filter, lag

## Os seguintes objetos são mascarados por 'package:base':
## 
##     intersect, setdiff, setequal, union

converter_para_mb <- function(valor_str) {
  valor_str <- str_trim(valor_str)
  numero    <- as.numeric(str_extract(valor_str, "[0-9]+\\.?[0-9]*"))
  unidade   <- str_extract(valor_str, "[A-Za-z]+")
  case_when(
    str_to_upper(unidade) == "TB" ~ numero * 1000000,
    str_to_upper(unidade) == "GB" ~ numero * 1024,
    str_to_upper(unidade) == "MB" ~ numero,
    TRUE ~ NA_real_
  )
}

preparar_dados <- function(caminho) {
  df             <- read.csv(caminho, stringsAsFactors = FALSE)
  df$currentTime <- as.POSIXct(df$currentTime, format = "%Y-%m-%d %H:%M:%S")
  df             <- df[order(df$currentTime), ]
  df$tempo_horas  <- as.numeric(difftime(df$currentTime, df$currentTime[1], units = "hours"))
  df$usedMemory_MB <- converter_para_mb(df$usedMemory)
  df
}

base_path <- "C:/Users/Jeane/Downloads/dataset/"

d01   <- preparar_dados(file.path(base_path, "monitoringCloudData_0.1.csv"))
d05   <- preparar_dados(file.path(base_path, "monitoringCloudData_0.5.csv"))
d1    <- preparar_dados(file.path(base_path, "monitoringCloudData_1.csv"))
dnone <- preparar_dados(file.path(base_path, "monitoringCloudData_NONE.csv"))

layout(matrix(1:4, nrow = 2, byrow = TRUE))

titulos  <- c("monitoringCloudData_0.1", "monitoringCloudData_0.5",
              "monitoringCloudData_1",   "monitoringCloudData_NONE")
datasets <- list(d01, d05, d1, dnone)

for (i in seq_along(datasets)) {
  df <- datasets[[i]]
  plot(df$tempo_horas, df$usedMemory_MB,
       type = "l", col  = "steelblue",
       xlab = "Tempo (horas)", ylab = "Memória Usada (MB)",
       main = titulos[i])
}

Questão 7

library(plotly)

## Carregando pacotes exigidos: ggplot2

## 
## Anexando pacote: 'plotly'

## O seguinte objeto é mascarado por 'package:ggplot2':
## 
##     last_plot

## O seguinte objeto é mascarado por 'package:stats':
## 
##     filter

## O seguinte objeto é mascarado por 'package:graphics':
## 
##     layout

library(dplyr)

netflix <- read.csv(file.path(base_path, "netflix_titles.csv"), stringsAsFactors = FALSE)

top10 <- netflix %>%
  filter(!is.na(country), country != "", !grepl(",", country)) %>%
  count(country, name = "total") %>%
  arrange(desc(total)) %>%
  slice_head(n = 10)

plot_ly(data = top10, labels = ~country, values = ~total, type = "pie",
        textinfo = "label+percent") %>%
  layout(title = "Top 10 Países com Mais Conteúdo na Netflix")

Questão 8

library(plotly)
library(dplyr)

netflix <- read.csv(file.path(base_path, "netflix_titles.csv"), stringsAsFactors = FALSE)

top10 <- netflix %>%
  filter(!is.na(country), country != "", !grepl(",", country)) %>%
  count(country, name = "total") %>%
  arrange(desc(total)) %>%
  slice_head(n = 10) %>%
  rename(País = country, `Total de conteúdos` = total)

plot_ly(type = "table",
  header = list(
    values = list("<b>País</b>", "<b>Total de conteúdos</b>"),
    align  = "center",
    fill   = list(color = "gray"),
    font   = list(color = "white", size = 13)
  ),
  cells = list(
    values = list(top10$País, top10$`Total de conteúdos`),
    align  = "center",
    font   = list(size = 12)
  )
)

Questão 9

library(plotly)
library(dplyr)

netflix <- read.csv(file.path(base_path, "netflix_titles.csv"), stringsAsFactors = FALSE)

por_decada <- netflix %>%
  filter(!is.na(release_year)) %>%
  mutate(decada = floor(release_year / 10) * 10) %>%
  group_by(decada, type) %>%
  summarise(total = n(), .groups = "drop")

filmes <- por_decada %>% filter(type == "Movie")
series <- por_decada %>% filter(type == "TV Show")

plot_ly() %>%
  add_trace(data = filmes, x = ~decada, y = ~total, type = "scatter",
            mode = "lines+markers", name = "Filmes",
            line = list(color = "yellow")) %>%
  add_trace(data = series, x = ~decada, y = ~total, type = "scatter",
            mode = "lines+markers", name = "Séries",
            line = list(color = "blue")) %>%
  layout(
    title = "Quantidade de Conteúdo por Década na Netflix",
    xaxis = list(title = "Década"),
    yaxis = list(title = "Quantidade de Conteúdo")
  )

Questão 10

library(plotly)
library(dplyr)
library(stringr)

netflix <- read.csv(file.path(base_path, "netflix_titles.csv"), stringsAsFactors = FALSE)

contagem <- netflix %>%
  filter(type == "Movie", release_year >= 2000, release_year <= 2010, !is.na(listed_in)) %>%
  mutate(genero_principal = str_trim(str_extract(listed_in, "^[^,]+"))) %>%
  filter(genero_principal %in% c("Dramas", "Action & Adventure", "Comedies")) %>%
  group_by(release_year, genero_principal) %>%
  summarise(total = n(), .groups = "drop")

plot_ly(data = contagem, x = ~as.factor(release_year), y = ~total,
        color = ~genero_principal, type = "bar") %>%
  layout(
    barmode = "group",
    title   = "Filmes por Gênero (2000–2010)",
    xaxis   = list(title = "Ano"),
    yaxis   = list(title = "Quantidade de Filmes")
  )

Exercício 12 - Visualização de Dados

Questões

Questão 1

Questão 2

Questão 3

Questão 4

Questão 5

Questão 6

Questão 7

Questão 8

Questão 9

Questão 10