Questão 1

MRT_1F <- c(517.1468515630205, 85.13094142168089, 30.333207896694553,
            12.694776264558937, 3.3041601673945418, 1.1823111717498882,
            1.1892293502386786)

MRT_3F <- c(156.68929936163462, 11.540837783562276, 0.4512835621696538,
            0.4509797929766453, 0.4502068233039181, 0.4496185276300172,
            0.4543157082191288)

MRT_5F <- c(83.90319666471157, 0.3068151086494968, 0.30522314133037304,
            0.3072588968084928, 0.30655265997285697, 0.3055812715727718,
            0.3053297166713006)

MRT_10F <- c(29.55430642951759, 0.19832832665772515, 0.1971923924717474,
             0.19796648905716516, 0.19615594370806338, 0.2034569237883263,
             0.19617420889447737)

MRT_15F <- c(11.317736530583566, 0.167364215666193, 0.16172168266811013,
             0.16701085329580515, 0.1598052657153692, 0.1645934043532696,
             0.16216563797118075)

MRT_sem_F <- c(11.93430909937736, 0.6095414637034009,
               0.6060645101029295, 0.612167181646899, 0.6146761002685637,
               0.6096747087200697, 0.6125810476877268)

clock <- c(0.1, 0.5, 1, 1.5, 2, 2.5, 3)

layout(matrix(c(1,1,
                2,3,
                4,5,
                6,7), nrow = 4, byrow = TRUE),
       heights = c(1.35, 1, 1, 1))

par(mar = c(4.8, 5, 3.5, 2), oma = c(0, 0, 2, 0))

plot(clock, MRT_1F, type = "b", pch = 16, col = "black",
     lwd = 2, ylim = c(0, max(MRT_1F) + 40),
     xlab = "Time between Things requests (seconds)",
     ylab = "Response Time (sec.)",
     main = "Response Time by Number of Fogs")

lines(clock, MRT_3F, type = "b", pch = 16, col = "red", lwd = 2)
lines(clock, MRT_5F, type = "b", pch = 16, col = "orange", lwd = 2)
lines(clock, MRT_10F, type = "b", pch = 16, col = "blue", lwd = 2)
lines(clock, MRT_15F, type = "b", pch = 16, col = "purple", lwd = 2)
lines(clock, MRT_sem_F, type = "b", pch = 16, col = "green4", lwd = 2)

legend("topright",
       legend = c("1 Fog", "3 Fogs", "5 Fogs", "10 Fogs", "15 Fogs", "no Fog"),
       col = c("black", "red", "orange", "blue", "purple", "green4"),
       pch = 16, lwd = 2, cex = 0.85, bg = "white")

grafico_barras <- function(com_fog, titulo, letra) {
  dados <- rbind(MRT_sem_F, com_fog)
  barplot(dados,
          beside = TRUE,
          log = "y",
          col = c("#E6E6E6", "#666666"),
          names.arg = clock,
          xlab = "Time between Things requests",
          ylab = "Response time (s)",
          main = paste0("(", letra, ") ", titulo),
          cex.names = 0.85,
          ylim = c(0.01, 1000))
  legend("topright",
         legend = c("no Fog", titulo),
         fill = c("#E6E6E6", "#666666"),
         cex = 0.75,
         bg = "white")
}

grafico_barras(MRT_1F, "1 Fog", "a")
grafico_barras(MRT_3F, "3 Fogs", "b")
grafico_barras(MRT_5F, "5 Fogs", "c")
grafico_barras(MRT_10F, "10 Fogs", "d")
grafico_barras(MRT_15F, "15 Fogs", "e")
grafico_barras(MRT_sem_F, "no Fog", "f")

Questão 2

qualidade_refeicao <- matrix(
  c(40, 30, 20, 10,
    35, 25, 25, 15,
    30, 25, 25, 20,
    25, 25, 20, 30),
  nrow = 4,
  byrow = FALSE
)

rownames(qualidade_refeicao) <- c("Excelente", "Boa", "Regular", "Ruim")
colnames(qualidade_refeicao) <- c("Baixo", "Médio", "Alto", "Muito Alto")

barplot(qualidade_refeicao,
        col = c("green3", "royalblue", "gold", "red"),
        main = "Qualidade da Refeição por Categoria de Preço",
        xlab = "Categoria de Preço",
        ylab = "Número de Refeições",
        ylim = c(0, 110),
        border = "white",
        legend.text = rownames(qualidade_refeicao),
        args.legend = list(x = "bottom", horiz = TRUE, inset = -0.18, cex = 0.85, bty = "n"))

Questão 3

dados_maio <- subset(airquality, Month == 5)
temp_celsius <- (dados_maio$Temp - 32) / 1.8

hist(temp_celsius,
     probability = TRUE,
     col = "lightblue",
     border = "white",
     main = "Histograma das Temperaturas de Maio",
     xlab = "Temperatura em Graus Celsius",
     ylab = "Densidade")

lines(density(temp_celsius, na.rm = TRUE),
      col = "red",
      lwd = 3)

Questão 4

sales <- read.table("https://training-course-material.com/images/8/8f/Sales.txt",
                    header = TRUE)

names(sales) <- make.names(names(sales))

col_pais <- names(sales)[sapply(sales, function(x) is.character(x) || is.factor(x))][1]
col_num <- names(sales)[sapply(sales, is.numeric)][1]

total_pais <- aggregate(sales[[col_num]],
                        by = list(Pais = sales[[col_pais]]),
                        FUN = sum)

names(total_pais) <- c("Pais", "Total")

porcentagem <- round(100 * total_pais$Total / sum(total_pais$Total), 1)
rotulos <- paste0(porcentagem, "%")
cores <- rainbow(nrow(total_pais))

pie(total_pais$Total,
    labels = rotulos,
    col = cores,
    main = "Percentual de Vendas por País")

legend("topright",
       legend = total_pais$Pais,
       fill = cores,
       cex = 0.85,
       bty = "n")

Questão 5

boxplot(count ~ spray,
        data = InsectSprays,
        outline = FALSE,
        col = "yellow",
        main = "Contagem de Insetos por Tipo de Inseticida",
        xlab = "Inseticida",
        ylab = "Contagem de Insetos")

Questão 6

converter_memoria_mb <- function(x) {
  x <- as.character(x)
  x <- trimws(x)

  valor <- as.numeric(gsub(",", ".", gsub("[^0-9,\\.]", "", x)))
  unidade <- toupper(gsub("[0-9,\\. ]", "", x))

  ifelse(grepl("TB|T", unidade), valor * 1000000,
         ifelse(grepl("GB|G", unidade), valor * 1024, valor))
}

ler_monitoramento <- function(arquivo) {
  dados <- read.csv(arquivo, stringsAsFactors = FALSE)

  dados$currentTime <- as.POSIXct(dados$currentTime,
                                  format = "%Y-%m-%d %H:%M:%S",
                                  tz = "UTC")

  if (all(is.na(dados$currentTime))) {
    dados$currentTime <- as.POSIXct(dados$currentTime, tz = "UTC")
  }

  dados$tempo_horas <- as.numeric(difftime(dados$currentTime,
                                           min(dados$currentTime, na.rm = TRUE),
                                           units = "hours"))

  dados$usedMemoryMB <- converter_memoria_mb(dados$usedMemory)
  dados
}

arquivos <- c("monitoringCloudData_NONE.csv",
              "monitoringCloudData_0.1.csv",
              "monitoringCloudData_0.5.csv",
              "monitoringCloudData_1.csv")

titulos <- c("Memory Analysis (None Workload)",
             "Memory Analysis (Workload of 0.1)",
             "Memory Analysis (Workload of 0.5)",
             "Memory Analysis (Workload of 1.0)")

layout(matrix(1:4, nrow = 2, byrow = TRUE))
par(mar = c(4.8, 5, 3.5, 1.5))

for (i in seq_along(arquivos)) {
  dados <- ler_monitoramento(arquivos[i])

  plot(dados$tempo_horas,
       dados$usedMemoryMB,
       type = "l",
       lwd = 1,
       col = "black",
       main = titulos[i],
       xlab = "Time (hour)",
       ylab = "Used Memory (MB)")
}

Questão 7

library(plotly)
library(dplyr)

netflix <- read.csv("netflix_titles.csv", stringsAsFactors = FALSE)

paises_top10 <- netflix %>%
  filter(!is.na(country),
         country != "",
         !grepl(",", country)) %>%
  count(country, sort = TRUE) %>%
  slice_head(n = 10)

plot_ly(paises_top10,
        labels = ~country,
        values = ~n,
        type = "pie",
        textinfo = "label+percent",
        hoverinfo = "label+value+percent") %>%
  layout(title = "Top 10 Países com Mais Conteúdos na Netflix")

Questão 8

plot_ly(
  type = "table",
  header = list(
    values = c("País", "Total de conteúdos"),
    fill = list(color = "gray"),
    font = list(color = "white", size = 14),
    align = "center"
  ),
  cells = list(
    values = list(paises_top10$country, paises_top10$n),
    align = "center",
    font = list(size = 13),
    height = 28
  )
)

Questão 9

conteudo_decada <- netflix %>%
  filter(!is.na(release_year), type %in% c("Movie", "TV Show")) %>%
  mutate(decada = floor(release_year / 10) * 10) %>%
  count(decada, type)

plot_ly() %>%
  add_trace(data = subset(conteudo_decada, type == "TV Show"),
            x = ~decada,
            y = ~n,
            type = "scatter",
            mode = "lines+markers",
            name = "TV Series",
            line = list(color = "blue", width = 3),
            marker = list(size = 7)) %>%
  add_trace(data = subset(conteudo_decada, type == "Movie"),
            x = ~decada,
            y = ~n,
            type = "scatter",
            mode = "lines+markers",
            name = "Movies",
            line = list(color = "yellow", width = 3),
            marker = list(size = 7)) %>%
  layout(title = "Quantidade de Conteúdo por Década",
         xaxis = list(title = "Década"),
         yaxis = list(title = "Qtd. Conteúdo"))

Questão 10

generos <- c("Dramas", "Action & Adventure", "Comedies")

filmes_genero <- netflix %>%
  filter(type == "Movie",
         release_year >= 2000,
         release_year <= 2010) %>%
  mutate(genero_principal = trimws(sub(",.*", "", listed_in))) %>%
  filter(genero_principal %in% generos) %>%
  count(release_year, genero_principal)

plot_ly() %>%
  add_trace(
    data = subset(filmes_genero, genero_principal == "Dramas"),
    x = ~release_year,
    y = ~n,
    type = "bar",
    name = "Drama",
    marker = list(color = "blue")
  ) %>%
  add_trace(
    data = subset(filmes_genero, genero_principal == "Action & Adventure"),
    x = ~release_year,
    y = ~n,
    type = "bar",
    name = "Ação & Aventura",
    marker = list(color = "orange")
  ) %>%
  add_trace(
    data = subset(filmes_genero, genero_principal == "Comedies"),
    x = ~release_year,
    y = ~n,
    type = "bar",
    name = "Comédia",
    marker = list(color = "green")
  ) %>%
  layout(
    title = "Quantidade de Filmes por Gênero entre 2000 e 2010",
    xaxis = list(title = "Ano de Lançamento"),
    yaxis = list(title = "Quantidade de Filmes"),
    barmode = "group"
  )

Exercício 12 - Visualização de Dados

Jamerson Cavalcanti Lira

Questão 1

Questão 2

Questão 3

Questão 4

Questão 5

Questão 6

Questão 7

Questão 8

Questão 9

Questão 10