Visualização de dados

Questão 1

MRT_1F <-c(517.1468515630205, 85.13094142168089, 30.333207896694553, 12.694776264558937, 3.3041601673945418, 1.1823111717498882, 1.1892293502386786)

MRT_3F <-c(156.68929936163462, 11.540837783562276, 0.4512835621696538, 0.4509797929766453, 0.4502068233039181, 0.4496185276300172, 0.4543157082191288)

MRT_5F <-c(83.90319666471157, 0.3068151086494968, 0.30522314133037304, 0.3072588968084928, 0.30655265997285697, 0.3055812715727718, 0.3053297166713006)

MRT_10F <-c(29.55430642951759, 0.19832832665772515, 0.1971923924717474, 0.19796648905716516, 0.19615594370806338, 0.2034569237883263, 0.19617420889447737)

MRT_15F <-c(11.317736530583566, 0.167364215666193, 0.16172168266811013, 0.16701085329580515, 0.1598052657153692, 0.1645934043532696, 0.16216563797118075)

MRT_sem_F <-c(11.93430909937736, 0.6095414637034009, 0.6060645101029295, 0.612167181646899, 0.6146761002685637, 0.6096747087200697, 0.6125810476877268)

clock <- c(0.1, 0.5, 1, 1.5, 2, 2.5, 3)
layout(1)

par(mar = c(4, 4, 2, 2))

max_x <- max(clock)
max_y <- max(c(MRT_1F, MRT_3F, MRT_5F, MRT_10F, MRT_15F, MRT_sem_F))

plot(clock, MRT_1F,
     type = "b",
     pch = 4,
     col = "black",
     xlab = "Time between Things requests (seconds)",
     ylab = "Response Time (sec.)",
     ylim = c(0, max_y),
     xlim = c(0, max_x))
lines(clock, MRT_3F, type = "b", pch = 9, col = "yellow")
lines(clock, MRT_5F, type = "b", pch = 1, col = "red")
lines(clock, MRT_10F, type = "b", pch = 2, col = "blue")
lines(clock, MRT_15F, type = "b", pch = 5, col = "purple")
lines(clock, MRT_sem_F, type = "b", pch = 4, col = "green")

legend("topright",
       legend = c("1 Fog", "3 Fogs", "5 Fogs", "10 Fogs", "15 Fogs", "w/o Fog"),
       col = c("black", "yellow", "red", "blue", "purple", "green"),
       pch = c(4, 9, 1, 2, 5, 4),
       lty = 1,
       bty = "n")

cores_barra <- c("#E6E6E6", "#666666")

layout(matrix(c(1, 2,
                3, 4,
                5, 0),
              nrow = 3,
              ncol = 2,
              byrow = TRUE))
par(mar = c(4.5, 4.5, 2.5, 1))

dados_plot1 <- rbind(MRT_sem_F, MRT_1F)
barplot(dados_plot1,
        beside = TRUE,
        log = "y",
        col = cores_barra,
        names.arg = clock,
        xlab = "Time between Things requests",
        ylab = "Response time (s)")
legend("topright", legend = c("w/o Fog", "1 Fog"), fill = cores_barra, bty = "n")


dados_plot2 <- rbind(MRT_sem_F, MRT_3F)
barplot(dados_plot2,
        beside = TRUE,
        log = "y",
        col = cores_barra,
        names.arg = clock,
        xlab = "Time between Things requests",
        ylab = "Response time (s)")
legend("topright", legend = c("w/o Fog", "3 Fogs"), fill = cores_barra, bty = "n")


dados_plot3 <- rbind(MRT_sem_F, MRT_5F)
barplot(dados_plot3,
        beside = TRUE,
        log = "y",
        col = cores_barra,
        names.arg = clock,
        xlab = "Time between Things requests",
        ylab = "Response time (s)")
legend("topright", legend = c("w/o Fog", "5 Fogs"), fill = cores_barra, bty = "n")

dados_plot4 <- rbind(MRT_sem_F, MRT_10F)
barplot(dados_plot4,
        beside = TRUE,
        log = "y",
        col = cores_barra,
        names.arg = clock,
        xlab = "Time between Things requests",
        ylab = "Response time (s)")
legend("topright", legend = c("w/o Fog", "10 Fogs"), fill = cores_barra, bty = "n")

dados_plot5 <- rbind(MRT_sem_F, MRT_15F)
barplot(dados_plot5,
        beside = TRUE,
        log = "y",
        col = cores_barra,
        names.arg = clock,
        xlab = "Time between Things requests",
        ylab = "Response time (s)")
legend("topright", legend = c("w/o Fog", "15 Fogs"), fill = cores_barra, bty = "n")

layout(1)

Questão 2

price_10_19 <- c(53.8, 43.6, 2.6)
price_20_29 <- c(33.9, 54.2, 11.9)
price_30_39 <- c(2.6, 60.5, 36.8)
price_40_49 <- c(0, 21.4, 78.6)

dados_matriz <- cbind(
  price_10_19,
  price_20_29,
  price_30_39,
  price_40_49
)

precos_labels <- c("$10–19", "$20–29", "$30–39", "$40–49")
qualidade_labels <- c("Good", "Very Good", "Excellent")
cores <- c("#6D926D", "#4F794F", "#3B603B")

somas_cumulativas <- apply(dados_matriz, 2, cumsum)
posicoes_y <- somas_cumulativas - (dados_matriz / 2)
labels_sem_zero <- ifelse(dados_matriz == 0, NA, paste0(dados_matriz, "%"))

bar_p <- barplot(
  height = dados_matriz,
  main = "Qualidade da Refeição por Categoria de Preço",
  xlab = "Categoria de Preço",
  ylab = "Percentual (%)",
  names.arg = precos_labels,
  col = cores,
  legend.text = qualidade_labels,
  ylim = c(0, 100),
  args.legend = list(x = "topright", title = "Qualidade")
)

text(
  x = rep(bar_p, each = nrow(dados_matriz)),
  y = as.vector(posicoes_y),
  labels = as.vector(labels_sem_zero),
  col = "white",
  cex = 0.8
)

Questão 3

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.1     ✔ stringr   1.5.2
## ✔ ggplot2   4.0.0     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.2.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
data("airquality")

result <- airquality %>%
          filter(Month==5) %>%
          mutate(temp_celsius = (Temp - 32)/1.8) %>%
          select(Day, temp_celsius)

hist(
  result$temp_celsius,
  main = "Histograma da Temperatura em Maio (°C)",
  xlab = "Temperatura (°C)",
  ylab = "Dias",
  col = "lightblue"
)

Questão 4

sales <- read.table("https://training-course-material.com/images/8/8f/Sales.txt",header=TRUE)

percentagens <- round(100 * sales$SALES / sum(sales$SALES), 1)
labels <- paste0(percentagens, "%")
cores <- rainbow(length(sales$SALES))

pie(
  sales$SALES,
  labels = labels,
  col = cores,
  main = "Percentual de Vendas por País"
)

legend(
  "topright",
  legend = sales$COUNTRY,
  fill = cores,
  title = "Países"
)

Questão 5

data("InsectSprays")

boxplot(
  count ~ spray,
  data = InsectSprays,

  outline = FALSE,

  main = "Contagem de Insetos por Tipo de Inseticida",
  xlab = "Tipo de Inseticida",
  ylab = "Número de Insetos",

  col = "yellow"
)

Questão 6

convert_to_MB <- function(valor_str) {
  valor_num <- as.numeric(gsub("[A-Za-z ]", "", valor_str))
  unidade <- gsub("[0-9., ]", "", valor_str)
  unidade <- toupper(unidade)

  resultado <- switch (unidade,
    "TB" = valor_num * 1000000,
    "GB" = valor_num * 1024,
    "MB" = valor_num,
    NA
  )

  return (resultado)
}

layout(matrix(c(1, 2,
                3, 4),
              nrow = 2,
              ncol = 2,
              byrow = TRUE))

mem_none_w <- read.csv("monitoringCloudData_NONE.csv")
plot_data_none_w <- mem_none_w %>%
                    mutate(used_memory_mb = sapply(usedMemory, convert_to_MB)) %>%
                    mutate(time_diff_in_hour = as.numeric(difftime(currentTime, mem_none_w$currentTime[1], units="hours"))) %>%
                    select(time_diff_in_hour,used_memory_mb)

plot(
  plot_data_none_w$time_diff_in_hour,
  plot_data_none_w$used_memory_mb,
  type="l",

  main = "Memory Analysis (None Workload)",
  xlab = "Time (hour)",
  ylab = "Used Memory (MB)",
)

mem_01_w <- read.csv("monitoringCloudData_0.1.csv")

plot_data_01_w <- mem_01_w %>%
                    mutate(used_memory_mb = sapply(usedMemory, convert_to_MB)) %>%
                    mutate(time_diff_in_hour = as.numeric(difftime(currentTime, mem_01_w$currentTime[1], units="hours"))) %>%
                    select(time_diff_in_hour,used_memory_mb)

plot(
  plot_data_01_w$time_diff_in_hour,
  plot_data_01_w$used_memory_mb,
  type="l",

  main = "Memory Analysis (Workload of 0.1)",
  xlab = "Time (hour)",
  ylab = "Used Memory (MB)",
)

mem_05_w <- read.csv("monitoringCloudData_0.5.csv")

plot_data_05_w <- mem_05_w %>%
                    mutate(used_memory_mb = sapply(usedMemory, convert_to_MB)) %>%
                    mutate(time_diff_in_hour = as.numeric(difftime(currentTime, mem_05_w$currentTime[1], units="hours"))) %>%
                    select(time_diff_in_hour,used_memory_mb)

plot(
  plot_data_05_w$time_diff_in_hour,
  plot_data_05_w$used_memory_mb,
  type="l",

  main = "Memory Analysis (Workload of 0.5)",
  xlab = "Time (hour)",
  ylab = "Used Memory (MB)",
)

mem_1_w <- read.csv("monitoringCloudData_1.csv")

plot_data_1_w <- mem_1_w %>%
                    mutate(used_memory_mb = sapply(usedMemory, convert_to_MB)) %>%
                    mutate(time_diff_in_hour = as.numeric(difftime(currentTime, mem_1_w$currentTime[1], units="hours"))) %>%
                    select(time_diff_in_hour,used_memory_mb)

plot(
  plot_data_1_w$time_diff_in_hour,
  plot_data_1_w$used_memory_mb,
  type="l",

  main = "Memory Analysis (Workload of 1.0)",
  xlab = "Time (hour)",
  ylab = "Used Memory (MB)",
)

Questão 7

library(tidyverse)
library(plotly)
## 
## Anexando pacote: 'plotly'
## O seguinte objeto é mascarado por 'package:ggplot2':
## 
##     last_plot
## O seguinte objeto é mascarado por 'package:stats':
## 
##     filter
## O seguinte objeto é mascarado por 'package:graphics':
## 
##     layout
netflix_titles <- read.csv("netflix_titles.csv")


netflix_one_country <- netflix_titles %>%
                        filter(!grepl(",",country), country != "")
top_10_countries <- netflix_one_country %>%
  count(country, sort = TRUE, name = "contagem") %>%
  slice_head(n = 10)

fig <- plot_ly(
  data = top_10_countries,

  labels = ~country,
  values = ~contagem,

  type = "pie",

  textinfo = "percent",

  hovertemplate = paste(
    "<b>País:</b> %{label}<br>",
    "<b>Total:</b> %{value}<br>",
    "<b>Porcentagem:</b> %{percent}",
    "<extra></extra>"
  )
)

fig <- fig %>% layout(
  title = "Top 10 Países com mais conteúdo na Netflix (2019)"
)

fig

Questão 8

top_10_renamed <- top_10_countries %>%
  rename(
    País = country,
    "Total de conteúdos" = contagem
  )

fig <- plot_ly(
  type = 'table',
  header = list(
    values = colnames(top_10_renamed),
    fill = list(color = 'purple'),
    font = list(color = 'white', size = 14),
    align = 'center'
  ),
  cells = list(
    values = unname(as.list(top_10_renamed)),
    font = list(size=12),
    align = 'center'
  )
)

fig <- fig %>% layout(
  title = "Top 10 Países por Total de Conteúdo"
)

fig

Questão 9

content_by_decade <- netflix_titles %>%
  filter(!is.na(release_year)) %>%
  mutate(decade = (release_year %/% 10) * 10) %>%
  count(decade, type, name = "contagem") %>%
  arrange(decade)

movies_data <- content_by_decade %>%
  filter(type == "Movie")

tv_data <- content_by_decade %>%
  filter(type == "TV Show")

fig <- plot_ly() %>%

  add_trace(
    data = movies_data,
    x = ~decade,
    y = ~contagem,
    type = 'scatter',
    mode = 'lines+markers',
    name = 'Filmes',
    line = list(color = 'orange'),
    marker = list(color = 'orange')
  ) %>%

  add_trace(
    data = tv_data,
    x = ~decade,
    y = ~contagem,
    type = 'scatter',
    mode = 'lines+markers', # Linhas E PONTOS
    name = 'Séries',
    line = list(color = 'blue'),
    marker = list(color = 'blue')
  )

fig <- fig %>% layout(
  xaxis = list(
    title = "Década",
    dtick = 10
  ),
  yaxis = list(title = "Qnd. Conteúdo")
)

fig

Questão 10

library(stringr)

genre_data <- netflix_titles %>%
  filter(type == "Movie",
         release_year >= 2000,
         release_year <= 2010) %>%
  mutate(

    main_genre = sapply(str_split(listed_in, ", "), `[`, 1)
  ) %>%
  filter(
    main_genre %in% c("Dramas", "Action & Adventure", "Comedies")
  ) %>%
  count(release_year, main_genre, name = "contagem") %>%
  mutate(release_year = as.factor(release_year))

color_map <- c(
  "Dramas" = "blue",
  "Action & Adventure" = "orange",
  "Comedies" = "green"
)


fig <- plot_ly(
  data = genre_data,
  x = ~release_year,
  y = ~contagem,
  color = ~main_genre,
  colors = color_map,
  type = 'bar'
)

fig <- fig %>% layout(
  barmode = 'group',
  xaxis = list(title = "Ano de Lançamento"),
  yaxis = list(title = "Qnt. de Lançamentos")
)

fig