Questões

— 1. Definir os dados —

MRT_1F <- c(517.1468515630205, 85.13094142168089, 30.333207896694553,
            12.694776264558937, 3.3041601673945418, 1.1823111717498882,
            1.1892293502386786)
MRT_3F <- c(156.68929936163462, 11.540837783562276, 0.4512835621696538,
            0.4509797929766453, 0.4502068233039181, 0.4496185276300172,
            0.4543157082191288)
MRT_5F <- c(83.90319666471157, 0.3068151086494968, 0.30522314133037304,
            0.3072588968084928, 0.30655265997285697, 0.3055812715727718,
            0.3053297166713006)
MRT_10F <- c(29.55430642951759, 0.19832832665772515, 0.1971923924717474,
             0.19796648905716516, 0.19615594370806338, 0.2034569237883263,
             0.19617420889447737)
MRT_15F <- c(11.317736530583566, 0.167364215666193, 0.16172168266811013,
             0.16701085329580515, 0.1598052657153692, 0.1645934043532696,
             0.16216563797118075)
MRT_sem_F <- c(11.93430909937736, 0.6095414637034009,
               0.6060645101029295, 0.612167181646899, 0.6146761002685637,
               0.6096747087200697, 0.6125810476877268)
clock <- c(0.1, 0.5, 1, 1.5, 2, 2.5, 3)

# Gráfico 1
plot(clock, MRT_1F, type = "o", col = "black", pch = 4, ylim = c(0, max(MRT_1F)),
     xlab = "Time between Things requests (seconds)",
     ylab = "Response Time (sec.)",
     main = "Tempo de Resposta vs Intervalo de Requisições")

lines(clock, MRT_3F, type = "o", col = "yellow", pch = 11)
lines(clock, MRT_5F, type = "o", col = "red", pch = 1)
lines(clock, MRT_10F, type = "o", col = "blue", pch = 2)
lines(clock, MRT_15F, type = "o", col = "purple", pch = 5)
lines(clock, MRT_sem_F, type = "o", col = "green", pch = 4)

legend("topright", 
       legend = c("1 Fog", "3 Fogs", "5 Fogs", "10 Fogs", "15 Fogs", "w/o Fog"),
       col = c("black", "yellow", "red", "blue", "purple", "green"),
       pch = c(4, 11, 1, 2, 5, 4),
       lwd = 1)

# Gráfico 2
dados_1F <- matrix(c(MRT_sem_F, MRT_1F), nrow = 2, ncol = 7, byrow = TRUE)
dados_3F <- matrix(c(MRT_sem_F, MRT_3F), nrow = 2, ncol = 7, byrow = TRUE)
dados_5F <- matrix(c(MRT_sem_F, MRT_5F), nrow = 2, ncol = 7, byrow = TRUE)
dados_10F <- matrix(c(MRT_sem_F, MRT_10F), nrow = 2, ncol = 7, byrow = TRUE)
dados_15F <- matrix(c(MRT_sem_F, MRT_15F), nrow = 2, ncol = 7, byrow = TRUE)

par(mfrow = c(3, 2)) 

criar_barplot <- function(dados, titulo) {
  barplot(dados, beside = TRUE, col = c("#E6E6E6", "#666666"),
          names.arg = clock, log = "y",
          xlab = "Time between Things requests (seconds)",
          ylab = "Response time (s)")
  legend("topright", legend = c("w/o Fog", titulo), 
         fill = c("#E6E6E6", "#666666"))
}

criar_barplot(dados_1F, "1 Fog")
criar_barplot(dados_3F, "3 Fogs")
criar_barplot(dados_5F, "5 Fogs")
criar_barplot(dados_10F, "10 Fogs")
criar_barplot(dados_15F, "15 Fogs")

# Restaurar layout padrão
par(mfrow = c(1, 1))

Questão 2

dados_q2 <- matrix(c(
  53.8, 33.9, 2.6, 0.0,
  43.6, 54.2, 60.5, 21.4,
  2.6, 11.9, 36.8, 78.6
), nrow = 3, byrow = TRUE)

rownames(dados_q2) <- c("Good", "Very Good", "Excellent")
colnames(dados_q2) <- c("$10-19", "$20-29", "$30-39", "$40-49")

cores_q2 <- c("#FDBF6F", "#B2DF8A", "#A6CEE3")

barplot(dados_q2,
        main = "Qualidade da Refeição por Categoria de Preço",
        xlab = "Categoria de Preço",
        ylab = "Porcentagem (%)",
        col = cores_q2,
        legend.text = rownames(dados_q2),
        args.legend = list(x = "bottomleft", bty = "n", inset = c(-0.12, -0.35))
)

Questão 3

data(airquality)

temp_f_maio <- airquality$Temp[airquality$Month == 5]
temp_c_maio <- (temp_f_maio - 32) / 1.8

hist(temp_c_maio,
     main = "Histograma das Temperaturas em Maio",
     xlab = "Temperatura (°C)",
     ylab = "Frequência",
     col = "lightblue",
     prob = TRUE
)

lines(density(temp_c_maio, na.rm = TRUE), col = "red", lwd = 2)

Questão 4

# Leitura dos dados de vendas
Sales <- read.table("https://training-course-material.com/images/8/8f/Sales.txt", header = TRUE)

vendas_por_pais <- aggregate(SALES ~ COUNTRY, data = Sales, FUN = sum)
vendas_por_pais$Pct <- vendas_por_pais$SALES / sum(vendas_por_pais$SALES)

# formatar porcentagens (usa scales::percent)
if (!requireNamespace("scales", quietly = TRUE)) install.packages("scales")
library(scales)
pct_labels <- percent(vendas_por_pais$Pct, accuracy = 0.1)
pie_labels <- paste0(vendas_por_pais$COUNTRY, " (", pct_labels, ")")

cores_pie <- rainbow(length(vendas_por_pais$COUNTRY))

pie(vendas_por_pais$SALES,
    labels = pie_labels,
    col = cores_pie,
    main = "Vendas Totais por País"
)

legend("topright",
       legend = vendas_por_pais$COUNTRY,
       fill = cores_pie,
       cex = 0.8
)

Questão 5

data(InsectSprays)

boxplot(count ~ spray,
        data = InsectSprays,
        main = "Contagem de Insetos por Tipo de Inseticida",
        xlab = "Tipo de Inseticida",
        ylab = "Contagem de Insetos",
        col = "yellow",
        outline = FALSE
)

Questão 6

# Função para converter strings de memória para MB
convert_to_mb <- function(memory_str) {
  value <- as.numeric(gsub("([0-9\\.]+).*", "\\1", memory_str))
  if (grepl("TB", memory_str, ignore.case = TRUE)) {
    return(value * 1000000) # 1 TB = 1000000 MB
  } else if (grepl("GB", memory_str, ignore.case = TRUE)) {
    return(value * 1024)    # 1 GB = 1024 MB
  } else if (grepl("KB", memory_str, ignore.case = TRUE)) {
    return(value / 1024)    # KB -> MB
  } else {
    return(value)           # assume MB
  }
}

process_data <- function(filepath) {
  df <- read.csv(filepath)
  df$usedMemory_MB <- sapply(df$usedMemory, convert_to_mb)
  df$currentTime <- as.POSIXct(df$currentTime, format = "%Y-%m-%d %H:%M:%OS")
  start_time <- min(df$currentTime, na.rm = TRUE)
  df$Time_hour <- as.numeric(difftime(df$currentTime, start_time, units = "hours"))
  return(df)
}

df_none <- process_data("monitoringCloudData_NONE.csv")
df_0.1 <- process_data("monitoringCloudData_0.1.csv")
df_0.5 <- process_data("monitoringCloudData_0.5.csv")
df_1.0 <- process_data("monitoringCloudData_1.csv")

layout(matrix(c(1,2,3,4), nrow = 2, ncol = 2, byrow = TRUE))
par(mar = c(4.1, 4.1, 3.1, 1.1))

plot(df_none$Time_hour, df_none$usedMemory_MB,
     type = "l",
     main = "Memory Analysis (None Workload)",
     xlab = "Time (hour)",
     ylab = "Used Memory (MB)",
     ylim = c(96, 106)
)

plot(df_0.1$Time_hour, df_0.1$usedMemory_MB,
     type = "l",
     main = "Memory Analysis (Workload of 0.1)",
     xlab = "Time (hour)",
     ylab = "Used Memory (MB)",
     ylim = c(0, 3500)
)

plot(df_0.5$Time_hour, df_0.5$usedMemory_MB,
     type = "l",
     main = "Memory Analysis (Workload of 0.5)",
     xlab = "Time (hour)",
     ylab = "Used Memory (MB)",
     ylim = c(400, 1200)
)

plot(df_1.0$Time_hour, df_1.0$usedMemory_MB,
     type = "l",
     main = "Memory Analysis (Workload of 1.0)",
     xlab = "Time (hour)",
     ylab = "Used Memory (MB)",
     ylim = c(242, 254)
)

par(mfrow = c(1, 1), mar = c(5.1, 4.1, 4.1, 2.1))

Questão 7

library(dplyr)
library(plotly)

df_netflix <- read.csv("netflix_titles.csv", na.strings = c("", NA), stringsAsFactors = FALSE)

top_10_countries <- df_netflix %>%
  filter(!is.na(country)) %>%
  filter(!grepl(",", country)) %>%
  group_by(country) %>%
  summarise(Total = n()) %>%
  arrange(desc(Total)) %>%
  slice_head(n = 10)

plot_ly(top_10_countries,
        labels = ~country,
        values = ~Total,
        type = 'pie',
        textinfo = 'percent+label',
        insidetextorientation = 'radial') %>%
  layout(title = "Top 10 Países com Mais Conteúdo na Netflix (País Único)",
         margin = list(l = 90, r = 90, b = 90, t = 60)
         )

Questão 8

tabela_data <- top_10_countries %>%
  rename(País = country, "Total de conteúdos" = Total)

plot_ly(
  type = 'table',
  header = list(
    values = colnames(tabela_data),
    align = "center",
    fill = list(color = "grey"),
    font = list(color = "white", size = 12)
  ),
  cells = list(
    values = unname(as.list(tabela_data)),
    align = "center",
    fill = list(color = "#F5F5F5")
  )
)

Questão 9

content_by_decade <- df_netflix %>%
  filter(!is.na(release_year)) %>%
  mutate(Decada = floor(release_year / 10) * 10) %>%
  filter(Decada >= 1940) %>%
  group_by(Decada, type) %>%
  summarise(Quantidade = n(), .groups = 'drop')

plot_ly(content_by_decade,
        x = ~Decada,
        y = ~Quantidade,
        color = ~type,
        colors = c("Movie" = "orange", "TV Show" = "blue"),
        type = 'scatter',
        mode = 'lines+markers') %>%
  layout(title = "Quantidade de Conteúdo por Década",
         xaxis = list(title = "Década"),
         yaxis = list(title = "Quantidade de Conteúdo"),
         legend = list(title = list(text = 'Tipo')))

Questão 10

generos_interesse <- c("Dramas", "Action & Adventure", "Comedies")

movie_genres_by_year <- df_netflix %>%
  filter(type == "Movie" & release_year >= 2000 & release_year <= 2010) %>%
  mutate(Primeiro_Genero = sub(",.*", "", listed_in)) %>%
  filter(Primeiro_Genero %in% generos_interesse) %>%
  group_by(release_year, Primeiro_Genero) %>%
  summarise(Quantidade = n(), .groups = 'drop')

plot_ly(movie_genres_by_year,
        x = ~release_year,
        y = ~Quantidade,
        color = ~Primeiro_Genero,
        type = 'bar') %>%
  layout(title = "Filmes Lançados por Gênero (2000-2010)",
         xaxis = list(title = "Ano de Lançamento"),
         yaxis = list(title = "Quantidade de Lançamentos"),
         barmode = 'group',
         legend = list(title = list(text = 'Gênero')))

Exercício 12

Vinícius Souza Pereira de Lima

07-11-2025

Questões

— 1. Definir os dados —

Questão 2

Questão 3

Questão 4

Questão 5

Questão 6

Questão 7

Questão 8

Questão 9

Questão 10