library(dplyr)
library(tidyr)
library(stringr)
library(readr)
library(plotly)

Exercício 12

Questão 1

MRT_1F <- c(517.1468515630205, 85.13094142168089, 30.333207896694553,
12.694776264558937, 3.3041601673945418, 1.1823111717498882,
1.1892293502386786)

MRT_3F <- c(156.68929936163462, 11.540837783562276, 0.4512835621696538,
0.4509797929766453, 0.4502068233039181, 0.4496185276300172,
0.4543157082191288)

MRT_5F <- c(83.90319666471157, 0.3068151086494968, 0.30522314133037304,
0.3072588968084928, 0.30655265997285697, 0.3055812715727718,
0.3053297166713006)

MRT_10F <- c(29.55430642951759, 0.19832832665772515, 0.1971923924717474,
0.19796648905716516, 0.19615594370806338, 0.2034569237883263,
0.19617420889447737)

MRT_15F <- c(11.317736530583566, 0.167364215666193, 0.16172168266811013,
0.16701085329580515, 0.1598052657153692, 0.1645934043532696,
0.16216563797118075)

MRT_sem_F <- c(11.93430909937736, 0.6095414637034009,
0.6060645101029295, 0.612167181646899, 0.6146761002685637,
0.6096747087200697, 0.6125810476877268)

clock <- c(0.1, 0.5, 1, 1.5, 2, 2.5, 3)

layout(matrix(c(1,1,2,3,4,5,6,0), nrow = 4, byrow = TRUE))

plot(clock, MRT_1F, type = "b", ylim = c(0, max(MRT_1F)),
     xlab = "Time between Things requests (seconds)",
     ylab = "Response Time (sec)",
     main = "Response Time by Fog Configuration")

lines(clock, MRT_3F, type = "b", col = "red")
lines(clock, MRT_5F, type = "b", col = "blue")
lines(clock, MRT_10F, type = "b", col = "purple")
lines(clock, MRT_15F, type = "b", col = "green")
lines(clock, MRT_sem_F, type = "b", col = "darkgray")

legend("topright",
       legend = c("1 Fog", "3 Fogs", "5 Fogs", "10 Fogs", "15 Fogs", "No Fog"),
       col = c("black", "red", "blue", "purple", "green", "darkgray"),
       lty = 1, pch = 1, cex = 0.7)

barplot(rbind(MRT_sem_F, MRT_1F),
        beside = TRUE, log = "y",
        col = c("#E6E6E6", "#666666"),
        names.arg = clock,
        main = "No Fog x 1 Fog",
        xlab = "Time between Things requests",
        ylab = "Response time (s)")
legend("topright", legend = c("No Fog", "1 Fog"),
       fill = c("#E6E6E6", "#666666"), cex = 0.7)

barplot(rbind(MRT_sem_F, MRT_3F),
        beside = TRUE, log = "y",
        col = c("#E6E6E6", "#666666"),
        names.arg = clock,
        main = "No Fog x 3 Fogs",
        xlab = "Time between Things requests",
        ylab = "Response time (s)")
legend("topright", legend = c("No Fog", "3 Fogs"),
       fill = c("#E6E6E6", "#666666"), cex = 0.7)

barplot(rbind(MRT_sem_F, MRT_5F),
        beside = TRUE, log = "y",
        col = c("#E6E6E6", "#666666"),
        names.arg = clock,
        main = "No Fog x 5 Fogs",
        xlab = "Time between Things requests",
        ylab = "Response time (s)")
legend("topright", legend = c("No Fog", "5 Fogs"),
       fill = c("#E6E6E6", "#666666"), cex = 0.7)

barplot(rbind(MRT_sem_F, MRT_10F),
        beside = TRUE, log = "y",
        col = c("#E6E6E6", "#666666"),
        names.arg = clock,
        main = "No Fog x 10 Fogs",
        xlab = "Time between Things requests",
        ylab = "Response time (s)")
legend("topright", legend = c("No Fog", "10 Fogs"),
       fill = c("#E6E6E6", "#666666"), cex = 0.7)

barplot(rbind(MRT_sem_F, MRT_15F),
        beside = TRUE, log = "y",
        col = c("#E6E6E6", "#666666"),
        names.arg = clock,
        main = "No Fog x 15 Fogs",
        xlab = "Time between Things requests",
        ylab = "Response time (s)")
legend("topright", legend = c("No Fog", "15 Fogs"),
       fill = c("#E6E6E6", "#666666"), cex = 0.7)

Questão 2

tabela_refeicao <- matrix(
  c(
    53.8, 33.9, 2.6, 0.0,
    43.6, 54.2, 60.5, 21.4,
    2.6, 11.9, 36.8, 78.6
  ),
  nrow = 3,
  byrow = TRUE
)

rownames(tabela_refeicao) <- c("Good", "Very Good", "Excellent")
colnames(tabela_refeicao) <- c("$10-19", "$20-29", "$30-39", "$40-49")

barplot(
  tabela_refeicao,
  beside = FALSE,
  col = c("lightblue", "orange", "lightgreen"),
  main = "Qualidade da refeição por categoria de preço",
  xlab = "Categoria de preço",
  ylab = "Percentual (%)",
  ylim = c(0, 100)
)

legend(
  "topright",
  legend = rownames(tabela_refeicao),
  fill = c("lightblue", "orange", "lightgreen"),
  title = "Quality Rating"
)

Questão 3

dados_maio <- airquality[airquality$Month == 5, ]

temperatura_celsius <- (dados_maio$Temp - 32) / 1.8

hist(
  temperatura_celsius,
  probability = TRUE,
  col = "lightblue",
  main = "Histograma das temperaturas de maio",
  xlab = "Temperatura em graus Celsius",
  ylab = "Densidade"
)

lines(
  density(temperatura_celsius),
  col = "red",
  lwd = 2
)

Questão 4

sales <- read.table(
  "https://training-course-material.com/images/8/8f/Sales.txt",
  header = TRUE
)

total_pais <- aggregate(SALES ~ COUNTRY, data = sales, sum)

porcentagem <- round(100 * total_pais$SALES / sum(total_pais$SALES), 1)

rotulos <- paste(total_pais$COUNTRY, porcentagem, "%")

cores <- rainbow(length(total_pais$COUNTRY))

pie(
  total_pais$SALES,
  labels = rotulos,
  col = cores,
  main = "Porcentagem de vendas por país"
)

legend(
  "topright",
  legend = total_pais$COUNTRY,
  fill = cores,
  cex = 0.8
)

Questao 5

boxplot(
  count ~ spray,
  data = InsectSprays,
  outline = FALSE,
  col = "yellow",
  main = "Contagem de insetos por tipo de inseticida",
  xlab = "Tipo de inseticida",
  ylab = "Contagem de insetos"
)

Questão 6

converter_memoria <- function(memoria) {
  memoria <- as.character(memoria)
  
  valor <- as.numeric(str_extract(memoria, "[0-9.]+"))
  unidade <- str_extract(memoria, "[A-Za-z]+")
  
  memoria_mb <- ifelse(
    unidade %in% c("TB", "T"),
    valor * 1000000,
    ifelse(
      unidade %in% c("GB", "G"),
      valor * 1024,
      ifelse(
        unidade %in% c("KB", "K"),
        valor / 1024,
        ifelse(
          unidade %in% c("B"),
          valor / (1024 * 1024),
          valor
        )
      )
    )
  )
  
  return(memoria_mb)
}

preparar_monitoramento <- function(arquivo) {
  dados <- read.csv(arquivo, stringsAsFactors = FALSE)
  
  dados$currentTime <- as.POSIXct(dados$currentTime)
  
  dados$tempo_hora <- as.numeric(
    difftime(dados$currentTime, min(dados$currentTime), units = "hours")
  )
  
  dados$memoria_mb <- converter_memoria(dados$usedMemory)
  
  return(dados)
}

dados_none <- preparar_monitoramento("monitoringCloudData_NONE.csv")
dados_01 <- preparar_monitoramento("monitoringCloudData_0.1.csv")
dados_05 <- preparar_monitoramento("monitoringCloudData_0.5.csv")
dados_1 <- preparar_monitoramento("monitoringCloudData_1.csv")

layout(matrix(c(1, 2, 3, 4), nrow = 2, byrow = TRUE))

plot(
  dados_none$tempo_hora,
  dados_none$memoria_mb,
  type = "l",
  main = "Memory Analysis (None Workload)",
  xlab = "Time (hour)",
  ylab = "Used Memory (MB)"
)

plot(
  dados_01$tempo_hora,
  dados_01$memoria_mb,
  type = "l",
  main = "Memory Analysis (Workload of 0.1)",
  xlab = "Time (hour)",
  ylab = "Used Memory (MB)"
)

plot(
  dados_05$tempo_hora,
  dados_05$memoria_mb,
  type = "l",
  main = "Memory Analysis (Workload of 0.5)",
  xlab = "Time (hour)",
  ylab = "Used Memory (MB)"
)

plot(
  dados_1$tempo_hora,
  dados_1$memoria_mb,
  type = "l",
  main = "Memory Analysis (Workload of 1.0)",
  xlab = "Time (hour)",
  ylab = "Used Memory (MB)"
)

Questão 7

netflix <- read.csv("netflix_titles.csv", stringsAsFactors = FALSE)

netflix_um_pais <- netflix %>%
  filter(!is.na(country), country != "") %>%
  filter(!str_detect(country, ","))

top10_paises <- netflix_um_pais %>%
  count(country, sort = TRUE) %>%
  slice_head(n = 10)

plot_ly(
  data = top10_paises,
  labels = ~country,
  values = ~n,
  type = "pie"
) %>%
  layout(
    title = "Top 10 países com mais conteúdos na Netflix"
  )

Questão 8

tabela_paises <- top10_paises %>%
  rename(
    País = country,
    `Total de conteúdos` = n
  )

plot_ly(
  type = "table",
  header = list(
    values = c("País", "Total de conteúdos"),
    fill = list(color = "gray"),
    font = list(color = "white"),
    align = "center"
  ),
  cells = list(
    values = list(
      tabela_paises$País,
      tabela_paises$`Total de conteúdos`
    ),
    align = "center"
  )
)

Questão 9

netflix_decada <- netflix %>%
  filter(!is.na(release_year), !is.na(type)) %>%
  filter(type %in% c("Movie", "TV Show")) %>%
  mutate(decada = floor(release_year / 10) * 10) %>%
  count(decada, type)

plot_ly() %>%
  add_lines(
    data = netflix_decada %>% filter(type == "TV Show"),
    x = ~decada,
    y = ~n,
    name = "TV Series",
    line = list(color = "blue")
  ) %>%
  add_lines(
    data = netflix_decada %>% filter(type == "Movie"),
    x = ~decada,
    y = ~n,
    name = "Movies",
    line = list(color = "yellow")
  ) %>%
  layout(
    title = "Quantidade de conteúdos da Netflix por década",
    xaxis = list(title = "Década"),
    yaxis = list(title = "Qtd. Conteúdo")
  )

Questão 10

generos_interesse <- c("Dramas", "Action & Adventure", "Comedies")

netflix_generos <- netflix %>%
  filter(type == "Movie") %>%
  filter(release_year >= 2000, release_year <= 2010) %>%
  mutate(
    genero_principal = str_trim(str_extract(listed_in, "^[^,]+"))
  ) %>%
  filter(genero_principal %in% generos_interesse) %>%
  count(release_year, genero_principal)

plot_ly(
  data = netflix_generos,
  x = ~release_year,
  y = ~n,
  color = ~genero_principal,
  type = "bar"
) %>%
  layout(
    title = "Quantidade de filmes por gênero entre 2000 e 2010",
    xaxis = list(title = "Ano de lançamento"),
    yaxis = list(title = "Qtd. de lançamentos"),
    barmode = "group"
  )