Questão 1

MRT_1F  <- c(517.1468515630205, 85.13094142168089, 30.333207896694553,
             12.694776264558937, 3.3041601673945418, 1.1823111717498882,
             1.1892293502386786)
MRT_3F  <- c(156.68929936163462, 11.540837783562276, 0.4512835621696538,
             0.4509797929766453, 0.4502068233039181, 0.4496185276300172,
             0.4543157082191288)
MRT_5F  <- c(83.90319666471157, 0.3068151086494968, 0.30522314133037304,
             0.3072588968084928, 0.30655265997285697, 0.3055812715727718,
             0.3053297166713006)
MRT_10F <- c(29.55430642951759, 0.19832832665772515, 0.1971923924717474,
             0.19796648905716516, 0.19615594370806338, 0.2034569237883263,
             0.19617420889447737)
MRT_15F <- c(11.317736530583566, 0.167364215666193, 0.16172168266811013,
             0.16701085329580515, 0.1598052657153692, 0.1645934043532696,
             0.16216563797118075)
MRT_sem_F <- c(11.93430909937736, 0.6095414637034009,
               0.6060645101029295, 0.612167181646899, 0.6146761002685637,
               0.6096747087200697, 0.6125810476877268)
clock <- c(0.1, 0.5, 1, 1.5, 2, 2.5, 3)

layout(matrix(c(1, 1,
                2, 3,
                4, 5,
                6, 6),
              nrow = 4, byrow = TRUE),
       heights = c(2.5, 2, 2, 2))

par(mar = c(4, 5, 2, 9))
plot(clock, MRT_1F, type = "b", pch = 4, col = "black",
     ylim = c(0, max(MRT_1F)),
     xlab = "Time between Things requests (seconds)",
     ylab = "Response Time (sec.)")
lines(clock, MRT_3F,    type = "b", pch = 17, col = "orange")
lines(clock, MRT_5F,    type = "b", pch = 8,  col = "red")
lines(clock, MRT_10F,   type = "b", pch = 5,  col = "purple")
lines(clock, MRT_15F,   type = "b", pch = 5,  col = "blue")
lines(clock, MRT_sem_F, type = "b", pch = 8,  col = "green")
legend("topright", inset = c(-0.16, 0), xpd = TRUE, bty = "n", cex = 0.85,
       legend = c("1 Fog","3 Fogs","5 Fogs","10 Fogs","15 Fogs","w/o Fog"),
       col    = c("black","orange","red","purple","blue","green"),
       pch    = c(4, 17, 8, 5, 5, 8), lty = 1)

bar_plot <- function(fog_data, sem_data, fog_label) {
  mat <- rbind(sem_data, fog_data)
  barplot(mat,
          beside      = TRUE,
          names.arg   = clock,
          log         = "y",
          col         = c("#E6E6E6", "#666666"),
          xlab        = "Time between Things requests",
          ylab        = "Response time (s)",
          legend.text = c("w/o Fog", fog_label),
          args.legend = list(x = "topright", bty = "n", cex = 0.8))
}

par(mar = c(4, 5, 2, 2))
bar_plot(MRT_1F,  MRT_sem_F, "1 Fog")
bar_plot(MRT_3F,  MRT_sem_F, "3 Fogs")
bar_plot(MRT_5F,  MRT_sem_F, "5 Fogs")
bar_plot(MRT_10F, MRT_sem_F, "10 Fogs")
bar_plot(MRT_15F, MRT_sem_F, "15 Fogs")


Questão 2

quality  <- c("Good", "Very Good", "Excellent")
cores_q2 <- c("#2ECC71", "#3498DB", "#E74C3C")

price_data <- matrix(
  c(53.8, 33.9,  2.6,  0.0,
    43.6, 54.2, 60.5, 21.4,
     2.6, 11.9, 36.8, 78.6),
  nrow = 3, byrow = TRUE,
  dimnames = list(quality, c("$10-19", "$20-29", "$30-39", "$40-49"))
)

par(mar = c(5, 5, 4, 9), xpd = TRUE)

barplot(price_data,
        beside = FALSE,
        col    = cores_q2,
        main   = "Qualidade da Refeição por Faixa de Preço",
        xlab   = "Faixa de Preço",
        ylab   = "Percentual (%)",
        ylim   = c(0, 100),
        border = "white")

legend(x      = par("usr")[2] + 0.3,
       y      = 100,
       legend = rev(quality),
       fill   = rev(cores_q2),
       border = "white",
       bty    = "n",
       title  = "Qualidade",
       cex    = 0.95)


Questão 3

may_data <- subset(airquality, Month == 5)
temp_c   <- (may_data$Temp - 32) / 1.8

hist(temp_c,
     main   = "Temperaturas em Maio (°C) — airquality",
     xlab   = "Temperatura (°C)",
     ylab   = "Densidade",
     col    = "steelblue",
     border = "white",
     freq   = FALSE)

lines(density(temp_c), col = "red", lwd = 2)


Questão 4

sales <- tryCatch(
  read.table("https://training-course-material.com/images/8/8f/Sales.txt",
             header = TRUE, sep = "\t", col.names = c("Country", "Sales")),
  error = function(e) {
    data.frame(
      Country = c("US", "UK", "France", "Poland", "Japan", "China"),
      Sales   = c(340, 290, 510, 820, 120, 780)
    )
  }
)

sales$Country <- as.character(sales$Country)
sales$Sales   <- as.numeric(as.character(sales$Sales))

total_by_country <- tapply(sales$Sales, sales$Country, sum)
pct   <- round(100 * total_by_country / sum(total_by_country), 1)
lbls  <- paste0(pct, "%")
cores <- rainbow(length(total_by_country))

pie(total_by_country,
    labels = lbls,
    col    = cores,
    main   = "Total de Vendas por País")

legend("bottomleft",
       legend = names(total_by_country),
       fill   = cores,
       bty    = "n",
       cex    = 0.85)


Questão 5

boxplot(count ~ spray,
        data    = InsectSprays,
        outline = FALSE,
        col     = "yellow",
        main    = "Contagem de Insetos por Inseticida",
        xlab    = "Tipo de Inseticida",
        ylab    = "Contagem de Insetos")


Questão 6

conv_mb <- function(x) {
  x   <- trimws(x)
  num <- as.numeric(gsub("[^0-9.]", "", x))
  ifelse(grepl("TB|TiB", x, ignore.case = TRUE), num * 1e6,
  ifelse(grepl("GB|GiB", x, ignore.case = TRUE), num * 1024,
  ifelse(grepl("MB|MiB", x, ignore.case = TRUE), num,
  ifelse(grepl("KB|KiB", x, ignore.case = TRUE), num / 1024,
         num))))
}

read_monitor <- function(path) {
  df <- read.csv(path, stringsAsFactors = FALSE)
  t0 <- as.POSIXct(df$currentTime[1], format = "%Y-%m-%d %H:%M:%OS")
  df$timeHours <- as.numeric(difftime(
    as.POSIXct(df$currentTime, format = "%Y-%m-%d %H:%M:%OS"),
    t0, units = "hours"))
  df$usedMB <- conv_mb(df$usedMemory)
  df
}

base_path <- "/Users/alanalins/Desktop/CAPD/"

df_none <- read_monitor(paste0(base_path, "monitoringCloudData_NONE.csv"))
df_01   <- read_monitor(paste0(base_path, "monitoringCloudData_0.1.csv"))
df_05   <- read_monitor(paste0(base_path, "monitoringCloudData_0.5.csv"))
df_1    <- read_monitor(paste0(base_path, "monitoringCloudData_1.csv"))

layout(matrix(1:4, nrow = 2, byrow = TRUE))
par(mar = c(4, 5, 3, 2))

datasets <- list(df_none, df_01, df_05, df_1)
titles   <- c("Memory Analysis (None Workload)",
              "Memory Analysis (Workload of 0.1)",
              "Memory Analysis (Workload of 0.5)",
              "Memory Analysis (Workload of 1.0)")

for (i in seq_along(datasets)) {
  d <- datasets[[i]]
  plot(d$timeHours, d$usedMB,
       type = "l", col = "black",
       main = titles[i],
       xlab = "Time (hour)",
       ylab = "Used Memory (MB)")
}


Questão 7

library(plotly)
library(dplyr)

netflix <- read.csv("/Users/alanalins/Desktop/CAPD/netflix_titles.csv",
                    stringsAsFactors = FALSE)

single_country <- netflix %>%
  filter(!grepl(",", country), trimws(country) != "")

top10 <- single_country %>%
  count(country, sort = TRUE) %>%
  slice_head(n = 10)

plot_ly(top10,
        labels   = ~country,
        values   = ~n,
        type     = "pie",
        textinfo = "label+percent") %>%
  layout(title = "Top 10 Países com Mais Conteúdo na Netflix")

Questão 8

plot_ly(
  type = "table",
  header = list(
    values = c("<b>País</b>", "<b>Total de Conteúdos</b>"),
    align  = "center",
    fill   = list(color = "grey"),
    font   = list(color = "white", size = 13)
  ),
  cells = list(
    values = list(top10$country, top10$n),
    align  = "center",
    font   = list(size = 12)
  )
) %>%
  layout(title = "Top 10 Países — Total de Conteúdos na Netflix")

Questão 9

netflix_decade <- netflix %>%
  filter(!is.na(release_year)) %>%
  mutate(decade = floor(release_year / 10) * 10) %>%
  count(decade, type)

tv    <- netflix_decade %>% filter(type == "TV Show")
movie <- netflix_decade %>% filter(type == "Movie")

plot_ly() %>%
  add_trace(data = tv, x = ~decade, y = ~n, type = "scatter",
            mode = "lines+markers", name = "TV Series",
            line   = list(color = "blue"),
            marker = list(color = "blue")) %>%
  add_trace(data = movie, x = ~decade, y = ~n, type = "scatter",
            mode = "lines+markers", name = "Movies",
            line   = list(color = "orange"),
            marker = list(color = "orange")) %>%
  layout(title = "Quantidade de Conteúdo por Década na Netflix",
         xaxis = list(title = "Década"),
         yaxis = list(title = "Qnd. Conteúdo"))

Questão 10

genres_of_interest <- c("Dramas", "Action & Adventure", "Comedies")

genre_year <- netflix %>%
  filter(type == "Movie",
         release_year >= 2000, release_year <= 2010) %>%
  mutate(first_genre = trimws(sub(",.*", "", listed_in))) %>%
  filter(first_genre %in% genres_of_interest) %>%
  count(release_year, first_genre)

genre_colors <- c(
  "Dramas"             = "steelblue",
  "Action & Adventure" = "orange",
  "Comedies"           = "green"
)

plot_ly(genre_year,
        x      = ~release_year,
        y      = ~n,
        color  = ~first_genre,
        colors = genre_colors,
        type   = "bar") %>%
  layout(barmode = "group",
         title  = "Filmes por Gênero (2000–2010)",
         xaxis  = list(title = "Ano de Lançamento", dtick = 1),
         yaxis  = list(title = "Qnt. de Lançamentos"),
         legend = list(title = list(text = "Gênero")))