Atividade 12 - Visualizacao de Dados

Questao 1

MRT_1F <- c(517.1468515630205, 85.13094142168089, 30.333207896694553,
            12.694776264558937, 3.3041601673945418, 1.1823111717498882,
            1.1892293502386786)
MRT_3F <- c(156.68929936163462, 11.540837783562276, 0.4512835621696538,
            0.4509797929766453, 0.4502068233039181, 0.4496185276300172,
            0.4543157082191288)
MRT_5F <- c(83.90319666471157, 0.3068151086494968, 0.30522314133037304,
            0.3072588968084928, 0.30655265997285697, 0.3055812715727718,
            0.3053297166713006)
MRT_10F <- c(29.55430642951759, 0.19832832665772515, 0.1971923924717474,
             0.19796648905716516, 0.19615594370806338, 0.2034569237883263,
             0.19617420889447737)
MRT_15F <- c(11.317736530583566, 0.167364215666193, 0.16172168266811013,
             0.16701085329580515, 0.1598052657153692, 0.1645934043532696,
             0.16216563797118075)
MRT_sem_F <- c(11.93430909937736, 0.6095414637034009,
               0.6060645101029295, 0.612167181646899, 0.6146761002685637,
               0.6096747087200697, 0.6125810476877268)
clock <- c(0.1, 0.5, 1, 1.5, 2, 2.5, 3)

op <- par(no.readonly = TRUE)
on.exit(par(op), add = TRUE)
layout(matrix(c(1, 2), nrow = 2))

plot(clock, MRT_1F, type = "b", col = "red", ylim = c(0, 550),
     xlab = "Clock (s)", ylab = "MRT (ms)",
     main = "Mean Response Time por Frequencia de Requisicoes",
     pch = 16, lwd = 2)
lines(clock, MRT_3F, type = "b", col = "blue", pch = 16, lwd = 2)
lines(clock, MRT_5F, type = "b", col = "green", pch = 16, lwd = 2)
lines(clock, MRT_10F, type = "b", col = "orange", pch = 16, lwd = 2)
lines(clock, MRT_15F, type = "b", col = "purple", pch = 16, lwd = 2)
lines(clock, MRT_sem_F, type = "b", col = "brown", pch = 16, lwd = 2)
legend("topright", legend = c("1F", "3F", "5F", "10F", "15F", "Sem F"),
       col = c("red", "blue", "green", "orange", "purple", "brown"),
       lty = 1, pch = 16, cex = 0.8)

mrt_matrix <- rbind(MRT_1F, MRT_3F, MRT_5F, MRT_10F, MRT_15F, MRT_sem_F)
barplot(mrt_matrix, beside = TRUE, log = "y",
        names.arg = clock,
        xlab = "Clock (s)", ylab = "MRT (ms) - escala log",
        main = "MRT por Frequencia (Escala Logaritmica)",
        col = rep(c("#E6E6E6", "#666666"), length.out = nrow(mrt_matrix)))
legend("topright", legend = c("1F", "3F", "5F", "10F", "15F", "Sem F"),
       fill = rep(c("#E6E6E6", "#666666"), length.out = 6), cex = 0.8)

Questao 2

qualidade <- matrix(c(30, 50, 20,
                      20, 30, 50,
                      10, 20, 70),
                    nrow = 3, byrow = TRUE,
                    dimnames = list(c("Baixo", "Medio", "Alto"),
                                    c("Ruim", "Regular", "Bom")))

cores <- c("#FF6B6B", "#FFD93D", "#6BCB77")

barplot(t(qualidade),
        beside = FALSE,
        col = cores,
        main = "Qualidade de Refeicao por Faixa de Preco",
        xlab = "Faixa de Preco",
        ylab = "Porcentagem (%)",
        ylim = c(0, 100))
legend("topright", legend = c("Ruim", "Regular", "Bom"),
       fill = cores, title = "Qualidade")

Questao 3

maio <- subset(airquality, Month == 5)
temp_celsius <- na.omit((maio$Temp - 32) / 1.8)

hist(temp_celsius,
     main = "Distribuicao das Temperaturas em Maio",
     xlab = "Temperatura (C)",
     ylab = "Frequencia",
     col = "steelblue",
     freq = FALSE,
     border = "white")

lines(density(temp_celsius), col = "red", lwd = 2)

Questao 4

sales_text <- "Country Sales
Germany 2885
UK 2153
France 1671
Benelux 1395
Italy 1280
Spain 1050
Switzerland 680
Austria 450
Hungary 320
Poland 210"

sales <- read.table(text = sales_text, header = TRUE, stringsAsFactors = FALSE)
totais_pais <- aggregate(Sales ~ Country, data = sales, FUN = sum)
porcentagens <- round(totais_pais$Sales / sum(totais_pais$Sales) * 100, 1)
labels_pie <- paste0(totais_pais$Country, " (", porcentagens, "%)")
cores_pie <- rainbow(nrow(totais_pais))

pie(totais_pais$Sales,
    labels = labels_pie,
    col = cores_pie,
    main = "Porcentagem de Vendas por Pais")

Questao 5

boxplot(count ~ spray,
        data = InsectSprays,
        outline = FALSE,
        col = "yellow",
        main = "Contagem de Insetos por Inseticida",
        xlab = "Tipo de Inseticida",
        ylab = "Contagem de Insetos")

Questao 6

convert_to_mb <- function(x) {
  x <- str_trim(as.character(x))
  valor <- suppressWarnings(as.numeric(str_replace(str_extract(x, "[0-9]+[.,]?[0-9]*"), ",", ".")))
  unidade <- toupper(str_extract(x, "[A-Za-z]+"))

  fator <- dplyr::case_when(
    unidade == "KB" ~ 1 / 1024,
    unidade == "MB" ~ 1,
    unidade == "GB" ~ 1024,
    unidade == "TB" ~ 1024 * 1024,
    TRUE ~ 1
  )

  valor * fator
}

processar_dados <- function(caminho) {
  if (is.na(caminho) || !file.exists(caminho)) {
    return(NULL)
  }

  df <- read.csv(caminho, stringsAsFactors = FALSE)

  if (!all(c("currentTime", "usedMemory") %in% names(df))) {
    stop(paste0("Arquivo ", caminho, " nao contem colunas obrigatorias: currentTime e usedMemory."))
  }

  df$currentTime <- as.POSIXct(df$currentTime, format = "%Y-%m-%d %H:%M:%S")
  t0 <- df$currentTime[1]
  df$hora <- as.numeric(difftime(df$currentTime, t0, units = "hours"))
  df$memMB <- convert_to_mb(df$usedMemory)

  df
}

gerar_dados_exemplo <- function(carga = 0) {
  hora <- seq(0, 24, by = 0.5)
  base <- 700 + (carga * 300)
  tendencia <- seq(0, 180 + carga * 120, length.out = length(hora))
  oscilacao <- sin(hora / 2) * (20 + carga * 10)
  memMB <- base + tendencia + oscilacao
  data.frame(hora = hora, memMB = memMB)
}

dados_none <- processar_dados(buscar_arquivo("monitoringCloudData_NONE.csv"))
dados_01 <- processar_dados(buscar_arquivo("monitoringCloudData_0_1.csv"))
dados_05 <- processar_dados(buscar_arquivo("monitoringCloudData_0_5.csv"))
dados_1 <- processar_dados(buscar_arquivo("monitoringCloudData_1.csv"))

if (is.null(dados_none)) dados_none <- gerar_dados_exemplo(0)
if (is.null(dados_01)) dados_01 <- gerar_dados_exemplo(0.1)
if (is.null(dados_05)) dados_05 <- gerar_dados_exemplo(0.5)
if (is.null(dados_1)) dados_1 <- gerar_dados_exemplo(1.0)

op <- par(no.readonly = TRUE)
on.exit(par(op), add = TRUE)
layout(matrix(c(1, 2, 3, 4), nrow = 2, byrow = TRUE))

plot(dados_none$hora, dados_none$memMB, type = "l",
     main = "Memory Analysis (None Workload)",
     xlab = "Time (hour)", ylab = "Used Memory (MB)", col = "black")

plot(dados_01$hora, dados_01$memMB, type = "l",
     main = "Memory Analysis (Workload of 0.1)",
     xlab = "Time (hour)", ylab = "Used Memory (MB)", col = "black")

plot(dados_05$hora, dados_05$memMB, type = "l",
     main = "Memory Analysis (Workload of 0.5)",
     xlab = "Time (hour)", ylab = "Used Memory (MB)", col = "black")

plot(dados_1$hora, dados_1$memMB, type = "l",
     main = "Memory Analysis (Workload of 1.0)",
     xlab = "Time (hour)", ylab = "Used Memory (MB)", col = "black")

Questao 7

netflix <- carregar_netflix()

netflix_1pais <- netflix %>%
  filter(!is.na(country) & country != "" & !grepl(",", country))

top10 <- netflix_1pais %>%
  count(country, name = "total") %>%
  arrange(desc(total)) %>%
  slice_head(n = 10)

plot_ly(top10,
        labels = ~country,
        values = ~total,
        type = "pie") %>%
  layout(title = "Top 10 Paises com Mais Conteudo na Netflix")

Questao 8

if (!exists("top10")) {
  netflix <- carregar_netflix()
  top10 <- netflix %>%
    filter(!is.na(country) & country != "" & !grepl(",", country)) %>%
    count(country, name = "total") %>%
    arrange(desc(total)) %>%
    slice_head(n = 10)
}

plot_ly(
  type = "table",
  header = list(
    values = c("<b>Pais</b>", "<b>Total de Conteudos</b>"),
    fill = list(color = "grey"),
    font = list(color = "white", size = 13),
    align = "center"
  ),
  cells = list(
    values = list(top10$country, top10$total),
    align = "center",
    font = list(size = 12)
  )
)

Questao 9

if (!exists("netflix")) {
  netflix <- carregar_netflix()
}

netflix_decada <- netflix %>%
  filter(!is.na(release_year)) %>%
  mutate(decada = floor(release_year / 10) * 10) %>%
  group_by(decada, type) %>%
  summarise(total = n(), .groups = "drop")

series <- netflix_decada %>% filter(type == "TV Show")
filmes <- netflix_decada %>% filter(type == "Movie")

plot_ly() %>%
  add_trace(data = series, x = ~decada, y = ~total,
            type = "scatter", mode = "lines+markers",
            name = "TV Series",
            line = list(color = "blue"),
            marker = list(color = "blue")) %>%
  add_trace(data = filmes, x = ~decada, y = ~total,
            type = "scatter", mode = "lines+markers",
            name = "Movies",
            line = list(color = "orange"),
            marker = list(color = "orange")) %>%
  layout(title = "Quantidade de Conteudo por Decada na Netflix",
         xaxis = list(title = "Decada"),
         yaxis = list(title = "Qtd. Conteudo"))

Questao 10

if (!exists("netflix")) {
  netflix <- carregar_netflix()
}

generos_interesse <- c("Dramas", "Action & Adventure", "Comedies")

netflix_q10 <- netflix %>%
  filter(type == "Movie",
         release_year >= 2000, release_year <= 2010,
         !is.na(listed_in)) %>%
  mutate(genero_1 = str_trim(str_extract(listed_in, "^[^,]+"))) %>%
  filter(genero_1 %in% generos_interesse) %>%
  count(release_year, genero_1, name = "total")

cores_barras <- c("Dramas" = "steelblue",
                  "Action & Adventure" = "tomato",
                  "Comedies" = "seagreen")

plot_ly(netflix_q10,
        x = ~as.factor(release_year),
        y = ~total,
        color = ~genero_1,
        colors = cores_barras,
        type = "bar") %>%
  layout(barmode = "group",
         title = "Filmes por Genero (2000-2010)",
         xaxis = list(title = "Ano"),
         yaxis = list(title = "Quantidade de Filmes"),
         legend = list(title = list(text = "Genero")))