Visualização de Dados

Tradicional

Questão 1

MRT_1F <-c(517.1468515630205, 85.13094142168089, 30.333207896694553, 12.694776264558937, 3.3041601673945418, 1.1823111717498882, 1.1892293502386786)

MRT_3F <-c(156.68929936163462, 11.540837783562276, 0.4512835621696538, 0.4509797929766453, 0.4502068233039181, 0.4496185276300172, 0.4543157082191288)

MRT_5F <-c(83.90319666471157, 0.3068151086494968, 0.30522314133037304, 0.3072588968084928, 0.30655265997285697, 0.3055812715727718, 0.3053297166713006)

MRT_10F <-c(29.55430642951759, 0.19832832665772515, 0.1971923924717474, 0.19796648905716516, 0.19615594370806338, 0.2034569237883263, 0.19617420889447737)

MRT_15F <-c(11.317736530583566, 0.167364215666193, 0.16172168266811013, 0.16701085329580515, 0.1598052657153692, 0.1645934043532696, 0.16216563797118075)

MRT_sem_F <-c(11.93430909937736, 0.6095414637034009, 0.6060645101029295, 0.612167181646899, 0.6146761002685637, 0.6096747087200697, 0.6125810476877268)

clock <- c(0.1, 0.5, 1, 1.5, 2, 2.5, 3)

plot(clock, MRT_1F, type="o", pch=4, 
     xlab="Time between Things requests (seconds)",
     ylab="Response Time (sec.)")

lines(clock, MRT_3F, type = "o", pch=11, col = "yellow")

lines(clock, MRT_5F, type = "o", pch=1, col = "red")

lines(clock, MRT_10F, type = "o", pch=2, col = "blue")

lines(clock, MRT_15F, type = "o", pch=5, col = "purple")

lines(clock, MRT_sem_F, type = "o", pch=4, col = "green")

legend("topright", pch = c(4, 11, 1, 2, 5, 4), lty = 1, 
       col = c("black", "yellow", "red", "blue", "purple", "green"),
       legend = c("1 Fog", "3 Fogs", "5 Fogs", "10 Fogs", "15 Fogs", "w/o Fog"))

gmean <- function(a, b) {
  return (exp(mean(log(c(a,b)))))
}

bargraph <- function(matrix, matrix_legend, axis_at){
  data_matrix <- matrix(c(MRT_sem_F, matrix), nrow = 2, byrow = T)
  
  barplot(data_matrix,
        names.arg = clock, 
        xlab="Time between Things requests (seconds)",
        ylab="Response Time (sec.)",
        beside = T,
        log = "y",
        col = c("#E6E6E6", "#666666"),
        legend = c("w/o Fog", matrix_legend),
        args.legend = list(x = "topright"),
        axes= F)
  
  max_val <- max(data_matrix)
  gmean_vals <- numeric(length(axis_at))
  
  for (i in seq_along(axis_at)) {
    if (i < length(axis_at)) {
      gmean_vals[i] <- gmean(axis_at[i], axis_at[i + 1])
    } else {
      gmean_vals[i] <- gmean(axis_at[i], max_val)
    }
  }
  
  all_at <- sort(c(axis_at, gmean_vals))
  
  axis(side = 2, axis_at)
  
  axis(side = 2, at = all_at, labels = F)
}

layout(matrix(1:6, nrow = 3, ncol = 2, byrow = T))

par(mar = c(5, 4, 1, 2) + 0.1)

bargraph(MRT_1F, "1 Fog", c(1, 5, 50, 500))

bargraph(MRT_3F, "3 Fog", c(0.5, 5.0, 50.0))

bargraph(MRT_5F, "5 Fog", c(0.5, 5.0, 50.0))

bargraph(MRT_10F, "10 Fog", c(0.2, 1.0, 5.0))

bargraph(MRT_15F, "15 Fog", c(0.2, 1.0, 5.0))

Questão 2

par(mar = c(5, 5, 1, 1) + 0.1)

meal_price <- matrix(
  c(
    53.8, 33.9, 2.6, 0.0,
    43.6, 54.2, 60.5, 21.4,
    2.6, 11.9, 36.8, 78.6
  ),
  nrow = 3,
  byrow = TRUE
)

rownames(meal_price) <- c("good", "very good", "excellent")
colnames(meal_price) <- c("$10-19", "$20-29", "$30-39", "$40-49")

bar_meal <- barplot(
  meal_price,
  col = c("lightgreen", "lightblue", "gold"),
  legend.text = rownames(meal_price),
  xlab = "Faixa de Preço",
  main = "Distribuição da Qualidade por Faixa de Preço",
  args.legend = list(x = "topright"),
  names.arg = colnames(meal_price),
  axes = F)

for (i in 1:ncol(meal_price)) {
  cumulative_height <- 0
  for (j in 1:nrow(meal_price)) {
    cumulative_height <- cumulative_height + meal_price[j, i]
    text(bar_meal[i], cumulative_height - meal_price[j, i] / 2, 
         labels = sprintf("%.1f%%", meal_price[j, i]), 
         cex = 0.8, col = "black")
  }
}

Questão 3

may_data <- subset(airquality, Month == 5)

may_data$Temp_C <- (may_data$Temp - 32) / 1.8

hist(may_data$Temp_C,
     breaks = 10,
     col = "lightblue",
     main = "Histograma das Temperaturas em Maio (°C)",
     xlab = "Temperatura (°C)",
     ylab = "Frequência",
     freq = F)

lines(density(may_data$Temp_C), col = "red", lwd = 2)

Questão 4

sales <- read.table("https://training-course-material.com/images/8/8f/Sales.txt", header = TRUE)

percentages <- round(100 * sales$SALES / sum(sales$SALES), 1)

colors <- c("lightblue", "lightgreen", "lightcoral", "gold", "purple", "orange")

pie(
  sales$SALES,
  labels = paste(percentages, "%", sep = ""),
  col = colors,
  main = "Total de Vendas por País"
)

legend("topright", legend = sales$COUNTRY, fill = colors)

Questão 5

boxplot(count ~ spray, data = InsectSprays,
        xlab = "Tipo de Inseticida",
        ylab = "Contagem de Insetos",
        main = "Contagem de Insetos por Tipo de Inseticida",
        outline = FALSE,
        col = "yellow")

Questão 6

library(tidyverse)

setwd("~/Documentos/CPAD/Visualização de Dados")

convert_to_mb <- function(memory) {
  if (grepl("TB", memory)) {
    return(as.numeric(gsub("TB", "", memory)) * 1e6)
  } else if (grepl("GB", memory)) {
    return(as.numeric(gsub("GB", "", memory)) * 1024)
  } else if (grepl("MB", memory)) {
    return(as.numeric(gsub("MB", "", memory)))
  } else {
    return(as.numeric(memory))
  }
}

clean_datetime <- function(datetime) {
    datetime <- sub("^.*([0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}\\.[0-9]{6}).*$", "\\1", datetime)
    return(datetime)
}

process_data <- function(file) {
  df <- read_csv(file)
  
  df$currentTime <- sapply(df$currentTime, clean_datetime)
  df$currentTime <- as.POSIXct(df$currentTime, format = "%Y-%m-%d %H:%M:%OS", tz = "UTC")

  df <- df %>%
    arrange(currentTime) %>%
    mutate(relativeTime = difftime(currentTime, min(currentTime), units = "hours"))
  
  df <- df %>%
  mutate(usedMemoryMB = sapply(usedMemory, convert_to_mb))
  
  return(df)
}

create_plot_basic <- function(data, title) {
  plot(data$relativeTime, data$usedMemoryMB, type = "l",
       xlab = "Time (hour)", ylab = "Used Memory (MB)",
       main = title)
}

data_none <- process_data("monitoringCloudData_NONE.csv")
data_0.1 <- process_data("monitoringCloudData_0.1.csv")
data_0.5 <- process_data("monitoringCloudData_0.5.csv")
data_1.0 <- process_data("monitoringCloudData_1.csv")

layout(matrix(1:4, 2, 2, byrow = T))

par(mar = c(5, 4, 1, 2) + 0.1)

create_plot_basic(data_none, "Memory Analysis (None Workload)")
create_plot_basic(data_0.1, "Memory Analysis (Workload of 0.1)")
create_plot_basic(data_0.5, "Memory Analysis (Workload of 0.5)")
create_plot_basic(data_1.0, "Memory Analysis (Workload of 1.0)")

Plotly

Questão 7

library(plotly)
library(dplyr)

setwd("~/Documentos/CPAD/Visualização de Dados")

data <- read.csv("netflix_titles.csv")

data_filtered <- data %>%
  filter(!grepl(",", country)) %>%  
  filter(country != "")

country_counts <- data_filtered %>%
  group_by(country) %>%
  summarise(count = n()) %>%
  arrange(desc(count))

top_10_countries <- country_counts %>%
  slice_max(count, n = 10)

fig <- plot_ly(top_10_countries, labels = ~country, values = ~count, type = 'pie') %>%
  layout(title = 'Top 10 Países com Mais Conteúdo na Netflix')

fig

Questão 8

library(plotly)
library(dplyr)

setwd("~/Documentos/CPAD/Visualização de Dados")

data <- read.csv("netflix_titles.csv")

data_filtered <- data %>%
  filter(!grepl(",", country)) %>%
  filter(country != "")

country_counts <- data_filtered %>%
  group_by(country) %>%
  summarise(count = n()) %>%
  arrange(desc(count))

top_10_countries <- country_counts %>%
  slice_max(count, n = 10)

fig <- plot_ly(
  type = 'table',
  header = list(
    values = c("País", "Total de conteúdos"),
    align = c('center', 'center'),
    line = list(width = 1, color = 'black'),
    fill = list(color = '#666666'),
    font = list(color = 'white', size = 14)
  ),
  cells = list(
    values = rbind(top_10_countries$country, top_10_countries$count),
    align = c('center', 'center'),
    line = list(color = 'black', width = 1),
    fill = list(color = '#E6E6E6'),
    font = list(size = 12)
  )
)

fig

Questão 9

library(plotly)
library(dplyr)

setwd("~/Documentos/CPAD/Visualização de Dados")

data <- read.csv("netflix_titles.csv")

data <- data %>%
  filter(!is.na(release_year)) %>%
  mutate(decade = floor(release_year / 10) * 10)

content_by_decade <- data %>%
  group_by(decade, type) %>%
  summarise(count = n()) %>%
  ungroup()

tv_shows <- content_by_decade %>% filter(type == "TV Show")
movies <- content_by_decade %>% filter(type == "Movie")

fig <- plot_ly() %>%
  add_trace(
    x = tv_shows$decade,
    y = tv_shows$count,
    type = 'scatter',
    mode = 'lines+markers',
    name = 'TV Series',
    line = list(color = 'blue'),
    marker = list(color = 'blue')
  ) %>%
  add_trace(
    x = movies$decade,
    y = movies$count,
    type = 'scatter',
    mode = 'lines+markers',
    name = 'Movies',
    line = list(color = 'yellow'),
    marker = list(color = 'yellow')
  ) %>%
  layout(
    xaxis = list(title = 'Década'),
    yaxis = list(title = 'Qnd. Conteúdo'),
    showlegend = TRUE
  )

fig

Questão 10

library(plotly)
library(dplyr)

setwd("~/Documentos/CPAD/Visualização de Dados")

data <- read.csv("netflix_titles.csv")

data_filmes <- data %>%
  filter(type == "Movie" & release_year >= 2000 & release_year <= 2010) %>%
  mutate(primary_genre = sapply(strsplit(as.character(listed_in), ","), `[`, 1)) %>%
  filter(primary_genre %in% c("Dramas", "Action & Adventure", "Comedies"))

data_filmes <- data_filmes %>%
  mutate(primary_genre = recode(
    primary_genre,
    'Dramas' = 'Drama',
    'Action & Adventure' = 'Ação e Aventura',
    'Comedies' = 'Comédia'))

conteudo_por_genero <- data_filmes %>%
  group_by(release_year, primary_genre) %>%
  summarise(count = n()) %>%
  ungroup()

fig <- plot_ly(
  data = conteudo_por_genero,
  x = ~release_year,
  y = ~count,
  color = ~primary_genre,
  type = 'bar',
   colors = c('Drama' = '#1f78b4', 'Ação e Aventura' = '#fe7f0e', 'Comédia' = '#2ba02d')
) %>%
  layout(
    barmode = 'group',
    xaxis = list(title = 'Ano de Lançamento'),
    yaxis = list(title = 'Qnt. de Lançamentos'),
    legend = list(title = list(text = ''))
  )

fig