Exercicio 12 - Visualização de dados - Gabriel Gomes

Questão 1

MRT_1F <-c(517.1468515630205, 85.13094142168089, 30.333207896694553, 12.694776264558937, 3.3041601673945418, 1.1823111717498882, 1.1892293502386786)

MRT_3F <-c(156.68929936163462, 11.540837783562276, 0.4512835621696538, 0.4509797929766453, 0.4502068233039181, 0.4496185276300172, 0.4543157082191288)

MRT_5F <-c(83.90319666471157, 0.3068151086494968, 0.30522314133037304, 0.3072588968084928, 0.30655265997285697, 0.3055812715727718, 0.3053297166713006)

MRT_10F <-c(29.55430642951759, 0.19832832665772515, 0.1971923924717474, 0.19796648905716516, 0.19615594370806338, 0.2034569237883263, 0.19617420889447737)

MRT_15F <-c(11.317736530583566, 0.167364215666193, 0.16172168266811013, 0.16701085329580515, 0.1598052657153692, 0.1645934043532696, 0.16216563797118075)

MRT_sem_F <-c(11.93430909937736, 0.6095414637034009, 0.6060645101029295, 0.612167181646899, 0.6146761002685637, 0.6096747087200697, 0.6125810476877268)

clock <- c(0.1, 0.5, 1, 1.5, 2, 2.5, 3)

plot(clock,MRT_1F,type="o", pch=4, ylab="Response Time (sec)", xlab="Time between requests (seconds)", ylim=c(0,518), xlim=c(0,3))

lines(clock, MRT_3F, type="o", pch=11, col="yellow")
lines(clock, MRT_5F, type="o", pch=1, col="red")
lines(clock, MRT_10F, type="o", pch=2, col="blue")
lines(clock, MRT_15F, type="o", pch=5, col="purple")
lines(clock, MRT_sem_F, type="o", pch=4, col="green")

legend("topright", pch = c(4,11,1,2,5,4), col= c("black", "yellow", "red", "blue","purple", "green"), legend= c("1 Fog", "3 Fog","5 Fog","10 Fog","15 Fog","w/o Fog"))

layout(matrix(c(1,2,
                1,2,
                3,4,
                3,4,
                5,6,
                5,6), nrow=3, ncol=2, byrow = T))
## Warning in matrix(c(1, 2, 1, 2, 3, 4, 3, 4, 5, 6, 5, 6), nrow = 3, ncol = 2, :
## data length differs from size of matrix: [12 != 3 x 2]
#layout.show(n=5)

barplot(rbind (MRT_sem_F,MRT_1F), log="y" ,col=c("#E6E6E6", "#666666"), beside=T, xlab="Time between things requests", ylab="Response time (s)", names.arg=clock)
legend("topright", pch=c(15,15), col= c("#E6E6E6", "#666666"), legend=c("w/o Fog","1 Fog"))

barplot(rbind( MRT_sem_F,MRT_3F), beside=T, xlab="Time between things requests", ylab="Response time (s)", names.arg=clock)
legend("topright", pch=c(15,15), col= c("#E6E6E6", "#666666"), legend=c("w/o Fog","3 Fog"))

barplot(rbind( MRT_sem_F, MRT_5F), beside=T, xlab="Time between things requests", ylab="Response time (s)", names.arg=clock)
legend("topright", pch=c(15,15), col= c("#E6E6E6", "#666666"), legend=c("w/o Fog","5 Fog"))

barplot(rbind( MRT_sem_F, MRT_10F), beside=T, xlab="Time between things requests", ylab="Response time (s)", names.arg=clock)
legend("topright", pch=c(15,15), col= c("#E6E6E6", "#666666"), legend=c("w/o Fog","10 Fog"))

barplot(rbind( MRT_sem_F, MRT_15F), beside=T, xlab="Time between things requests", ylab="Response time (s)", names.arg=clock)
legend("topright", pch=c(15,15), col= c("#E6E6E6", "#666666"), legend=c("w/o Fog","15 Fog"))

Questão 2

Meal_Price <- cbind(S10a19= c(53.8,43.6,2.6),S20a29= c(33.9,54.2,11.9), S30a39= c(2.6,60.5,36.8), S40a49= c(0,21.4,78.6))
rownames(Meal_Price) <- c("Good","Very Good","Excellent")
colnames(Meal_Price) <- c("$10-19","$20-29","$30-39","$40-49")

barplot(Meal_Price, ylab="Porcentagem (%)", xlab="Preço", names.arg = colnames(Meal_Price),legend.text = rownames(Meal_Price))

Questão 3

data(airquality)

airquality$Temp_Celsius <- (airquality$Temp - 32) / 1.8
hist_plot <- hist(airquality$Temp_Celsius, breaks = 10, col = "green", main = "Temperaturas no mês de Maio (°C)", xlab = "Temperatura (°C)", ylab = "Frequência", probability = TRUE)

lines(density(airquality$Temp_Celsius), col = "red", lwd = 2)
legend("topright", legend = c("Curva de Densidade"), col = c("red"), lwd = 2)

Questão 4

library(dplyr)

sales <- read.table("https://training-course-material.com/images/8/8f/Sales.txt", header = TRUE)

sales_percent <- sales %>%
  mutate(Percentage = (sales$SALES / sum(sales$SALES)) * 100)

pie(sales_percent$Percentage, labels = paste(sales_percent$COUNTRY, "\n", round(sales_percent$Percentage, 1), "%"), col = rainbow(length(sales_percent$COUNTRY)))

title("% de Vendas por País")

legend("topright", legend = sales_percent$COUNTRY, fill = rainbow(length(sales_percent$COUNTRY)), title = "País") 

Questão 5

data(InsectSprays)
boxplot(count ~ spray, data = InsectSprays, col = "yellow", outline = FALSE,
        main = "Contagens de Insetos por Inseticida",
        xlab = "Tipo de Inseticida", ylab = "Contagem de Insetos")

Questão 6

library(patchwork)
library(ggplot2)

prim <- read.csv("./monitoringCloudData_0.1.csv")
seg <- read.csv("./monitoringCloudData_0.5.csv")
terc <- read.csv("./monitoringCloudData_1.csv")
quart <- read.csv("./monitoringCloudData_NONE.csv")

convert <- function(df) {
  gb_indices <- grepl("GB", df$usedMemory)
  df$usedMemory[gb_indices] <- as.numeric(sub("GB", "", df$usedMemory[gb_indices])) * 1024
  df$usedMemory <- as.numeric(sub("MB", "", df$usedMemory))
  df$currentTime <- as.POSIXct(df$currentTime, format = "%Y-%m-%d %H:%M:%OS")
  df$hours_since_start <- as.numeric(difftime(df$currentTime, min(df$currentTime), units = "hours"))
  return(df)
}

prim <- convert(prim)
terc <- convert(terc)
quart <- convert(quart)
graph_prim <- ggplot(prim, aes(x = hours_since_start, y = usedMemory, group = 1)) +
  
  geom_line() +
  labs(title = expression(bold("Memory Analysis (None Workload)")), x = "Time (hour)", y = "Used Memory (MB)") +
  annotate("rect", xmin = min(prim$hours_since_start), xmax = max(prim$hours_since_start),
           ymin = min(prim$usedMemory), ymax = max(prim$usedMemory),
           color = "black", fill = NA, linetype = "solid") +
  
  theme(panel.background = element_rect(fill = "white")) +
  scale_x_continuous(breaks = seq(0, 70, 10)) +
  scale_y_continuous(breaks = c(500, 1500, 2500, 3500))

  gb_indices <- grepl("GB", seg$usedMemory)
  seg$usedMemory[gb_indices] <- as.numeric(sub("GB", "", seg$usedMemory[gb_indices])) * 1024
  
  seg$usedMemory <- as.numeric(sub("MB", "", seg$usedMemory))
  seg$currentTime <- as.POSIXct(seg$currentTime, format = "%Y-%m-%d %H:%M:%OS")
  seg$hours_since_start <- as.numeric(difftime(seg$currentTime, seg$currentTime[1], units = "hours"))
seg <- seg[complete.cases(seg$hours_since_start, seg$usedMemory), ]
graph_seg <- ggplot(seg, aes(x = hours_since_start, y = usedMemory, group = 1)) +
  geom_line() +
  labs(title = expression(bold("Memory Analysis (Workload 0.1)")),
       x = "Time (hour)",
       y = "Used Memory (MB)") +
  geom_rect(aes(xmin = min(hours_since_start), xmax = max(hours_since_start),
                ymin = min(usedMemory), ymax = max(usedMemory)),
            color = "black", fill = NA, linetype = "solid", alpha = 0) +
  theme(panel.background = element_rect(fill = "white")) +
  scale_x_continuous(breaks = seq(0, 70, 10)) +
  scale_y_continuous(breaks = c(400, 800, 1200))
graph_terc <- ggplot(terc, aes(x = hours_since_start, y = usedMemory, group = 1)) +
  geom_line() +
  labs(title = expression(bold("Memory Analysis (Workload 0.5)")),
       x = "Time (hour)",
       y = "Used Memory (MB)") +
  annotate("rect", xmin = min(terc$hours_since_start), xmax = max(terc$hours_since_start),
           ymin = min(terc$usedMemory), ymax = max(terc$usedMemory),
           color = "black", fill = NA, linetype = "solid") +
  theme(panel.background = element_rect(fill = "white")) +
  scale_x_continuous(breaks = seq(0, 70, 10)) +
  scale_y_continuous(breaks = c(242, 246, 250, 254))
graph_quart <- ggplot(quart, aes(x = hours_since_start, y = usedMemory, group = 1)) +
  geom_line() +
  labs(title = expression(bold("Memory Analysis (Workload 1.0)")),
       x = "Time (hour)",
       y = "Used Memory (MB)") +
  annotate("rect", xmin = min(quart$hours_since_start), xmax = max(quart$hours_since_start),
           ymin = min(quart$usedMemory), ymax = max(quart$usedMemory),
           color = "black", fill = NA, linetype = "solid") +
  theme(panel.background = element_rect(fill = "white")) +
  scale_x_continuous(breaks = seq(0, 70, 10)) +
  scale_y_continuous(breaks = c(96, 98, 102, 106))
layout(matrix(c(1, 2, 3, 4), nrow = 2, ncol = 2))
par(mar = c(4, 4, 2, 1))
all_plot <- graph_quart + graph_prim + graph_seg + graph_terc 
all_plot

Questão 7

library(dplyr)
library(plotly)

netflix_titles <- read.csv(file = "netflix_titles.csv",
                            header = TRUE,
                            strip.white = TRUE,
                            na.strings = "")

netflix_titles <- netflix_titles %>%
  filter(!is.na(country) & country != "" & !grepl(",", country)) %>%
  filter(!is.na(country) & country != "" & !grepl(",", country))

top_countries <- netflix_titles %>%
  group_by(country) %>%
  summarise(count = n()) %>%
  arrange(desc(count)) %>%
  head(10)

plot_ly(labels = top_countries$country, values = top_countries$count, type = "pie",
        textinfo = "label+percent", insidetextfont = list(color = "#FFFFFF"),
        hoverinfo = "label+percent", hole = 0.6) %>%
  layout(title = "Top 10 Países com Mais Conteúdo na Netflix",
         xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
         yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
         width = 900,  
         height = 800) 

Questão 8

library(dplyr)
library(plotly)

top_countries <- netflix_titles %>%
  group_by(country) %>%
  summarise(count = n()) %>%
  arrange(desc(count)) %>%
  head(10)

tabela <- plot_ly(
  type = "table",
  header = list(values = c("País", "Total de Conteúdos"),
                fill = list(color = "#666666"),
                align = c("center"),
                font = list(color = "white", size = 15)),
  cells = list(values = list(top_countries$country, top_countries$count),
               align = c("center"),
               font = list(color = c("black", "black"), size = 12))
)

tabela

Questão 9

library(dplyr)
library(plotly)
library(stringr)

netflix <- read.csv("netflix_titles.csv")
netflix <- netflix %>%
  mutate(decade = 10 * (release_year %/% 10))

filmesPorDecada <- netflix %>%
  filter(type == "Movie") %>%
  group_by(decade) %>%
  summarise(qtd_conteúdo = n())

seriesPorDecada <- netflix %>%
  filter(type == "TV Show") %>%
  group_by(decade) %>%
  summarise(num_series = n())

seriesFilmes <- left_join(filmesPorDecada, seriesPorDecada, by = "decade")
seriesFilmes$num_series[2] <- 1

fig <- plot_ly(seriesFilmes, x = ~decade) %>%
  add_trace(y = ~num_series, name = 'TV Series', mode = 'lines+markers') %>%
  add_trace(y = ~qtd_conteúdo, name = 'Movies', mode = 'lines+markers')

fig

Questão 10

library(stringr)

netflix <- read.csv("netflix_titles.csv")

df_filtrado <- netflix %>%
  filter(between(release_year, 2000, 2010) & type == "Movie") %>%
  select(release_year, listed_in)

contagem_categorias_por_ano <- df_filtrado %>%
  mutate(primeira_categoria = ifelse(str_detect(listed_in, ","), word(listed_in, 1, sep = ", "), listed_in)) %>%
  group_by(release_year, primeira_categoria) %>%
  summarise(num_filmes = n())

cats <- c("Action & Adventure", "Comedies", "Dramas")
df_final <- contagem_categorias_por_ano %>%
  filter(primeira_categoria %in% cats)



df_grafico <- data.frame(release_year = 2000:2010)
df_grafico <- df_grafico %>%
  left_join(
    df_final %>%
      filter(str_detect(primeira_categoria, "Comedies")) %>%
      select(release_year, num_filmes) %>%
      rename(Comedies = num_filmes),
    by = "release_year"
  ) %>%
  left_join(
    df_final %>%
      filter(str_detect(primeira_categoria, "Dramas")) %>%
      select(release_year, num_filmes) %>%
      rename(Dramas = num_filmes),
    by = "release_year"
  ) %>%
  left_join(
    df_final %>%
      filter(str_detect(primeira_categoria, "Action & Adventure")) %>%
      select(release_year, num_filmes) %>%
      rename(`ActionAdventure` = num_filmes),
    by = "release_year"
  )


fig <- plot_ly( df_grafico, x = ~release_year, y = ~Comedies, type = 'bar', name = 'Comédia') %>%
  add_trace(y = ~Dramas, name = 'Drama') %>%
  add_trace(y = ~ActionAdventure, name = 'Ação e Aventura') %>%
  layout(
    yaxis = list(title = 'Qnt. de Lançamentos'),
    xaxis = list(title = 'Ano de Lançamento'
  )
)

fig