Exercício 12 [Visualização de Dados]

Questão 01

# Dados
clock <- c(0.1, 0.5, 1, 1.5, 2, 2.5, 3)
MRT_1F <-c(517.1468515630205, 85.13094142168089, 30.333207896694553, 12.694776264558937, 3.3041601673945418, 1.1823111717498882, 1.1892293502386786)
MRT_3F <-c(156.68929936163462, 11.540837783562276, 0.4512835621696538, 0.4509797929766453, 0.4502068233039181, 0.4496185276300172, 0.4543157082191288)
MRT_5F <-c(83.90319666471157, 0.3068151086494968, 0.30522314133037304, 0.3072588968084928, 0.30655265997285697, 0.3055812715727718, 0.3053297166713006)
MRT_10F <-c(29.55430642951759, 0.19832832665772515, 0.1971923924717474, 0.19796648905716516, 0.19615594370806338, 0.2034569237883263, 0.19617420889447737)
MRT_15F <-c(11.317736530583566, 0.167364215666193, 0.16172168266811013, 0.16701085329580515, 0.1598052657153692, 0.1645934043532696, 0.16216563797118075)
MRT_sem_F <-c(11.93430909937736, 0.6095414637034009, 0.6060645101029295, 0.612167181646899, 0.6146761002685637, 0.6096747087200697, 0.6125810476877268)
# Gráfico 01
plot(clock,
     MRT_1F,
     type = "o",
     pch = 4,
     col = "black",
     xlab = "Time between Things request (seconds)",
     ylab = "Response Time (sec.)")
lines(clock, MRT_3F, type="o", pch=11, col="yellow")
lines(clock, MRT_5F, type="o", pch=1, col="red")
lines(clock, MRT_10F, type="o", pch=2, col="blue")
lines(clock, MRT_15F, type="o", pch=5, col="purple")
lines(clock, MRT_sem_F, type="o", pch=4, col="green")
legend("topright",
       pch = c(4,11,1,2,5,4),
       col = c("black","yellow","red","blue","purple","green"),
       legend = c("1 Fog","3 Fogs","5 Fogs","10 Fogs","15 Fogs","w/o Fogs"),
       lty = 1)

# Gráfico 02 - 06
g2 <- rbind(MRT_sem_F, MRT_1F)
g3 <- rbind(MRT_sem_F, MRT_3F)
g4 <- rbind(MRT_sem_F, MRT_5F)
g5 <- rbind(MRT_sem_F, MRT_10F)
g6 <- rbind(MRT_sem_F, MRT_15F)

layout(matrix(c(1,2,
                3,4,
                5,0), 3, 2, byrow = TRUE))

par(mar = c(4, 4, 2, 1))  # Reduzindo as margens (baixo, esquerda, topo, direita)

barplot(g2,
        beside = T,
        xlab = "Time between Things request",
        ylab = "Response Time (s)",
        names.arg = clock,
        log = "y",
        col = c("#E6E6E6", "#666666"))
legend("topright", col=c("#E6E6E6", "#666666"),
       legend=c("w/o Fog","1 Fog"), pch=c(15,15))

barplot(g3,
        beside = T,
        xlab = "Time between Things request",
        ylab = "Response Time (s)",
        names.arg = clock,
        log = "y",
        col = c("#E6E6E6", "#666666"))
legend("topright", col=c("#E6E6E6", "#666666"),
       legend=c("w/o Fog","3 Fogs"), pch=c(15,15))

barplot(g4,
        beside = T,
        xlab = "Time between Things request",
        ylab = "Response Time (s)",
        names.arg = clock,
        log = "y",
        col = c("#E6E6E6", "#666666"))
legend("topright", col=c("#E6E6E6", "#666666"), 
       legend=c("w/o Fog","5 Fogs") ,pch=c(15,15))

barplot(g5,
        beside = T,
        xlab = "Time between Things request",
        ylab = "Response Time (s)",
        names.arg = clock,
        log = "y",
        col = c("#E6E6E6", "#666666"))
legend("topright", col=c("#E6E6E6","#666666"),
       legend=c("w/o Fog","10 Fogs"), pch=c(15,15))

barplot(g6,
        beside = T,
        xlab = "Time between Things request",
        ylab = "Response Time (s)",
        names.arg = clock,
        log = "y",
        col = c("#E6E6E6", "#666666"))
legend("topright", col=c("#E6E6E6", "#666666"), 
       legend=c("w/o Fog","15 Fogs"), pch=c(15,15))

Questão 2

# Dados
quality <- c("Good", "Very Good", "Excellent")
mealPrice <- c("$10-19", "$20-29", "$30-39", "$40-49")
l1 <- c(53.8, 33.9, 2.6, 0)
l2 <- c(43.6, 54.2, 60.5, 21.4)
l3 <- c(2.6, 11.9, 36.8, 78.6)
data <- rbind(l1, l2, l3)

# Gráfico
barplot(data,
        beside = TRUE,
        names.arg = mealPrice,
        col = rev(heat.colors(3)),
        ylab = "Percentuais",
        xlab = "Faixa de Preço",
        ylim = c(0, 100),
        main = "Classificação de Qualidade das Refeições por Faixa de Preço")
legend("top", legend = quality, col = rev(heat.colors(3)), pch = 15, title = "Classificação de Qualidade")

Questão 03

library (dplyr)

# Pegando o mês de maio e convertendo para Celsius
data <- airquality %>% filter(Month == 5)
data$Temp <- (data$Temp - 32) / 1.8

# Plotando Histograma
hist(data$Temp,
     col = "#406da3",
     density = 10,
     probability = T,
     main = "Temperaturas P/Mês - Maio",
     ylab = "Densidade",
     xlab = "Temperatura")
lines(density(data$Temp), col="black")

Questão 04

# Carregando e Pre-Processando dataset
data <- read.table("https://training-course-material.com/images/8/8f/Sales.txt",header=TRUE)
qtd <- nrow(data)
percentage <- round(data$SALES / sum(data$SALES) * 100,2)
labels <- paste(percentage, "%", sep="")

# Plotando gráfico de pizza
pie(x = data$SALES,
    labels,
    main = "Porcentagem de Vendas p/País",
    col = rainbow(qtd))
legend("topleft", col=rainbow(qtd), pch=15, legend=data$COUNTRY, title="País")

Questão 05

# Carregando dados
data <- InsectSprays

# Plotando
boxplot(data$count ~ data$spray,
        xlab = "Tipo de Inseticida",
        ylab = "Quantidade",
        col = "yellow",
        outline = FALSE,
        main = "Quantidade de Insetos p/Tipo de Inseticida")

Questão 06

# Instalando pacote caso ainda não esteja
if (!require("anytime", quietly = TRUE)) {
  install.packages("anytime", repos = "http://cran.us.r-project.org")
}

# Carregando libs
library(anytime)
library(tidyr)
library(dplyr)

# Carregando dados
monCloudDataNone <- read.csv("monitoringCloudData_NONE.csv")
monCloudData01 <- read.csv("monitoringCloudData_0.1.csv")
monCloudData05 <- read.csv("monitoringCloudData_0.5.csv")
monCloudData1 <- read.csv("monitoringCloudData_1.csv")

# Separando o valor de usedMemory da sua unidade de medida
monCloudDataNone <- monCloudDataNone %>%  separate(col="usedMemory", into=c("usedMemory","umUnit"), sep=-2)
monCloudData01 <- monCloudData01 %>%  separate(col="usedMemory", into=c("usedMemory","umUnit"), sep=-2)
monCloudData05 <- monCloudData05 %>%  separate(col="usedMemory", into=c("usedMemory","umUnit"), sep=-2)
monCloudData1 <- monCloudData1 %>%  separate(col="usedMemory", into=c("usedMemory","umUnit"), sep=-2)

# Convertendo usedMemory para o tipo numérico
monCloudDataNone$usedMemory <- as.double(monCloudDataNone$usedMemory) 
monCloudData01$usedMemory <- as.double(monCloudData01$usedMemory) 
monCloudData05$usedMemory <- as.double(monCloudData05$usedMemory) 
monCloudData1$usedMemory <- as.double(monCloudData1$usedMemory) 

# Conversão de GB para MB
monCloudDataNone$usedMemory[monCloudDataNone$umUnit=="GB"] <- monCloudDataNone$usedMemory[monCloudDataNone$umUnit=="GB"]*1024
monCloudData01$usedMemory[monCloudData01$umUnit=="GB"] <- monCloudData01$usedMemory[monCloudData01$umUnit=="GB"]*1024
monCloudData05$usedMemory[monCloudData05$umUnit=="GB"] <- monCloudData05$usedMemory[monCloudData05$umUnit=="GB"]*1024
monCloudData1$usedMemory[monCloudData1$umUnit=="GB"] <- monCloudData1$usedMemory[monCloudData1$umUnit=="GB"]*1024

# Convertendo as datas de currentTime para objetos de data/hora e adiciona a coluna convertedCurrentTime
monCloudDataNone$convertedCurrentTime<-anytime(monCloudDataNone$currentTime)
monCloudData01$convertedCurrentTime<-anytime(monCloudData01$currentTime)
monCloudData05$convertedCurrentTime<-anytime(monCloudData05$currentTime)
monCloudData1$convertedCurrentTime<-anytime(monCloudData1$currentTime)

# Calcula a diferença de tempo entre cada data/hora e a data/hora mínima no conjunto de dados, em horas e adiciona a coluna continuousTime
monCloudDataNone$continuousTime <- difftime(monCloudDataNone$convertedCurrentTime, min(monCloudDataNone$convertedCurrentTime), units = "hours")
monCloudData01$continuousTime <- difftime(monCloudData01$convertedCurrentTime, min(monCloudData01$convertedCurrentTime), units = "hours")
monCloudData05$continuousTime <- difftime(monCloudData05$convertedCurrentTime, min(monCloudData05$convertedCurrentTime), units = "hours")
monCloudData1$continuousTime <- difftime(monCloudData1$convertedCurrentTime, min(monCloudData1$convertedCurrentTime), units = "hours")
 
# Plotando gráficos
layout(matrix(c(1,1,2,2,
                1,1,2,2,
                3,3,4,4,
                3,3,4,4), 4, 4, byrow = TRUE))

par(mar = c(6, 4, 3, 5))  # Reduzindo as margens (baixo, esquerda, topo, direita)

plot(monCloudDataNone$continuousTime,monCloudDataNone$usedMemory ,type="l",main="Memory Analysis (None Workload)",xlab="Time (hour)",ylab="Used Memory (MB)",font.main=2)
plot(monCloudData01$continuousTime,monCloudData01$usedMemory ,type="l",main="Memory Analysis (Workload of 0.1)",xlab="Time (hour)",ylab="Used Memory (MB)",font.main=2)
plot(monCloudData05$continuousTime,monCloudData05$usedMemory ,type="l",main="Memory Analysis (Workload of 0.5)",xlab="Time (hour)",ylab="Used Memory (MB)",font.main=2)
plot(monCloudData1$continuousTime,monCloudData1$usedMemory ,type="l",main="Memory Analysis (Workload of 1.0)",xlab="Time (hour)",ylab="Used Memory (MB)",font.main=2)

Questão 07

# Instalando pacote caso ainda não esteja
if (!require("plotly", quietly = TRUE)) {
  install.packages("plotly", repos = "http://cran.us.r-project.org")
}
# Carregando libs
library(dplyr)
library(plotly)

# Carregando dados
data <- read.csv("netflix_titles.csv", stringsAsFactors = FALSE)

data_filtered <- data %>%
  filter(!is.na(country) & country != "") %>%
  filter(!grepl(",", country))

content_per_country <- data_filtered %>%
  count(country) %>%
  arrange(desc(n)) %>%
  slice(1:10)

plot_ly(content_per_country, labels = ~country, values = ~n, type = 'pie') %>%
  layout(
    title = 'Top 10 países com mais conteúdo na plataforma Netflix',
    xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
    yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
    margin = list(l = 10, r = 10, b = 10, t = 50),  # Ajusta as margens
    autosize = TRUE,  # Ajusta o tamanho automaticamente
    showlegend = TRUE  # Centraliza a legenda
  )

Questão 08

df <- data.frame(content_per_country)
colnames(df) <- c("País", "Número de conteúdos")
country <- as.list(df$País)
total <- as.list(df$`Número de conteúdos`)

plot_ly(
  type = "table",
  header = list(
    values = names(df),
    fill = list(color = "gray"),
    font = list(color = "white"),
    align = "center"
  ),
  cells = list(
    values = list(country, total),
    align = "center"
  )
)

Questão 09

data$decade <- as.numeric(substr(data$release_year, 1, 3)) * 10

data_per_decade <- data %>%
  filter(!is.na(release_year)) %>%
  group_by(decade, type) %>%
  summarise(count = n()) %>%
  ungroup()

plot_ly(data_per_decade, x = ~decade, y = ~count, color = ~type,
        type = 'scatter', mode = 'lines+markers', colors = c('lightblue', 'orange')) %>%
  layout(#title = 'Quantidade de Conteúdo p/Década na Netflix',
    xaxis = list(title = 'Década'),
    yaxis = list(title = 'Qtd. Conteúdo'),
    margin = list(l = 10, r = 10, b = 10, t = 50),  # Ajusta as margens
    autosize = TRUE,  # Ajusta o tamanho automaticamente
    showlegend = TRUE,  # Centraliza a legenda
    legend = list(title = 'Tipo de Conteúdo', 
                  x = 1, y = 0.95))

Questão 10

movies <- data %>%
  filter(type == "Movie", release_year > 2000, release_year < 2010)

movies$firstGen <- sub(",.*", "", movies$listed_in)

movies$firstGen <- factor(movies$firstGen,
                                         levels = c("Dramas", "Action & Adventure", "Comedies"))


movies$firstGen <- recode(movies$firstGen, "Dramas" = "Drama", "Action & Adventure" = "Ação e Aventura", "Comedies" = "Comédia")

groupData <- movies %>%
  group_by(release_year, firstGen) %>%
  summarise(contagem = n()) %>%
  filter(firstGen %in% c("Drama", "Ação e Aventura", "Comédia")) %>%
  ungroup()

plot_ly(groupData, x = ~release_year, y = ~contagem, color = ~firstGen,
        type = 'bar', width = 0.5) %>%
  layout(title = 'Quantidade de Filmes por Gênero entre 2000 e 2010',
         xaxis = list(title = 'Ano de Lançamento'),
         yaxis = list(title = 'Qtd. de Lançamentos'),
         barmode = 'group',
         legend = list(title = 'Gênero'))