Exercício 12 [Visualização de Dados]

Questões

Q1

Dados Iniciais

# Definindo os dados
MRT_1F <- c(517.1468515630205, 85.13094142168089, 30.333207896694553, 
            12.694776264558937, 3.3041601673945418, 1.1823111717498882, 1.1892293502386786)
MRT_3F <- c(156.68929936163462, 11.540837783562276, 0.4512835621696538, 
            0.4509797929766453, 0.4502068233039181, 0.4496185276300172, 0.4543157082191288)
MRT_5F <- c(83.90319666471157, 0.3068151086494968, 0.30522314133037304, 
            0.3072588968084928, 0.30655265997285697, 0.3055812715727718, 0.3053297166713006)
MRT_10F <- c(29.55430642951759, 0.19832832665772515, 0.1971923924717474, 
             0.19796648905716516, 0.19615594370806338, 0.2034569237883263, 0.19617420889447737)
MRT_15F <- c(11.317736530583566, 0.167364215666193, 0.16172168266811013, 
             0.16701085329580515, 0.1598052657153692, 0.1645934043532696, 0.16216563797118075)
MRT_sem_F <- c(11.93430909937736, 0.6095414637034009, 0.6060645101029295, 
               0.612167181646899, 0.6146761002685637, 0.6096747087200697, 0.6125810476877268)
clock <- c(0.1, 0.5, 1, 1.5, 2, 2.5, 3)

Primeiro Gráfico - Todas as Configurações

# Plotando o primeiro gráfico com todas as configurações
plot(clock, MRT_1F, type="l", col="red", lwd=2,
     xlab="Clock", ylab="MRT",
     ylim=range(c(MRT_1F, MRT_3F, MRT_5F, MRT_10F, MRT_15F, MRT_sem_F)))
lines(clock, MRT_3F, col="blue", lwd=2)
lines(clock, MRT_5F, col="green", lwd=2)
lines(clock, MRT_10F, col="purple", lwd=2)
lines(clock, MRT_15F, col="orange", lwd=2)
lines(clock, MRT_sem_F, col="brown", lwd=2)
legend("topright", 
       legend=c("1F", "3F", "5F", "10F", "15F", "sem F"),
       col=c("red", "blue", "green", "purple", "orange", "brown"),
       lwd=2)

Gráficos Comparativos

# Configurando o layout
layout_matrix <- matrix(c(1,1,2,2,
                          3,3,4,4,
                          5,5,6,6), nrow=3, byrow=TRUE)
layout(layout_matrix)

# Função para criar gráfico de barras comparativo para cada tempo
create_time_comparison <- function(data1, data_sem_F, title) {
    # Configurando margens
    par(mar=c(4.5, 4.5, 3, 2))
    
    # Criando matriz para barplot
    heights <- rbind(data1, data_sem_F)
    
    # Criando o barplot
    barplot(heights, 
            beside=TRUE,
            col=c("#666666", "#E6E6E6"),
            names.arg=clock,
            xlab="Time between Things requests",
            ylab="Response time (s)",
            log="y",
            cex.axis=0.8,
            cex.names=0.8,
            cex.lab=0.9,
            main=title)
    
    # Adicionando legenda
    legend("topright", 
           legend=c(paste0(strsplit(title, "_")[[1]][1], "_Flag"), "sem_Flag"),
           fill=c("#666666", "#E6E6E6"),
           bty="n",
           cex=0.8)
}

# Criando os 5 gráficos comparativos
create_time_comparison(MRT_1F, MRT_sem_F, "1_Flag")
create_time_comparison(MRT_3F, MRT_sem_F, "3_Flag")
create_time_comparison(MRT_5F, MRT_sem_F, "5_Flag")
create_time_comparison(MRT_10F, MRT_sem_F, "10_Flag")
create_time_comparison(MRT_15F, MRT_sem_F, "15_Flag")

Q2

Image para ser convertida

Qualidade de Refeição por Categoria de Preço

Q3

# Carregar o dataset airquality
data(airquality)

# Filtrar as temperaturas do mês de maio
temperaturas_fahrenheit <- airquality$Temp[airquality$Month == 5]

# Converter para Celsius
temperaturas_celsius <- (temperaturas_fahrenheit - 32) / 1.8
par(mar = c(5, 5, 4, 2) + 0.1) 

hist(temperaturas_celsius, 
     col = "skyblue",  
     main = "Histograma das Temperaturas de Maio (Celsius)",  
     xlab = "Temperatura (°C)",  
     ylab = "Frequência",  
     probability = TRUE,
     labels = TRUE)  

# Labels
lines(density(temperaturas_celsius), col = "red", lwd = 2)
legend("topright", legend = "Densidade", col = "red", lwd = 2)

Q4

sales_url <- "https://training-course-material.com/images/8/8f/Sales.txt"
sales <- read.table(sales_url, header = TRUE)

sales$percent <- sales$SALES / sum(sales$SALES) * 100

sales <- sales[order(sales$percent, decreasing = TRUE), ]

pie(sales$percent,
    labels = paste(sales$COUNTRY, "\n", round(sales$percent, 1), "%"),
    main = "Porcentagem de Vendas por País",
    col = rainbow(nrow(sales)))

legend("topright", legend = paste(sales$COUNTRY, " - ", round(sales$percent, 1), "%"),
       fill = rainbow(nrow(sales)), cex = 0.8)

Q5

# Carregar o dataset InsectSprays
data(InsectSprays)

# Boxplot sem outliers
boxplot(count ~ spray, data = InsectSprays, outline = FALSE, col = "yellow",
        main = "Contagens de Insetos por Tipo de Inseticida",
        xlab = "Tipo de Inseticida", ylab = "Contagem de Insetos",
        labels = TRUE)

Q6

# Lista de arquivos
files <- c("monitoringCloudData/monitoringCloudData_0.1.csv", 
           "monitoringCloudData/monitoringCloudData_0.5.csv", 
           "monitoringCloudData/monitoringCloudData_1.csv", 
           "monitoringCloudData/monitoringCloudData_NONE.csv")

# Função para converter 'usedMemory' para MB sem usar pacotes externos
convert_to_MB <- function(memory) {
  num <- as.numeric(gsub("[^0-9.]", "", memory))  # Extrai o número

  if (grepl("GB", memory)) {
    return(num * 1024)
  } else if (grepl("TB", memory)) {
    return(num * 1000000)
  } else {
    return(num)  # Já está em MB
  }
}

# Criar listas para armazenar dados
time_data <- list()
memory_data <- list()

# Ler e processar cada arquivo
for (file in files) {
  df <- read.csv(file, stringsAsFactors = FALSE)
  
  # Converter currentTime para POSIXct
  df$currentTime <- as.POSIXct(df$currentTime, format="%Y-%m-%d %H:%M:%OS")
  
  # Converter usedMemory para MB
  df$usedMemory <- sapply(df$usedMemory, convert_to_MB)
  
  # Ajustar o tempo contínuo em horas
  elapsed_time <- as.numeric(difftime(df$currentTime, df$currentTime[1], units = "hours"))
  
  # Armazenar os dados processados
  time_data[[file]] <- elapsed_time
  memory_data[[file]] <- df$usedMemory
}

# Criar layout 2x2 para os gráficos usando layout()
layout(matrix(1:4, nrow=2, byrow=TRUE))

# Criar gráficos com a função plot() do R base
plot(time_data[[4]], memory_data[[4]], type="l", col="purple", xlab="Tempo (Horas)", ylab="Memória Usada (MB)", main="Workload - NONE")
plot(time_data[[1]], memory_data[[1]], type="l", col="blue", xlab="Tempo (Horas)", ylab="Memória Usada (MB)", main="Workload - 0.1")
plot(time_data[[2]], memory_data[[2]], type="l", col="red", xlab="Tempo (Horas)", ylab="Memória Usada (MB)", main="Workload - 0.5")
plot(time_data[[3]], memory_data[[3]], type="l", col="green", xlab="Tempo (Horas)", ylab="Memória Usada (MB)", main="Workload - 1")

# Resetar layout
layout(1)

Q7

# Carregar Plotly
library(plotly)

# Carregar o dataset
netflix_data <- read.csv("netflix_titles.csv", stringsAsFactors = FALSE)

# Remover valores vazios e países com mais de um listado
netflix_data <- netflix_data[!is.na(netflix_data$country) & !grepl(",", netflix_data$country) & netflix_data$country != "", ]

# Contar quantos títulos cada país tem
country_counts <- table(netflix_data$country)

# Ordenar e selecionar os 10 países com mais títulos
top_10 <- sort(country_counts, decreasing = TRUE)[1:10]

# Criar o gráfico de pizza com Plotly
fig <- plot_ly(labels = names(top_10), values = as.numeric(top_10), type = "pie",
               textinfo = "label+percent", hoverinfo = "text",
               text = paste(names(top_10), "-", top_10, "títulos"))

# Personalizar layout
fig <- fig %>% layout(title = "Top 10 Países com Mais Conteúdo Único na Netflix")

# Exibir gráfico
fig

Q8

# Carregar Plotly
library(plotly)

# Carregar o dataset
netflix_data <- read.csv("netflix_titles.csv", stringsAsFactors = FALSE)

# Remover valores vazios e países com mais de um listado
netflix_data <- netflix_data[!is.na(netflix_data$country) & !grepl(",", netflix_data$country) & netflix_data$country != "", ]

# Contar quantos títulos cada país tem
country_counts <- table(netflix_data$country)

# Ordenar e selecionar os 10 países com mais títulos
top_10 <- sort(country_counts, decreasing = TRUE)[1:10]

# Criar a tabela de conteúdo com Plotly
fig <- plot_ly(
  type = 'table',
  header = list(
    values = c("País", "Total de conteúdos"),
    align = c('center', 'center'),
    font = list(color = 'white', size = 14),
    fill = list(color = 'gray')
  ),
  cells = list(
    values = list(names(top_10), as.numeric(top_10)),
    align = c('center', 'center'),
    font = list(color = 'black', size = 12),
    fill = list(color = c('white', 'lightgray'))
  )
)

# Exibir a tabela
fig

Q9

# Carregar o Plotly
library(plotly)

# Carregar o dataset
netflix_data <- read.csv("netflix_titles.csv", stringsAsFactors = FALSE)

# Remover valores vazios na coluna 'release_year' e 'type'
netflix_data <- netflix_data[!is.na(netflix_data$release_year) & !is.na(netflix_data$type), ]

# Adicionar a coluna 'decade' com o valor da década
netflix_data$decade <- floor(netflix_data$release_year / 10) * 10

# Filtrar apenas filmes e séries
movies_data <- netflix_data[netflix_data$type == "Movie", ]
tv_shows_data <- netflix_data[netflix_data$type == "TV Show", ]

# Contar o número de filmes e séries por década
movie_counts <- table(movies_data$decade)
tv_show_counts <- table(tv_shows_data$decade)

# Converter para data frame para o Plotly
df_movies <- data.frame(decade = as.numeric(names(movie_counts)), count = as.numeric(movie_counts), type = "Movies")
df_tv_shows <- data.frame(decade = as.numeric(names(tv_show_counts)), count = as.numeric(tv_show_counts), type = "TV Shows")

# Combinar os dados de filmes e séries
df_combined <- rbind(df_movies, df_tv_shows)

# Criar o gráfico de linha com Plotly
fig <- plot_ly() %>%
  add_trace(data = df_movies, x = ~decade, y = ~count, type = 'scatter', mode = 'lines+markers',
            name = 'Movies', line = list(color = 'orange'), marker = list(color = 'orange')) %>%
  add_trace(data = df_tv_shows, x = ~decade, y = ~count, type = 'scatter', mode = 'lines+markers',
            name = 'TV Shows', line = list(color = 'blue'), marker = list(color = 'blue')) %>%
  layout(title = "Conteúdo por Década na Netflix",
         xaxis = list(title = "Década", tickmode = "array"),
         yaxis = list(title = "Quantidade de Conteúdo"),
         legend = list(title = list(text = 'Tipo de Conteúdo')))

# Exibir o gráfico
fig

Q10

# Carregar o Plotly
library(plotly)

# Carregar o dataset
netflix_data <- read.csv("netflix_titles.csv", stringsAsFactors = FALSE)

# Filtrar apenas filmes e remover valores ausentes em 'release_year' e 'listed_in'
netflix_data <- netflix_data[!is.na(netflix_data$release_year) & !is.na(netflix_data$listed_in) & netflix_data$type == "Movie", ]

# Considerar apenas filmes lançados entre 2000 e 2010
netflix_data <- netflix_data[netflix_data$release_year >= 2000 & netflix_data$release_year <= 2010, ]

# Extrair o primeiro gênero listado na coluna 'listed_in'
netflix_data$primary_genre <- sapply(strsplit(netflix_data$listed_in, ", "), `[`, 1)

# Filtrar os gêneros de interesse: "Dramas", "Action & Adventure" e "Comedies"
genres_of_interest <- c("Dramas", "Action & Adventure", "Comedies")
filtered_data <- netflix_data[netflix_data$primary_genre %in% genres_of_interest, ]

# Contar a quantidade de filmes por gênero e ano
genre_counts <- table(filtered_data$release_year, filtered_data$primary_genre)

# Converter para data frame
df_genre_counts <- as.data.frame(genre_counts)
colnames(df_genre_counts) <- c("Ano", "Gênero", "Quantidade")

# Mapear os gêneros para as traduções
genre_translation <- c("Dramas" = "Drama", "Action & Adventure" = "Ação e Aventura", "Comedies" = "Comédia")
df_genre_counts$Gênero <- genre_translation[df_genre_counts$Gênero]

# Criar o gráfico de barras lado-a-lado com Plotly
fig <- plot_ly(df_genre_counts, x = ~Ano, y = ~Quantidade, type = 'bar', color = ~Gênero, barmode = 'group',
               colors = c("blue", "orange", "green")) %>%
  layout(title = "Quantidade de Filmes por Gênero (2000-2010)",
         xaxis = list(title = "Ano"),
         yaxis = list(title = "Quantidade de Filmes"),
         legend = list(title = list(text = 'Gênero'),
                       traceorder = 'normal'))  # Garantir a ordem normal na legenda

# Exibir o gráfico
fig

Exercício 12 [Visualização de Dados]

Marina Mota

2025-01-22

Questões

Q1

Dados Iniciais

Primeiro Gráfico - Todas as Configurações

Gráficos Comparativos

Q2

Q3

Q4

Q5

Q6

Q7

Q8

Q9

Q10