Visualização de Dados

Questão 1

MRT_1F <-c(517.1468515630205, 85.13094142168089, 30.333207896694553, 12.694776264558937, 3.3041601673945418, 1.1823111717498882, 1.1892293502386786)

MRT_3F <-c(156.68929936163462, 11.540837783562276, 0.4512835621696538, 0.4509797929766453, 0.4502068233039181, 0.4496185276300172, 0.4543157082191288)

MRT_5F <-c(83.90319666471157, 0.3068151086494968, 0.30522314133037304, 0.3072588968084928, 0.30655265997285697, 0.3055812715727718, 0.3053297166713006)

MRT_10F <-c(29.55430642951759, 0.19832832665772515, 0.1971923924717474, 0.19796648905716516, 0.19615594370806338, 0.2034569237883263, 0.19617420889447737)

MRT_15F <-c(11.317736530583566, 0.167364215666193, 0.16172168266811013, 0.16701085329580515, 0.1598052657153692, 0.1645934043532696, 0.16216563797118075)

MRT_sem_F <-c(11.93430909937736, 0.6095414637034009, 0.6060645101029295, 0.612167181646899, 0.6146761002685637, 0.6096747087200697, 0.6125810476877268)

clock <- c(0.1, 0.5, 1, 1.5, 2, 2.5, 3)

layout(matrix(c(1, 2), nrow = 2, byrow = TRUE))

# Gráfico 1: Linhas
plot(clock, MRT_1F, type = "o", col = "red", pch = 16, ylim = c(0.1, max(MRT_1F)), log = "y", ylab = "Valores", xlab = "Clock")
lines(clock, MRT_3F, type = "o", col = "blue", pch = 16)
lines(clock, MRT_5F, type = "o", col = "green", pch = 16)
lines(clock, MRT_10F, type = "o", col = "purple", pch = 16)
lines(clock, MRT_15F, type = "o", col = "orange", pch = 16)
lines(clock, MRT_sem_F, type = "o", col = "black", pch = 16)
legend("topright", legend = c("1F", "3F", "5F", "10F", "15F", "Sem F"),
       col = c("red", "blue", "green", "purple", "orange", "black"), lty = 1, pch = 16)

# Gráfico 2: Barras com escala logarítmica
data_matrix <- rbind(MRT_1F, MRT_3F, MRT_5F, MRT_10F, MRT_15F, MRT_sem_F)
barplot(data_matrix, beside = TRUE, col = c("#E6E6E6", "#666666"), log = "y",
        names.arg = clock, legend.text = c("1F", "3F", "5F", "10F", "15F", "Sem F"),
        args.legend = list(x = "topright"), ylab = "Valores", xlab = "Clock")

Questão 2

precos <- c("$10-19", "$20-29", "$30-39", "$40-49")
qualidade <- c("Good", "Very Good", "Excellent")

dados <- matrix(c(
  53.8, 33.9, 2.6, 0.0,   # Good
  43.6, 54.2, 60.5, 21.4,  # Very Good
  2.6, 11.9, 36.8, 78.6    # Excellent
), nrow = 3, byrow = TRUE)

cores <- c("#E6E6E6", "#666666", "#2E86C1")

barplot(dados, beside = FALSE, col = cores, names.arg = precos,
        main = "Qualidade da Refeição por Faixa de Preço",
        xlab = "Faixa de Preço da Refeição", ylab = "Porcentagem (%)",
        ylim = c(0, 100))

legend("topright", legend = qualidade, fill = cores, title = "Qualidade")

Questão 3

data(airquality)

temp_maio <- airquality$Temp[airquality$Month == 5]

temp_maio_celsius <- (temp_maio - 32) / 1.8

hist(temp_maio_celsius, 
     main = "Histograma das Temperaturas de Maio (°C)", 
     xlab = "Temperatura (°C)", 
     ylab = "Frequência", 
     col = "lightblue", 
     border = "black", 
     probability = TRUE)

lines(density(temp_maio_celsius), col = "red", lwd = 2)

Questão 4

sales <- read.table("https://training-course-material.com/images/8/8f/Sales.txt", header = TRUE)

total_sales <- sum(sales$SALES)
sales$Percentage <- (sales$SALES / total_sales) * 100

colors <- rainbow(nrow(sales))

pie(sales$SALES,
    labels = paste0(round(sales$Percentage, 1), "%"),
    col = colors,
    main = "Porcentagem de Vendas por País")

legend("topright", legend = sales$COUNTRY, fill = colors, title = "Países")

Questão 5

data("InsectSprays")

boxplot(count ~ spray, data = InsectSprays, 
        main = "Contagem de Insetos por Tipo de Inseticida", 
        xlab = "Tipo de Inseticida", 
        ylab = "Contagem de Insetos", 
        col = "yellow", 
        outline = FALSE)

Questão 6

df_0.1 <- read.csv("monitoringCloudData_0.1.csv")
df_0.5 <- read.csv("monitoringCloudData_0.5.csv")
df_1 <- read.csv("monitoringCloudData_1.csv")
df_NONE <- read.csv("monitoringCloudData_NONE.csv")

convert_to_mb <- function(memory) {
  if (grepl("TB", memory)) {
    return(as.numeric(gsub("TB", "", memory)) * 1000000)
  } else if (grepl("GB", memory)) {
    return(as.numeric(gsub("GB", "", memory)) * 1024)
  } else if (grepl("MB", memory)) {
    return(as.numeric(gsub("MB", "", memory)))
  } else {
    return(NA)
  }
}

adjust_data <- function(df) {
  
  df$currentTime <- as.POSIXct(df$currentTime, format="%Y-%m-%d %H:%M:%S")
  
  
  df$hours <- as.numeric(difftime(df$currentTime, df$currentTime[1], units = "hours"))
  
  
  df$usedMemoryMB <- sapply(df$usedMemory, convert_to_mb)
  
  return(df)
}


df_0.1 <- adjust_data(df_0.1)
df_0.5 <- adjust_data(df_0.5)
df_1 <- adjust_data(df_1)
df_NONE <- adjust_data(df_NONE)


plot_memory_usage <- function(df, title) {
  plot(df$hours, df$usedMemoryMB, type = "l", xlab = "Tempo (horas)", ylab = "Memória Usada (MB)", main = title)
}


par(mfrow=c(2, 2))


plot_memory_usage(df_0.1, "Memória Usada (0.1)")
plot_memory_usage(df_0.5, "Memória Usada (0.5)")
plot_memory_usage(df_1, "Memória Usada (1)")
plot_memory_usage(df_NONE, "Memória Usada (NONE)")

Questão 7

library(dplyr)
library(plotly)

netflix_data <- read.csv("netflix_titles.csv")

netflix_data <- netflix_data %>%
  filter(!is.na(country) & !grepl(",", country))

country_counts <- netflix_data %>%
  group_by(country) %>%
  summarise(count = n()) %>%
  arrange(desc(count))

top_10_countries <- head(country_counts, 10)

plot_ly(top_10_countries, labels = ~country, values = ~count, type = 'pie') %>%
  layout(title = "Top 10 Países com Mais Conteúdos na Netflix (2019)",
         showlegend = TRUE)

Questão 8

library(dplyr)
library(plotly)

netflix_data <- read.csv("netflix_titles.csv")

netflix_data <- netflix_data %>%
  filter(!is.na(country) & !grepl(",", country))  

country_counts <- netflix_data %>%
  group_by(country) %>%
  summarise(count = n(), .groups = 'drop') %>%
  arrange(desc(count))

top_10_countries <- head(country_counts, 10)

colnames(top_10_countries) <- c("País", "Total de Conteúdos")

tabela <- plot_ly(
  type = "table",
  header = list(
    values = c("<b>País</b>", "<b>Total de Conteúdos</b>"),
    align = c("center", "center"),
    fill = list(color = "gray"),
    font = list(color = "white", size = 14)
  ),
  cells = list(
    values = rbind(top_10_countries$País, top_10_countries$`Total de Conteúdos`),
    align = c("center", "center"),
    font = list(size = 12)
  )
)

tabela

Questão 9

library(dplyr)
library(plotly)

netflix_data <- read.csv("netflix_titles.csv")

netflix_data <- netflix_data %>%
  mutate(decade = floor(release_year / 10) * 10)  

netflix_data <- netflix_data %>%
  filter(!is.na(release_year))

content_by_decade <- netflix_data %>%
  group_by(decade, type) %>%
  summarise(count = n(), .groups = 'drop')

series_data <- content_by_decade %>% filter(type == "TV Show")
movies_data <- content_by_decade %>% filter(type == "Movie")

grafico <- plot_ly() %>%
  add_trace(
    x = ~series_data$decade, 
    y = ~series_data$count, 
    type = 'scatter', 
    mode = 'lines+markers', 
    name = 'Séries', 
    line = list(color = 'blue'), 
    marker = list(color = 'blue')
  ) %>%
  add_trace(
    x = ~movies_data$decade, 
    y = ~movies_data$count, 
    type = 'scatter', 
    mode = 'lines+markers', 
    name = 'Filmes', 
    line = list(color = 'yellow'), 
    marker = list(color = 'yellow')
  ) %>%
  layout(
    title = "Quantidade de Conteúdo por Década na Netflix",
    xaxis = list(title = "Década"),
    yaxis = list(title = "Quantidade de Conteúdo"),
    hovermode = "x unified"
  )

grafico

Questão 10

library(dplyr)
library(plotly)
library(stringr)

netflix_data <- read.csv("netflix_titles.csv")

filmes_2000_2010 <- netflix_data %>%
  filter(type == "Movie" & release_year >= 2000 & release_year <= 2010)

filmes_2000_2010 <- filmes_2000_2010 %>%
  mutate(primeiro_genero = str_split(listed_in, ",", simplify = TRUE)[, 1])

generos_interesse <- c("Dramas", "Action & Adventure", "Comedies")
filmes_2000_2010 <- filmes_2000_2010 %>%
  filter(primeiro_genero %in% generos_interesse)

filmes_por_ano_genero <- filmes_2000_2010 %>%
  group_by(release_year, primeiro_genero) %>%
  summarise(quantidade = n(), .groups = 'drop')

grafico <- plot_ly(
  data = filmes_por_ano_genero,
  x = ~release_year,
  y = ~quantidade,
  color = ~primeiro_genero,
  type = "bar",
  colors = c("Dramas" = "blue", "Action & Adventure" = "red", "Comedies" = "green")
) %>%
  layout(
    title = "Quantidade de Filmes Lançados por Gênero (2000-2010)",
    xaxis = list(title = "Ano"),
    yaxis = list(title = "Quantidade de Filmes"),
    barmode = "group"  # Barras lado a lado
  )

grafico

Visualização de Dados

Raphael Melo

2025-02-06

Questão 1

Questão 2

Questão 3

Questão 4

Questão 5

Questão 6

Questão 7

Questão 8

Questão 9

Questão 10