Introdução

Resolução da lista 12 da disciplina de Computação para Análise de Dados no semestre 2024.1.

Questões

Questão 1

Dados

MRT_1F <- c(517.1468515630205, 85.13094142168089, 30.333207896694553, 
            12.694776264558937, 3.3041601673945418, 1.1823111717498882, 
            1.1892293502386786)
MRT_3F <- c(156.68929936163462, 11.540837783562276, 0.4512835621696538, 
            0.4509797929766453, 0.4502068233039181, 0.4496185276300172, 
            0.4543157082191288)
MRT_5F <- c(83.90319666471157, 0.3068151086494968, 0.30522314133037304, 
            0.3072588968084928, 0.30655265997285697, 0.3055812715727718, 
            0.3053297166713006)
MRT_10F <- c(29.55430642951759, 0.19832832665772515, 0.1971923924717474, 
             0.19796648905716516, 0.19615594370806338, 0.2034569237883263, 
             0.19617420889447737)
MRT_15F <- c(11.317736530583566, 0.167364215666193, 0.16172168266811013, 
             0.16701085329580515, 0.1598052657153692, 0.1645934043532696, 
             0.16216563797118075)
MRT_sem_F <- c(11.93430909937736, 0.6095414637034009, 0.6060645101029295, 
               0.612167181646899, 0.6146761002685637, 0.6096747087200697, 
               0.6125810476877268)
clock <- c(0.1, 0.5, 1, 1.5, 2, 2.5, 3)

Gráfico de Linhas

layout(matrix(c(1, 2), nrow = 1, ncol = 2))

plot(clock, MRT_1F, type = "o", col = "red", pch = 16, ylim = range(0, max(MRT_1F)),
     xlab = "Clock", ylab = "MRT", main = "Gráfico de Linhas")
lines(clock, MRT_3F, col = "blue", type = "o", pch = 17)
lines(clock, MRT_5F, col = "green", type = "o", pch = 18)
lines(clock, MRT_10F, col = "purple", type = "o", pch = 19)
lines(clock, MRT_15F, col = "orange", type = "o", pch = 15)
lines(clock, MRT_sem_F, col = "black", type = "o", pch = 8)
legend("topright", legend = c("1 Fog", "3 Fogs", "5 Fogs", "10 Fogs", "15 Fogs", "w/o Fog"),
       col = c("red", "blue", "green", "purple", "orange", "black"), pch = c(4, 11, 9, 2, 5, 4), lty = 1)

Gráfico de Barras

barplot(height = rbind(MRT_1F, MRT_3F, MRT_5F, MRT_10F, MRT_15F, MRT_sem_F), beside = TRUE, 
        log = "y", col = c("#E6E6E6", "#666666"), 
        names.arg = rep(clock, times = 6),
        xlab = "Clock", ylab = "MRT (log)", main = "Gráfico de Barras")

Questão 2

Dados

precos <- c("R$10-19", "R$20-29", "R$30-39", "R$40-49")
qualidade_bom <- c(53.8, 33.9, 2.6, 0)
qualidade_muito_bom <- c(43.6, 54.2, 60.5, 21.4)
qualidade_excelente <- c(2.6, 11.9, 36.8, 78.6)

dados <- data.frame(
  Excelente = qualidade_excelente,
  `Muito Bom` = qualidade_muito_bom,
  Bom = qualidade_bom
)

dados_matriz <- t(as.matrix(dados))

Gráfico

barplot(
  dados_matriz, 
  beside = FALSE, 
  col = c("lightblue", "lightgreen", "lightcoral"), 
  names.arg = precos,
  main = "Classificação da Qualidade da Refeição por Preço",
  xlab = "Preços",
  ylab = "Qualidade (%)"
)

legend("topleft", legend = c("Excelente", "Muito Bom", "Bom"), 
       fill = c("lightblue", "lightgreen", "lightcoral"),
       ncol = 1,  
       inset = 0.05)

Questão 3

Dados

data(airquality)

maio_data <- airquality[airquality$Month == 5, ]

maio_data$Temp <- (maio_data$Temp - 32) / 1.8

Gráfico

hist(maio_data$Temp, 
     main = "Histograma das Temperaturas em Maio", 
     xlab = "Temperatura (°C)", 
     ylab = "Densidade", 
     col = "orange", 
     freq = FALSE)


lines(density(maio_data$Temp), col = "red", lwd = 2)

Questão 4

Dados

sales <- read.table("https://training-course-material.com/images/8/8f/Sales.txt", header = TRUE)


total_sales <- aggregate(SALES ~ COUNTRY, data = sales, sum)


percentages <- round(total_sales$SALES / sum(total_sales$SALES) * 100, 1)

Gráfico

colors <- c("red", "blue", "green", "yellow", "orange", "purple")


pie(total_sales$SALES, labels = paste(percentages, "%"), col = colors, main = "Total de Vendas por País")

legend("topright", legend = total_sales$COUNTRY, fill = colors)

Questão 5

Dados

data(InsectSprays)

Gráfico

boxplot(count ~ spray, data = InsectSprays, 
        main = "Contagem de Insetos por Tipo de Inseticida",
        xlab = "Inseticida",
        ylab = "Contagem de Insetos",
        col = "yellow",
        outline = FALSE)

Questão 7

Pacotes

install.packages("plotly")
install.packages("dplyr")
library(plotly)
library(dplyr)

Dados

netflix_data <- read.csv("netflix_titles.csv")

netflix_data_filtered <- netflix_data %>%
  filter(!is.na(country) & !grepl(",", country))

country_counts <- netflix_data_filtered %>%
  group_by(country) %>%
  summarise(count = n()) %>%
  arrange(desc(count))

top_10_countries <- country_counts %>%
  top_n(10, count)

Gráfico

fig <- plot_ly(top_10_countries, labels = ~country, values = ~count, type = 'pie',
               textinfo = 'label+percent',
               insidetextorientation = 'radial')

fig <- fig %>% layout(title = 'Top 10 Países com Mais Conteúdos na Netflix (2019)')


fig

Questão 8

Pacotes

library(plotly)
library(dplyr)

Dados

netflix_data <- read.csv("netflix_titles.csv")

netflix_data_filtered <- netflix_data %>%
  filter(!is.na(country) & !grepl(",", country))

country_counts <- netflix_data_filtered %>%
  group_by(country) %>%
  summarise(count = n()) %>%
  arrange(desc(count))

top_10_countries <- country_counts %>%
  top_n(10, count)

Tabela

fig <- plot_ly(
  type = 'table',
  header = list(
    values = c("País", "Total de conteúdos"),
    align = c('center', 'center'),
    line = list(width = 1, color = 'black'),
    fill = list(color = 'grey'),
    font = list(family = "Arial", size = 12, color = "white")
  ),
  cells = list(
    values = rbind(top_10_countries$country, top_10_countries$count),
    align = c('center', 'center'),
    line = list(color = "black", width = 1),
    fill = list(color = c('white')),
    font = list(family = "Arial", size = 12, color = c("black"))
  )
)

fig

Questão 9

Pacotes

library(plotly)
library(dplyr)

Dados

netflix_data <- read.csv("netflix_titles.csv")

netflix_data <- netflix_data %>%
  mutate(decade = floor(release_year / 10) * 10)

content_counts <- netflix_data %>%
  group_by(decade, type) %>%
  tally()

Gráfico

custom_colors <- c("Movie" = "yellow", "TV Show" = "blue")

line_chart <- plot_ly(content_counts, x = ~decade, y = ~n, color = ~type,
                      colors = custom_colors, type = "scatter", mode = "lines+markers") %>%
  layout(title = "Content Count by Decade on Netflix",
         xaxis = list(title = "Decade"),
         yaxis = list(title = "Content Count"))

line_chart

Questão 10

Pacotes

library(plotly)
library(dplyr)

Dados

netflix_data <- read.csv("netflix_titles.csv")

filtered_data <- netflix_data %>%
  filter(type == "Movie" & release_year >= 2000 & release_year <= 2010) %>%
  mutate(primary_genre = sapply(strsplit(as.character(listed_in), ","), `[`, 1))

selected_genres <- c("Dramas", "Action & Adventure", "Comedies")
filtered_data <- filtered_data %>%
  filter(primary_genre %in% selected_genres)

genre_counts <- filtered_data %>%
  group_by(release_year, primary_genre) %>%
  summarise(count = n()) %>%
  ungroup()

Gráficos

fig <- plot_ly(data = genre_counts, 
               x = ~release_year, 
               y = ~count, 
               color = ~primary_genre, 
               colors = c("Dramas" = "blue", "Action & Adventure" = "green", "Comedies" = "red"),
               type = 'bar', 
               barmode = 'group')

fig <- fig %>%
  layout(title = 'Quantidade de Filmes por Gênero',
         xaxis = list(title = 'Ano de Lançamento'),
         yaxis = list(title = 'Quantidade de Filmes'),
         legend = list(title = list(text = '<b>Gênero</b>')))

fig