atividade12-analisededados

Exercício 12

Questão 1

MRT_1F <- c(517.1468515630205, 85.13094142168089, 30.333207896694553, 12.694776264558937, 3.3041601673945418, 1.1823111717498882, 1.1892293502386786)
MRT_3F <- c(156.68929936163462, 11.540837783562276, 0.4512835621696538, 0.4509797929766453, 0.4502068233039181, 0.4496185276300172, 0.4543157082191288)
MRT_5F <- c(83.90319666471157, 0.3068151086494968, 0.30522314133037304, 0.3072588968084928, 0.30655265997285697, 0.3055812715727718, 0.3053297166713006)
MRT_10F <- c(29.55430642951759, 0.19832832665772515, 0.1971923924717474, 0.19796648905716516, 0.19615594370806338, 0.2034569237883263, 0.19617420889447737)
MRT_15F <- c(11.317736530583566, 0.167364215666193, 0.16172168266811013, 0.16701085329580515, 0.1598052657153692, 0.1645934043532696, 0.16216563797118075)
MRT_sem_F <- c(11.93430909937736, 0.6095414637034009, 0.6060645101029295, 0.612167181646899, 0.6146761002685637, 0.6096747087200697, 0.6125810476877268)
clock <- c(0.1, 0.5, 1, 1.5, 2, 2.5, 3)

# ==============================================================================
# GRÁFICO 1: Gráfico de Linhas Múltiplas
# ==============================================================================
layout(1)
par(mar = c(5, 5, 4, 2))

plot(clock, MRT_1F, type = "o", pch = 4, col = "black", lwd = 1.2,
     xlab = "Time between Things requests (seconds)",
     ylab = "Response Time (sec.)",
     xlim = c(0, 3), ylim = c(0, 550), xaxt = "n")

axis(1, at = seq(0, 3, by = 0.5), labels = sprintf("%.1f", seq(0, 3, by = 0.5)))

lines(clock, MRT_3F, type = "o", pch = 8, col = "yellow", lwd = 1.2)
lines(clock, MRT_5F, type = "o", pch = 1, col = "red", lwd = 1.2)
lines(clock, MRT_10F, type = "o", pch = 2, col = "blue", lwd = 1.2)
lines(clock, MRT_15F, type = "o", pch = 5, col = "purple", lwd = 1.2)
lines(clock, MRT_sem_F, type = "o", pch = 3, col = "green", lwd = 1.2)

legend("topright", 
       legend = c("1 Fog", "3 Fogs", "5 Fogs", "10 Fogs", "15 Fogs", "w/o Fog"),
       col = c("black", "yellow", "red", "blue", "purple", "green"),
       pch = c(4, 8, 1, 2, 5, 3), lty = 1, bty = "o", bg = "white", cex = 0.9)

# ==============================================================================
# GRÁFICO 2: Painel de Barras Agrupadas com Escala Logarítmica
# ==============================================================================
mat_layout <- matrix(c(1, 2, 
                       3, 4, 
                       5, 0), nrow = 3, ncol = 2, byrow = TRUE)
layout(mat_layout)
par(mar = c(4.5, 4.5, 2, 1))

cores_barras <- c("#E6E6E6", "#666666")

gerar_subplot_barras <- function(mrt_fog_data, label_fog) {
  dados_combinados <- rbind(MRT_sem_F, mrt_fog_data)
  barplot(dados_combinados, 
          beside = TRUE, 
          log = "y", 
          col = cores_barras,
          names.arg = clock, 
          xlab = "Time between Things requests", 
          ylab = "Response time (s)",
          las = 1)
  legend("topright", 
         legend = c("w/o Fog", label_fog), 
         fill = cores_barras, 
         bty = "o", bg = "white", cex = 0.8)
}

gerar_subplot_barras(MRT_1F, "1 Fog")
gerar_subplot_barras(MRT_3F, "3 Fogs")
gerar_subplot_barras(MRT_5F, "5 Fogs")
gerar_subplot_barras(MRT_10F, "10 Fogs")
gerar_subplot_barras(MRT_15F, "15 Fogs")

layout(1)

Questão 2

dados_qualidade <- matrix(c(
  53.8, 33.9,  2.6,  0.0,
  43.6, 54.2, 60.5, 21.4,
   2.6, 11.9, 36.8, 78.6
), nrow = 3, byrow = TRUE)

rownames(dados_qualidade) <- c("Good", "Very Good", "Excellent")
colnames(dados_qualidade) <- c("$10-19", "$20-29", "$30-39", "$40-49")

cores_qualidade <- c("#F2A6A2", "#B3CDE3", "#8C96C6")

par(mar = c(5, 5, 4, 2))

barplot(dados_qualidade,
        beside = FALSE,
        col = cores_qualidade,
        main = "Meal Quality Rating by Price Category",
        xlab = "Meal Price",
        ylab = "Percentage (%)",
        ylim = c(0, 125),
        las = 1,
        border = "white")

legend("top", 
       legend = rownames(dados_qualidade), 
       fill = cores_qualidade, 
       horiz = TRUE,
       bty = "n",
       cex = 1)

Questão 3

temp_maio_f <- airquality$Temp[airquality$Month == 5]
temp_maio_c <- (temp_maio_f - 32) / 1.8

par(mar = c(5, 5, 4, 2))

hist(temp_maio_c, 
     freq = FALSE, 
     main = "Daily Temperature Distribution in May",
     xlab = "Temperature (°C)", 
     ylab = "Density", 
     col = "#99D8C9",
     border = "white",
     las = 1,
     ylim = c(0, 0.12))

linhas_densidade <- density(temp_maio_c)
lines(linhas_densidade, 
      col = "#005824",
      lwd = 2.5)

Questão 4

sales <- read.table("https://training-course-material.com/images/8/8f/Sales.txt", header=TRUE)

porcentagem <- round(sales$SALES / sum(sales$SALES) * 100, 1)
rotulos_fatias <- paste0(porcentagem, "%")

cores_paises <- c("#7FC97F", "#BEAED4", "#FDC086", "#FFFF99", "#386CB0", "#F0027F")

par(mar = c(3, 3, 4, 7), xpd = TRUE)

pie(sales$SALES, 
    labels = rotulos_fatias, 
    col = cores_paises, 
    main = "Percentage of Total Sales by Country",
    clockwise = TRUE,
    border = "white",
    cex = 1.1)

legend("topright", 
       inset = c(-0.15, 0.1),
       legend = sales$COUNTRY, 
       fill = cores_paises, 
       title = "Countries",
       bty = "o", 
       bg = "white",
       cex = 0.9)

Questão 5

par(mar = c(5, 5, 4, 2))

boxplot(count ~ spray, 
        data = InsectSprays,
        outline = FALSE,
        col = "yellow",
        main = "Insect Counts by Insecticide Spray Type",
        xlab = "Insecticide Spray Type",
        ylab = "Insect Count",
        las = 1,
        border = "#4D4D4D",
        pch = 16)

Questão 6

limpar_memoria_mb <- function(vetor_memoria) {
  valores_numericos <- as.numeric(gsub("[A-Za-z ]", "", vetor_memoria))
  unidades <- gsub("[0-9\\.]", "", vetor_memoria)
  multiplicadores <- ifelse(grepl("GB", unidades), 1024,
                     ifelse(grepl("TB", unidades), 1000000, 1))
  return(valores_numericos * multiplicadores)
}

layout(matrix(1:4, nrow = 2, ncol = 2, byrow = TRUE))
par(mar = c(5, 5, 3.5, 2))

arquivos <- c("monitoringCloudData_0.1.csv", 
              "monitoringCloudData_0.5.csv", 
              "monitoringCloudData_1.csv", 
              "monitoringCloudData_NONE.csv")

titulos <- c("Cloud Monitoring (0.1)", 
             "Cloud Monitoring (0.5)", 
             "Cloud Monitoring (1.0)", 
             "Cloud Monitoring (NONE)")

cores_linhas <- c("#1B9E77", "#D95F02", "#7570B3", "#E7298A")

for (i in 1:length(arquivos)) {
  dados <- read.csv(arquivos[i])
  tempo_posix <- as.POSIXct(dados$currentTime, format = "%Y-%m-%d %H:%M:%OS")
  dados$tempo_horas <- as.numeric(difftime(tempo_posix, tempo_posix[1], units = "hours"))
  dados$memoria_mb <- limpar_memoria_mb(dados$usedMemory)
  
  plot(dados$tempo_horas, dados$memoria_mb, 
       type = "l", 
       col = cores_linhas[i], 
       lwd = 1.5,
       main = titulos[i], 
       xlab = "Time (hours)", 
       ylab = "Used Memory (MB)", 
       las = 1)
}

layout(1)

Questão 7

library(plotly)

netflix_data <- read.csv("netflix_titles.csv", stringsAsFactors = FALSE)

netflix_valid_country <- netflix_data[netflix_data$country != "" & !is.na(netflix_data$country), ]
netflix_single_country <- netflix_valid_country[!grepl(",", netflix_valid_country$country), ]

tabela_paises <- as.data.frame(table(netflix_single_country$country))
colnames(tabela_paises) <- c("Country", "Content_Count")
tabela_paises <- tabela_paises[order(-tabela_paises$Content_Count), ]

top10_paises <- head(tabela_paises, 10)

plot_ly(top10_paises, 
        labels = ~Country, 
        values = ~Content_Count, 
        type = 'pie',
        textinfo = 'label+percent',
        insidetextorientation = 'radial',
        marker = list(line = list(color = '#FFFFFF', width = 1))) %>%
  layout(title = list(text = '<b>Top 10 Países com Mais Conteúdo Exclusivo na Netflix</b>', y = 0.95),
         showlegend = TRUE,
         xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
         yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE))

Questão 8

library(plotly)

netflix_data <- read.csv("netflix_titles.csv", stringsAsFactors = FALSE)

netflix_valid_country <- netflix_data[netflix_data$country != "" & !is.na(netflix_data$country), ]
netflix_single_country <- netflix_valid_country[!grepl(",", netflix_valid_country$country), ]

tabela_paises <- as.data.frame(table(netflix_single_country$country))
colnames(tabela_paises) <- c("Country", "Content_Count")
tabela_paises <- tabela_paises[order(-tabela_paises$Content_Count), ]
top10_paises <- head(tabela_paises, 10)

plot_ly(
  type = 'table',
  header = list(
    values = c("<b>País</b>", "<b>Total de conteúdos</b>"),
    fill = list(color = '#666666'),
    font = list(color = 'white', size = 14),
    align = 'center'
  ),
  cells = list(
    values = list(top10_paises$Country, top10_paises$Content_Count),
    align = 'center',
    font = list(color = 'black', size = 12),
    fill = list(color = '#F9F9F9'),
    height = 30
  )
)

Questão 9

library(plotly)

netflix_data <- read.csv("netflix_titles.csv", stringsAsFactors = FALSE)

netflix_data$decade <- floor(netflix_data$release_year / 10) * 10

contagem_decada <- aggregate(show_id ~ decade + type, data = netflix_data, FUN = length)
colnames(contagem_decada) <- c("Decade", "Type", "Count")

dados_filmes <- subset(contagem_decada, Type == "Movie")
dados_series <- subset(contagem_decada, Type == "TV Show")

dados_filmes <- dados_filmes[order(dados_filmes$Decade), ]
dados_series <- dados_series[order(dados_series$Decade), ]

plot_ly() %>%
  add_trace(data = dados_filmes, 
            x = ~Decade, 
            y = ~Count, 
            type = 'scatter', 
            mode = 'lines+markers',
            name = 'Filmes', 
            line = list(color = '#FFC107', width = 3),
            marker = list(color = '#FFC107', size = 8)) %>%
  add_trace(data = dados_series, 
            x = ~Decade, 
            y = ~Count, 
            type = 'scatter', 
            mode = 'lines+markers',
            name = 'Séries', 
            line = list(color = '#007BFF', width = 3),
            marker = list(color = '#007BFF', size = 8)) %>%
  layout(
    title = list(text = '<b>Evolução de Conteúdos na Netflix por Década de Lançamento</b>', y = 0.97),
    xaxis = list(
      title = "Décadas",
      tickvals = seq(min(contagem_decada$Decade), max(contagem_decada$Decade), by = 10),
      ticktext = paste0(seq(min(contagem_decada$Decade), max(contagem_decada$Decade), by = 10), "s"),
      showgrid = TRUE,
      gridcolor = '#E9E9E9'
    ),
    yaxis = list(
      title = "Quantidade de Conteúdo",
      showgrid = TRUE,
      gridcolor = '#E9E9E9'
    ),
    hovermode = "x unified",
    legend = list(orientation = 'h', x = 0.5, xanchor = 'center', y = -0.15)
  )

Questão 10

library(plotly)

netflix_data <- read.csv("netflix_titles.csv", stringsAsFactors = FALSE)

netflix_movies <- subset(netflix_data, type == "Movie")

netflix_movies$first_genre <- sapply(strsplit(netflix_movies$listed_in, ","), function(x) trimws(x[1]))

movies_filtered <- subset(netflix_movies, release_year >= 2000 & release_year <= 2010)

genres_alvo <- c("Dramas", "Action & Adventure", "Comedies")
movies_target_genres <- subset(movies_filtered, first_genre %in% genres_alvo)

contagem_filmes <- aggregate(show_id ~ release_year + first_genre, data = movies_target_genres, FUN = length)
colnames(contagem_filmes) <- c("Year", "Genre", "Count")

anos_intervalo <- 2000:2010
df_grafico <- data.frame(Year = anos_intervalo)

df_grafico$Dramas <- sapply(anos_intervalo, function(y) {
  val <- contagem_filmes$Count[contagem_filmes$Year == y & contagem_filmes$Genre == "Dramas"]
  if(length(val) == 0) 0 else val
})
df_grafico$Action <- sapply(anos_intervalo, function(y) {
  val <- contagem_filmes$Count[contagem_filmes$Year == y & contagem_filmes$Genre == "Action & Adventure"]
  if(length(val) == 0) 0 else val
})
df_grafico$Comedies <- sapply(anos_intervalo, function(y) {
  val <- contagem_filmes$Count[contagem_filmes$Year == y & contagem_filmes$Genre == "Comedies"]
  if(length(val) == 0) 0 else val
})

plot_ly(data = df_grafico) %>%
  add_trace(x = ~Year, y = ~Dramas, type = 'bar', name = 'Dramas',
            marker = list(color = '#E41A1C')) %>%
  add_trace(x = ~Year, y = ~Action, type = 'bar', name = 'Action & Adventure',
            marker = list(color = '#377EB8')) %>%
  add_trace(x = ~Year, y = ~Comedies, type = 'bar', name = 'Comedies',
            marker = list(color = '#4DAF4A')) %>%
  layout(
    title = list(text = '<b>Volume de Filmes Lançados por Gênero Principal (2000 - 2010)</b>', y = 0.96),
    xaxis = list(
      title = "Ano de Lançamento",
      tickvals = anos_intervalo,
      tickmode = "array"
    ),
    yaxis = list(
      title = "Quantidade de Filmes Lançados",
      showgrid = TRUE
    ),
    barmode = 'group',
    legend = list(orientation = 'h', x = 0.5, xanchor = 'center', y = -0.15)
  )

atividade12-analisededados

19-05-2026

Exercício 12

Questão 1

Questão 2

Questão 3

Questão 4

Questão 5

Questão 6

Questão 7

Questão 8

Questão 9

Questão 10