Resolução de Exercícios 12

Questão 1

# Dados fornecidos
MRT_1F <- c(517.1468515630205, 85.13094142168089, 30.333207896694553, 12.694776264558937, 3.3041601673945418, 1.1823111717498882, 1.1892293502386786)
MRT_3F <- c(156.68929936163462, 11.540837783562276, 0.4512835621696538, 0.4509797929766453, 0.4502068233039181, 0.4496185276300172, 0.4543157082191288)
MRT_5F <- c(83.90319666471157, 0.3068151086494968, 0.30522314133037304, 0.3072588968084928, 0.30655265997285697, 0.3055812715727718, 0.3053297166713006)
MRT_10F <- c(29.55430642951759, 0.19832832665772515, 0.1971923924717474, 0.19796648905716516, 0.19615594370806338, 0.2034569237883263, 0.19617420889447737)
MRT_15F <- c(11.317736530583566, 0.167364215666193, 0.16172168266811013, 0.16701085329580515, 0.1598052657153692, 0.1645934043532696, 0.16216563797118075)
MRT_sem_F <- c(11.93430909937736, 0.6095414637034009, 0.6060645101029295, 0.612167181646899, 0.6146761002685637, 0.6096747087200697, 0.6125810476877268)
clock <- c(0.1, 0.5, 1, 1.5, 2, 2.5, 3)

# Configurando layout
layout(matrix(c(1), nrow = 3, byrow = TRUE), heights = c(2, 1, 1))

# Gráfico 1: Junção de todas as séries (tela cheia)
plot(clock, MRT_1F, type = "o", col = "black", xlab = "Time between Things requests (seconds)", ylab = "Response Time (sec.)", main = "Response Time vs. Time", pch = 4)
lines(clock, MRT_3F, type = "o", col = "yellow", pch = 8)
lines(clock, MRT_5F, type = "o", col = "red", pch = 1)
lines(clock, MRT_10F, type = "o", col = "blue", pch = 2)
lines(clock, MRT_15F, type = "o", col = "purple", pch = 5)
lines(clock, MRT_sem_F, type = "o", col = "green", pch = 16)
legend("topright", legend = c("1 Fog", "3 Fogs", "5 Fogs", "10 Fogs", "15 Fogs", "w/o Fog"), col = c("black", "yellow", "red", "blue", "purple", "green"), lty = 1, pch = c(4, 8, 1, 2, 5, 16))

# Ajustando o layout para uma matriz 3x2
par(mfrow = c(3, 2), mar = c(5, 6, 4, 2) + 0.1)

# Gráfico 1: MRT_1F
barplot(rbind(MRT_sem_F, MRT_1F), beside = TRUE, names.arg = clock, col = c("#E6E6E6", "#666666"), border = "black", 
        xlab = "Time between Things requests", 
        ylab = "Response time (s)", 
        main = "w/o Fog vs 1 Fog", ylim = c(0.1, 550), log = "y")
legend("topright", legend = c("w/o Fog", "1 Fog"), fill = c("#E6E6E6", "#666666"), cex = 0.7)  # Diminuindo o tamanho da legenda

# Gráfico 2: MRT_3F
barplot(rbind(MRT_sem_F, MRT_3F), beside = TRUE, names.arg = clock, col = c("#E6E6E6", "#666666"), border = "black", 
        xlab = "Time between Things requests", 
        ylab = "Response time (s)", 
        main = "w/o Fog vs 3 Fogs", ylim = c(0.1, 200), log = "y")
legend("topright", legend = c("w/o Fog", "3 Fogs"), fill = c("#E6E6E6", "#666666"), cex = 0.7)

# Gráfico 3: MRT_5F
barplot(rbind(MRT_sem_F, MRT_5F), beside = TRUE, names.arg = clock, col = c("#E6E6E6", "#666666"), border = "black", 
        xlab = "Time between Things requests", 
        ylab = "Response time (s)", 
        main = "w/o Fog vs 5 Fogs", ylim = c(0.1, 100), log = "y")
legend("topright", legend = c("w/o Fog", "5 Fogs"), fill = c("#E6E6E6", "#666666"), cex = 0.7)

# Gráfico 4: MRT_10F
barplot(rbind(MRT_sem_F, MRT_10F), beside = TRUE, names.arg = clock, col = c("#E6E6E6", "#666666"), border = "black", 
        xlab = "Time between Things requests", 
        ylab = "Response time (s)", 
        main = "w/o Fog vs 10 Fogs", ylim = c(0.1, 50), log = "y")
legend("topright", legend = c("w/o Fog", "10 Fogs"), fill = c("#E6E6E6", "#666666"), cex = 0.7)

# Gráfico 5: MRT_15F
barplot(rbind(MRT_sem_F, MRT_15F), beside = TRUE, names.arg = clock, col = c("#E6E6E6", "#666666"), border = "black", 
        xlab = "Time between Things requests", 
        ylab = "Response time (s)", 
        main = "w/o Fog vs 15 Fogs", ylim = c(0.1, 20), log = "y")
legend("topright", legend = c("w/o Fog", "15 Fogs"), fill = c("#E6E6E6", "#666666"), cex = 0.7)

# Espaço vazio se necessário
plot.new()

Questão 2

# Criando a matriz com os dados da tabela
qualidade_refeicao <- matrix(c(53.8, 33.9, 2.6, 0.0, 43.6, 54.2, 60.5, 21.4, 2.6, 11.9, 36.8, 78.6), 
                             nrow = 3, byrow = TRUE)

# Definindo os nomes das linhas e colunas
rownames(qualidade_refeicao) <- c("Good", "Very Good", "Excellent")
colnames(qualidade_refeicao) <- c("$10–19", "$20–29", "$30–39", "$40–49")

# Ajustando as margens e o tamanho do gráfico
par(mar = c(5, 5, 4, 7), xpd = TRUE)

# Criando o gráfico de barras empilhadas sem a legenda dentro do gráfico
barplot(qualidade_refeicao, beside = FALSE, col = c("#E6E6E6", "#666666", "#FF9999"),
        xlab = "Meal Price", ylab = "Percentage", main = "Quality Rating by Meal Price",
        cex.names = 0.8, cex.axis = 0.8, cex.lab = 0.8)

# Adicionando a legenda fora da área do gráfico
legend("topright", inset = c(-0.2, 0), legend = rownames(qualidade_refeicao), 
       fill = c("#E6E6E6", "#666666", "#FF9999"), cex = 0.8)

Questão 3

# Carregando o dataset airquality
data("airquality")

# Filtrando os dados de temperatura do mês de maio
temp_may <- airquality$Temp[airquality$Month == 5]

# Convertendo as temperaturas de Fahrenheit para Celsius
temp_may_celsius <- (temp_may - 32) / 1.8

# Criando o histograma
hist(temp_may_celsius, 
     breaks = 10, 
     col = "#FFA07A", 
     main = "Histogram of Temperatures in May (Celsius)", 
     xlab = "Temperature (°C)", 
     ylab = "Frequency", 
     border = "black", 
     freq = FALSE)

# Adicionando a curva de densidade
lines(density(temp_may_celsius), col = "blue", lwd = 2)

Questão 4

# Carregando o dataset sales
sales <- read.table("https://training-course-material.com/images/8/8f/Sales.txt", header = TRUE)

# Convertendo a coluna de vendas para numérica (se necessário)
sales$SALES <- as.numeric(sales$SALES)

# Agrupando as vendas por país
sales_sum <- aggregate(SALES ~ COUNTRY, data = sales, sum)

# Calculando a porcentagem das vendas por país
sales_percentage <- round(100 * sales_sum$SALES / sum(sales_sum$SALES), 1)

# Criando as labels para o gráfico com porcentagem
labels <- paste(sales_sum$COUNTRY, "-", sales_percentage, "%", sep = "")

# Criando o gráfico de pizza
pie(sales_percentage, labels = labels, col = rainbow(length(sales_percentage)),
    main = "Percentage of Total Sales by Country")

# Adicionando a legenda
legend("topright", legend = sales_sum$COUNTRY, fill = rainbow(length(sales_sum$COUNTRY)), cex = 0.8)

Questão 5

# Carregando o dataset InsectSprays
data("InsectSprays")

# Criando o boxplot sem outliers
boxplot(count ~ spray, data = InsectSprays,
        col = "yellow", 
        main = "Insect Count by Spray Type",
        xlab = "Spray Type", 
        ylab = "Insect Count",
        outline = FALSE) # Remover outliers

Questão 6

# Definindo o caminho dos arquivos
file_path <- "C:/Users/Niciu/Downloads/monitoringCloudData/"

# Carregando os dados dos arquivos CSV
data_0.1 <- read.csv(paste0(file_path, "monitoringCloudData_0.1.csv"))
data_0.5 <- read.csv(paste0(file_path, "monitoringCloudData_0.5.csv"))
data_1 <- read.csv(paste0(file_path, "monitoringCloudData_1.csv"))
data_NONE <- read.csv(paste0(file_path, "monitoringCloudData_NONE.csv"))

# Função ajustada para remover sufixos e converter para MB
convert_to_mb <- function(memory_col) {
  memory_col <- trimws(memory_col)
  
  # Converter terabytes para megabytes
  memory_col <- ifelse(grepl("TB", memory_col),
                       as.numeric(gsub("[^0-9.]", "", memory_col)) * 1000000,
                       
                       # Converter gigabytes para megabytes
                       ifelse(grepl("GB", memory_col),
                              as.numeric(gsub("[^0-9.]", "", memory_col)) * 1024,
                              
                              # Manter megabytes como estão
                              as.numeric(gsub("[^0-9.]", "", memory_col))))
  return(memory_col)
}

# Aplicando a função de conversão
data_0.1$usedMemory_MB <- convert_to_mb(data_0.1$usedMemory)
data_0.5$usedMemory_MB <- convert_to_mb(data_0.5$usedMemory)
data_1$usedMemory_MB <- convert_to_mb(data_1$usedMemory)
data_NONE$usedMemory_MB <- convert_to_mb(data_NONE$usedMemory)

# Ajustando a coluna currentTime para tempo contínuo
data_0.1$currentTime <- as.POSIXct(data_0.1$currentTime, format = "%Y-%m-%d %H:%M:%S", tz = "UTC")
data_0.5$currentTime <- as.POSIXct(data_0.5$currentTime, format = "%Y-%m-%d %H:%M:%S", tz = "UTC")
data_1$currentTime <- as.POSIXct(data_1$currentTime, format = "%Y-%m-%d %H:%M:%S", tz = "UTC")
data_NONE$currentTime <- as.POSIXct(data_NONE$currentTime, format = "%Y-%m-%d %H:%M:%S", tz = "UTC")

# Identificando registros com NA em currentTime para data_0.5
na_current_time_0_5 <- data_0.5[is.na(data_0.5$currentTime), ]

# Removendo registros com NA em currentTime para data_0.5
data_0.5 <- data_0.5[!is.na(data_0.5$currentTime), ]

# Calculando a diferença de tempo em horas desde o início
data_0.1$Time_Hours <- as.numeric(difftime(data_0.1$currentTime, min(data_0.1$currentTime), units = "hours"))
data_0.5$Time_Hours <- as.numeric(difftime(data_0.5$currentTime, min(data_0.5$currentTime), units = "hours"))
data_1$Time_Hours <- as.numeric(difftime(data_1$currentTime, min(data_1$currentTime), units = "hours"))
data_NONE$Time_Hours <- as.numeric(difftime(data_NONE$currentTime, min(data_NONE$currentTime), units = "hours"))

# Removendo linhas com valores NA ou não finitos em Time_Hours e usedMemory_MB
data_0.1 <- data_0.1[is.finite(data_0.1$Time_Hours) & is.finite(data_0.1$usedMemory_MB), ]
data_0.5 <- data_0.5[is.finite(data_0.5$Time_Hours) & is.finite(data_0.5$usedMemory_MB), ]
data_1 <- data_1[is.finite(data_1$Time_Hours) & is.finite(data_1$usedMemory_MB), ]
data_NONE <- data_NONE[is.finite(data_NONE$Time_Hours) & is.finite(data_NONE$usedMemory_MB), ]

# Configurando o layout para exibir 4 gráficos em 2x2
par(mfrow = c(2, 2), mar = c(5, 5, 4, 2) + 0.1)

# Gráfico 1: monitoringCloudData_NONE.csv
plot(data_NONE$Time_Hours, data_NONE$usedMemory_MB, type = "l", col = "black",
     main = "Memory Analysis (None Workload)", xlab = "Time (hour)", ylab = "Used Memory (MB)",
     ylim = c(min(data_NONE$usedMemory_MB), max(data_NONE$usedMemory_MB)))

# Gráfico 2: monitoringCloudData_0.1.csv
plot(data_0.1$Time_Hours, data_0.1$usedMemory_MB, type = "l", col = "black",
     main = "Memory Analysis (Workload of 0.1)", xlab = "Time (hour)", ylab = "Used Memory (MB)",
     ylim = c(min(data_0.1$usedMemory_MB), max(data_0.1$usedMemory_MB)))

# Gráfico 3: monitoringCloudData_0.5.csv
plot(data_0.5$Time_Hours, data_0.5$usedMemory_MB, type = "l", col = "black",
     main = "Memory Analysis (Workload of 0.5)", xlab = "Time (hour)", ylab = "Used Memory (MB)",
     ylim = c(min(data_0.5$usedMemory_MB), max(data_0.5$usedMemory_MB)))

# Gráfico 4: monitoringCloudData_1.csv
plot(data_1$Time_Hours, data_1$usedMemory_MB, type = "l", col = "black",
     main = "Memory Analysis (Workload of 1.0)", xlab = "Time (hour)", ylab = "Used Memory (MB)",
     ylim = c(min(data_1$usedMemory_MB), max(data_1$usedMemory_MB)))

Questão 7

#carregar as bibliotecas necessárias
library(plotly)

## Carregando pacotes exigidos: ggplot2

## 
## Anexando pacote: 'plotly'

## O seguinte objeto é mascarado por 'package:ggplot2':
## 
##     last_plot

## O seguinte objeto é mascarado por 'package:stats':
## 
##     filter

## O seguinte objeto é mascarado por 'package:graphics':
## 
##     layout

library(dplyr)

## 
## Anexando pacote: 'dplyr'

## Os seguintes objetos são mascarados por 'package:stats':
## 
##     filter, lag

## Os seguintes objetos são mascarados por 'package:base':
## 
##     intersect, setdiff, setequal, union

# Definindo o caminho do arquivo e carregando os dados
file_path <- "C:/Users/Niciu/Downloads/netflix_titles.csv"
netflix_data <- read.csv(file_path)

# Filtrar os conteúdos com apenas UM país de origem
single_country_data <- netflix_data %>%
  filter(!grepl(",", country))

# Remover entradas com país vazio
single_country_data <- single_country_data %>%
  filter(country != "")

# Contar o número de conteúdos por país
country_counts <- single_country_data %>%
  group_by(country) %>%
  summarise(count = n()) %>%
  arrange(desc(count))

# Selecionar os 10 países com mais conteúdos
top_countries <- head(country_counts, 10)

# Criar o gráfico de pizza
plot_ly(top_countries, labels = ~country, values = ~count, type = 'pie',
        textinfo = 'label+percent', 
        title = 'Top 10 Países com Mais Conteúdos na Netflix (2019)')

Questão 8

#carregar as bibliotecas necessárias
library(plotly)
library(dplyr)

# Definindo o caminho do arquivo e carregando os dados
file_path <- "C:/Users/Niciu/Downloads/netflix_titles.csv"
netflix_data <- read.csv(file_path)

# Filtrar os conteúdos com apenas UM país de origem
single_country_data <- netflix_data %>%
  filter(!grepl(",", country))

# Remover entradas com país vazio
single_country_data <- single_country_data %>%
  filter(country != "")

# Contar o número de conteúdos por país
country_counts <- single_country_data %>%
  group_by(country) %>%
  summarise(count = n()) %>%
  arrange(desc(count))

# Selecionar os 10 países com mais conteúdos
top_countries <- head(country_counts, 10)

# Criar a tabela formatada com Plotly
plot_ly(
  type = 'table',
  header = list(
    values = c('País', 'Total de Conteúdos'),
    align = 'center',
    font = list(size = 14, color = 'white'),
    fill = list(color = 'gray')
  ),
  cells = list(
    values = list(top_countries$country, top_countries$count),
    align = 'center',
    font = list(size = 12)
  )
)

Questão 9

# carregar as bibliotecas necessárias)
library(plotly)
library(dplyr)

# Definindo o caminho do arquivo e carregando os dados
file_path <- "C:/Users/Niciu/Downloads/netflix_titles.csv"
netflix_data <- read.csv(file_path)

# Adicionar uma coluna de década
netflix_data <- netflix_data %>%
  mutate(decade = floor(release_year / 10) * 10)

# Filtrar dados para filmes e séries
content_data <- netflix_data %>%
  group_by(decade, type) %>%
  summarise(content_count = n(), .groups = 'drop') %>%
  arrange(decade)

# Ajustar os nomes dos tipos de conteúdo
content_data$type <- recode(content_data$type, "TV Show" = "TV Series", "Movie" = "Movie")

# Criar o gráfico de linha com pontos
fig <- plot_ly(content_data, x = ~decade, y = ~content_count, color = ~type,
               type = 'scatter', mode = 'lines+markers',
               line = list(width = 2),
               marker = list(size = 8),
               colors = c('TV Series' = 'blue', 'Movie' = 'orange'))

# Adicionar título e rótulos dos eixos
fig <- fig %>%
  layout(title = 'Quantidade de Conteúdo por Década na Netflix',
         xaxis = list(title = 'Década'),
         yaxis = list(title = 'Quantidade de Conteúdo'))

# Exibir o gráfico
fig

Questão 10

# Instalar e carregar as bibliotecas necessárias
library(plotly)
library(dplyr)

# Definir o caminho do arquivo e carregar os dados
file_path <- "C:/Users/Niciu/Downloads/netflix_titles.csv"
netflix_data <- read.csv(file_path)

# Filtrar dados para filmes lançados entre 2000 e 2010
filtered_data <- netflix_data %>%
  filter(release_year >= 2000 & release_year <= 2010) %>%
  filter(type == "Movie")

# Selecionar o primeiro gênero listado
filtered_data <- filtered_data %>%
  mutate(first_genre = trimws(sub(",.*", "", listed_in)))

# Traduzir os gêneros para o português
filtered_data <- filtered_data %>%
  mutate(first_genre = case_when(
    first_genre == "Dramas" ~ "Dramas",
    first_genre == "Action & Adventure" ~ "Ação e Aventura",
    first_genre == "Comedies" ~ "Comédias",
    TRUE ~ first_genre
  ))

# Filtrar os gêneros desejados
genres_of_interest <- c("Dramas", "Ação e Aventura", "Comédias")
filtered_data <- filtered_data %>%
  filter(first_genre %in% genres_of_interest)

# Contar a quantidade de filmes por ano e gênero
genre_counts <- filtered_data %>%
  group_by(release_year, first_genre) %>%
  summarise(count = n(), .groups = 'drop')

# Criar o gráfico de barras lado-a-lado
fig <- plot_ly()

for (genre in genres_of_interest) {
  genre_data <- genre_counts %>% filter(first_genre == genre)
  fig <- fig %>% add_trace(
    x = genre_data$release_year,
    y = genre_data$count,
    type = 'bar',
    name = genre
  )
}

# Adicionar título e rótulos dos eixos
fig <- fig %>%
  layout(title = 'Quantidade de Filmes por Gênero (2000-2010)',
         xaxis = list(title = 'Ano'),
         yaxis = list(title = 'Quantidade de Filmes'),
         barmode = 'group', # Ajuste para exibir barras lado a lado
         legend = list(title = list(text = 'Gênero')))

# Exibir o gráfico
fig

Resolução de Exercícios 12 - Vinicius Nário

Questão 1

Questão 2

Questão 3

Questão 4

Questão 5

Questão 6

Questão 7

Questão 8

Questão 9

Questão 10