Introdução

Este relatório tem como objetivo demonstrar a manipulação de dados, plotagem de gráficos, conversão de tabelas em gráficos e domínio de pacotes R. Através da Atividade 12

Questão 1

# Dados
MRT_1F <-c(517.1468515630205, 85.13094142168089, 30.333207896694553, 12.694776264558937, 3.3041601673945418, 1.1823111717498882, 1.1892293502386786)
MRT_3F <-c(156.68929936163462, 11.540837783562276, 0.4512835621696538, 0.4509797929766453, 0.4502068233039181, 0.4496185276300172, 0.4543157082191288)
MRT_5F <-c(83.90319666471157, 0.3068151086494968, 0.30522314133037304, 0.3072588968084928, 0.30655265997285697, 0.3055812715727718, 0.3053297166713006)
MRT_10F <-c(29.55430642951759, 0.19832832665772515, 0.1971923924717474, 0.19796648905716516, 0.19615594370806338, 0.2034569237883263, 0.19617420889447737)
MRT_15F <-c(11.317736530583566, 0.167364215666193, 0.16172168266811013, 0.16701085329580515, 0.1598052657153692, 0.1645934043532696, 0.16216563797118075)
MRT_sem_F <-c(11.93430909937736, 0.6095414637034009, 0.6060645101029295, 0.612167181646899, 0.6146761002685637, 0.6096747087200697, 0.6125810476877268)
clock <- c(0.1, 0.5, 1, 1.5, 2, 2.5, 3)

# Configurando layout para os gráficos
layout(matrix(c(1, 2), nrow = 1, ncol = 2), widths = c(2, 1))

# Primeiro gráfico: Gráfico de linhas com símbolos
plot(clock, MRT_1F, type = "l", col = "black", pch=4, lwd = 2, ylim = c(0, max(MRT_1F)), xlab = "Clock", ylab = "MRT", main = "Gráfico de Linhas")
points(clock, MRT_1F, col = "black", pch=4)

lines(clock, MRT_3F, col = "yellow", lwd = 2)
points(clock, MRT_3F, col = "yellow", pch=11)

lines(clock, MRT_5F, col = "red", lwd = 2)
points(clock, MRT_5F, col = "red", pch=1)

lines(clock, MRT_10F, col = "blue", lwd = 2)
points(clock, MRT_10F, col = "blue", pch=2)

lines(clock, MRT_15F, col = "purple", lwd = 2)
points(clock, MRT_15F, col = "purple", pch=5)

lines(clock, MRT_sem_F, col = "green", lwd = 2)
points(clock, MRT_sem_F, col = "green", pch=4)

legend("topright", legend = c("1 Fog", "3 Fogs", "5 Fogs", "10 Fogs", "15 Fogs", "w/o Fog"), col = c("black",  "yellow", "red", "blue", "purple", "green"), lty = 1, lwd = 2, pch = c(4, 11, 1, 2, 5, 4))

# Segundo gráfico: Gráfico de barras com escala logarítmica
par(mfrow = c(3, 2))

w_1 <- rbind(MRT_15F, MRT_sem_F)
w_3 <- rbind(MRT_3F, MRT_sem_F)
w_5 <-rbind(MRT_5F, MRT_sem_F)
w_10 <- rbind(MRT_10F, MRT_sem_F)
w_15 <- rbind (MRT_15F, MRT_sem_F)

barplot(w_1, beside = TRUE, log = "y", col = c("#E6E6E6", "#666666"),
        names.arg = clock, xlab = "Time between Things requests", ylab = "Response time (s)",
        legend.text = c("1 Flog", "W/O Flog"), args.legend = list(x = "topright"))

## Warning in yinch(0.1): y log scale: yinch() is nonsense

barplot(w_3, beside = TRUE, log = "y", col = c("#E6E6E6", "#666666"),
        names.arg = clock, xlab = "Time between Things requests", ylab = "Response time (s)",
        legend.text = c("3 Flog", "W/O Flog"), args.legend = list(x = "topright"))

## Warning in yinch(0.1): y log scale: yinch() is nonsense

barplot(w_5, beside = TRUE, log = "y", col = c("#E6E6E6", "#666666"),
        names.arg = clock, xlab = "Time between Things requests", ylab = "Response time (s)",
        legend.text = c("5 Flog", "W/O Flog"), args.legend = list(x = "topright"))

## Warning in yinch(0.1): y log scale: yinch() is nonsense

barplot(w_10, beside = TRUE, log = "y", col = c("#E6E6E6", "#666666"),
        names.arg = clock, xlab = "Time between Things requests", ylab = "Response time (s)",
        legend.text = c("10 Flog", "W/O Flog"), args.legend = list(x = "topright"))

## Warning in yinch(0.1): y log scale: yinch() is nonsense

barplot(w_15, beside = TRUE, log = "y", col = c("#E6E6E6", "#666666"),
        names.arg = clock, xlab = "Time between Things requests", ylab = "Response time (s)",
        legend.text = c("15 Flog", "W/O Flog"), args.legend = list(x = "topright"))

## Warning in yinch(0.1): y log scale: yinch() is nonsense

Questão 2

# Dados da tabela
price <- c("$10-19", "$20-29", "$30-39", "$40-49")
Good <- c(53.8, 33.9, 2.6, 0.0)
Very_Good <- c(43.6, 54.2, 60.5, 21.4)
Excellent <- c(2.6, 11.9, 36.9, 78.6)

# Configura o gráfico
barplot(
  height = rbind(Good, Very_Good, Excellent),
  beside = FALSE,
  names.arg = price,
  col = c("blue", "orange", "green"),
  legend.text = c("Good", "Very Good", "Excellent"),
  args.legend = list(title = "Qualidade", x = "topright"),
  main = "Qualidade da Refeição por Faixa de Preço",
  xlab = "Faixa de Preço",
  ylab = "Porcentagem (%)"
)

Questão 3

# Carrega o dataset airquality
data("airquality")

# Filtra os dados para o mês de maio
may_data <- subset(airquality, Month == 5)

# Converte as temperaturas de Fahrenheit para Celsius
may_data$Temp_C <- (may_data$Temp - 32) / 1.8

# Gera o histograma
hist(may_data$Temp_C,
     main = "Histograma das Temperaturas em Maio",
     xlab = "Temperatura (°C)",
     ylab = "Frequência",
     col = "lightblue",
     border = "black",
     prob = TRUE)

# Adiciona a curva de densidade
lines(density(may_data$Temp_C), col = "red", lwd = 2)

Questão 4

#Dados
sales <- read.table("https://training-course-material.com/images/8/8f/Sales.txt",header=TRUE)

#Porcentagem
percentual <- round(sales$SALES / sum(sales$SALES) * 100, 1)

#Plota o gráfico de pizza
cores <- c("red", "blue", "green", "yellow", "purple", "orange")
pie(sales$SALES, labels = paste(sales$COUNTRY, percentual, "%"), col = cores, main = "Percentual de Vendas por País")

legend("topright", legend = sales$COUNTRY, fill = cores)

Questão 5

# Carrega o conjunto de dados InsectSprays
data(InsectSprays)

# Cria o boxplot sem outliers
boxplot(count ~ spray, data = InsectSprays, 
        main = "Contagem de Insetos por Tipo de Inseticida",
        xlab = "Tipo de Inseticida", 
        ylab = "Contagem de Insetos",
        col = "orange", 
        outline = FALSE)

Questão 6

# Carrega os dados
monitoringCloudData_0.1 <- read.csv("C:/Users/damar/OneDrive/Área de Trabalho/Atividades - R/monitoringCloudData (3)/monitoringCloudData_0.1.csv")
monitoringCloudData_0.5 <- read.csv("C:/Users/damar/OneDrive/Área de Trabalho/Atividades - R/monitoringCloudData (3)/monitoringCloudData_0.5.csv")
monitoringCloudData_1 <- read.csv("C:/Users/damar/OneDrive/Área de Trabalho/Atividades - R/monitoringCloudData (3)/monitoringCloudData_1.csv")
monitoringCloudData_NONE <- read.csv("C:/Users/damar/OneDrive/Área de Trabalho/Atividades - R/monitoringCloudData (3)/monitoringCloudData_NONE.csv")

convert_to_mb <- function(memory) {
  memory <- gsub("TB", "*1000000", memory)
  memory <- gsub("GB", "*1024", memory)
  memory <- gsub("MB", "", memory)
  return(eval(parse(text=memory)))
}

# Aplica a função de conversão
monitoringCloudData_0.1$usedMemory <- sapply(monitoringCloudData_0.1$usedMemory, convert_to_mb)
monitoringCloudData_0.5$usedMemory <- sapply(monitoringCloudData_0.5$usedMemory, convert_to_mb)
monitoringCloudData_1$usedMemory <- sapply(monitoringCloudData_1$usedMemory, convert_to_mb)
monitoringCloudData_NONE$usedMemory <- sapply(monitoringCloudData_NONE$usedMemory, convert_to_mb)

# Ajusta a coluna currentTime para tempo contínuo
monitoringCloudData_0.1$currentTime <- as.POSIXct(monitoringCloudData_0.1$currentTime)
monitoringCloudData_0.5$currentTime <- as.POSIXct(monitoringCloudData_0.5$currentTime)
monitoringCloudData_1$currentTime <- as.POSIXct(monitoringCloudData_1$currentTime)
monitoringCloudData_NONE$currentTime <- as.POSIXct(monitoringCloudData_NONE$currentTime)

# Calcula a diferença de tempo em horas
monitoringCloudData_0.1$timeDiff <- as.numeric(difftime(monitoringCloudData_0.1$currentTime, min(monitoringCloudData_0.1$currentTime), units = "hours"))
monitoringCloudData_0.5$timeDiff <- as.numeric(difftime(monitoringCloudData_0.5$currentTime, min(monitoringCloudData_0.5$currentTime), units = "hours"))
monitoringCloudData_1$timeDiff <- as.numeric(difftime(monitoringCloudData_1$currentTime, min(monitoringCloudData_1$currentTime), units = "hours"))
monitoringCloudData_NONE$timeDiff <- as.numeric(difftime(monitoringCloudData_NONE$currentTime, min(monitoringCloudData_NONE$currentTime), units = "hours"))

# Configura layout para múltiplos gráficos
layout(matrix(1:4, ncol = 2))

# Plota os gráficos
plot(monitoringCloudData_0.1$timeDiff, monitoringCloudData_0.1$usedMemory, type = "l", col = "black", 
     main = "Monitoramento 0.1", xlab = "Tempo (horas)", ylab = "Memória Usada (MB)")
plot(monitoringCloudData_0.5$timeDiff, monitoringCloudData_0.5$usedMemory, type = "l", col = "black", 
     main = "Monitoramento 0.5", xlab = "Tempo (horas)", ylab = "Memória Usada (MB)")
plot(monitoringCloudData_1$timeDiff, monitoringCloudData_1$usedMemory, type = "l", col = "black", 
     main = "Monitoramento 1", xlab = "Tempo (horas)", ylab = "Memória Usada (MB)")
plot(monitoringCloudData_NONE$timeDiff, monitoringCloudData_NONE$usedMemory, type = "l", col = "black", 
     main = "Monitoramento NONE", xlab = "Tempo (horas)", ylab = "Memória Usada (MB)")

Questão 7

library(ggplot2)
library(plotly)
library(dplyr)
library(stringr)

netflix_titles <- read.csv("C:/Users/damar/OneDrive/Área de Trabalho/Atividades - R/netflix_titles.csv")

# Filtra os dados
filtrado <- netflix_titles %>% 
  filter(!is.na(country) & str_detect(country, "^[^,]+$"))

# Conta o número de conteúdos por país
conteudo_por_pais <- filtrado %>%
  group_by(country) %>%
  summarise(count = n()) %>%
  arrange(desc(count)) %>%
  head(10)

# Cria o gráfico de pizza com Plotly
fig <- plot_ly(conteudo_por_pais, labels = ~country, values = ~count, type = 'pie') %>%
  layout(title = 'Top 10 Países com Mais Conteúdo',
         xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
         yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE))

# Exibe o gráfico interativo
fig

Questão 8

# Cria a tabela com Plotly
fig <- plot_ly(
  type = 'table',
  header = list(
    values = c('País', 'Total de conteúdos'),
    fill = list(color = 'grey'),
    font = list(color = 'white', size = 12),
    align = 'center'
  ),
  cells = list(
    values = rbind(conteudo_por_pais$country, conteudo_por_pais$count),
    fill = list(color = 'lightgrey'),
    align = 'center'
  )
)

# Exibe a tabela interativa
fig

Questão 9

# Adiciona uma coluna para a década
netflix_titles <- netflix_titles %>%
  mutate(decade = floor(release_year / 10) * 10)

# Filtra os dados para incluir apenas títulos com um único país de origem
filtrado <- netflix_titles %>% 
  filter(!is.na(country) & str_detect(country, "^[^,]+$"))

# Conta o número de conteúdos por década e tipo
conteudo_por_decada <- filtrado %>%
  group_by(decade, type) %>%
  summarise(count = n()) %>%
  arrange(decade)

## `summarise()` has grouped output by 'decade'. You can override using the
## `.groups` argument.

# Separa os dados por tipo
series <- conteudo_por_decada %>% filter(type == "TV Show")
filmes <- conteudo_por_decada %>% filter(type == "Movie")

# Cria o gráfico de linha com pontos usando Plotly
fig <- plot_ly() %>%
  add_trace(x = ~series$decade, y = ~series$count, type = 'scatter', mode = 'lines+markers', name = 'Séries', line = list(color = 'blue')) %>%
  add_trace(x = ~filmes$decade, y = ~filmes$count, type = 'scatter', mode = 'lines+markers', name = 'Filmes', line = list(color = 'yellow')) %>%
  layout(title = 'Quantidade de Conteúdo por Década no Netflix',
         xaxis = list(title = 'Década'),
         yaxis = list(title = 'Quantidade de Conteúdo'),
         legend = list(title = list(text = 'Tipo de Conteúdo')))

# Exibe o gráfico interativo
fig

Questão 10

# Filtra os dados para incluir apenas filmes lançados entre 2000 e 2010
filmes_2000_2010 <- netflix_titles %>%
  filter(type == "Movie" & release_year >= 2000 & release_year <= 2010)

# Considera apenas o primeiro gênero listado
filmes_2000_2010 <- filmes_2000_2010 %>%
  mutate(primary_genre = sapply(str_split(listed_in, ","), `[`, 1))

# Filtra para os gêneros de interesse
filmes_2000_2010 <- filmes_2000_2010 %>%
  filter(primary_genre %in% c("Dramas", "Action & Adventure", "Comedies"))

# Conta o número de filmes por ano e gênero
conteudo_por_ano_genero <- filmes_2000_2010 %>%
  group_by(release_year, primary_genre) %>%
  summarise(count = n()) %>%
  arrange(release_year)

## `summarise()` has grouped output by 'release_year'. You can override using the
## `.groups` argument.

# Cria o gráfico de barras lado-a-lado usando Plotly
fig <- plot_ly() %>%
  add_trace(data = conteudo_por_ano_genero %>% filter(primary_genre == "Dramas"),
            x = ~release_year, y = ~count, type = 'bar', name = 'Dramas', marker = list(color = 'blue')) %>%
  add_trace(data = conteudo_por_ano_genero %>% filter(primary_genre == "Action & Adventure"),
            x = ~release_year, y = ~count, type = 'bar', name = 'Action & Adventure', marker = list(color = 'orange')) %>%
  add_trace(data = conteudo_por_ano_genero %>% filter(primary_genre == "Comedies"),
            x = ~release_year, y = ~count, type = 'bar', name = 'Comedies', marker = list(color = 'green')) %>%
  layout(barmode = 'group',
         title = 'Quantidade de Filmes por Gênero (2000-2010)',
         xaxis = list(title = 'Ano'),
         yaxis = list(title = 'Quantidade de Filmes'),
         legend = list(title = list(text = 'Gênero')))

# Exibe o gráfico interativo
fig

Visualização de Dados - 12

Damaris Rodrigues

2024-08-26

Introdução

Questão 1

Questão 2

Questão 3

Questão 4

Questão 5

Questão 6

Questão 7

Questão 8

Questão 9

Questão 10