Resultados

Questão 1

MRT_1F <- c(517.1468515630205, 85.13094142168089, 30.333207896694553, 12.694776264558937, 3.3041601673945418, 1.1823111717498882, 1.1892293502386786)
MRT_3F <- c(156.68929936163462, 11.540837783562276, 0.4512835621696538, 0.4509797929766453, 0.4502068233039181, 0.4496185276300172, 0.4543157082191288)
MRT_5F <- c(83.90319666471157, 0.3068151086494968, 0.30522314133037304, 0.3072588968084928, 0.30655265997285697, 0.3055812715727718, 0.3053297166713006)
MRT_10F <- c(29.55430642951759, 0.19832832665772515, 0.1971923924717474, 0.19796648905716516, 0.19615594370806338, 0.2034569237883263, 0.19617420889447737)
MRT_15F <- c(11.317736530583566, 0.167364215666193, 0.16172168266811013, 0.16701085329580515, 0.1598052657153692, 0.1645934043532696, 0.16216563797118075)
MRT_sem_F <- c(11.93430909937736, 0.6095414637034009, 0.6060645101029295, 0.612167181646899, 0.6146761002685637, 0.6096747087200697, 0.6125810476877268)
clock <- c(0.1, 0.5, 1, 1.5, 2, 2.5, 3)

par(mfrow=c(4,2), mar=c(4,4,2,1))

layout(matrix(c(1,2,3,4,5,6), ncol=2, byrow=TRUE), heights = c(2,1,1,1,1,1))

plot(clock, MRT_1F, type="o", col="gold", pch=8, ylim=c(0,520),
     xlab="Time between Things requests (seconds)", ylab="Response Time (sec.)",
     main="")
lines(clock, MRT_3F, type="o", col="orange", pch=6)
lines(clock, MRT_5F, type="o", col="magenta", pch=17)
lines(clock, MRT_10F, type="o", col="blue", pch=15)
lines(clock, MRT_15F, type="o", col="green", pch=4)
lines(clock, MRT_sem_F, type="o", col="black", pch=1)
legend("topright", legend=c("1 Fog","3 Fogs","5 Fogs","10 Fogs","15 Fogs","w/o Fog"),
       col=c("gold","orange","magenta","blue","green","black"),
       pch=c(8,6,17,15,4,1), bty="n")

barplot(rbind(MRT_sem_F, MRT_1F), beside=TRUE, names.arg=clock,
        col=c("#E6E6E6","#666666"), log="y",
        main="",
        xlab="Time between requests (s)", ylab="Response Time (s)")
legend("topright", legend=c("w/o Fog","1 Fog"), fill=c("#E6E6E6","#666666"), bty="n")

barplot(rbind(MRT_sem_F, MRT_3F), beside=TRUE, names.arg=clock,
        col=c("#E6E6E6","#666666"), log="y",
        main="",
        xlab="Time between requests (s)", ylab="Response Time (s)")
legend("topright", legend=c("w/o Fog","3 Fogs"), fill=c("#E6E6E6","#666666"), bty="n")

barplot(rbind(MRT_sem_F, MRT_5F), beside=TRUE, names.arg=clock,
        col=c("#E6E6E6","#666666"), log="y",
        main="",
        xlab="Time between requests (s)", ylab="Response Time (s)")
legend("topright", legend=c("w/o Fog","5 Fogs"), fill=c("#E6E6E6","#666666"), bty="n")

barplot(rbind(MRT_sem_F, MRT_10F), beside=TRUE, names.arg=clock,
        col=c("#E6E6E6","#666666"), log="y",
        main="",
        xlab="Time between requests (s)", ylab="Response Time (s)")
legend("topright", legend=c("w/o Fog","10 Fogs"), fill=c("#E6E6E6","#666666"), bty="n")

barplot(rbind(MRT_sem_F, MRT_15F), beside=TRUE, names.arg=clock,
        col=c("#E6E6E6","#666666"), log="y",
        main="",
        xlab="Time between requests (s)", ylab="Response Time (s)")
legend("topright", legend=c("w/o Fog","15 Fogs"), fill=c("#E6E6E6","#666666"), bty="n")

Questão 2

dados <- matrix(
  c(53.8, 33.9, 2.6, 0.0,
    43.6, 54.2, 60.5, 21.4,
    2.6, 11.9, 36.8, 78.6),
  nrow = 3, byrow = TRUE
)

colnames(dados) <- c("$10–19", "$20–29", "$30–39", "$40–49")
rownames(dados) <- c("Good", "Very Good", "Excellent")

barplot(dados,
        col = c("#B3CDE3", "#6497B1", "#005B96"),
        main = "Meal Quality Rating by Price Category",
        xlab = "Meal Price Range (USD)",
        ylab = "Percentage (%)",
        legend.text = rownames(dados),
        args.legend = list(x = "topright", bty = "n"),
        ylim = c(0, 120))
abline(h = seq(0, 100, 20), col = "gray90", lty = 2)

Questão 3

data("airquality")
maio <- subset(airquality, Month == 5)
temp_celsius <- (maio$Temp - 32) / 1.8

hist(temp_celsius,
     main = "Distribuição das Temperaturas em Maio (°C)",
     xlab = "Temperatura (°C)",
     ylab = "Frequência",
     col = "#69b3a2",
     border = "white",
     freq = FALSE)

lines(density(temp_celsius, na.rm = TRUE),
      col = "darkblue", lwd = 2)
abline(v = mean(temp_celsius, na.rm = TRUE),
       col = "red", lwd = 2, lty = 2)

Questão 4

sales <- read.table("https://training-course-material.com/images/8/8f/Sales.txt", header = TRUE)

# Converter a coluna SALES para numérica (caso não seja)
sales$SALES <- as.numeric(sales$SALES)

# Criar tabela com total de vendas por país
vendas_pais <- tapply(sales$SALES, sales$COUNTRY, sum)

# Calcular porcentagens
porcentagens <- round(100 * vendas_pais / sum(vendas_pais), 1)

rotulos <- paste(names(vendas_pais), "-", porcentagens, "%")

cores <- rainbow(length(vendas_pais))

pie(vendas_pais,
    labels = rotulos,
    col = cores,
    main = "Porcentagem total de vendas por país")

legend("topright", legend = names(vendas_pais), fill = cores)

Questão 5

data(InsectSprays)

boxplot(count ~ spray,
        data = InsectSprays,
        main = "Contagem de insetos por yipo de inseticida",
        xlab = "Tipo de inseticida",
        ylab = "Número de insetos",
        col = "yellow",
        outline = FALSE)

Questão 6

data_none <- read.csv("monitoringCloudData_NONE.csv", header = TRUE)
data_01   <- read.csv("monitoringCloudData_0.1.csv", header = TRUE)
data_05   <- read.csv("monitoringCloudData_0.5.csv", header = TRUE)
data_1    <- read.csv("monitoringCloudData_1.csv", header = TRUE)

prepare_data <- function(df) {
  
  df$currentTime <- as.POSIXct(df$currentTime, format = "%Y-%m-%d %H:%M:%S")
  df$elapsedHours <- as.numeric(difftime(df$currentTime, df$currentTime[1], units = "hours"))
  
  df$usedMemory <- gsub(",", "", as.character(df$usedMemory))
  
  df$memoryMB <- ifelse(grepl("TB", df$usedMemory),
                        as.numeric(gsub("TB", "", df$usedMemory)) * 1000000,
                 ifelse(grepl("GB", df$usedMemory),
                        as.numeric(gsub("GB", "", df$usedMemory)) * 1024,
                 ifelse(grepl("MB", df$usedMemory),
                        as.numeric(gsub("MB", "", df$usedMemory)),
                        as.numeric(df$usedMemory))))
  
  return(df)
}

data_none <- prepare_data(data_none)
data_01   <- prepare_data(data_01)

## Warning in ifelse(grepl("GB", df$usedMemory), as.numeric(gsub("GB", "", : NAs
## introduzidos por coerção

## Warning in ifelse(grepl("MB", df$usedMemory), as.numeric(gsub("MB", "", : NAs
## introduzidos por coerção
## Warning in ifelse(grepl("MB", df$usedMemory), as.numeric(gsub("MB", "", : NAs
## introduzidos por coerção

data_05   <- prepare_data(data_05)

## Warning in ifelse(grepl("GB", df$usedMemory), as.numeric(gsub("GB", "", : NAs
## introduzidos por coerção
## Warning in ifelse(grepl("GB", df$usedMemory), as.numeric(gsub("GB", "", : NAs
## introduzidos por coerção
## Warning in ifelse(grepl("GB", df$usedMemory), as.numeric(gsub("GB", "", : NAs
## introduzidos por coerção

data_1    <- prepare_data(data_1)

layout(matrix(1:4, nrow = 2, byrow = TRUE))

plot(data_none$elapsedHours, data_none$memoryMB, type = "l",
     xlab = "Time (hours)", ylab = "Used Memory (MB)",
     main = "Memory Usage - No Workload")

plot(data_01$elapsedHours, data_01$memoryMB, type = "l",
     xlab = "Time (hours)", ylab = "Used Memory (MB)",
     main = "Memory Usage - Workload 0.1")

plot(data_05$elapsedHours, data_05$memoryMB, type = "l",
     xlab = "Time (hours)", ylab = "Used Memory (MB)",
     main = "Memory Usage - Workload 0.5")

plot(data_1$elapsedHours, data_1$memoryMB, type = "l",
     xlab = "Time (hours)", ylab = "Used Memory (MB)",
     main = "Memory Usage - Workload 1.0")

Questão 7

library(plotly)

## Warning: pacote 'plotly' foi compilado no R versão 4.5.2

## Carregando pacotes exigidos: ggplot2

## 
## Anexando pacote: 'plotly'

## O seguinte objeto é mascarado por 'package:ggplot2':
## 
##     last_plot

## O seguinte objeto é mascarado por 'package:stats':
## 
##     filter

## O seguinte objeto é mascarado por 'package:graphics':
## 
##     layout

library(dplyr)

## 
## Anexando pacote: 'dplyr'

## Os seguintes objetos são mascarados por 'package:stats':
## 
##     filter, lag

## Os seguintes objetos são mascarados por 'package:base':
## 
##     intersect, setdiff, setequal, union

library(readr)

## Warning: pacote 'readr' foi compilado no R versão 4.5.2

netflix <- read_csv("netflix_titles.csv")

## Rows: 7787 Columns: 12

## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (11): show_id, type, title, director, cast, country, date_added, rating,...
## dbl  (1): release_year
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

netflix_filtrado <- netflix %>%
  filter(!is.na(country)) %>%           # remover NAs
  filter(!grepl(",", country))          # apenas um país

contagem <- netflix_filtrado %>%
  group_by(country) %>%
  summarise(total = n()) %>%
  arrange(desc(total))

top10 <- head(contagem, 10)

fig_pizza <- plot_ly(top10,
                     labels = ~country,
                     values = ~total,
                     type = 'pie',
                     textinfo = 'label+percent',
                     insidetextorientation = 'radial') %>%
  layout(title = 'Top 10 países com mais conteúdos na Netflix (um país por título)')

fig_pizza

Questão 8

fig_tabela <- plot_ly(
  type = 'table',
  header = list(
    values = c('<b>País</b>', '<b>Total de conteúdos</b>'),
    fill = list(color = 'gray'),
    font = list(color = 'white', size = 14),
    align = 'center'
  ),
  cells = list(
    values = list(top10$country, top10$total),
    align = 'center'
  )
) %>%
  layout(title = 'Tabela - Top 10 países com mais conteúdos na Netflix')

fig_tabela

Questão 9

netflix <- read_csv("netflix_titles.csv")

## Rows: 7787 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (11): show_id, type, title, director, cast, country, date_added, rating,...
## dbl  (1): release_year
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

netflix_decada <- netflix %>%
  filter(!is.na(release_year)) %>%
  mutate(decada = floor(release_year / 10) * 10) %>%   # transforma ano em década
  group_by(decada, type) %>%
  summarise(qtd_conteudo = n(), .groups = 'drop') %>%
  arrange(decada)

fig <- plot_ly(netflix_decada,
               x = ~decada,
               y = ~qtd_conteudo,
               color = ~type,
               colors = c("blue", "orange"),
               type = 'scatter',
               mode = 'lines+markers') %>%
  layout(title = "Quantidade de conteúdo por década na Netflix",
         xaxis = list(title = "Década"),
         yaxis = list(title = "Qtd. Conteúdo"),
         legend = list(title = list(text = '')))

fig

Questão 10

library(stringr)

netflix <- read_csv("netflix_titles.csv")

## Rows: 7787 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (11): show_id, type, title, director, cast, country, date_added, rating,...
## dbl  (1): release_year
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

filmes <- netflix %>%
  filter(type == "Movie",
         release_year >= 2000,
         release_year <= 2010)

filmes <- filmes %>%
  mutate(primeiro_genero = str_trim(str_split_fixed(listed_in, ",", 2)[,1]))

filmes_filtrados <- filmes %>%
  filter(primeiro_genero %in% c("Dramas", "Action & Adventure", "Comedies"))

contagem <- filmes_filtrados %>%
  group_by(release_year, primeiro_genero) %>%
  summarise(qtd = n(), .groups = 'drop')

fig <- plot_ly(
  contagem,
  x = ~release_year,
  y = ~qtd,
  color = ~primeiro_genero,
  colors = c("blue", "orange", "green"),
  type = "bar"
) %>%
  layout(
    barmode = "group",
    title = "Quantidade de lançamentos por gênero (2000–2010)",
    xaxis = list(title = "Ano de Lançamento"),
    yaxis = list(title = "Qtd. de Lançamentos"),
    legend = list(title = list(text = "Gênero"))
  )

fig

Exercício 12 [Visualização de Dados]

Maria Gabrielly Anísio de Santana

Resultados

Questão 1

Questão 2

Questão 3

Questão 4

Questão 5

Questão 6

Questão 7

Questão 8

Questão 9

Questão 10