Questões

Questão 1

# dados
MRT_1F <-c(517.1468515630205, 85.13094142168089, 30.333207896694553, 12.694776264558937, 3.3041601673945418, 1.1823111717498882, 1.1892293502386786)

MRT_3F <-c(156.68929936163462, 11.540837783562276, 0.4512835621696538, 0.4509797929766453, 0.4502068233039181, 0.4496185276300172, 0.4543157082191288)

MRT_5F <-c(83.90319666471157, 0.3068151086494968, 0.30522314133037304, 0.3072588968084928, 0.30655265997285697, 0.3055812715727718, 0.3053297166713006)

MRT_10F <-c(29.55430642951759, 0.19832832665772515, 0.1971923924717474, 0.19796648905716516, 0.19615594370806338, 0.2034569237883263, 0.19617420889447737)

MRT_15F <-c(11.317736530583566, 0.167364215666193, 0.16172168266811013, 0.16701085329580515, 0.1598052657153692, 0.1645934043532696, 0.16216563797118075)

MRT_sem_F <-c(11.93430909937736, 0.6095414637034009, 0.6060645101029295, 0.612167181646899, 0.6146761002685637, 0.6096747087200697, 0.6125810476877268)

clock <- c(0.1, 0.5, 1, 1.5, 2, 2.5, 3)

layout(matrix(c(1,1,
                2,3,
                4,5,
                6,7), 
              nrow = 4, byrow = TRUE),
      heights = c(3, 2, 2, 2)
      )

# margens
par(mar = c(4,4,3,2))

# linhas
plot(clock, MRT_1F,
     type = "o",
     pch = 4,
     col = "black",
     ylim = c(0, max(MRT_1F)),
     xlab = "Time Between Things requests (seconds)",
     ylab = "Response Time (sec.)")

lines(clock, MRT_3F, type = "o", pch = 9, col = "yellow")
lines(clock, MRT_5F, type = "o", pch = 1, col = "red")
lines(clock, MRT_10F, type = "o", pch = 5, col = "blue")
lines(clock, MRT_15F, type = "o", pch = 16, col = "purple")
lines(clock, MRT_sem_F, type = "o", pch = 4, col = "green")

legend("topright",
       legend = c("1 Fog", "3 Fogs", "5 Fogs",
                  "10 Fogs", "15 Fogs", "w/o Fog"),
       col = c("black", "yellow", "red",
               "blue", "purple", "green"),
       lty = 1,
       pch = c(4,9,1,5,16,4),
       cex = 0.8)

# barras
cores <- c("#E6E6E6", "#666666")

# função auxiliar para gerar os gráficos de barras
grafico_barras <- function(dados, titulo){

  par(mar = c(4,4,2,1))

  barplot(rbind(MRT_sem_F, dados),
          beside = TRUE,
          col = cores,
          log = "y",
          names.arg = clock,
          xlab = "Requests interval (s)",
          ylab = "Response Time (s)"
          )

  legend("topright",
         legend = c("w/o Fog", titulo),
         fill = cores,
         cex = 0.8)
}

grafico_barras(MRT_1F, "1 Fog")
grafico_barras(MRT_3F, "3 Fogs")
grafico_barras(MRT_5F, "5 Fogs")
grafico_barras(MRT_10F, "10 Fogs")
grafico_barras(MRT_15F, "15 Fogs")

Questão 2

# dados da tabela
dados <- matrix(c(
  53.8, 33.9,  2.6,  0.0,   # good
  43.6, 54.2, 60.5, 21.4,   # very good
   2.6, 11.9, 36.8, 78.6    # excellent
),
nrow = 3,
byrow = TRUE)

# nomes das linhas e colunas
rownames(dados) <- c("Good", "Very Good", "Excellent")

colnames(dados) <- c("$10–19", "$20–29", "$30–39", "$40–49")

barplot(dados,
        beside = FALSE,   # empilhado
        col = c("lightblue", "gold", "lightgreen"),
        main = "Meal Quality Rating by Price Category",
        xlab = "Meal Price",
        ylab = "Percentage (%)",
        ylim = c(0, 100))

legend("topright",
       legend = rownames(dados),
       fill = c("lightblue", "gold", "lightgreen"),
       title = "Quality Rating")

Questão 3

# carregar dataset
data(airquality)

# filtrar apenas o mês de maio
maio <- subset(airquality, Month == 5)

# converter temperatura de fahrenheit para celsius
temp_celsius <- (maio$Temp - 32) / 1.8

# criar histograma
hist(temp_celsius,
     probability = TRUE,
     col = "lightblue",
     border = "white",
     main = "Histograma das Temperaturas de Maio",
     xlab = "Temperatura (°C)",
     ylab = "Densidade")

# adicionar curva de densidade
lines(density(temp_celsius),
      lwd = 2)

# adicionar linha da média
abline(v = mean(temp_celsius),
       lwd = 2,
       lty = 2)

Questão 4

# ler dataset
sales <- read.table(
  "https://training-course-material.com/images/8/8f/Sales.txt",
  header = TRUE
)

# calcular porcentagens
porcentagem <- round((sales$SALES / sum(sales$SALES)) * 100, 1)

# criar rótulos com porcentagem
rotulos <- paste(sales$COUNTRY, "-", porcentagem, "%")

# definir cores
cores <- c("lightblue", "lightgreen", "pink",
           "orange", "violet", "yellow")

# criar gráfico de pizza
pie(sales$SALES,
    labels = rotulos,
    col = cores,
    main = "Porcentagem Total de Vendas por País")

# adicionar legenda
legend("topright",
       legend = sales$COUNTRY,
       fill = cores,
       title = "Países")

Questão 5

# carregar dataset
data(InsectSprays)

boxplot(count ~ spray,
        data = InsectSprays,
        outline = F, # remove visualização dos outliers
        col = "yellow",
        main = "Contagem de Insetos por Tipo de Inseticida",
        xlab = "Tipo de Inseticida",
        ylab = "Quantidade de Insetos")

Questão 6

# lendo os dados
cloud_none <- read.csv("monitoringCloudData_NONE.csv")
cloud_01   <- read.csv("monitoringCloudData_0.1.csv")
cloud_05   <- read.csv("monitoringCloudData_0.5.csv")
cloud_10   <- read.csv("monitoringCloudData_1.csv")

converter_para_mb <- function(x){
  valor <- as.numeric(gsub("[A-Z]+", "", x))

  unidade <- gsub("[0-9.]", "", x)

  resultado <- ifelse(unidade == "TB", valor * 1000000,
               ifelse(unidade == "GB", valor * 1024,
               ifelse(unidade == "MB", valor,
               ifelse(unidade == "KB", valor / 1024,
               ifelse(unidade == "B", valor / (1024^2),
               NA)))))

  return(resultado)
}

preparar_dados <- function(df){
  # converter currentTime para datetime
  df$currentTime <- as.POSIXct(df$currentTime,
                               format="%Y-%m-%d %H:%M:%S")

  tempo_inicial <- df$currentTime[1]

  df$timeHour <- as.numeric(
    difftime(df$currentTime,
             tempo_inicial,
             units = "hours")
  )

  # converter usedMemory para MB
  df$usedMemoryMB <- converter_para_mb(df$usedMemory)

  return(df)
}

cloud_none <- preparar_dados(cloud_none)
cloud_01   <- preparar_dados(cloud_01)
cloud_05   <- preparar_dados(cloud_05)
cloud_10   <- preparar_dados(cloud_10)

# organizando os gráficos
layout(matrix(c(1,2,
                3,4),
              nrow = 2,
              byrow = TRUE))

par(mar = c(4,4,3,1))

# none
plot(cloud_none$timeHour,
     cloud_none$usedMemoryMB,
     type = "l",
     lwd = 2,
     main = "Memory Analysis (None Workload)",
     xlab = "Time (hour)",
     ylab = "Used Memory (MB)")

# 0.1
plot(cloud_01$timeHour,
     cloud_01$usedMemoryMB,
     type = "l",
     lwd = 2,
     main = "Memory Analysis (Workload of 0.1)",
     xlab = "Time (hour)",
     ylab = "Used Memory (MB)")

# 0.5
plot(cloud_05$timeHour,
     cloud_05$usedMemoryMB,
     type = "l",
     lwd = 2,
     main = "Memory Analysis (Workload of 0.5)",
     xlab = "Time (hour)",
     ylab = "Used Memory (MB)")

# 1.0
plot(cloud_10$timeHour,
     cloud_10$usedMemoryMB,
     type = "l",
     lwd = 2,
     main = "Memory Analysis (Workload of 1.0)",
     xlab = "Time (hour)",
     ylab = "Used Memory (MB)")

Questão 7

#install.packages("plotly")

library(plotly)

netflix <- read.csv("netflix_titles.csv")

netflix <- subset(
  netflix,
  !is.na(country) &
  trimws(country) != "" &
  !grepl(",", country)
)

# contando conteúdos por país
country_count <- sort(
  table(netflix$country),
  decreasing = TRUE
)

# selecionando top10 países
top10 <- head(country_count, 10)

# gráfico de pizza
plot_ly(
  labels = names(top10),
  values = as.numeric(top10),
  type = "pie",
  textinfo = "label+percent",
  insidetextorientation = "radial"
) %>%
layout(
  title = "Top 10 Países com Mais Conteúdo na Netflix",
  legend = list(title = list(text = "<b>Países</b>"))
)

Questão 8

library(plotly)

netflix <- read.csv(
  "netflix_titles.csv",
  stringsAsFactors = FALSE
)

netflix <- netflix[
  netflix$country != "" &
  !is.na(netflix$country) &
  !grepl(",", netflix$country),
]

country_count <- sort(
  table(trimws(netflix$country)),
  decreasing = TRUE
)

top10 <- head(country_count, 10)

top10_df <- data.frame(
  Pais = names(top10),
  Total = as.numeric(top10),
  stringsAsFactors = FALSE
)

plot_ly(
  type = "table",
  header = list(
    values = c("País", "Total de conteúdos"),
    fill = list(color = "gray"),
    font = list(color = "white", size = 14),
    align = "center"
  ),
  cells = list(
    values = list(
      top10_df$Pais,
      top10_df$Total
    ),
    fill = list(color = "white"),
    align = "center",
    font = list(color = "black", size = 12)
  )
)

Questão 9

library(plotly)

netflix <- read.csv(
  "netflix_titles.csv",
  stringsAsFactors = FALSE
)

netflix$decade <- floor(netflix$release_year / 10) * 10

movies <- netflix[netflix$type == "Movie", ]
tvshows <- netflix[netflix$type == "TV Show", ]

movies_count <- table(movies$decade)
tvshows_count <- table(tvshows$decade)

movies_df <- data.frame(
  decade = as.numeric(names(movies_count)),
  total = as.numeric(movies_count)
)

tvshows_df <- data.frame(
  decade = as.numeric(names(tvshows_count)),
  total = as.numeric(tvshows_count)
)

decadas <- sort(unique(netflix$decade))

plot_ly() %>%

  add_lines(
    data = tvshows_df,
    x = ~decade,
    y = ~total,
    name = "TV Series",
    line = list(color = "blue"),
    mode = "lines+markers"
  ) %>%

  add_lines(
    data = movies_df,
    x = ~decade,
    y = ~total,
    name = "Movies",
    line = list(color = "yellow"),
    mode = "lines+markers"
  ) %>%

  layout(
    xaxis = list(
      title = "Década",
      tickmode = "array",
      tickvals = decadas,
      ticktext = decadas
    ),
    yaxis = list(
      title = "Quantidade de Conteúdo"
    )
  )

Questão 10

library(plotly)

netflix <- read.csv(
  "netflix_titles.csv",
  stringsAsFactors = FALSE
)

movies <- netflix[
  netflix$type == "Movie" &
  netflix$release_year >= 2000 &
  netflix$release_year <= 2010,
]

movies$main_genre <- trimws(
  sub(",.*", "", movies$listed_in)
)

genres <- c("Dramas", "Action & Adventure", "Comedies")

movies_filtered <- movies[
  movies$main_genre %in% genres,
]

count_data <- table(
  movies_filtered$release_year,
  movies_filtered$main_genre
)

count_df <- as.data.frame(count_data)

colnames(count_df) <- c("year", "genre", "total")

dramas_df <- count_df[count_df$genre == "Dramas", ]
action_df <- count_df[count_df$genre == "Action & Adventure", ]
comedy_df <- count_df[count_df$genre == "Comedies", ]

plot_ly() %>%
  add_bars(
    data = dramas_df,
    x = ~year,
    y = ~total,
    name = "Drama"
  ) %>%
  add_bars(
    data = action_df,
    x = ~year,
    y = ~total,
    name = "Ação e Aventura"
  ) %>%
  add_bars(
    data = comedy_df,
    x = ~year,
    y = ~total,
    name = "Comédia"
  ) %>%
  layout(
    xaxis = list(
      title = "Ano de Lançamento"
    ),
    yaxis = list(
      title = "Qnt. de Lançamentos"
    ),
    barmode = "group"
  )