Atividade Lista 12

Questao 1

xlim <- c(0, 3)
ylim <- c(0, 518)

MRT_1F <-c(517.1468515630205, 85.13094142168089, 30.333207896694553, 12.694776264558937, 3.3041601673945418, 1.1823111717498882, 1.1892293502386786)

clock <- c(0.1, 0.5, 1, 1.5, 2, 2.5, 3)

MRT_3F <-c(156.68929936163462, 11.540837783562276, 0.4512835621696538, 0.4509797929766453, 0.4502068233039181, 0.4496185276300172, 0.4543157082191288)

MRT_5F <-c(83.90319666471157, 0.3068151086494968, 0.30522314133037304, 0.3072588968084928, 0.30655265997285697, 0.3055812715727718, 0.3053297166713006)

MRT_10F <-c(29.55430642951759, 0.19832832665772515, 0.1971923924717474, 0.19796648905716516, 0.19615594370806338, 0.2034569237883263, 0.19617420889447737)

MRT_15F <-c(11.317736530583566, 0.167364215666193, 0.16172168266811013, 0.16701085329580515, 0.1598052657153692, 0.1645934043532696, 0.16216563797118075)

MRT_sem_F <-c(11.93430909937736, 0.6095414637034009, 0.6060645101029295, 0.612167181646899, 0.6146761002685637, 0.6096747087200697, 0.6125810476877268)

plot(clock ,MRT_1F, type="o", pch=0, xlim=c(0, 3), ylim=c(0, 518), xlab="Time Response (sec.)", ylab="Time between Things requests (seconds)")

lines(clock, MRT_3F, type="o", col="yellow", pch=1)

lines(clock, MRT_5F, type="o", col="red", pch=2)

lines(clock, MRT_10F, type="o", col="blue", pch=3)

lines(clock, MRT_15F, type="o", col="purple", pch=4)

lines(clock, MRT_sem_F, type="o", col="green", pch=5)

layout(matrix(c(1, 2,
               1, 2,
               3, 4,
               3, 4,
               5, 6,
               5, 6
               ), nrow=3, ncol=2, byrow=TRUE))

colors <- c("#E6E6E6", "#666666")
log_MRT_1F <- log(MRT_1F)
bind_MRT_1F <- cbind(MRT_1F, log_MRT_1F)
val_MRT_1F <- matrix(bind_MRT_1F, nrow = 2, ncol = 7, byrow = TRUE)
barplot(val_MRT_1F, names.arg = clock, col = colors, beside=T, xlab="Time between Thngs requests", ylab="Response time (s)")
legend("topright", legend = c("w/aFog", "1 Fog"), col = colors, pch = c(15, 15))

log_MRT_3F <- log(MRT_3F)
bind_MRT_3F <- cbind(MRT_3F, log_MRT_3F)
val_MRT_3F <- matrix(bind_MRT_3F, nrow = 2, ncol = 7, byrow = TRUE)
barplot(val_MRT_1F, names.arg = clock, col = colors, beside=T, xlab="Time between Things requests", ylab="Response time (s)")
legend("topright", legend = c("w/aFog", "3 Fog"), col = colors, pch = c(15, 15))

log_MRT_5F <- log(MRT_5F)
bind_MRT_5F <- cbind(MRT_5F, log_MRT_5F)
val_MRT_5F <- matrix(bind_MRT_1F, nrow = 2, ncol = 7, byrow = TRUE)
barplot(val_MRT_5F, names.arg = clock, col = colors, beside=T, xlab="Time between Things requests", ylab="Response time (s)")
legend("topright", legend = c("w/aFog", "5 Fog"), col = colors, pch = c(15, 15))

log_MRT_10F <- log(MRT_10F)
bind_MRT_10F <- cbind(MRT_10F, log_MRT_10F)
val_MRT_10F <- matrix(bind_MRT_10F, nrow = 2, ncol = 7, byrow = TRUE)
barplot(val_MRT_10F, names.arg = clock, col = colors, beside=T, xlab="Time between Things requests", ylab="Response time (s)")
legend("topright", legend = c("w/aFog", "10 Fog"), col = colors, pch = c(15, 15))

log_MRT_15F <- log(MRT_15F)
bind_MRT_15F <- cbind(MRT_15F, log_MRT_15F)
val_MRT_15F <- matrix(bind_MRT_15F, nrow = 2, ncol = 7, byrow = TRUE)
barplot(val_MRT_1F, main="Grafico name", names.arg = clock, col = colors, beside=T, xlab="Time between Thngs requests", ylab="Response time (s)")
legend("topright", legend = c("w/aFog", "15 Fog"), col = colors, pch = c(15, 15))

Questao 2

install.packages("ggplot2", repos = "http://cran.rstudio.com/")
## Installing package into 'C:/Users/andre/AppData/Local/R/win-library/4.3'
## (as 'lib' is unspecified)
## package 'ggplot2' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\andre\AppData\Local\Temp\RtmpUX2nsN\downloaded_packages
library(ggplot2)
install.packages("ggplot2", dependencies = TRUE)

quality_ratings <- c("Good", "Very Good", "Excellent")
metal_prices <- c("$10-19", "$20-29", "$30-39", "$40-49")
matriz <- matrix(c(
  "Good", 53.8, 33.9, 2.6, 0.0,
  "Very Good", 43.6, 54.2, 60.5, 21.4,
  "Excellent", 2.6, 11.9, 36.8, 78.6
), nrow = length(quality_ratings), byrow = TRUE)
df <- as.data.frame(matriz)
colnames(df) <- c("Quality_Rating", metal_prices)
df[, -1] <- sapply(df[, -1], as.numeric)
barplot(
  t(as.matrix(df[, -1])),  # Transpor os dados
  beside = TRUE,  # Barras empilhadas lado a lado
  col = c("plum4", "orchid", "purple", "purple4"), # Cores para as barras
  legend.text = FALSE,  # Adicionar legenda
  args.legend = list(x = "topright"),  # Posição da legenda
  main = "Qualidade de refeição",  # Título principal
  xlab = "Quality Rating",  # Rótulo do eixo x
  ylab = "Porcentagem (%)",  # Rótulo do eixo y
  names.arg = df$Quality_Rating,  # Nomes no eixo x
  ylim = c(0, 100)  # Limite do eixo y
)
legend("topleft", title = " Metal Prices ", legend = metal_prices, fill = c("plum4", "orchid", "purple", "purple4"))

Questao 3

data(airquality)


airquality$Temp_Celsius <- (airquality$Temp - 32) / 1.8


hist_plot <- hist(airquality$Temp_Celsius, breaks = 20, col = "lightblue", main = "Histograma das Temperaturas em Graus Celsius (Maio)", xlab = "Temperatura (°C)", ylab = "Frequência", probability = TRUE)


lines(density(airquality$Temp_Celsius), col = "blue", lwd = 2)


legend("topright", legend = c("Densidade"), col = c("blue"), lwd = 2)

Questao 4

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
sales <- read.table("https://training-course-material.com/images/8/8f/Sales.txt", header = TRUE)


sales_percent <- sales %>%
  mutate(Percentage = (sales$SALES / sum(sales$SALES)) * 100)

pie(sales_percent$Percentage, labels = paste(sales_percent$COUNTRY, "\n", round(sales_percent$Percentage, 1), "%"), col = rainbow(length(sales_percent$COUNTRY)))


title("Porcentagem Total de Vendas por País")


legend("topright", legend = sales_percent$COUNTRY, fill = rainbow(length(sales_percent$COUNTRY)), title = "País") 

Questao 5

data(InsectSprays)

boxplot(count ~ spray, data = InsectSprays, col = "yellow", outline = FALSE,
        main = "Boxplot de Contagens de Insetos por Tipo de Inseticida",
        xlab = "Tipo de Inseticida", ylab = "Contagem de Insetos")

Questao 6

prim <- read.csv("monitoringCloudData_0.1.csv")
seg <- read.csv("monitoringCloudData_0.5.csv")
terc <- read.csv("monitoringCloudData_1.csv")
quart <- read.csv("monitoringCloudData_NONE.csv")


transformar_dataframe <- function(df) {

  gb_indices <- grepl("GB", df$usedMemory)
  df$usedMemory[gb_indices] <- as.numeric(sub("GB", "", df$usedMemory[gb_indices])) * 1024
 
  df$usedMemory <- as.numeric(sub("MB", "", df$usedMemory))
  
  df$currentTime <- as.POSIXct(df$currentTime, format = "%Y-%m-%d %H:%M:%OS")

  df$hours_since_start <- as.numeric(difftime(df$currentTime, min(df$currentTime), units = "hours"))

  return(df)
}



prim <- transformar_dataframe(prim)
terc <- transformar_dataframe(terc)
quart <- transformar_dataframe(quart)



if (!requireNamespace("ggplot2", quietly = TRUE)) {
  install.packages("ggplot2")
}


library(ggplot2)


if (!requireNamespace("patchwork", quietly = TRUE)) {
  install.packages("patchwork")
}
library(patchwork)


plot_prim <- ggplot(prim, aes(x = hours_since_start, y = usedMemory, group = 1)) +
  geom_line() +
  labs(title = expression(bold("Memory Analysis (None Workload)")),
       x = "Time (hour)",
       y = "Used Memory (MB)") +
  annotate("rect", xmin = min(prim$hours_since_start), xmax = max(prim$hours_since_start),
           ymin = min(prim$usedMemory), ymax = max(prim$usedMemory),
           color = "black", fill = NA, linetype = "solid") +
  theme(panel.background = element_rect(fill = "white")) +
  scale_x_continuous(breaks = seq(0, 70, 10)) +
  scale_y_continuous(breaks = c(500, 1500, 2500, 3500))

 
  gb_indices <- grepl("GB", seg$usedMemory)
  seg$usedMemory[gb_indices] <- as.numeric(sub("GB", "", seg$usedMemory[gb_indices])) * 1024
  
  seg$usedMemory <- as.numeric(sub("MB", "", seg$usedMemory))

  seg$currentTime <- as.POSIXct(seg$currentTime, format = "%Y-%m-%d %H:%M:%OS")

  seg$hours_since_start <- as.numeric(difftime(seg$currentTime, seg$currentTime[1], units = "hours"))



seg <- seg[complete.cases(seg$hours_since_start, seg$usedMemory), ]


plot_seg <- ggplot(seg, aes(x = hours_since_start, y = usedMemory, group = 1)) +
  geom_line() +
  labs(title = expression(bold("Memory Analysis (Workload 0.1)")),
       x = "Time (hour)",
       y = "Used Memory (MB)") +
  geom_rect(aes(xmin = min(seg$hours_since_start), xmax = max(seg$hours_since_start),
                ymin = min(seg$usedMemory), ymax = max(seg$usedMemory)),
            color = "black", fill = NA, linetype = "solid", alpha = 0) +
  theme(panel.background = element_rect(fill = "white")) +
  scale_x_continuous(breaks = seq(0, 70, 10)) +
  scale_y_continuous(breaks = c(400, 800, 1200))




plot_terc <- ggplot(terc, aes(x = hours_since_start, y = usedMemory, group = 1)) +
  geom_line() +
  labs(title = expression(bold("Memory Analysis (Workload 0.5)")),
       x = "Time (hour)",
       y = "Used Memory (MB)") +
  annotate("rect", xmin = min(terc$hours_since_start), xmax = max(terc$hours_since_start),
           ymin = min(terc$usedMemory), ymax = max(terc$usedMemory),
           color = "black", fill = NA, linetype = "solid") +
  theme(panel.background = element_rect(fill = "white")) +
  scale_x_continuous(breaks = seq(0, 70, 10)) +
  scale_y_continuous(breaks = c(242, 246, 250, 254))

plot_quart <- ggplot(quart, aes(x = hours_since_start, y = usedMemory, group = 1)) +
  geom_line() +
  labs(title = expression(bold("Memory Analysis (Workload 1.0)")),
       x = "Time (hour)",
       y = "Used Memory (MB)") +
  annotate("rect", xmin = min(quart$hours_since_start), xmax = max(quart$hours_since_start),
           ymin = min(quart$usedMemory), ymax = max(quart$usedMemory),
           color = "black", fill = NA, linetype = "solid") +
  theme(panel.background = element_rect(fill = "white")) +
  scale_x_continuous(breaks = seq(0, 70, 10)) +
  scale_y_continuous(breaks = c(96, 98, 102, 106))



layout(matrix(c(1, 2, 3, 4), nrow = 2, ncol = 2))
par(mar = c(4, 4, 2, 1))  # Ajuste das margens


graficos <- plot_quart + plot_prim + plot_seg + plot_terc 


graficos

Questao 7

if (!requireNamespace("dplyr", quietly = TRUE)) {
  install.packages("dplyr")
}

if (!requireNamespace("plotly", quietly = TRUE)) {
  install.packages("plotly")
}

library(dplyr)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
#dplyr::filter(data_frame, condition)

netflix_titles <- read.csv(file = "netflix_titles.csv",
                            header = TRUE,
                            strip.white = TRUE,
                            na.strings = "")
netflix_titles <- netflix_titles %>%
  filter(!is.na(country) & country != "" & !grepl(",", country)) %>%
  filter(!is.na(country) & country != "" & !grepl(",", country))
top_countries <- netflix_titles %>%
  group_by(country) %>%
  summarise(count = n()) %>%
  arrange(desc(count)) %>%
  head(10)
plot_ly(labels = top_countries$country, values = top_countries$count, type = "pie",
        textinfo = "label+percent", insidetextfont = list(color = "#FFFFFF"),
        hoverinfo = "label+percent", hole = 0.6) %>%
  layout(title = "Top 10 Países com Mais Conteúdo na Netflix",
         xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
         yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
         width = 900,  
         height = 800)  

Questao 8

if (!requireNamespace("dplyr", quietly = TRUE)) {
  install.packages("dplyr")
}

if (!requireNamespace("plotly", quietly = TRUE)) {
  install.packages("plotly")
}

library(dplyr)
library(plotly)
top_countries <- netflix_titles %>%
  group_by(country) %>%
  summarise(count = n()) %>%
  arrange(desc(count)) %>%
  head(10)

table <- plot_ly(
  type = "table",
  header = list(values = c("País", "Total de Conteúdos"),
                fill = list(color = "#a9a9a9"),
                align = c("center"),
                font = list(color = "white", size = 15)),
  cells = list(values = list(top_countries$country, top_countries$count),
               align = c("center"),
               font = list(color = c("black", "black"), size = 12))
)

table

Questao 9

netflix <- read.csv("netflix_titles.csv")
library(dplyr)
library(plotly)
library(stringr)
netflix <- netflix %>%
  mutate(decade = 10 * (release_year %/% 10))

filmesPorDecada <- netflix %>%
  filter(type == "Movie") %>%
  group_by(decade) %>%
  summarise(qtd_conteúdo = n())


seriesPorDecada <- netflix %>%
  filter(type == "TV Show") %>%
  group_by(decade) %>%
  summarise(num_series = n())


seriesFilmes <- left_join(filmesPorDecada, seriesPorDecada, by = "decade")

seriesFilmes$num_series[2] <- 1



fig <- plot_ly(
    seriesFilmes, 
    x = ~decade
) %>%
  add_trace(
    y = ~qtd_conteúdo,
    name = 'Filmes',
    mode = 'lines+markers'
  ) %>%
  add_trace(
    y = ~num_series,
    name = 'Séries',
    mode = 'lines+markers'
  )
fig 
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter

Questao 10

library(stringr)
netflix <- read.csv("netflix_titles.csv")


df_filtrado <- netflix %>%
  filter(between(release_year, 2000, 2010) & type == "Movie") %>%
  select(release_year, listed_in)


contagem_categorias_por_ano <- df_filtrado %>%
  mutate(primeira_categoria = ifelse(str_detect(listed_in, ","), word(listed_in, 1, sep = ", "), listed_in)) %>%
  group_by(release_year, primeira_categoria) %>%
  summarise(num_filmes = n())
## `summarise()` has grouped output by 'release_year'. You can override using the
## `.groups` argument.
categorias <- c("Action & Adventure", "Comedies", "Dramas")
df_final <- contagem_categorias_por_ano %>%
  filter(primeira_categoria %in% categorias)


df_grafico <- data.frame(release_year = 2000:2010)


df_grafico <- df_grafico %>%
  left_join(
    df_final %>%
      filter(str_detect(primeira_categoria, "Comedies")) %>%
      select(release_year, num_filmes) %>%
      rename(Comedies = num_filmes),
    by = "release_year"
  ) %>%
  left_join(
    df_final %>%
      filter(str_detect(primeira_categoria, "Dramas")) %>%
      select(release_year, num_filmes) %>%
      rename(Dramas = num_filmes),
    by = "release_year"
  ) %>%
  left_join(
    df_final %>%
      filter(str_detect(primeira_categoria, "Action & Adventure")) %>%
      select(release_year, num_filmes) %>%
      rename(`ActionAdventure` = num_filmes),
    by = "release_year"
  )


fig <- plot_ly(
  df_grafico,
  x = ~release_year,
  y = ~Comedies,
  type = 'bar',
  name = 'Comédia'
) %>%
  add_trace(
    y = ~Dramas,
    name = 'Drama'
  ) %>%
  add_trace(
    y = ~ActionAdventure,
    name = 'Ação e Aventura'
  ) %>%
  layout(
    yaxis = list(title = 'Qnt. de Lançamentos'),
    xaxis = list(title = 'Ano de Lançamento')
)

#exibir gráfico
fig