Introdução

Esse aquivo Markdown servirá como minha resposta para a atividade 12 da disciplina de Computação para Analise de Dados, do Programa de Pós Graduação em Informatica Aplicada (PPGIA) da Universidade Federal Rural de Pernambuco (UFRPE).

Questões

Questão 1

Carregando os Dados

MRT_1F <-c(517.1468515630205, 85.13094142168089, 30.333207896694553, 12.694776264558937, 3.3041601673945418, 1.1823111717498882, 1.1892293502386786)

MRT_3F <-c(156.68929936163462, 11.540837783562276, 0.4512835621696538, 0.4509797929766453, 0.4502068233039181, 0.4496185276300172, 0.4543157082191288)

MRT_5F <-c(83.90319666471157, 0.3068151086494968, 0.30522314133037304, 0.3072588968084928, 0.30655265997285697, 0.3055812715727718, 0.3053297166713006)

MRT_10F <-c(29.55430642951759, 0.19832832665772515, 0.1971923924717474, 0.19796648905716516, 0.19615594370806338, 0.2034569237883263, 0.19617420889447737)

MRT_15F <-c(11.317736530583566, 0.167364215666193, 0.16172168266811013, 0.16701085329580515, 0.1598052657153692, 0.1645934043532696, 0.16216563797118075)

MRT_sem_F <-c(11.93430909937736, 0.6095414637034009, 0.6060645101029295, 0.612167181646899, 0.6146761002685637, 0.6096747087200697, 0.6125810476877268)

clock <- c(0.1, 0.5, 1, 1.5, 2, 2.5, 3)

Fazendo os Plots

Plot 1

# Primeiro gráfico
plot(
  x = clock,
  y = MRT_1F,
  type = "b",
  pch = 16,
  col = "blue",
  ylim = range(
    c(MRT_1F,
      MRT_3F,
      MRT_5F,
      MRT_10F,
      MRT_15F,
      MRT_sem_F)
  ),
  xlab = "Clock",
  ylab = "Valores",
  main = "Comparacao entre os MRTs"
)

# Adicionando as outras variáveis
lines(
  x = clock,
  y = MRT_3F,
  type = "b",
  pch = 17,
  col = "red"
)

lines(
  x = clock,
  y = MRT_5F,
  type = "b",
  pch = 15,
  col = "green"
)

lines(
  x = clock,
  y = MRT_10F,
  type = "b",
  pch = 1,
  col = "purple"
)

lines(
  x = clock,
  y = MRT_15F,
  type = "b",
  pch = 2,
  col = "orange"
)

lines(
  x = clock,
  y = MRT_sem_F,
  type = "b",
  pch = 2,
  col = "black"
)

legend(
  "topright",
  legend = c(
    "1 Fog",
    "3 Fogs",
    "5 Fogs",
    "10 Fogs",
    "15 Fogs",
    "w/o Fog"
  ),
  col = c(
    "blue",
    "red",
    "green",
    "purple",
    "orange",
    "black"
  ),
  pch = c(16,17,15,1,2,2),
  lty = 1
)

Plot 2

# Criando varios plots no mesmo espaço
par(mfrow = c(4, 2))

# Cores das barras
colors <- c("#666666", "#E6E6E6")

m1 <- rbind(MRT_1F, MRT_sem_F)

barplot(
  m1,
  main = NULL,
  names.arg = clock,
  xlab = "Time between Things requests",
  ylab = "Response time (s)",
  col = colors,
  beside = TRUE,
  log = "y"
)
legend("topright",
       legend = c("1 Fog","w/o fog"),
       col = colors,
       pch = c(15,15,15))


m2 <- rbind(MRT_3F, MRT_sem_F)

barplot(
  m2,
  main = NULL,
  names.arg = clock,
  xlab = "Time between Things requests",
  ylab = "Response time (s)",
  col = colors,
  beside = TRUE,
  log = "y"
)
legend("topright",
       legend = c("3 Fog","w/o fog"),
       col = colors,
       pch = c(15,15,15))


m3 <- rbind(MRT_5F, MRT_sem_F)

barplot(
  m3,
  main = NULL,
  names.arg = clock,
  xlab = "Time between Things requests",
  ylab = "Response time (s)",
  col = colors,
  beside = TRUE,
  log = "y"
)
legend("topright",
       legend = c("5 Fog","w/o fog"),
       col = colors,
       pch = c(15,15,15))


m4 <- rbind(MRT_10F, MRT_sem_F)

barplot(
  m4,
  main = NULL,
  names.arg = clock,
  xlab = "Time between Things requests",
  ylab = "Response time (s)",
  col = colors,
  beside = TRUE,
  log = "y"
)
legend("topright",
       legend = c("10 Fog","w/o fog"),
       col = colors,
       pch = c(15,15,15))


m5 <- rbind(MRT_15F, MRT_sem_F)

barplot(
  m5,
  main = NULL,
  names.arg = clock,
  xlab = "Time between Things requests",
  ylab = "Response time (s)",
  col = colors,
  beside = TRUE,
  log = "y"
)
legend("topright",
       legend = c("15 Fog","w/o fog"),
       col = colors,
       pch = c(15,15,15))

Questão 2

valores <- matrix(
  c(
    53.8, 33.9, 2.6, 0.0,
    43.6, 54.2, 60.5, 21.4,
    2.6, 11.9, 36.8, 78.6
  ),
  nrow = 3,
  byrow = TRUE
)

rownames(valores) <- c(
  "Boa",
  "Muito Boa",
  "Excelente"
)

colnames(valores) <- c(
  "$10-19",
  "$20-29",
  "$30-39",
  "$40-49"
)

valores_prop <- prop.table(valores, margin = 2)*100

bp <- barplot(
  valores_prop,
  col = c("skyblue", "orange", "green"),
  main = "Qualidade da Refeicao por Faixa de Preco",
  xlab = "Faixa de Preco",
  ylab = "Percentual (%)",
  legend.text = rownames(valores)
)

labels <- ifelse(
  valores_prop > 5,
  paste0(round(valores_prop,1), "%"),
  ""
)

text(
  x = rep(bp, each = nrow(valores_prop)),
  y = apply(valores_prop, 2, cumsum) - valores_prop/2,
  labels = labels,
  cex = 0.8
)

Questão 3

data(airquality)

df_maio <- subset(airquality, Month == 5)

df_maio_c <- (df_maio$Temp - 32) / 1.8

hist(
  df_maio_c,
  probability = TRUE,
  col = "navyblue",
  main = "Histograma das Temperaturas de Maio",
  xlab = "Temperatura (°C)",
  ylab = "Densidade",
  border = "white"
)

lines(
  density(df_maio_c, na.rm = TRUE),
  col = "red",
  lwd = 2
)

Questão 4

COUNTRY = c("US","UK","France","Poland","Japan","China")

SALES = c(340,290,510,820,120,780)

pct <- round(SALES/sum(SALES)*100)

lbls <- paste(COUNTRY, pct, "%")

pie(SALES,
    main = "Venda dos Países",
    col = rainbow(6),
    labels = lbls)

Questão 5

df_inseto <- data.frame(InsectSprays)

boxplot(count ~ spray, data = df_inseto,
        main = "Comparando Diferentes Inseticidas",
        xlab = "Contagem de Insetos",
        ylab = "Inseticida",
        outline = FALSE,
        col = "Yellow")

Questão 6

Carregando e ajeitando os dados

library(readr)
library(plotly)

df1 <- read_csv("monitoringCloudData/monitoringCloudData_0.1.csv")
df2 <- read_csv("monitoringCloudData/monitoringCloudData_0.5.csv")
df3 <- read_csv("monitoringCloudData/monitoringCloudData_1.csv")
df4 <- read_csv("monitoringCloudData/monitoringCloudData_NONE.csv")


valores <- as.numeric(
  gsub("[A-Z]", "", df1$usedMemory))

unidade <- gsub("[0-9\\.]", "", df1$usedMemory)

df1$usedMemory <- ifelse(
  unidade == "GB",
  valores * 1024,
  valores)

df1$currentTime <- as.POSIXct(
  df1$currentTime,
  format = "%Y-%m-%d %H:%M:%S")

df1$currentTime <- c(0,
  cumsum(as.numeric(difftime(
        df1$currentTime[-1],
        df1$currentTime[-nrow(df1)],
        units = "hours"
      )
    )
  )
)


valores <- as.numeric(
  gsub("[A-Z]", "", df2$usedMemory))

unidade <- gsub("[0-9\\.]", "", df2$usedMemory)

df2$usedMemory <- ifelse(
  unidade == "GB",
  valores * 1024,
  valores)

df2$currentTime <- as.POSIXct(
  df2$currentTime,
  format = "%Y-%m-%d %H:%M:%S")

df2$currentTime <- c(0,
  cumsum(as.numeric(difftime(
        df2$currentTime[-1],
        df2$currentTime[-nrow(df2)],
        units = "hours"
      )
    )
  )
)



valores <- as.numeric(
  gsub("[A-Z]", "", df3$usedMemory))

unidade <- gsub("[0-9\\.]", "", df3$usedMemory)

df3$usedMemory <- ifelse(
  unidade == "GB",
  valores * 1024,
  valores)

df3$currentTime <- as.POSIXct(
  df3$currentTime,
  format = "%Y-%m-%d %H:%M:%S")

df3$currentTime <- c(0,
  cumsum(as.numeric(difftime(
        df3$currentTime[-1],
        df3$currentTime[-nrow(df3)],
        units = "hours"
      )
    )
  )
)



valores <- as.numeric(
  gsub("[A-Z]", "", df4$usedMemory))

unidade <- gsub("[0-9\\.]", "", df4$usedMemory)

df4$usedMemory <- ifelse(
  unidade == "GB",
  valores * 1024,
  valores)

df4$currentTime <- as.POSIXct(
  df4$currentTime,
  format = "%Y-%m-%d %H:%M:%S")

df4$currentTime <- c(0,
  cumsum(as.numeric(difftime(
        df4$currentTime[-1],
        df4$currentTime[-nrow(df4)],
        units = "hours"
      )
    )
  )
)

Fazendo os Plots

layout(matrix(c(1,2,3,4), nrow = 2, ncol = 2))


plot(
  df1$currentTime,
  df1$usedMemory,
  type = "l",
  col = "blue",
  lwd = 2,
  xlab = "Time (hour)",
  ylab = "Used Memory (MB)",
  main = "Memory Analysis (Workload of 0.1)"
)

plot(
  df2$currentTime,
  df2$usedMemory,
  type = "l",
  col = "blue",
  lwd = 2,
  xlab = "Time (hour)",
  ylab = "Used Memory (MB)",
  main = "Memory Analysis (Workload of 0.5)"
)

plot(
  df3$currentTime,
  df3$usedMemory,
  type = "l",
  col = "blue",
  lwd = 2,
  xlab = "Time (hour)",
  ylab = "Used Memory (MB)",
  main = "Memory Analysis (Workload of 1)"
)

plot(
  df4$currentTime,
  df4$usedMemory,
  type = "l",
  col = "blue",
  lwd = 2,
  xlab = "Time (hour)",
  ylab = "Used Memory (MB)",
  main = "Memory Analysis (None Workload)"
)

Questão 7

library(plotly)
library(dplyr)
library(stringr)

df_netflix <- read_csv("netflix_titles.csv")

df_net_fil <- df_netflix %>%
  filter(!str_detect(country, ","))

top10 <- df_net_fil %>%
  count(country, sort = TRUE) %>%
  slice(1:10)

plot_ly(
  data = top10,
  labels = ~country,
  values = ~n,
  type = "pie"
) %>%
  layout(title = "Ranking dos Países com mais conteudos")

Questão 8

plot_ly(
  type = 'table',
  columnwidth = c(50,50),
  columnorder = c(0,1),
  header = list(
    values = c("País","Total de conteúdos"),
    align = c("center", "center"),
    line = list(width=1.5, color = c("black")),
    font = list(family = "Arial", size = 12, color = c("black"))
  ),
  cells = list(
    values = rbind(top10$country, top10$n),
    align = c("center", "center"),
    line = list(width = 1.5, color = 'black'),
    font = list(family = "Arial", size = 12, color = c("black"))
  )
)

Questão 9

df_netflix$decada <- floor(df_netflix$release_year / 10) * 10

dados_plot <- df_netflix %>%
  group_by(decada, type) %>%
  summarise(qtd = n(), .groups = "drop")

filmes <- dados_plot %>%
  filter(type == "Movie")

series <- dados_plot %>%
  filter(type == "TV Show")

plot_ly() %>%
  add_lines(
    data = series,
    x = ~decada,
    y = ~qtd,
    name = "Series",
    line = list(color = "blue")) %>%
  add_lines(
    data = filmes,
    x = ~decada,
    y = ~qtd,
    name = "Filmes",
    line = list(color = "yellow")) %>%
  layout(
    title = "Quantidade de Conteudo por Decada",
    xaxis = list(
      title = "Decadas"),
    yaxis = list(
      title = "Quantidade de Conteudo"))%>%

config(displayModeBar = FALSE)

Questão 10

df_netflix$genero <- str_trim(
  sapply(
    strsplit(df_netflix$listed_in, ","),`[`,1))

dados <- df_netflix %>%
  filter(
    type == "Movie",
    release_year >= 2000,
    release_year <= 2010,
    genero %in% c(
      "Dramas",
      "Action & Adventure",
      "Comedies")) %>%
  
  group_by(release_year, genero) %>%
  summarise(
    qtd = n(),
    .groups = "drop")

plot_ly(
  data = dados,
  x = ~release_year,
  y = ~qtd,
  color = ~genero,
  type = "bar") %>%
  
  layout(
    title = "Quantidade de Filmes por Genero (2000-2010)",
    xaxis = list(
      title = "Ano"),
    yaxis = list(
      title = "Quantidade de Filmes"),
    barmode = "group") %>%

config(displayModeBar = FALSE)