Análise de Dados

Questão 1

MRT_1F <-c(517.1468515630205, 85.13094142168089, 30.333207896694553, 12.694776264558937, 3.3041601673945418, 1.1823111717498882, 1.1892293502386786)

MRT_3F <-c(156.68929936163462, 11.540837783562276, 0.4512835621696538, 0.4509797929766453, 0.4502068233039181, 0.4496185276300172, 0.4543157082191288)

MRT_5F <-c(83.90319666471157, 0.3068151086494968, 0.30522314133037304, 0.3072588968084928, 0.30655265997285697, 0.3055812715727718, 0.3053297166713006)

MRT_10F <-c(29.55430642951759, 0.19832832665772515, 0.1971923924717474, 0.19796648905716516, 0.19615594370806338, 0.2034569237883263, 0.19617420889447737)

MRT_15F <-c(11.317736530583566, 0.167364215666193, 0.16172168266811013, 0.16701085329580515, 0.1598052657153692, 0.1645934043532696, 0.16216563797118075)

MRT_sem_F <-c(11.93430909937736, 0.6095414637034009, 0.6060645101029295, 0.612167181646899, 0.6146761002685637, 0.6096747087200697, 0.6125810476877268)

clock <- c(0.1, 0.5, 1, 1.5, 2, 2.5, 3)


### Plot gráfico de linhas

layout(matrix(1:2, nrow = 1), widths = c(2, 1)) 

plot(clock, MRT_1F, type = "o", col = "black", pch = 4, lty = 1, lwd = 2,
     ylim = c(0, max(MRT_1F)), xlab = "Time between Things requests (seconds)", 
     ylab = "Response Time (sec.)", main = "")

lines(clock, MRT_3F, type = "o", col = "yellow", pch = 11, lty = 1, lwd = 2)
lines(clock, MRT_5F, type = "o", col = "red", pch = 1, lty = 1, lwd = 2)
lines(clock, MRT_10F, type = "o", col = "blue", pch = 2, lty = 1, lwd = 2)
lines(clock, MRT_15F, type = "o", col = "magenta", pch = 5, lty = 1, lwd = 2)
lines(clock, MRT_sem_F, type = "o", col = "green", pch = 4, lty = 1, lwd = 2)

legend("topright", legend = c("1 Fog", "3 Fogs", "5 Fogs", "10 Fogs", "15 Fogs", "w/o Fog"),
       col = c("black", "yellow", "red", "blue", "magenta", "green"),
       pch = c(4, 11, 1, 2, 5, 4), lty = 1, lwd = 2, cex = 0.8)

# Cores e layout
cores <- c("#E6E6E6", "#666666")
layout(matrix(c(1,2,3,4,5,6), nrow = 2, byrow = TRUE))

#Função para plots
plot_comparativo <- function(fog, label) {
  dados <- rbind(MRT_sem_F, fog)
  ymax <- max(dados)
  barplot(dados,
          beside = TRUE,
          log = "y",
          col = cores,
          names.arg = clock,
          ylim = c(0.1, ymax), 
          xlab = "Time between Things requests",
          ylab = "Response time (s)",
          main = NULL)
  legend("topright", legend = c("w/o Fog", label),
         fill = cores, border = "black", bty = "n")
}

# Plots
plot_comparativo(MRT_1F, "1 Fog")
plot_comparativo(MRT_3F, "3 Fogs")
plot_comparativo(MRT_5F, "5 Fogs")
plot_comparativo(MRT_10F, "10 Fogs")
plot_comparativo(MRT_15F, "15 Fogs")

Questão 2

#dados e nomes linha/coluna
dados <- matrix(c(
  53.8, 33.9, 2.6, 0.0,   
  43.6, 54.2, 60.5, 21.4, 
  2.6, 11.9, 36.8, 78.6   
), nrow = 3, byrow = TRUE)


rownames(dados) <- c("Good", "Very Good", "Excellent")
colnames(dados) <- c("$10-19", "$20-29", "$30-39", "$40-49")

#cores e definição de parametros p/ tamanho do grafico
cores <- c("yellow", "blue", "green")
par(mar = c(5, 4, 4, 10))

#Plot
barplot(dados, 
        col = cores,
        main = "Qualidade das Refeições por Faixa de Preço",
        xlab = "Faixa de Preço",
        ylab = "Porcentagem (%)",
        ylim = c(0, 100),  
        legend.text = FALSE,  
        beside = FALSE)


legend("topright", 
       title = "Classificação de Qualidade",
       legend = c("Excellent", "Very Good", "Good"), 
       fill = c("green", "blue", "yellow"),  
       xpd = TRUE,  
       inset = c(-0.46, 0))  #pra poder ajeitar a legenda

Questão 3

#carregamento, filtragem e converção
data(airquality)
dados_maio <- airquality[airquality$Month == 5, ]
temperaturas_celsius <- (dados_maio$Temp - 32) / 1.8

#geração do hist, sem par, não achei necessário
hist(temperaturas_celsius, 
     col = "skyblue",  
     border = "white",  
     main = "Histograma das Temperaturas em Maio",  
     xlab = "Temperatura (°C)",  
     ylab = "Frequência",  
     prob = TRUE)  


lines(density(temperaturas_celsius), 
      col = "darkred", 
      lwd = 2)  


legend("topright", 
       legend = c("Densidade"),
       col = c("darkred"),
       lwd = 2,
       bty = "n") 

Questão 4

sales <- read.table("https://training-course-material.com/images/8/8f/Sales.txt",header=TRUE)

vendas_por_pais <- aggregate(sales$SALES, by=list(Country=sales$COUNTRY), FUN=sum)
colnames(vendas_por_pais) <- c("Pais", "Total_Vendas")


vendas_por_pais$Porcentagem <- round(vendas_por_pais$Total_Vendas / sum(vendas_por_pais$Total_Vendas) * 100, 1)



cores <- c("red", "lightgreen", "salmon", "gold", "violet", "grey")


par(mar = c(1, 1, 4, 10), xpd = TRUE)

pie(vendas_por_pais$Total_Vendas, 
    labels = paste0(vendas_por_pais$Pais, " (", vendas_por_pais$Porcentagem, "%)"), 
    col = cores,
    main = "Porcentagem de Vendas por País")


legend("right", 
       legend = paste0(vendas_por_pais$Pais, " (", vendas_por_pais$Porcentagem, "%)"),
       fill = cores,
       title = "Países",
       inset = c(-0.3, 0),
       cex = 0.8)

Questão 5

#dados e organização do boxplot
data(InsectSprays)
par(mar = c(5, 4, 4, 2))

#Boxplot
boxplot(count ~ spray, 
        data = InsectSprays,
        main = "Contagem de Insetos por Tipo de Inseticida",
        xlab = "Tipo de Inseticida",
        ylab = "Número de Insetos",
        col = "yellow",
        outline = FALSE)  

Questão 6

process_data <- function(file_path) {
 
  data <- read.csv(file_path)
  data$currentTime <- as.POSIXct(data$currentTime)
  
  
  start_time <- min(data$currentTime)
  data$time_hours <- as.numeric(difftime(data$currentTime, start_time, units = "hours"))
  
  
  convert_to_mb <- function(memory_str) {
    
    value <- as.numeric(gsub("[^0-9.]", "", memory_str))
    
   
    if (grepl("TB", memory_str, ignore.case = TRUE)) {
      return(value * 1000000) 
    } else if (grepl("GB", memory_str, ignore.case = TRUE)) {
      return(value * 1024)     
    } else {
      return(value)           
    }
  }
  
  
  data$usedMemory_mb <- sapply(as.character(data$usedMemory), convert_to_mb)
  
  return(data)
}


data_none <- process_data("monitoringCloudData_NONE.csv")
data_01 <- process_data("monitoringCloudData_0.1.csv")
data_05 <- process_data("monitoringCloudData_0.5.csv")
data_10 <- process_data("monitoringCloudData_1.csv")


layout(matrix(c(1, 2, 3, 4), nrow = 2, byrow = TRUE))


par(mar = c(4, 4, 2, 1))


plot(data_none$time_hours, data_none$usedMemory_mb, 
     type = "l", 
     main = "Memory Analysis (None Workload)",
     xlab = "Time (hour)",
     ylab = "Used Memory (MB)",
     col = "black")


plot(data_01$time_hours, data_01$usedMemory_mb, 
     type = "l", 
     main = "Memory Analysis (Workload of 0.1)",
     xlab = "Time (hour)",
     ylab = "Used Memory (MB)",
     col = "black")


plot(data_05$time_hours, data_05$usedMemory_mb, 
     type = "l", 
     main = "Memory Analysis (Workload of 0.5)",
     xlab = "Time (hour)",
     ylab = "Used Memory (MB)",
     col = "black")


plot(data_10$time_hours, data_10$usedMemory_mb, 
     type = "l", 
     main = "Memory Analysis (Workload of 1.0)",
     xlab = "Time (hour)",
     ylab = "Used Memory (MB)",
     col = "black")

Questão 7

netflix_data <- read_csv("netflix_titles.csv", show_col_types = FALSE)


has_single_country <- function(country_str) {
  return(!grepl(",", country_str))
}


pais_unico <- netflix_data[!is.na(netflix_data$country) & 
                                      sapply(netflix_data$country, has_single_country), ]


trim <- function(x) {
  return(gsub("^\\s+|\\s+$", "", x))
}

pais_unico$country_clean <- sapply(pais_unico$country, trim)


country_table <- table(pais_unico$country_clean)
country_df <- data.frame(
  country = names(country_table),
  count = as.numeric(country_table)
)


country_df <- country_df[order(-country_df$count), ]


top_10 <- head(country_df, 10)


total_count <- sum(top_10$count)
top_10$percentage <- round(top_10$count / total_count * 100, 2)


#Plot
plot_ly(top_10, 
               labels = ~country, 
               values = ~count, 
               type = 'pie',
               textinfo = 'label+percent',
               insidetextorientation = 'radial',
               textposition = 'outside',
               texttemplate = '%{label}<br>%{percent}') %>%
      layout(
        title = 'Top 10 Países com Mais Conteúdo na Netflix',
        showlegend = TRUE,
        legend = list(orientation = "h", x = 0.5, y = -0.2, xanchor = "center"),
        margin = list(l = 50, r = 50, b = 100, t = 100, pad = 4),
        autosize = FALSE,
        width = 900,
        height = 600
      )
## Warning: Specifying width/height in layout() is now deprecated.
## Please specify in ggplotly() or plot_ly()

Questão 8

plot_ly(
  type = 'table',
  header = list(values = c("País", "Total de conteúdos"),
    align = 'center',
    fill = list(color = 'gray'),
    font = list(color = 'white', size = 14)
  ),
  cells = list(
    values = list(top_10$country, top_10$count),
    align = 'center')
)

Questão 9

netflix_data$decade <- floor(netflix_data$release_year / 10) * 10

movies <- aggregate(type ~ decade, data = netflix_data[netflix_data$type == "Movie", ], FUN = length)
series <- aggregate(type ~ decade, data = netflix_data[netflix_data$type == "TV Show", ], FUN = length)


names(movies)[2] <- "count"
names(series)[2] <- "count"

plot_ly() %>%
  add_trace(
    x = series$decade, 
    y = series$count, 
    type = 'scatter', 
    mode = 'lines+markers',
    name = 'TV Series',
    line = list(color = 'blue')
  ) %>%
  add_trace(
    x = movies$decade, 
    y = movies$count, 
    type = 'scatter', 
    mode = 'lines+markers',
    name = 'Movies',
    line = list(color = 'gold')
  ) %>%
  layout(
    title = "Quantidade de Conteúdo por Década na Netflix",
    xaxis = list(title = "Década", gridcolor = 'lightgray'),
    yaxis = list(title = "Qtd. Conteúdo", gridcolor = 'lightgray'),
    plot_bgcolor = 'white'
  )

Questão 10

filmes <- netflix_data[netflix_data$type == "Movie" & 
                 netflix_data$release_year >= 2000 & 
                 netflix_data$release_year <= 2010, ]


filmes$genero <- sapply(strsplit(as.character(filmes$listed_in), ","), function(x) trimws(x[1]))


filmes_filtrados <- filmes[filmes$genero %in% c("Dramas", "Action & Adventure", "Comedies"), ]


contagem <- table(filmes_filtrados$release_year, filmes_filtrados$genero)
df_contagem <- as.data.frame(contagem)
names(df_contagem) <- c("Ano", "Genero", "Quantidade")


plot_ly(df_contagem, 
               x = ~Ano, 
               y = ~Quantidade, 
               color = ~Genero, 
               type = 'bar',
               colors = c("Dramas" = "blue", 
                          "Action & Adventure" = "orange", 
                          "Comedies" = "green")) %>%
             layout(title = "",
             xaxis = list(title = "Ano de Lançamento", 
                          tickmode = "array",
                          tickvals = seq(2000, 2010, by = 2)),
             yaxis = list(title = "Qtd. de Lançamentos"),
             barmode = 'group',
             legend = list(title = ""))