CPAD - Atividade Visualização de dados

Atividades

Questão 1

MRT_1F <-c(517.1468515630205, 85.13094142168089, 30.333207896694553, 12.694776264558937, 3.3041601673945418, 1.1823111717498882, 1.1892293502386786)

MRT_3F <-c(156.68929936163462, 11.540837783562276, 0.4512835621696538, 0.4509797929766453, 0.4502068233039181, 0.4496185276300172, 0.4543157082191288)

MRT_5F <-c(83.90319666471157, 0.3068151086494968, 0.30522314133037304, 0.3072588968084928, 0.30655265997285697, 0.3055812715727718, 0.3053297166713006)

MRT_10F <-c(29.55430642951759, 0.19832832665772515, 0.1971923924717474, 0.19796648905716516, 0.19615594370806338, 0.2034569237883263, 0.19617420889447737)

MRT_15F <-c(11.317736530583566, 0.167364215666193, 0.16172168266811013, 0.16701085329580515, 0.1598052657153692, 0.1645934043532696, 0.16216563797118075)

MRT_sem_F <-c(11.93430909937736, 0.6095414637034009, 0.6060645101029295, 0.612167181646899, 0.6146761002685637, 0.6096747087200697, 0.6125810476877268)

clock <- c(0.1, 0.5, 1, 1.5, 2, 2.5, 3)

dados <- c(MRT_1F,MRT_3F, MRT_5F, MRT_10F, MRT_15F, MRT_sem_F)

nomes <- c("1 Frog", "3 Frogs", "5 Frogs", "10 Frogs", "15 Frogs", "W/O Frogs")
layout(matrix(c(1, 1,
                2, 3,
                4, 5,
                6, 0), nrow=4, byrow=TRUE)
       
       
       ) 
par(mar = c(1, 1, 1, 1))

plot(clock, MRT_1F , type= "o",
     pch = 1,
     col = 1,
     xlim = range(clock),
     ylim = range(dados),
     ylab = "Response time (seconds)",
     xlab = "Time between things requests (seconds)")

lines(clock, MRT_3F,  type= "o", pch =2, col=2)
lines(clock, MRT_5F,  type= "o", pch =3, col=3)
lines(clock, MRT_10F,  type= "o", pch =4, col=4)
lines(clock, MRT_15F, type= "o", pch =5, col=5)
lines(clock, MRT_sem_F, type= "o", pch =6, col=6)
legend("topright",
       legend = nomes,
       pch = 1:6,
       col = 1:6)

barplot(rbind(MRT_sem_F, MRT_1F), 
        beside = TRUE,              
        names.arg = clock,          
        log = "y",                  
        col = c("#E6E6E6", "#666666"), 
        xlab = "Time between things requests",
        ylab = "Response time (seconds)",
        legend.text = c("W/o Frog", "1 Frog"), 
        args.legend = list(x = "topright")
)

barplot(rbind(MRT_sem_F, MRT_3F), 
        beside = TRUE,              
        names.arg = clock,          
        log = "y",                  
        col = c("#E6E6E6", "#666666"), 
        xlab = "Time between things requests",
        ylab = "Response time (seconds)",
        legend.text = c("W/o Frog", "3 Frog"), 
        args.legend = list(x = "topright")   

)

barplot(rbind(MRT_sem_F, MRT_5F), 
        beside = TRUE,              
        names.arg = clock,          
        log = "y",                  
        col = c("#E6E6E6", "#666666"), 
        xlab = "Time between things requests",
        ylab = "Response time (seconds)",
        legend.text = c("W/o Frog", "5 Frog"), 
        args.legend = list(x = "topright")   

)

barplot(rbind(MRT_sem_F, MRT_10F), 
        beside = TRUE,              
        names.arg = clock,          
        log = "y",                  
        col = c("#E6E6E6", "#666666"), 
        xlab = "Time between things requests",
        ylab = "Response time (seconds)",
        legend.text = c("W/o Frog", "10 Frog"), 
        args.legend = list(x = "topright")   

)

barplot(rbind(MRT_sem_F, MRT_15F), 
        beside = TRUE,              
        names.arg = clock,          
        log = "y",                  
        col = c("#E6E6E6", "#666666"), 
        xlab = "Time between things requests",
        ylab = "Response time (seconds)",
        legend.text = c("W/o Frog", "15 Frog"), 
        args.legend = list(x = "topright")   

)

Questão 2

dados <- matrix(
  c(53.8, 43.6, 2.6,  
    33.9, 54.2, 11.9, 
    2.6,  60.5, 36.8, 
    0.0,  21.4, 78.6), 
  nrow = 3,       
  byrow = FALSE   
)

rownames(dados) <- c("Bom", "Muito bom", "Exelente")
colnames(dados) <- c("$10-19", "$20-29", "$30-39", "$40-49")
cores <- c("lightblue", "lightgreen", "lightyellow")

barplot(
  dados,
  main = "Qualidade da refeição por categoria de preço", 
  xlab = "Preço da refeição",             
  ylab = "Percentual (%)",                               
  col = cores,                                           
  legend.text = rownames(dados),                         
  ylim = c(0, 120),
  args.legend = list(x = "top", bty = "n", ncol = 3),
  space = 0.5
)

Questão 3

temp <- airquality[airquality$Month == 5, "Temp"]
temp <- temp[!is.na(temp)]
temp <- (temp - 32) / 1.8

hist(
  temp,
  main = "Histograma das temperaturas de maio (Cº)", 
  xlab = "Temperatura (Cº)",                                
  ylab = "Densidade",                                       
  col = "blue",                                     
  freq = FALSE,                                             
  ylim = c(0, 0.2)                                         
)

lines(
  density(temp),  
  col = "red",      
  lwd = 2           
)

Questão 4

sales <- read.table("https://training-course-material.com/images/8/8f/Sales.txt", header = TRUE)

pct <- round(sales$SALES/sum(sales$SALES)*100)
lbls <- paste(sales$COUNTRY, pct)
lbls <- paste(lbls, "%", sep="")

pie(sales$SALES, labels = lbls,
    main = "Vendas por país",
    col=rainbow(nrow(sales)))

Questão 5

boxplot(
  count ~ spray,                
  data = InsectSprays,     
  main = "Insetos por inseticida", 
  xlab = "Tipo de inseticida",     
  ylab = "Número de insetos",      
  col = "yellow",                
  outline = FALSE          
)

Questão 6

processar_dados <- function(filepath) {
  
  dados <- read.csv(filepath, stringsAsFactors = FALSE)
  valores_num <- as.numeric(gsub("([0-9.]+).*", "\\1", dados$usedMemory))
  unidades <- gsub("[0-9.]+(.*)", "\\1", dados$usedMemory)
  dados$memoria_mb <- ifelse(
    unidades == "TB", valores_num * 1000000,
    ifelse(
      unidades == "GB", valores_num * 1024,
      valores_num
    )
  )
  
  dados$currentTime <- as.POSIXct(dados$currentTime)
  
  tempo_inicio <- min(dados$currentTime)
  
  dados$tempo_horas <- as.numeric(
    difftime(dados$currentTime, tempo_inicio, units = "hours")
  )
  
  return(dados)
}


arquivos <- c(
  "/home/rick/Downloads/monitoringCloudData/monitoringCloudData_0.1.csv",
  "/home/rick/Downloads/monitoringCloudData/monitoringCloudData_0.5.csv",
  "/home/rick/Downloads/monitoringCloudData/monitoringCloudData_1.csv",
  "/home/rick/Downloads/monitoringCloudData/monitoringCloudData_NONE.csv"
)

titulos <- c(
  "Cenário: 0.1",
  "Cenário: 0.5",
  "Cenário: 1.0",
  "Cenário: NONE"
)


layout(matrix(c(1, 2, 3, 4), nrow = 2, byrow = TRUE))
par(mar = c(4.5, 4.5, 2.5, 1.5))

for (i in 1:length(arquivos)) {
  
  dados_plot <- processar_dados(arquivos[i])
  
  plot(
    dados_plot$tempo_horas,
    dados_plot$memoria_mb,
    type = "l", 
    main = titulos[i],
    xlab = "Tempo (horas)",
    ylab = "Memória Usada (MB)",
    col = "blue", 
    lwd = 2       
  )
}

Questão 7

library(plotly)
library(dplyr)

df_netflix <- read.csv("netflix_titles.csv")

top_10_countries <- df_netflix %>%
  filter(!is.na(country) & country != "") %>%
  filter(!grepl(",", country)) %>%
  group_by(country) %>%
  summarise(total_conteudo = n()) %>%
  arrange(desc(total_conteudo)) %>%
  slice_head(n = 10)

fig <- plot_ly(
  data = top_10_countries,
  labels = ~country,           
  values = ~total_conteudo,     
  type = 'pie',                  
  textinfo = 'percent', 
  textposition = 'inside',
  insidetextorientation = 'radial' 
)

fig <- fig %>% layout(
  title = "10 Países com mais conteúdo na netflix",
  showlegend = TRUE,
  width = 500,
  height = 300
)
## Warning: Specifying width/height in layout() is now deprecated.
## Please specify in ggplotly() or plot_ly()
fig

Questão 8

library(plotly)
library(dplyr)

df_netflix <- read.csv("netflix_titles.csv")



top_10_countries <- df_netflix %>%
  filter(!is.na(country) & country != "") %>%
  filter(!grepl(",", country)) %>%
  group_by(country) %>%
  summarise(total_conteudo = n()) %>%
  arrange(desc(total_conteudo)) %>%
  slice_head(n = 10)

fig <- plot_ly(
  type = 'table',
  header = list(
    values = c("País", "Total de conteúdos"), 
    align = "center", 
    fill = list(color = 'grey'), 
    font = list(color = 'white', size = 12) 
  ),
  
  cells = list(
    values = list(top_10_countries$country, top_10_countries$total_conteudo), 
    align = "center", 
    fill = list(color = c("white", "lightgrey")), 
    height = 30 
  )
)
fig <- fig %>% layout(
  title = "10 Países com mais conteúdo"
)
fig

Questão 9

library(plotly)
library(dplyr)

df_netflix <- read.csv("netflix_titles.csv")


data_processed <- df_netflix %>%
  filter(!is.na(release_year) & release_year != "") %>%
  mutate(decade = floor(release_year / 10) * 10) %>%
  group_by(decade, type) %>%
  summarise(total_conteudo = n()) %>%
  ungroup() %>%
  filter(type %in% c("Movie", "TV Show")) %>%
  arrange(decade) %>%
  mutate(decade = as.factor(decade))

data_movies <- data_processed %>%
  filter(type == "Movie")

data_tv_shows <- data_processed %>%
  filter(type == "TV Show")

fig <- plot_ly() %>%
  add_trace(
    data = data_movies,
    x = ~decade,
    y = ~total_conteudo,
    type = 'scatter',
    mode = 'lines+markers',
    name = 'Filmes',
    line = list(color = 'gold'), 
    marker = list(color = 'gold') 
  ) %>%
  add_trace(
    data = data_tv_shows,
    x = ~decade,
    y = ~total_conteudo,
    type = 'scatter',
    mode = 'lines+markers', 
    name = 'Séries',
    line = list(color = 'blue'), 
    marker = list(color = 'blue') 
  ) %>%
  layout(
    title = "Quantidade de conteúdo por década",
    xaxis = list(title = "Década"),
    yaxis = list(title = "Quantidade de conteúdo")
  )
fig

Questão 10

library(plotly)
library(dplyr)


df_netflix <- read.csv("netflix_titles.csv")
generos_alvo <- c("Dramas", "Action & Adventure", "Comedies")

data_plot <- df_netflix %>%
  filter(type == "Movie") %>%
  filter(release_year >= 2000 & release_year <= 2010) %>%
  mutate(primeiro_genero = sapply(strsplit(listed_in, ", "), function(x) x[1])) %>%
  filter(primeiro_genero %in% generos_alvo) %>%
  group_by(release_year, primeiro_genero) %>%
  summarise(total_filmes = n(), .groups = 'drop') %>%
  mutate(release_year = as.factor(release_year))

fig <- plot_ly(
  data = data_plot,
  x = ~release_year,  
  y = ~total_filmes,  
  color = ~primeiro_genero,
  type = 'bar'
)
fig <- fig %>% layout(
  title = "Filmes lançados por ano e gênero (2000-2010)",
  xaxis = list(title = "Ano de lançamento"),
  yaxis = list(title = "Quantidade de filmes"),
  barmode = 'group' 
)
fig