Exercício 12 Visualização de Dados

Questão 01

parte 1

MRT_1F <-c(517.1468515630205, 85.13094142168089, 30.333207896694553, 12.694776264558937, 3.3041601673945418, 1.1823111717498882, 1.1892293502386786)
MRT_3F <-c(156.68929936163462, 11.540837783562276, 0.4512835621696538, 0.4509797929766453, 0.4502068233039181, 0.4496185276300172, 0.4543157082191288)
MRT_5F <-c(83.90319666471157, 0.3068151086494968, 0.30522314133037304, 0.3072588968084928, 0.30655265997285697, 0.3055812715727718, 0.3053297166713006)
MRT_10F <-c(29.55430642951759, 0.19832832665772515, 0.1971923924717474, 0.19796648905716516, 0.19615594370806338, 0.2034569237883263, 0.19617420889447737)
MRT_15F <-c(11.317736530583566, 0.167364215666193, 0.16172168266811013, 0.16701085329580515, 0.1598052657153692, 0.1645934043532696, 0.16216563797118075)
MRT_sem_F <-c(11.93430909937736, 0.6095414637034009, 0.6060645101029295, 0.612167181646899, 0.6146761002685637, 0.6096747087200697, 0.6125810476877268)
clock <- c(0.1, 0.5, 1, 1.5, 2, 2.5, 3)

plot(clock,
     MRT_1F,
     type = "o",
     pch = 4,
     col = "black",
     xlab = "Time between Things request (seconds)",
     ylab = "Response Time (sec.)")
lines(clock, MRT_3F, type="o", pch=11, col="yellow")
lines(clock, MRT_5F, type="o", pch=1, col="red")
lines(clock, MRT_10F, type="o", pch=2, col="blue")
lines(clock, MRT_15F, type="o", pch=5, col="pink")
lines(clock, MRT_sem_F, type="o", pch=4, col="green")
legend("topright",
       pch = c(4,11,1,2,5,4),
       col = c("black","yellow","red","blue","pink","green"),
       legend = c("1 Fog","3 Fogs","5 Fogs","10 Fogs","15 Fogs","w/o Fogs"),
       lty = 1)

parte 2

g2 <- rbind(MRT_sem_F, MRT_1F)
g3 <- rbind(MRT_sem_F, MRT_3F)
g4 <- rbind(MRT_sem_F, MRT_5F)
g5 <- rbind(MRT_sem_F, MRT_10F)
g6 <- rbind(MRT_sem_F, MRT_15F)

layout(matrix(c(1,2,3,4,5,6), 3, 2, byrow = TRUE))

barplot(g2,
        beside = T,
        xlab = "Time between Things request",
        ylab = "Response Time (s)",
        names.arg = clock,
        log = "y",
        col = c("#E6E6E6", "#666666"))
legend("topright", col=c("#E6E6E6", "#666666"),
       legend=c("w/o Fog","1 Fog"), pch=c(15,15))

barplot(g3,
        beside = T,
        xlab = "Time between Things request",
        ylab = "Response Time (s)",
        names.arg = clock,
        log = "y",
        col = c("#E6E6E6", "#666666"))
legend("topright", col=c("#E6E6E6", "#666666"),
       legend=c("w/o Fog","3 Fogs"), pch=c(15,15))

barplot(g4,
        beside = T,
        xlab = "Time between Things request",
        ylab = "Response Time (s)",
        names.arg = clock,
        log = "y",
        col = c("#E6E6E6", "#666666"))
legend("topright", col=c("#E6E6E6", "#666666"), 
       legend=c("w/o Fog","5 Fogs") ,pch=c(15,15))

barplot(g5,
        beside = T,
        xlab = "Time between Things request",
        ylab = "Response Time (s)",
        names.arg = clock,
        log = "y",
        col = c("#E6E6E6", "#666666"))
legend("topright", col=c("#E6E6E6","#666666"),
       legend=c("w/o Fog","10 Fogs"), pch=c(15,15))

barplot(g6,
        beside = T,
        xlab = "Time between Things request",
        ylab = "Response Time (s)",
        names.arg = clock,
        log = "y",
        col = c("#E6E6E6", "#666666"))
legend("topright", col=c("#E6E6E6", "#666666"), 
       legend=c("w/o Fog","15 Fogs"), pch=c(15,15))

Questão 02

quality <- c("Good","Very Good","Excelent")
mealPrice <- c("$10-19","$20-29","$30-39","$40-49")
l1 <- c(53.8,33.9,2.6,0)
l2 <- c(43.6,54.2,60.5,21.4)
l3 <- c(2.6,11.9,36.8,78.6)
dados <- rbind(l1,l2,l3)

barplot(dados,
        beside = T,
        names.arg = mealPrice,
        col = rainbow(3),
        ylab = "Percentuais",
        ylim = c(0,100),
        main = "Meal Price" )
legend("topleft",legend=quality, col=rainbow(3), pch=15, title="quality Rating")

Questão 03

dados <- airquality %>% filter(Month == 5)
dados$Temp <- (dados$Temp - 32) /1.8

hist(dados$Temp,
     col = rainbow(10),
     density = 80,
     probability = T,
     main = "Temperaturas - Maio",
     ylab = "Densidade",
     xlab = "Temperaturas")
lines(density(dados$Temp), col="black")

Questão 04

sales <- read.table("https://training-course-material.com/images/8/8f/Sales.txt",header=TRUE)
qtd <- nrow(sales)
pct <- round(sales$SALES / sum(sales$SALES) * 100,2)
lbls <- paste(pct, "%", sep="")

pie(x = sales$SALES,
    labels = lbls,
    main = "Vendas por País",
    col = rainbow(qtd))
legend("bottomleft", col=rainbow(qtd), pch=15, legend=sales$COUNTRY, title="País")

Questão 05

dados <- InsectSprays
boxplot(dados$count ~ dados$spray,
        xlab = "Tipos de Inseticidas",
        ylab = "Quantidade",
        col = "yellow",
        outline = F,
        main = "Quantidade de Insetos por Tipo de Inseticida")

Questão 06

f1 <- read.csv2(file="C:/Users/Thiago/Documents/monitoringCloudData_NONE.csv", sep=",")
f2 <- read.csv2(file="C:/Users/Thiago/Documents/monitoringCloudData_0.1.csv", sep=",")
f3 <- read.csv2(file="C:/Users/Thiago/Documents/monitoringCloudData_0.5.csv", sep=",")
f4 <- read.csv2(file="C:/Users/Thiago/Documents/monitoringCloudData_1.csv", sep=",")

f1 <- f1 %>%  separate(col="usedMemory", into=c("usedMemory","umUnit"), sep=-2)
f2 <- f2 %>%  separate(col="usedMemory", into=c("usedMemory","umUnit"), sep=-2)
f3 <- f3 %>%  separate(col="usedMemory", into=c("usedMemory","umUnit"), sep=-2)
f4 <- f4 %>%  separate(col="usedMemory", into=c("usedMemory","umUnit"), sep=-2)

f1$usedMemory <- as.double(f1$usedMemory) 
f2$usedMemory <- as.double(f2$usedMemory) 
f3$usedMemory <- as.double(f3$usedMemory) 
f4$usedMemory <- as.double(f4$usedMemory) 

f1$usedMemory[f1$umUnit=="GB"] <- f1$usedMemory[f1$umUnit=="GB"]*1024
f2$usedMemory[f2$umUnit=="GB"] <- f2$usedMemory[f2$umUnit=="GB"]*1024
f3$usedMemory[f3$umUnit=="GB"] <- f3$usedMemory[f3$umUnit=="GB"]*1024
f4$usedMemory[f4$umUnit=="GB"] <- f4$usedMemory[f4$umUnit=="GB"]*1024

f1$ct<-anytime(f1$currentTime)
f2$ct<-anytime(f2$currentTime)
f3$ct<-anytime(f3$currentTime)
f4$ct<-anytime(f4$currentTime)

f1$ct2 <- difftime(f1$ct, min(f1$ct), units = "hours")
f2$ct2 <- difftime(f2$ct, min(f2$ct), units = "hours")
f3$ct2 <- difftime(f3$ct, min(f3$ct), units = "hours")
f4$ct2 <- difftime(f4$ct, min(f4$ct), units = "hours")

layout(matrix(c(1,2,3,4), 2, 2, byrow = TRUE))
plot(f1$ct2,f1$usedMemory ,type="l",main="Memory Analysis (None Workload)",xlab="Time (hour)",ylab="Used Memory (MB)",font.main=2)
plot(f2$ct2,f2$usedMemory ,type="l",main="Memory Analysis (Workload of 0.1)",xlab="Time (hour)",ylab="Used Memory (MB)",font.main=2)
plot(f3$ct2,f3$usedMemory ,type="l",main="Memory Analysis (Workload of 0.5)",xlab="Time (hour)",ylab="Used Memory (MB)",font.main=2)
plot(f4$ct2,f4$usedMemory ,type="l",main="Memory Analysis (Workload of 1.0)",xlab="Time (hour)",ylab="Used Memory (MB)",font.main=2)

Questão 07

netflix_data <- read.csv2(file="C:/Users/Thiago/Documents/netflix_titles.csv", sep=",")

netflix_data <- netflix_data[!is.na(netflix_data$country) & netflix_data$country != "", ]


single_country <- netflix_data %>%
  filter(!is.na(country)) %>%
  filter(!grepl(",", country))


country_count <- single_country %>%
  count(country) %>%
  arrange(desc(n)) %>%
  head(10)


fig_pie <- plot_ly(country_count, labels = ~country, values = ~n, type = 'pie') %>%
  layout(title = 'Top 10 Países com Mais Conteúdo na Netflix')
fig_pie

Questão 08

table_data <- country_count

fig_table <- plot_ly(
  type = 'table',
  header = list(
    values = c("País", "Total de conteúdos"),
    align = c('center', 'center'),
    line = list(width = 1, color = 'black'),
    fill = list(color = 'gray'),
    font = list(color = 'white', size = 12)
  ),
  cells = list(
    values = rbind(table_data$country, table_data$n),
    align = c('center', 'center'),
    line = list(color = "black", width = 1),
    fill = list(color = c('white', 'lightgray')),
    font = list(size = 12)
  )
)
fig_table

Questão 09

netflix_data$decade <- floor(netflix_data$release_year / 10) * 10
decade_count <- netflix_data %>%
  filter(!is.na(decade)) %>%
  group_by(decade, type) %>%
  summarise(Total = n(), .groups = 'drop')  

fig_line <- plot_ly(decade_count, x = ~decade, y = ~Total, color = ~type,
                    type = 'scatter', mode = 'lines+markers') %>%
  layout(title = 'Conteúdo Disponível por Década',
         xaxis = list(title = 'Década'),
         yaxis = list(title = 'Quantidade de Conteúdo'))
fig_line

Questão 10

filtered_data <- netflix_data %>%
  filter(release_year >= 2000 & release_year <= 2010) %>%
  mutate(genre = sapply(strsplit(listed_in, ","), `[`, 1))


genres_of_interest <- c("Dramas", "Action & Adventure", "Comedies")
filtered_data <- filtered_data %>%
  filter(genre %in% genres_of_interest)

filtered_data$genre <- recode(filtered_data$genre,
                              "Dramas" = "Dramas",
                              "Action & Adventure" = "Ação e Aventura",
                              "Comedies" = "Comédias")


genre_count <- filtered_data %>%
  group_by(release_year, genre) %>%
  summarise(Total = n(), .groups = 'drop')


fig_bar <- plot_ly(genre_count, x = ~release_year, y = ~Total, color = ~genre, type = 'bar') %>%
  layout(title = 'Quantidade de Filmes por Gênero (2000-2010)',
         barmode = 'group',
         xaxis = list(title = 'Ano'),
         yaxis = list(title = 'Quantidade de Filmes'))
fig_bar