Exercicio 12 - Visualização de dados - Gabriel
Gomes
Questão 1
MRT_1F <-c(517.1468515630205, 85.13094142168089, 30.333207896694553, 12.694776264558937, 3.3041601673945418, 1.1823111717498882, 1.1892293502386786)
MRT_3F <-c(156.68929936163462, 11.540837783562276, 0.4512835621696538, 0.4509797929766453, 0.4502068233039181, 0.4496185276300172, 0.4543157082191288)
MRT_5F <-c(83.90319666471157, 0.3068151086494968, 0.30522314133037304, 0.3072588968084928, 0.30655265997285697, 0.3055812715727718, 0.3053297166713006)
MRT_10F <-c(29.55430642951759, 0.19832832665772515, 0.1971923924717474, 0.19796648905716516, 0.19615594370806338, 0.2034569237883263, 0.19617420889447737)
MRT_15F <-c(11.317736530583566, 0.167364215666193, 0.16172168266811013, 0.16701085329580515, 0.1598052657153692, 0.1645934043532696, 0.16216563797118075)
MRT_sem_F <-c(11.93430909937736, 0.6095414637034009, 0.6060645101029295, 0.612167181646899, 0.6146761002685637, 0.6096747087200697, 0.6125810476877268)
clock <- c(0.1, 0.5, 1, 1.5, 2, 2.5, 3)
plot(clock,MRT_1F,type="o", pch=4, ylab="Response Time (sec)", xlab="Time between requests (seconds)", ylim=c(0,518), xlim=c(0,3))
lines(clock, MRT_3F, type="o", pch=11, col="yellow")
lines(clock, MRT_5F, type="o", pch=1, col="red")
lines(clock, MRT_10F, type="o", pch=2, col="blue")
lines(clock, MRT_15F, type="o", pch=5, col="purple")
lines(clock, MRT_sem_F, type="o", pch=4, col="green")
legend("topright", pch = c(4,11,1,2,5,4), col= c("black", "yellow", "red", "blue","purple", "green"), legend= c("1 Fog", "3 Fog","5 Fog","10 Fog","15 Fog","w/o Fog"))

layout(matrix(c(1,2,
1,2,
3,4,
3,4,
5,6,
5,6), nrow=3, ncol=2, byrow = T))
## Warning in matrix(c(1, 2, 1, 2, 3, 4, 3, 4, 5, 6, 5, 6), nrow = 3, ncol = 2, :
## data length differs from size of matrix: [12 != 3 x 2]
#layout.show(n=5)
barplot(rbind (MRT_sem_F,MRT_1F), log="y" ,col=c("#E6E6E6", "#666666"), beside=T, xlab="Time between things requests", ylab="Response time (s)", names.arg=clock)
legend("topright", pch=c(15,15), col= c("#E6E6E6", "#666666"), legend=c("w/o Fog","1 Fog"))
barplot(rbind( MRT_sem_F,MRT_3F), beside=T, xlab="Time between things requests", ylab="Response time (s)", names.arg=clock)
legend("topright", pch=c(15,15), col= c("#E6E6E6", "#666666"), legend=c("w/o Fog","3 Fog"))
barplot(rbind( MRT_sem_F, MRT_5F), beside=T, xlab="Time between things requests", ylab="Response time (s)", names.arg=clock)
legend("topright", pch=c(15,15), col= c("#E6E6E6", "#666666"), legend=c("w/o Fog","5 Fog"))
barplot(rbind( MRT_sem_F, MRT_10F), beside=T, xlab="Time between things requests", ylab="Response time (s)", names.arg=clock)
legend("topright", pch=c(15,15), col= c("#E6E6E6", "#666666"), legend=c("w/o Fog","10 Fog"))

barplot(rbind( MRT_sem_F, MRT_15F), beside=T, xlab="Time between things requests", ylab="Response time (s)", names.arg=clock)
legend("topright", pch=c(15,15), col= c("#E6E6E6", "#666666"), legend=c("w/o Fog","15 Fog"))

Questão 2
Meal_Price <- cbind(S10a19= c(53.8,43.6,2.6),S20a29= c(33.9,54.2,11.9), S30a39= c(2.6,60.5,36.8), S40a49= c(0,21.4,78.6))
rownames(Meal_Price) <- c("Good","Very Good","Excellent")
colnames(Meal_Price) <- c("$10-19","$20-29","$30-39","$40-49")
barplot(Meal_Price, ylab="Porcentagem (%)", xlab="Preço", names.arg = colnames(Meal_Price),legend.text = rownames(Meal_Price))

Questão 3
data(airquality)
airquality$Temp_Celsius <- (airquality$Temp - 32) / 1.8
hist_plot <- hist(airquality$Temp_Celsius, breaks = 10, col = "green", main = "Temperaturas no mês de Maio (°C)", xlab = "Temperatura (°C)", ylab = "Frequência", probability = TRUE)
lines(density(airquality$Temp_Celsius), col = "red", lwd = 2)
legend("topright", legend = c("Curva de Densidade"), col = c("red"), lwd = 2)

Questão 4
library(dplyr)
sales <- read.table("https://training-course-material.com/images/8/8f/Sales.txt", header = TRUE)
sales_percent <- sales %>%
mutate(Percentage = (sales$SALES / sum(sales$SALES)) * 100)
pie(sales_percent$Percentage, labels = paste(sales_percent$COUNTRY, "\n", round(sales_percent$Percentage, 1), "%"), col = rainbow(length(sales_percent$COUNTRY)))
title("% de Vendas por País")
legend("topright", legend = sales_percent$COUNTRY, fill = rainbow(length(sales_percent$COUNTRY)), title = "País")

Questão 5
data(InsectSprays)
boxplot(count ~ spray, data = InsectSprays, col = "yellow", outline = FALSE,
main = "Contagens de Insetos por Inseticida",
xlab = "Tipo de Inseticida", ylab = "Contagem de Insetos")

Questão 6
library(patchwork)
library(ggplot2)
prim <- read.csv("./monitoringCloudData_0.1.csv")
seg <- read.csv("./monitoringCloudData_0.5.csv")
terc <- read.csv("./monitoringCloudData_1.csv")
quart <- read.csv("./monitoringCloudData_NONE.csv")
convert <- function(df) {
gb_indices <- grepl("GB", df$usedMemory)
df$usedMemory[gb_indices] <- as.numeric(sub("GB", "", df$usedMemory[gb_indices])) * 1024
df$usedMemory <- as.numeric(sub("MB", "", df$usedMemory))
df$currentTime <- as.POSIXct(df$currentTime, format = "%Y-%m-%d %H:%M:%OS")
df$hours_since_start <- as.numeric(difftime(df$currentTime, min(df$currentTime), units = "hours"))
return(df)
}
prim <- convert(prim)
terc <- convert(terc)
quart <- convert(quart)
graph_prim <- ggplot(prim, aes(x = hours_since_start, y = usedMemory, group = 1)) +
geom_line() +
labs(title = expression(bold("Memory Analysis (None Workload)")), x = "Time (hour)", y = "Used Memory (MB)") +
annotate("rect", xmin = min(prim$hours_since_start), xmax = max(prim$hours_since_start),
ymin = min(prim$usedMemory), ymax = max(prim$usedMemory),
color = "black", fill = NA, linetype = "solid") +
theme(panel.background = element_rect(fill = "white")) +
scale_x_continuous(breaks = seq(0, 70, 10)) +
scale_y_continuous(breaks = c(500, 1500, 2500, 3500))
gb_indices <- grepl("GB", seg$usedMemory)
seg$usedMemory[gb_indices] <- as.numeric(sub("GB", "", seg$usedMemory[gb_indices])) * 1024
seg$usedMemory <- as.numeric(sub("MB", "", seg$usedMemory))
seg$currentTime <- as.POSIXct(seg$currentTime, format = "%Y-%m-%d %H:%M:%OS")
seg$hours_since_start <- as.numeric(difftime(seg$currentTime, seg$currentTime[1], units = "hours"))
seg <- seg[complete.cases(seg$hours_since_start, seg$usedMemory), ]
graph_seg <- ggplot(seg, aes(x = hours_since_start, y = usedMemory, group = 1)) +
geom_line() +
labs(title = expression(bold("Memory Analysis (Workload 0.1)")),
x = "Time (hour)",
y = "Used Memory (MB)") +
geom_rect(aes(xmin = min(hours_since_start), xmax = max(hours_since_start),
ymin = min(usedMemory), ymax = max(usedMemory)),
color = "black", fill = NA, linetype = "solid", alpha = 0) +
theme(panel.background = element_rect(fill = "white")) +
scale_x_continuous(breaks = seq(0, 70, 10)) +
scale_y_continuous(breaks = c(400, 800, 1200))
graph_terc <- ggplot(terc, aes(x = hours_since_start, y = usedMemory, group = 1)) +
geom_line() +
labs(title = expression(bold("Memory Analysis (Workload 0.5)")),
x = "Time (hour)",
y = "Used Memory (MB)") +
annotate("rect", xmin = min(terc$hours_since_start), xmax = max(terc$hours_since_start),
ymin = min(terc$usedMemory), ymax = max(terc$usedMemory),
color = "black", fill = NA, linetype = "solid") +
theme(panel.background = element_rect(fill = "white")) +
scale_x_continuous(breaks = seq(0, 70, 10)) +
scale_y_continuous(breaks = c(242, 246, 250, 254))
graph_quart <- ggplot(quart, aes(x = hours_since_start, y = usedMemory, group = 1)) +
geom_line() +
labs(title = expression(bold("Memory Analysis (Workload 1.0)")),
x = "Time (hour)",
y = "Used Memory (MB)") +
annotate("rect", xmin = min(quart$hours_since_start), xmax = max(quart$hours_since_start),
ymin = min(quart$usedMemory), ymax = max(quart$usedMemory),
color = "black", fill = NA, linetype = "solid") +
theme(panel.background = element_rect(fill = "white")) +
scale_x_continuous(breaks = seq(0, 70, 10)) +
scale_y_continuous(breaks = c(96, 98, 102, 106))
layout(matrix(c(1, 2, 3, 4), nrow = 2, ncol = 2))
par(mar = c(4, 4, 2, 1))
all_plot <- graph_quart + graph_prim + graph_seg + graph_terc
all_plot

Questão 7
library(dplyr)
library(plotly)
netflix_titles <- read.csv(file = "netflix_titles.csv",
header = TRUE,
strip.white = TRUE,
na.strings = "")
netflix_titles <- netflix_titles %>%
filter(!is.na(country) & country != "" & !grepl(",", country)) %>%
filter(!is.na(country) & country != "" & !grepl(",", country))
top_countries <- netflix_titles %>%
group_by(country) %>%
summarise(count = n()) %>%
arrange(desc(count)) %>%
head(10)
plot_ly(labels = top_countries$country, values = top_countries$count, type = "pie",
textinfo = "label+percent", insidetextfont = list(color = "#FFFFFF"),
hoverinfo = "label+percent", hole = 0.6) %>%
layout(title = "Top 10 Países com Mais Conteúdo na Netflix",
xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
width = 900,
height = 800)
Questão 8
library(dplyr)
library(plotly)
top_countries <- netflix_titles %>%
group_by(country) %>%
summarise(count = n()) %>%
arrange(desc(count)) %>%
head(10)
tabela <- plot_ly(
type = "table",
header = list(values = c("País", "Total de Conteúdos"),
fill = list(color = "#666666"),
align = c("center"),
font = list(color = "white", size = 15)),
cells = list(values = list(top_countries$country, top_countries$count),
align = c("center"),
font = list(color = c("black", "black"), size = 12))
)
tabela
Questão 9
library(dplyr)
library(plotly)
library(stringr)
netflix <- read.csv("netflix_titles.csv")
netflix <- netflix %>%
mutate(decade = 10 * (release_year %/% 10))
filmesPorDecada <- netflix %>%
filter(type == "Movie") %>%
group_by(decade) %>%
summarise(qtd_conteúdo = n())
seriesPorDecada <- netflix %>%
filter(type == "TV Show") %>%
group_by(decade) %>%
summarise(num_series = n())
seriesFilmes <- left_join(filmesPorDecada, seriesPorDecada, by = "decade")
seriesFilmes$num_series[2] <- 1
fig <- plot_ly(seriesFilmes, x = ~decade) %>%
add_trace(y = ~num_series, name = 'TV Series', mode = 'lines+markers') %>%
add_trace(y = ~qtd_conteúdo, name = 'Movies', mode = 'lines+markers')
fig
Questão 10
library(stringr)
netflix <- read.csv("netflix_titles.csv")
df_filtrado <- netflix %>%
filter(between(release_year, 2000, 2010) & type == "Movie") %>%
select(release_year, listed_in)
contagem_categorias_por_ano <- df_filtrado %>%
mutate(primeira_categoria = ifelse(str_detect(listed_in, ","), word(listed_in, 1, sep = ", "), listed_in)) %>%
group_by(release_year, primeira_categoria) %>%
summarise(num_filmes = n())
cats <- c("Action & Adventure", "Comedies", "Dramas")
df_final <- contagem_categorias_por_ano %>%
filter(primeira_categoria %in% cats)
df_grafico <- data.frame(release_year = 2000:2010)
df_grafico <- df_grafico %>%
left_join(
df_final %>%
filter(str_detect(primeira_categoria, "Comedies")) %>%
select(release_year, num_filmes) %>%
rename(Comedies = num_filmes),
by = "release_year"
) %>%
left_join(
df_final %>%
filter(str_detect(primeira_categoria, "Dramas")) %>%
select(release_year, num_filmes) %>%
rename(Dramas = num_filmes),
by = "release_year"
) %>%
left_join(
df_final %>%
filter(str_detect(primeira_categoria, "Action & Adventure")) %>%
select(release_year, num_filmes) %>%
rename(`ActionAdventure` = num_filmes),
by = "release_year"
)
fig <- plot_ly( df_grafico, x = ~release_year, y = ~Comedies, type = 'bar', name = 'Comédia') %>%
add_trace(y = ~Dramas, name = 'Drama') %>%
add_trace(y = ~ActionAdventure, name = 'Ação e Aventura') %>%
layout(
yaxis = list(title = 'Qnt. de Lançamentos'),
xaxis = list(title = 'Ano de Lançamento'
)
)
fig