Análise de Dados
Questão 1
MRT_1F <-c(517.1468515630205, 85.13094142168089, 30.333207896694553, 12.694776264558937, 3.3041601673945418, 1.1823111717498882, 1.1892293502386786)
MRT_3F <-c(156.68929936163462, 11.540837783562276, 0.4512835621696538, 0.4509797929766453, 0.4502068233039181, 0.4496185276300172, 0.4543157082191288)
MRT_5F <-c(83.90319666471157, 0.3068151086494968, 0.30522314133037304, 0.3072588968084928, 0.30655265997285697, 0.3055812715727718, 0.3053297166713006)
MRT_10F <-c(29.55430642951759, 0.19832832665772515, 0.1971923924717474, 0.19796648905716516, 0.19615594370806338, 0.2034569237883263, 0.19617420889447737)
MRT_15F <-c(11.317736530583566, 0.167364215666193, 0.16172168266811013, 0.16701085329580515, 0.1598052657153692, 0.1645934043532696, 0.16216563797118075)
MRT_sem_F <-c(11.93430909937736, 0.6095414637034009, 0.6060645101029295, 0.612167181646899, 0.6146761002685637, 0.6096747087200697, 0.6125810476877268)
clock <- c(0.1, 0.5, 1, 1.5, 2, 2.5, 3)
### Plot gráfico de linhas
layout(matrix(1:2, nrow = 1), widths = c(2, 1))
plot(clock, MRT_1F, type = "o", col = "black", pch = 4, lty = 1, lwd = 2,
ylim = c(0, max(MRT_1F)), xlab = "Time between Things requests (seconds)",
ylab = "Response Time (sec.)", main = "")
lines(clock, MRT_3F, type = "o", col = "yellow", pch = 11, lty = 1, lwd = 2)
lines(clock, MRT_5F, type = "o", col = "red", pch = 1, lty = 1, lwd = 2)
lines(clock, MRT_10F, type = "o", col = "blue", pch = 2, lty = 1, lwd = 2)
lines(clock, MRT_15F, type = "o", col = "magenta", pch = 5, lty = 1, lwd = 2)
lines(clock, MRT_sem_F, type = "o", col = "green", pch = 4, lty = 1, lwd = 2)
legend("topright", legend = c("1 Fog", "3 Fogs", "5 Fogs", "10 Fogs", "15 Fogs", "w/o Fog"),
col = c("black", "yellow", "red", "blue", "magenta", "green"),
pch = c(4, 11, 1, 2, 5, 4), lty = 1, lwd = 2, cex = 0.8)

# Cores e layout
cores <- c("#E6E6E6", "#666666")
layout(matrix(c(1,2,3,4,5,6), nrow = 2, byrow = TRUE))
#Função para plots
plot_comparativo <- function(fog, label) {
dados <- rbind(MRT_sem_F, fog)
ymax <- max(dados)
barplot(dados,
beside = TRUE,
log = "y",
col = cores,
names.arg = clock,
ylim = c(0.1, ymax),
xlab = "Time between Things requests",
ylab = "Response time (s)",
main = NULL)
legend("topright", legend = c("w/o Fog", label),
fill = cores, border = "black", bty = "n")
}
# Plots
plot_comparativo(MRT_1F, "1 Fog")
plot_comparativo(MRT_3F, "3 Fogs")
plot_comparativo(MRT_5F, "5 Fogs")
plot_comparativo(MRT_10F, "10 Fogs")
plot_comparativo(MRT_15F, "15 Fogs")

Questão 2
#dados e nomes linha/coluna
dados <- matrix(c(
53.8, 33.9, 2.6, 0.0,
43.6, 54.2, 60.5, 21.4,
2.6, 11.9, 36.8, 78.6
), nrow = 3, byrow = TRUE)
rownames(dados) <- c("Good", "Very Good", "Excellent")
colnames(dados) <- c("$10-19", "$20-29", "$30-39", "$40-49")
#cores e definição de parametros p/ tamanho do grafico
cores <- c("yellow", "blue", "green")
par(mar = c(5, 4, 4, 10))
#Plot
barplot(dados,
col = cores,
main = "Qualidade das Refeições por Faixa de Preço",
xlab = "Faixa de Preço",
ylab = "Porcentagem (%)",
ylim = c(0, 100),
legend.text = FALSE,
beside = FALSE)
legend("topright",
title = "Classificação de Qualidade",
legend = c("Excellent", "Very Good", "Good"),
fill = c("green", "blue", "yellow"),
xpd = TRUE,
inset = c(-0.46, 0)) #pra poder ajeitar a legenda

Questão 3
#carregamento, filtragem e converção
data(airquality)
dados_maio <- airquality[airquality$Month == 5, ]
temperaturas_celsius <- (dados_maio$Temp - 32) / 1.8
#geração do hist, sem par, não achei necessário
hist(temperaturas_celsius,
col = "skyblue",
border = "white",
main = "Histograma das Temperaturas em Maio",
xlab = "Temperatura (°C)",
ylab = "Frequência",
prob = TRUE)
lines(density(temperaturas_celsius),
col = "darkred",
lwd = 2)
legend("topright",
legend = c("Densidade"),
col = c("darkred"),
lwd = 2,
bty = "n")

Questão 4
sales <- read.table("https://training-course-material.com/images/8/8f/Sales.txt",header=TRUE)
vendas_por_pais <- aggregate(sales$SALES, by=list(Country=sales$COUNTRY), FUN=sum)
colnames(vendas_por_pais) <- c("Pais", "Total_Vendas")
vendas_por_pais$Porcentagem <- round(vendas_por_pais$Total_Vendas / sum(vendas_por_pais$Total_Vendas) * 100, 1)
cores <- c("red", "lightgreen", "salmon", "gold", "violet", "grey")
par(mar = c(1, 1, 4, 10), xpd = TRUE)
pie(vendas_por_pais$Total_Vendas,
labels = paste0(vendas_por_pais$Pais, " (", vendas_por_pais$Porcentagem, "%)"),
col = cores,
main = "Porcentagem de Vendas por País")
legend("right",
legend = paste0(vendas_por_pais$Pais, " (", vendas_por_pais$Porcentagem, "%)"),
fill = cores,
title = "Países",
inset = c(-0.3, 0),
cex = 0.8)

Questão 5
#dados e organização do boxplot
data(InsectSprays)
par(mar = c(5, 4, 4, 2))
#Boxplot
boxplot(count ~ spray,
data = InsectSprays,
main = "Contagem de Insetos por Tipo de Inseticida",
xlab = "Tipo de Inseticida",
ylab = "Número de Insetos",
col = "yellow",
outline = FALSE)

Questão 6
process_data <- function(file_path) {
data <- read.csv(file_path)
data$currentTime <- as.POSIXct(data$currentTime)
start_time <- min(data$currentTime)
data$time_hours <- as.numeric(difftime(data$currentTime, start_time, units = "hours"))
convert_to_mb <- function(memory_str) {
value <- as.numeric(gsub("[^0-9.]", "", memory_str))
if (grepl("TB", memory_str, ignore.case = TRUE)) {
return(value * 1000000)
} else if (grepl("GB", memory_str, ignore.case = TRUE)) {
return(value * 1024)
} else {
return(value)
}
}
data$usedMemory_mb <- sapply(as.character(data$usedMemory), convert_to_mb)
return(data)
}
data_none <- process_data("monitoringCloudData_NONE.csv")
data_01 <- process_data("monitoringCloudData_0.1.csv")
data_05 <- process_data("monitoringCloudData_0.5.csv")
data_10 <- process_data("monitoringCloudData_1.csv")
layout(matrix(c(1, 2, 3, 4), nrow = 2, byrow = TRUE))
par(mar = c(4, 4, 2, 1))
plot(data_none$time_hours, data_none$usedMemory_mb,
type = "l",
main = "Memory Analysis (None Workload)",
xlab = "Time (hour)",
ylab = "Used Memory (MB)",
col = "black")
plot(data_01$time_hours, data_01$usedMemory_mb,
type = "l",
main = "Memory Analysis (Workload of 0.1)",
xlab = "Time (hour)",
ylab = "Used Memory (MB)",
col = "black")
plot(data_05$time_hours, data_05$usedMemory_mb,
type = "l",
main = "Memory Analysis (Workload of 0.5)",
xlab = "Time (hour)",
ylab = "Used Memory (MB)",
col = "black")
plot(data_10$time_hours, data_10$usedMemory_mb,
type = "l",
main = "Memory Analysis (Workload of 1.0)",
xlab = "Time (hour)",
ylab = "Used Memory (MB)",
col = "black")

Questão 7
netflix_data <- read_csv("netflix_titles.csv", show_col_types = FALSE)
has_single_country <- function(country_str) {
return(!grepl(",", country_str))
}
pais_unico <- netflix_data[!is.na(netflix_data$country) &
sapply(netflix_data$country, has_single_country), ]
trim <- function(x) {
return(gsub("^\\s+|\\s+$", "", x))
}
pais_unico$country_clean <- sapply(pais_unico$country, trim)
country_table <- table(pais_unico$country_clean)
country_df <- data.frame(
country = names(country_table),
count = as.numeric(country_table)
)
country_df <- country_df[order(-country_df$count), ]
top_10 <- head(country_df, 10)
total_count <- sum(top_10$count)
top_10$percentage <- round(top_10$count / total_count * 100, 2)
#Plot
plot_ly(top_10,
labels = ~country,
values = ~count,
type = 'pie',
textinfo = 'label+percent',
insidetextorientation = 'radial',
textposition = 'outside',
texttemplate = '%{label}<br>%{percent}') %>%
layout(
title = 'Top 10 Países com Mais Conteúdo na Netflix',
showlegend = TRUE,
legend = list(orientation = "h", x = 0.5, y = -0.2, xanchor = "center"),
margin = list(l = 50, r = 50, b = 100, t = 100, pad = 4),
autosize = FALSE,
width = 900,
height = 600
)
## Warning: Specifying width/height in layout() is now deprecated.
## Please specify in ggplotly() or plot_ly()
Questão 8
plot_ly(
type = 'table',
header = list(values = c("País", "Total de conteúdos"),
align = 'center',
fill = list(color = 'gray'),
font = list(color = 'white', size = 14)
),
cells = list(
values = list(top_10$country, top_10$count),
align = 'center')
)
Questão 9
netflix_data$decade <- floor(netflix_data$release_year / 10) * 10
movies <- aggregate(type ~ decade, data = netflix_data[netflix_data$type == "Movie", ], FUN = length)
series <- aggregate(type ~ decade, data = netflix_data[netflix_data$type == "TV Show", ], FUN = length)
names(movies)[2] <- "count"
names(series)[2] <- "count"
plot_ly() %>%
add_trace(
x = series$decade,
y = series$count,
type = 'scatter',
mode = 'lines+markers',
name = 'TV Series',
line = list(color = 'blue')
) %>%
add_trace(
x = movies$decade,
y = movies$count,
type = 'scatter',
mode = 'lines+markers',
name = 'Movies',
line = list(color = 'gold')
) %>%
layout(
title = "Quantidade de Conteúdo por Década na Netflix",
xaxis = list(title = "Década", gridcolor = 'lightgray'),
yaxis = list(title = "Qtd. Conteúdo", gridcolor = 'lightgray'),
plot_bgcolor = 'white'
)
Questão 10
filmes <- netflix_data[netflix_data$type == "Movie" &
netflix_data$release_year >= 2000 &
netflix_data$release_year <= 2010, ]
filmes$genero <- sapply(strsplit(as.character(filmes$listed_in), ","), function(x) trimws(x[1]))
filmes_filtrados <- filmes[filmes$genero %in% c("Dramas", "Action & Adventure", "Comedies"), ]
contagem <- table(filmes_filtrados$release_year, filmes_filtrados$genero)
df_contagem <- as.data.frame(contagem)
names(df_contagem) <- c("Ano", "Genero", "Quantidade")
plot_ly(df_contagem,
x = ~Ano,
y = ~Quantidade,
color = ~Genero,
type = 'bar',
colors = c("Dramas" = "blue",
"Action & Adventure" = "orange",
"Comedies" = "green")) %>%
layout(title = "",
xaxis = list(title = "Ano de Lançamento",
tickmode = "array",
tickvals = seq(2000, 2010, by = 2)),
yaxis = list(title = "Qtd. de Lançamentos"),
barmode = 'group',
legend = list(title = ""))