Questões
Questão 1
MRT_1F <- c(517.1468515630205, 85.13094142168089, 30.333207896694553, 12.694776264558937, 3.3041601673945418, 1.1823111717498882, 1.1892293502386786)
MRT_3F <- c(156.68929936163462, 11.540837783562276, 0.4512835621696538, 0.4509797929766453, 0.4502068233039181, 0.4496185276300172, 0.4543157082191288)
MRT_5F <- c(83.90319666471157, 0.3068151086494968, 0.30522314133037304, 0.3072588968084928, 0.30655265997285697, 0.3055812715727718, 0.3053297166713006)
MRT_10F <- c(29.55430642951759, 0.19832832665772515, 0.1971923924717474, 0.19796648905716516, 0.19615594370806338, 0.2034569237883263, 0.19617420889447737)
MRT_15F <- c(11.317736530583566, 0.167364215666193, 0.16172168266811013, 0.16701085329580515, 0.1598052657153692, 0.1645934043532696, 0.16216563797118075)
MRT_sem_F <- c(11.93430909937736, 0.6095414637034009, 0.6060645101029295, 0.612167181646899, 0.6146761002685637, 0.6096747087200697, 0.6125810476877268)
clock <- c(0.1, 0.5, 1, 1.5, 2, 2.5, 3)
layout(matrix(1:2, nrow=1))
plot(clock, MRT_1F, type="b", col="blue", pch=19,
main="Gráfico de Linhas",
xlab="Clock", ylab="MRT")
barplot(rbind(MRT_1F, MRT_3F), beside=TRUE, col=c("#E6E6E6", "#666666"),
log="y", names.arg=clock,
main="Gráfico de Barras",
xlab="Clock", ylab="MRT")
legend("topright", legend=c("MRT_1F", "MRT_3F"),
fill=c("#E6E6E6", "#666666"))

Questão 2
qualidade <- matrix(c(10, 20, 40, 30, 20, 25, 15, 10, 5), nrow=3, byrow=TRUE)
rownames(qualidade) <- c("Baixa", "Média", "Alta")
colnames(qualidade) <- c("Barato", "Médio", "Caro")
barplot(qualidade, beside=FALSE, col=c("darkorange", "steelblue", "darkgreen"),
main="Qualidade da Refeição por Faixa de Preço",
xlab="Categoria de Preço", ylab="Frequência")
legend("topright", legend=rownames(qualidade), fill=c("darkorange", "steelblue", "darkgreen"))

Questão 3
data("airquality")
temp_c <- (airquality$Temp - 32) / 1.8
hist(temp_c, freq=FALSE, col="lightblue",
main="Histograma das Temperaturas em Maio (°C)",
xlab="Temperatura (°C)", ylab="Densidade")
lines(density(temp_c, na.rm=TRUE), col="red", lwd=2)

Questão 4
# Link do dataset não disponível
Questão 5
data("InsectSprays")
boxplot(count ~ spray, data=InsectSprays, col="yellow", outline=FALSE,
main="Contagem de Insetos por Tipo de Inseticida",
xlab="Tipo de Inseticida", ylab="Contagem de Insetos")

Questão 6
library(dplyr)
library(plotly)
library(stringr)
files <- list(
"~/faculdade/monitoringCloudData_0.1.csv",
"~/faculdade/monitoringCloudData_0.5.csv",
"~/faculdade/monitoringCloudData_1.csv",
"~/faculdade/monitoringCloudData_NONE.csv"
)
read_and_process <- function(file) {
df <- read.csv(file)
df$currentTime <- as.POSIXct(df$currentTime, format="%Y-%m-%d %H:%M:%S")
df <- df %>%
mutate(
time_hours = as.numeric(difftime(currentTime, min(currentTime), units = "hours")),
usedMemoryMB = case_when(
str_detect(usedMemory, "TB") ~ as.numeric(str_extract(usedMemory, "[0-9.]+")) * 1e6,
str_detect(usedMemory, "GB") ~ as.numeric(str_extract(usedMemory, "[0-9.]+")) * 1024,
str_detect(usedMemory, "MB") ~ as.numeric(str_extract(usedMemory, "[0-9.]+")),
TRUE ~ as.numeric(usedMemory)
)
)
df
}
data_list <- lapply(files, read_and_process)
plot_list <- list(
plot_ly(data_list[[1]], x = ~time_hours, y = ~usedMemoryMB, type = "scatter", mode = "lines") %>%
layout(title = "Uso de memória - Cloud 0.1", xaxis = list(title = "Tempo (h)"), yaxis = list(title = "Memória (MB)")),
plot_ly(data_list[[2]], x = ~time_hours, y = ~usedMemoryMB, type = "scatter", mode = "lines") %>%
layout(title = "Uso de memória - Cloud 0.5", xaxis = list(title = "Tempo (h)"), yaxis = list(title = "Memória (MB)")),
plot_ly(data_list[[3]], x = ~time_hours, y = ~usedMemoryMB, type = "scatter", mode = "lines") %>%
layout(title = "Uso de memória - Cloud 1", xaxis = list(title = "Tempo (h)"), yaxis = list(title = "Memória (MB)")),
plot_ly(data_list[[4]], x = ~time_hours, y = ~usedMemoryMB, type = "scatter", mode = "lines") %>%
layout(title = "Uso de memória - Cloud NONE", xaxis = list(title = "Tempo (h)"), yaxis = list(title = "Memória (MB)"))
)
subplot(plot_list, nrows = 2, shareX = TRUE, shareY = TRUE)
Questão 7
library(dplyr)
library(plotly)
nt <- read.csv("~/faculdade/netflix_titles.csv", stringsAsFactors = FALSE)
nt2 <- nt %>%
filter(!is.na(country)) %>%
mutate(country_clean = trimws(country)) %>%
filter(country_clean != "") %>%
filter(!grepl(",", country_clean))
top_countries <- nt2 %>%
count(country_clean, name = "total") %>%
arrange(desc(total)) %>%
slice_head(n = 10) %>%
filter(!is.na(country_clean), !is.na(total))
p_pie <- plot_ly(
data = top_countries,
labels = ~country_clean,
values = ~total,
type = 'pie',
textinfo = 'label+percent',
hoverinfo = 'label+value'
) %>%
layout(title = "Top 10 países com mais conteúdos (1 país por conteúdo)")
p_pie
Questão 8
library(plotly)
library(dplyr)
tbl <- top_countries %>%
filter(!is.na(country_clean), !is.na(total)) %>%
rename(País = country_clean, `Total de conteúdos` = total)
table_plot <- plot_ly(
type = 'table',
header = list(
values = c("País", "Total de conteúdos"),
fill = list(color = 'gray'),
font = list(color = 'white', size = 12),
align = c('center', 'center')
),
cells = list(
values = list(tbl$País, tbl$`Total de conteúdos`),
align = 'center'
)
)
table_plot
Questão 9
library(dplyr)
library(plotly)
nt <- read.csv("~/faculdade/netflix_titles.csv", stringsAsFactors = FALSE)
nt_decade <- nt %>%
filter(!is.na(release_year)) %>%
mutate(decade = floor(release_year / 10) * 10,
type_clean = ifelse(tolower(type) %in% c("tv show","tv show "), "Series",
ifelse(tolower(type) %in% c("movie","movie "), "Movie", type))) %>%
filter(type_clean %in% c("Series","Movie")) %>%
group_by(decade, type_clean) %>%
summarise(n = n(), .groups = "drop")
decade_wide <- tidyr::pivot_wider(nt_decade, names_from = type_clean, values_from = n, values_fill = 0) %>%
arrange(decade)
x_axis <- paste0(decade_wide$decade, "s")
series_y <- decade_wide$Series
movie_y <- decade_wide$Movie
p_line <- plot_ly() %>%
add_lines(x = x_axis, y = series_y, name = "Series", line = list(color = 'blue')) %>%
add_lines(x = x_axis, y = movie_y, name = "Movies", line = list(color = 'yellow')) %>%
layout(title = "Quantidade de conteúdo por década (Series vs Movies)",
xaxis = list(title = "Década"),
yaxis = list(title = "Quantidade de conteúdos"))
p_line
Questão 10
library(dplyr)
library(plotly)
library(stringr)
nt <- read.csv("~/faculdade/netflix_titles.csv", stringsAsFactors = FALSE)
nt_movies <- nt %>%
filter(tolower(type) == "movie") %>%
filter(!is.na(release_year) & release_year >= 2000 & release_year <= 2010) %>%
mutate(first_genre = sapply(strsplit(as.character(listed_in), ","), function(x) trimws(x[1])))
genres_of_interest <- c("Dramas", "Action & Adventure", "Comedies")
nt_sel <- nt_movies %>% filter(first_genre %in% genres_of_interest)
counts <- nt_sel %>%
group_by(release_year, first_genre) %>%
summarise(n = n(), .groups = "drop") %>%
tidyr::complete(release_year = 2000:2010, first_genre = genres_of_interest, fill = list(n = 0)) %>%
arrange(release_year)
years <- unique(counts$release_year)
trace_list <- list()
colors <- c("Dramas" = "darkblue", "Action & Adventure" = "darkred", "Comedies" = "darkgreen")
for(g in genres_of_interest){
y <- counts %>% filter(first_genre == g) %>% arrange(release_year) %>% pull(n)
trace_list[[g]] <- list(x = years, y = y, name = g)
}
p_bar <- plot_ly()
for(g in genres_of_interest){
p_bar <- add_trace(p_bar, x = trace_list[[g]]$x, y = trace_list[[g]]$y, name = g,
type = 'bar')
}
p_bar <- layout(p_bar, barmode = 'group',
title = "Filmes por gênero (2000–2010)",
xaxis = list(title = "Ano"),
yaxis = list(title = "Quantidade de filmes"))
p_bar