Este relatório apresenta as respostas às 10 questões da lista de exercícios, utilizando R base (questões 1 a 6) e Plotly (questões 7 a 10).
Gráficos com os vetores MRT e uso de layout e barras logarítmicas.
MRT_1F <- c(517.1468515630205,85.13094142168089,30.333207896694553,12.694776264558937,3.3041601673945418,1.1823111717498882,1.1892293502386786)
MRT_3F <- c(156.68929936163462,11.540837783562276,0.4512835621696538,0.4509797929766453,0.4502068233039181,0.4496185276300172,0.4543157082191288)
MRT_5F <- c(83.90319666471157,0.3068151086494968,0.30522314133037304,0.3072588968084928,0.30655265997285697,0.3055812715727718,0.3053297166713006)
MRT_10F <- c(29.55430642951759,0.19832832665772515,0.1971923924717474,0.19796648905716516,0.19615594370806338,0.2034569237883263,0.19617420889447737)
MRT_15F <- c(11.317736530583566,0.167364215666193,0.16172168266811013,0.16701085329580515,0.1598052657153692,0.1645934043532696,0.16216563797118075)
MRT_sem_F<- c(11.93430909937736,0.6095414637034009,0.6060645101029295,0.612167181646899,0.6146761002685637,0.6096747087200697,0.6125810476877268)
clock <- c(0.1,0.5,1,1.5,2,2.5,3)
layout(matrix(c(1,2), nrow=2, byrow=TRUE), heights = c(2,1))
plot(clock, MRT_1F, type="o", pch=16, xlab="clock (h)", ylab="MRT", main="MRT ao longo do tempo (exemplo)", ylim = range(c(MRT_1F,MRT_3F,MRT_5F)))
lines(clock, MRT_3F, type="o", pch=17)
lines(clock, MRT_5F, type="o", pch=15)
legend("topright", legend=c("MRT_1F","MRT_3F","MRT_5F"), lty=1, pch=c(16,17,15), bty="n")
vals1 <- MRT_1F
vals2 <- MRT_sem_F
bar_centers <- barplot(rbind(vals1, vals2), beside=TRUE, log="y",
col=c("#E6E6E6","#666666"),
names.arg = clock, xlab="clock", ylab="MRT (escala log)", main="Barras MRT (escala log)")
legend("topright", legend=c("MRT_1F","MRT_sem_F"), fill=c("#E6E6E6","#666666"), bty="n")
Gráfico de barras empilhadas (Preço x Qualidade)
** Não consegui fazer! **
Histograma das temperaturas de maio do dataset airquality em Celsius.
data(airquality)
may_data <- subset(airquality, Month == 5)
temp_C <- (may_data$Temp - 32) / 1.8
temp_C <- temp_C[!is.na(temp_C)]
hist(temp_C,
prob = TRUE,
breaks = 8,
col = "lightblue",
border = "white",
main = "Temperaturas de Maio - airquality",
xlab = expression("Temperatura (" * degree * "C)"),
ylab = "Densidade"
)
lines(density(temp_C), lwd = 2)
abline(v = mean(temp_C), col = "red", lty = 2, lwd = 2)
legend("topright",
legend = c("Curva de densidade", "Media"),
col = c("black", "red"),
lwd = 2, lty = c(1,2), bty = "n")
Gráfico de pizza com percentuais por país (dataset Sales.txt).
** Não conseguir ter acesso ao site para resposnder a questão **
Boxplot do dataset InsectSprays sem outliers e caixas amarelas.
data(InsectSprays)
boxplot(count ~ spray, data = InsectSprays, outline = FALSE, col = "yellow",
main = "Contagem de insetos por inseticida", xlab = "Inseticida", ylab = "Contagem")
Gráficos de uso de memória em função do tempo (monitoringCloudData).
setwd("C:\\Users\\carlo\\Downloads")
# Função utilitária para converter formatos de memória em MB
convert_to_mb <- function(x) {
x <- trimws(x)
num <- as.numeric(gsub("([0-9\\.]+).*", "\\1", x))
unit <- toupper(gsub("[0-9\\.\\s]+", "", x))
unit[unit==""] <- "B"
mb <- numeric(length(num))
for (i in seq_along(num)) {
u <- unit[i]
if (grepl("TB", u)) {
mb[i] <- num[i] * 1000000
} else if (grepl("GB", u)) {
mb[i] <- num[i] * 1024
} else if (grepl("MB", u)) {
mb[i] <- num[i]
} else if (grepl("KB", u)) {
mb[i] <- num[i] / 1024
} else {
mb[i] <- num[i]
}
}
return(mb)
}
prepare_monitor <- function(path) {
df <- read.csv(path, stringsAsFactors = FALSE)
df$currentTime_parsed <- as.POSIXct(df$currentTime, format="%Y-%m-%d %H:%M:%S", tz = "UTC")
df$time_h <- as.numeric(difftime(df$currentTime_parsed, df$currentTime_parsed[1], units="hours"))
df$usedMemory_MB <- convert_to_mb(df$usedMemory)
return(df)
}
files <- c("monitoringCloudData_0.1.csv","monitoringCloudData_0.5.csv","monitoringCloudData_1.csv","monitoringCloudData_NONE.csv")
list_df <- lapply(files, prepare_monitor)
par(mfrow=c(2,2), mar=c(4,4,3,1))
for (i in seq_along(list_df)) {
d <- list_df[[i]]
plot(d$time_h, d$usedMemory_MB, type="l", lwd=1.5,
xlab="Tempo (h)", ylab="Used Memory (MB)",
main = paste("Arquivo:", files[i]))
}
par(mfrow=c(1,1))
Gráfico de pizza (Plotly) com os 10 países que têm mais conteúdos (Netflix).
setwd("C:/Users/carlo/Downloads")
options(encoding = "UTF-8")
library(dplyr)
library(plotly)
net <- read.csv("netflix_titles.csv", stringsAsFactors = FALSE)
one_country <- net %>% filter(!grepl(",", country) & !is.na(country) & country != "")
top10 <- one_country %>% count(country, name="total") %>% arrange(desc(total)) %>% slice_head(n=10)
fig <- plot_ly(top10, labels=~country, values=~total, type="pie",
textinfo='label+percent', hoverinfo='label+value')
fig <- fig %>% layout(title = "Top 10 paises com mais conteudo (1 pais de origem)")
fig
Tabela Plotly com cabeçalho cinza e texto centralizado.
table_fig <- plot_ly(
type = 'table',
header = list(values = c("<b>Pais</b>", "<b>Total de conteudos</b>"),
align = c('center','center'),
fill = list(color = '#666666'),
font = list(color = 'white', size = 12)),
cells = list(values = list(top10$country, top10$total),
align = c('center','center'))
)
table_fig
Gráfico de linhas com séries (azul) e filmes (amarelo) por década.
net2 <- net %>% mutate(decade = paste0(floor(release_year/10)*10, "s"))
by_decade <- net2 %>% group_by(decade, type) %>% summarise(total=n(), .groups='drop')
dec_ordered <- by_decade %>% distinct(decade) %>% arrange(decade) %>% pull(decade)
movies <- by_decade %>% filter(type=="Movie")
tv <- by_decade %>% filter(type=="TV Show")
fig <- plot_ly() %>%
add_trace(x = tv$decade, y = tv$total, type='scatter', mode='lines+markers', name='Series', line = list(color = 'blue')) %>%
add_trace(x = movies$decade, y = movies$total, type='scatter', mode='lines+markers', name='Filmes', line = list(color = 'yellow')) %>%
layout(title="Quantidade de conteudo por decada: Series (azul) vs Filmes (amarelo)",
xaxis = list(title="Decada"), yaxis = list(title="Quantidade"))
fig
Gráfico de barras lado a lado para gêneros “Dramas”, “Action & Adventure” e “Comedies”.
net_movies <- net %>% filter(type == "Movie", release_year >= 2000, release_year <= 2010)
net_movies <- net_movies %>% mutate(first_genre = trimws(sapply(strsplit(listed_in, ","), `[`, 1)))
genres_of_interest <- c("Dramas", "Action & Adventure", "Comedies")
df_gen <- net_movies %>% filter(first_genre %in% genres_of_interest) %>%
group_by(release_year, first_genre) %>% summarise(total = n(), .groups='drop')
fig <- plot_ly(df_gen, x=~release_year, y=~total, color=~first_genre, type='bar') %>%
layout(barmode='group', title="Filmes por genero (2000-2010)", xaxis=list(title="Ano"), yaxis=list(title="Qtd"))
fig