Questões
Questão 1
MRT_1F <- c(517.1468515630205, 85.13094142168089, 30.333207896694553, 12.694776264558937, 3.3041601673945418, 1.1823111717498882, 1.1892293502386786)
MRT_3F <- c(156.68929936163462, 11.540837783562276, 0.4512835621696538, 0.4509797929766453, 0.4502068233039181, 0.4496185276300172, 0.4543157082191288)
MRT_5F <- c(83.90319666471157, 0.3068151086494968, 0.30522314133037304, 0.3072588968084928, 0.30655265997285697, 0.3055812715727718, 0.3053297166713006)
MRT_10F <- c(29.55430642951759, 0.19832832665772515, 0.1971923924717474, 0.19796648905716516, 0.19615594370806338, 0.2034569237883263, 0.19617420889447737)
MRT_15F <- c(11.317736530583566, 0.167364215666193, 0.16172168266811013, 0.16701085329580515, 0.1598052657153692, 0.1645934043532696, 0.16216563797118075)
MRT_sem_F <- c(11.93430909937736, 0.6095414637034009, 0.6060645101029295, 0.612167181646899, 0.6146761002685637, 0.6096747087200697, 0.6125810476877268)
clock <- c(0.1, 0.5, 1, 1.5, 2, 2.5, 3)
layout(matrix(c(1,2), 1, 2, byrow = TRUE))
# cada MRT em relação ao clock
plot(clock, MRT_1F, type = "o", col = "red", xlab = "Clock (s)", ylab = "MRT", main = "MRT vs Clock")
lines(clock, MRT_3F, type = "o", col = "blue")
lines(clock, MRT_5F, type = "o", col = "green")
lines(clock, MRT_10F, type = "o", col = "purple")
lines(clock, MRT_15F, type = "o", col = "orange")
lines(clock, MRT_sem_F, type = "o", col = "black")
legend("topright", legend = c("MRT_1F", "MRT_3F", "MRT_5F", "MRT_10F", "MRT_15F", "MRT_sem_F"),
col = c("red", "blue", "green", "purple", "orange", "black"), lty = 1)
# Barras
barplot(MRT_1F, names.arg = clock, log = "y", col = "#E6E6E6", border = "#666666",
xlab = "Clock (s)", ylab = "MRT", main = "MRT_1F - Escala Logarítmica")

Questão 2
# Questão 2
meal_quality <- matrix(c(53.8, 43.6, 2.6, 33.9, 54.2, 11.9, 2.6, 60.5, 36.8, 0, 21.4, 78.6),
nrow = 3, byrow = TRUE)
colnames(meal_quality) <- c("$10-19", "$20-29", "$30-39", "$40-49")
rownames(meal_quality) <- c("Good", "Very Good", "Excellent")
# Gráfico
barplot(meal_quality, beside = FALSE, col = c("lightblue", "lightgreen", "lightcoral"),
legend = rownames(meal_quality), xlab = "Meal Price", ylab = "PERCENTAGE (%)",
main = "Meal Quality Rating by Price")

Questão 3
airquality$Temp_C <- (airquality$Temp - 32) / 1.8
temp_may <- airquality$Temp_C[airquality$Month == 5]
# Histogramaa
hist(temp_may, breaks = 10, col = "lightblue", main = "Histogram of May Temperatures (Celsius)",
xlab = "Temperature (°C)", ylab = "Frequency")
# curva de densidade
lines(density(temp_may), col = "red", lwd = 2)

Questão 4
library(readr)
sales <- read_table("https://training-course-material.com/images/8/8f/Sales.txt")
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## COUNTRY = col_character(),
## SALES = col_double()
## )
sales_summary <- aggregate(SALES ~ COUNTRY, data = sales, FUN = sum)
sales_summary$percentage <- round(100 * sales_summary$SALES / sum(sales_summary$SALES), 1)
# grafico
pie(sales_summary$percentage, labels = paste(sales_summary$COUNTRY, sales_summary$percentage, "%"),
col = rainbow(length(sales_summary$COUNTRY)), main = "Total Sales by Country")
legend("topright", legend = sales_summary$COUNTRY, fill = rainbow(length(sales_summary$COUNTRY)))

Questão 5
data(InsectSprays)
boxplot(count ~ spray, data = InsectSprays, outline = FALSE, col = "yellow",
main = "Insect Counts by Spray Type", xlab = "Spray Type", ylab = "Insect Count")

Questão 6
library(readr)
monitoringCloudData_1 <- read_csv("monitoringCloudData_1.csv")
## Rows: 4303 Columns: 16
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (11): totalMemory, availableMemory, usedMemory, totalSwap, freeSwap, us...
## dbl (4): totalCpuUsage..., percentageMemory..., percentageSwap..., percent...
## dttm (1): currentTime
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
monitoringCloudData_05 <- read_csv("monitoringCloudData_0.5.csv")
## Rows: 4303 Columns: 16
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (12): currentTime, totalMemory, availableMemory, usedMemory, totalSwap, ...
## dbl (4): totalCpuUsage..., percentageMemory..., percentageSwap..., percenta...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
monitoringCloudData_01 <- read_csv("monitoringCloudData_0.1.csv")
## Rows: 4275 Columns: 16
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (11): totalMemory, availableMemory, usedMemory, totalSwap, freeSwap, us...
## dbl (4): totalCpuUsage..., percentageMemory..., percentageSwap..., percent...
## dttm (1): currentTime
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
monitoringCloudData_NONE <- read_csv("monitoringCloudData_NONE.csv")
## Rows: 4302 Columns: 16
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (11): totalMemory, availableMemory, usedMemory, totalSwap, freeSwap, us...
## dbl (4): totalCpuUsage..., percentageMemory..., percentageSwap..., percent...
## dttm (1): currentTime
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# convertendo a coluna currentTime para o formato POSIXct
monitoringCloudData_1$currentTime <- as.POSIXct(monitoringCloudData_1$currentTime, format="%Y-%m-%d %H:%M:%S")
monitoringCloudData_05$currentTime <- as.POSIXct(monitoringCloudData_05$currentTime, format="%Y-%m-%d %H:%M:%S")
monitoringCloudData_01$currentTime <- as.POSIXct(monitoringCloudData_01$currentTime, format="%Y-%m-%d %H:%M:%S")
monitoringCloudData_NONE$currentTime <- as.POSIXct(monitoringCloudData_NONE$currentTime, format="%Y-%m-%d %H:%M:%S")
monitoringCloudData_1$time_in_hours <- as.numeric(difftime(monitoringCloudData_1$currentTime, min(monitoringCloudData_1$currentTime), units="hours"))
monitoringCloudData_05$time_in_hours <- as.numeric(difftime(monitoringCloudData_05$currentTime, min(monitoringCloudData_05$currentTime, na.rm = TRUE), units = "hours"))
monitoringCloudData_01$time_in_hours <- as.numeric(difftime(monitoringCloudData_01$currentTime, min(monitoringCloudData_01$currentTime), units="hours"))
monitoringCloudData_NONE$time_in_hours <- as.numeric(difftime(monitoringCloudData_NONE$currentTime, min(monitoringCloudData_NONE$currentTime), units="hours"))
# função para converter memoria para MB
convert_to_mb <- function(memory_str) {
if (is.na(memory_str) || memory_str == "") {
return(NA)
}
value <- as.numeric(gsub("[^0-9\\.]", "", memory_str))
if (grepl("GB", memory_str)) {
return(value * 1024)
} else if (grepl("TB", memory_str)) {
return(value * 1000000)
} else if (grepl("MB", memory_str)) {
return(value)
} else {
return(NA)
}
}
monitoringCloudData_1$usedMemory_MB <- sapply(monitoringCloudData_1$usedMemory, convert_to_mb)
monitoringCloudData_05$usedMemory_MB <- sapply(monitoringCloudData_05$usedMemory, convert_to_mb)
monitoringCloudData_01$usedMemory_MB <- sapply(monitoringCloudData_01$usedMemory, convert_to_mb)
monitoringCloudData_NONE$usedMemory_MB <- sapply(monitoringCloudData_NONE$usedMemory, convert_to_mb)
head(monitoringCloudData_05$time_in_hours)
## [1] 0.00000000 0.01694444 0.03361111 0.05027778 0.06694444 0.08388889
head(monitoringCloudData_1$time_in_hours)
## [1] 0.00000000 0.01673960 0.03346666 0.05020650 0.06694222 0.08368178
head(monitoringCloudData_01$time_in_hours)
## [1] 0.00000000 0.01674010 0.03346715 0.05020156 0.06694109 0.08368067
head(monitoringCloudData_NONE$time_in_hours)
## [1] 0.00000000 0.01674055 0.03348025 0.05022017 0.06696019 0.08370020
sum(is.na(monitoringCloudData_05$currentTime))
## [1] 194
sum(is.na(monitoringCloudData_1$currentTime))
## [1] 0
par(mfrow = c(2, 2))
plot(monitoringCloudData_NONE$time_in_hours, monitoringCloudData_NONE$usedMemory_MB, type="l", col="black",
xlab="Time (hour)", ylab="Used Memory (MB)", main="Memory Analysis (None Workload)")
plot(monitoringCloudData_01$time_in_hours, monitoringCloudData_01$usedMemory_MB, type="l", col="red",
xlab="Time (hour)", ylab="Used Memory (MB)", main="Memory Analysis (Workload of 0.1)")
plot(monitoringCloudData_05$time_in_hours, monitoringCloudData_05$usedMemory_MB, type="l", col="blue",
xlab="Time (hour)", ylab="Used Memory (MB)", main="Memory Analysis (Workload of 0.5)")
plot(monitoringCloudData_1$time_in_hours, monitoringCloudData_1$usedMemory_MB, type="l", col="green",
xlab="Time (hour)", ylab="Used Memory (MB)", main="Memory Analysis (Workload of 1.0)")

Questão 7
library(readr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(plotly)
## Carregando pacotes exigidos: ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
netflix_titles <- read_csv("netflix_titles.csv")
## Rows: 7787 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (11): show_id, type, title, director, cast, country, date_added, rating,...
## dbl (1): release_year
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
netflix_single_country <- netflix_titles %>%
filter(!is.na(country) & !grepl(",", country)) %>%
group_by(country) %>%
summarize(count = n()) %>%
arrange(desc(count)) %>%
head(10)
plot_ly(netflix_single_country, labels = ~country, values = ~count, type = 'pie') %>%
layout(title = 'Top 10 Países com Mais Conteúdos na Netflix (2019)')
Questão 8
library(dplyr)
library(plotly)
library(readr)
netflix_titles <- read_csv("netflix_titles.csv")
## Rows: 7787 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (11): show_id, type, title, director, cast, country, date_added, rating,...
## dbl (1): release_year
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
filtered_data <- netflix_titles %>%
filter(!is.na(country) & !grepl(",", country))
# Contando o número de conteúdos por país
content_by_country <- filtered_data %>%
count(country, name = "Total de conteúdos") %>%
arrange(desc(`Total de conteúdos`)) %>%
slice(1:10)
# Criando a tabela com Plotly
fig <- plot_ly(
type = 'table',
header = list(
values = c('País', 'Total de conteúdos'),
align = c('center', 'center'),
fill = list(color = 'gray'),
font = list(color = 'white', size = 12)
),
cells = list(
values = t(content_by_country),
align = c('center', 'center')
)
)
fig
Questão 9
library(readr)
library(plotly)
library(dplyr)
netflix_titles <- read_csv("netflix_titles.csv")
## Rows: 7787 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (11): show_id, type, title, director, cast, country, date_added, rating,...
## dbl (1): release_year
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Criando uma coluna de décadas
netflix_titles <- netflix_titles %>%
mutate(decade = floor(release_year / 10) * 10)
# Contando o número de conteúdos por década e tipo
decade_data <- netflix_titles %>%
filter(!is.na(type)) %>%
group_by(decade, type) %>%
summarise(count = n()) %>%
ungroup()
## `summarise()` has grouped output by 'decade'. You can override using the
## `.groups` argument.
# Criando o gráfico de linha
fig <- plot_ly(decade_data, x = ~decade, y = ~count, color = ~type, colors = c("blue", "yellow"),
type = 'scatter', mode = 'lines+markers') %>%
layout(
title = "Quantidade de Conteúdo por Década",
xaxis = list(title = "Década"),
yaxis = list(title = "Qtd. de Conteúdo"),
legend = list(title = list(text = '<b>Tipo de Conteúdo</b>'))
)
fig
Questão 10
library(readr)
library(plotly)
library(dplyr)
netflix_titles <- read_csv("netflix_titles.csv")
## Rows: 7787 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (11): show_id, type, title, director, cast, country, date_added, rating,...
## dbl (1): release_year
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Filtrando os dados entre 2000 e 2010
filtered_data <- netflix_titles %>%
filter(release_year >= 2000, release_year <= 2010) %>%
mutate(listed_in = sapply(strsplit(as.character(listed_in), ","), `[`, 1))
# Filtrando apenas os gêneros desejados
filtered_data <- filtered_data %>%
filter(listed_in %in% c("Dramas", "Action & Adventure", "Comedies"))
# Contando os filmes por ano e gênero
yearly_genre_count <- filtered_data %>%
group_by(release_year, listed_in) %>%
summarise(count = n()) %>%
ungroup()
## `summarise()` has grouped output by 'release_year'. You can override using the
## `.groups` argument.
# Criando o gráfico de barras lado-a-lado
fig <- plot_ly(yearly_genre_count, x = ~release_year, y = ~count, color = ~listed_in, colors = c("blue", "orange", "green"),
type = 'bar') %>%
layout(
title = "Quantidade de Filmes por Gênero (2000-2010)",
xaxis = list(title = "Ano de Lançamento"),
yaxis = list(title = "Qtd. de Lançamentos"),
barmode = 'group'
)
fig