Questões

Questão 1

MRT_1F <- c(517.1468515630205, 85.13094142168089, 30.333207896694553, 12.694776264558937, 3.3041601673945418, 1.1823111717498882, 1.1892293502386786)
MRT_3F <- c(156.68929936163462, 11.540837783562276, 0.4512835621696538, 0.4509797929766453, 0.4502068233039181, 0.4496185276300172, 0.4543157082191288)
MRT_5F <- c(83.90319666471157, 0.3068151086494968, 0.30522314133037304, 0.3072588968084928, 0.30655265997285697, 0.3055812715727718, 0.3053297166713006)
MRT_10F <- c(29.55430642951759, 0.19832832665772515, 0.1971923924717474, 0.19796648905716516, 0.19615594370806338, 0.2034569237883263, 0.19617420889447737)
MRT_15F <- c(11.317736530583566, 0.167364215666193, 0.16172168266811013, 0.16701085329580515, 0.1598052657153692, 0.1645934043532696, 0.16216563797118075)
MRT_sem_F <- c(11.93430909937736, 0.6095414637034009, 0.6060645101029295, 0.612167181646899, 0.6146761002685637, 0.6096747087200697, 0.6125810476877268)

clock <- c(0.1, 0.5, 1, 1.5, 2, 2.5, 3)

layout(matrix(c(1,2), 1, 2, byrow = TRUE))

# cada MRT em relação ao clock
plot(clock, MRT_1F, type = "o", col = "red", xlab = "Clock (s)", ylab = "MRT", main = "MRT vs Clock")
lines(clock, MRT_3F, type = "o", col = "blue")
lines(clock, MRT_5F, type = "o", col = "green")
lines(clock, MRT_10F, type = "o", col = "purple")
lines(clock, MRT_15F, type = "o", col = "orange")
lines(clock, MRT_sem_F, type = "o", col = "black")
legend("topright", legend = c("MRT_1F", "MRT_3F", "MRT_5F", "MRT_10F", "MRT_15F", "MRT_sem_F"), 
       col = c("red", "blue", "green", "purple", "orange", "black"), lty = 1)

#  Barras
barplot(MRT_1F, names.arg = clock, log = "y", col = "#E6E6E6", border = "#666666",
        xlab = "Clock (s)", ylab = "MRT", main = "MRT_1F - Escala Logarítmica")

Questão 2

# Questão 2

meal_quality <- matrix(c(53.8, 43.6, 2.6, 33.9, 54.2, 11.9, 2.6, 60.5, 36.8, 0, 21.4, 78.6), 
                       nrow = 3, byrow = TRUE)
colnames(meal_quality) <- c("$10-19", "$20-29", "$30-39", "$40-49")
rownames(meal_quality) <- c("Good", "Very Good", "Excellent")

# Gráfico
barplot(meal_quality, beside = FALSE, col = c("lightblue", "lightgreen", "lightcoral"), 
        legend = rownames(meal_quality), xlab = "Meal Price", ylab = "PERCENTAGE (%)", 
        main = "Meal Quality Rating by Price")

Questão 3

airquality$Temp_C <- (airquality$Temp - 32) / 1.8

temp_may <- airquality$Temp_C[airquality$Month == 5]

# Histogramaa
hist(temp_may, breaks = 10, col = "lightblue", main = "Histogram of May Temperatures (Celsius)",
     xlab = "Temperature (°C)", ylab = "Frequency")

# curva de densidade
lines(density(temp_may), col = "red", lwd = 2)

Questão 4

library(readr)
sales <- read_table("https://training-course-material.com/images/8/8f/Sales.txt")

## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   COUNTRY = col_character(),
##   SALES = col_double()
## )

sales_summary <- aggregate(SALES ~ COUNTRY, data = sales, FUN = sum)

sales_summary$percentage <- round(100 * sales_summary$SALES / sum(sales_summary$SALES), 1)

# grafico
pie(sales_summary$percentage, labels = paste(sales_summary$COUNTRY, sales_summary$percentage, "%"), 
    col = rainbow(length(sales_summary$COUNTRY)), main = "Total Sales by Country")

legend("topright", legend = sales_summary$COUNTRY, fill = rainbow(length(sales_summary$COUNTRY)))

Questão 5

data(InsectSprays)

boxplot(count ~ spray, data = InsectSprays, outline = FALSE, col = "yellow", 
        main = "Insect Counts by Spray Type", xlab = "Spray Type", ylab = "Insect Count")

Questão 6

library(readr)
monitoringCloudData_1 <- read_csv("monitoringCloudData_1.csv")

## Rows: 4303 Columns: 16
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (11): totalMemory, availableMemory, usedMemory, totalSwap, freeSwap, us...
## dbl   (4): totalCpuUsage..., percentageMemory..., percentageSwap..., percent...
## dttm  (1): currentTime
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

monitoringCloudData_05 <- read_csv("monitoringCloudData_0.5.csv")

## Rows: 4303 Columns: 16
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (12): currentTime, totalMemory, availableMemory, usedMemory, totalSwap, ...
## dbl  (4): totalCpuUsage..., percentageMemory..., percentageSwap..., percenta...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

monitoringCloudData_01 <- read_csv("monitoringCloudData_0.1.csv")

## Rows: 4275 Columns: 16
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (11): totalMemory, availableMemory, usedMemory, totalSwap, freeSwap, us...
## dbl   (4): totalCpuUsage..., percentageMemory..., percentageSwap..., percent...
## dttm  (1): currentTime
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

monitoringCloudData_NONE <- read_csv("monitoringCloudData_NONE.csv")

## Rows: 4302 Columns: 16
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (11): totalMemory, availableMemory, usedMemory, totalSwap, freeSwap, us...
## dbl   (4): totalCpuUsage..., percentageMemory..., percentageSwap..., percent...
## dttm  (1): currentTime
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

# convertendo a coluna currentTime para o formato POSIXct
monitoringCloudData_1$currentTime <- as.POSIXct(monitoringCloudData_1$currentTime, format="%Y-%m-%d %H:%M:%S")
monitoringCloudData_05$currentTime <- as.POSIXct(monitoringCloudData_05$currentTime, format="%Y-%m-%d %H:%M:%S")
monitoringCloudData_01$currentTime <- as.POSIXct(monitoringCloudData_01$currentTime, format="%Y-%m-%d %H:%M:%S")
monitoringCloudData_NONE$currentTime <- as.POSIXct(monitoringCloudData_NONE$currentTime, format="%Y-%m-%d %H:%M:%S")

monitoringCloudData_1$time_in_hours <- as.numeric(difftime(monitoringCloudData_1$currentTime, min(monitoringCloudData_1$currentTime), units="hours"))

monitoringCloudData_05$time_in_hours <- as.numeric(difftime(monitoringCloudData_05$currentTime, min(monitoringCloudData_05$currentTime, na.rm = TRUE), units = "hours"))

monitoringCloudData_01$time_in_hours <- as.numeric(difftime(monitoringCloudData_01$currentTime, min(monitoringCloudData_01$currentTime), units="hours"))
monitoringCloudData_NONE$time_in_hours <- as.numeric(difftime(monitoringCloudData_NONE$currentTime, min(monitoringCloudData_NONE$currentTime), units="hours"))

# função para converter memoria para MB
convert_to_mb <- function(memory_str) {
  if (is.na(memory_str) || memory_str == "") {
    return(NA)
  }
  value <- as.numeric(gsub("[^0-9\\.]", "", memory_str))
  if (grepl("GB", memory_str)) {
    return(value * 1024)
  } else if (grepl("TB", memory_str)) {
    return(value * 1000000)
  } else if (grepl("MB", memory_str)) {
    return(value)
  } else {
    return(NA)
  }
}

 monitoringCloudData_1$usedMemory_MB <- sapply(monitoringCloudData_1$usedMemory, convert_to_mb)
monitoringCloudData_05$usedMemory_MB <- sapply(monitoringCloudData_05$usedMemory, convert_to_mb)
 monitoringCloudData_01$usedMemory_MB <- sapply(monitoringCloudData_01$usedMemory, convert_to_mb)
 monitoringCloudData_NONE$usedMemory_MB <- sapply(monitoringCloudData_NONE$usedMemory, convert_to_mb)

head(monitoringCloudData_05$time_in_hours)

## [1] 0.00000000 0.01694444 0.03361111 0.05027778 0.06694444 0.08388889

head(monitoringCloudData_1$time_in_hours)

## [1] 0.00000000 0.01673960 0.03346666 0.05020650 0.06694222 0.08368178

head(monitoringCloudData_01$time_in_hours)

## [1] 0.00000000 0.01674010 0.03346715 0.05020156 0.06694109 0.08368067

head(monitoringCloudData_NONE$time_in_hours)

## [1] 0.00000000 0.01674055 0.03348025 0.05022017 0.06696019 0.08370020

sum(is.na(monitoringCloudData_05$currentTime))

## [1] 194

sum(is.na(monitoringCloudData_1$currentTime))

## [1] 0

par(mfrow = c(2, 2))

plot(monitoringCloudData_NONE$time_in_hours, monitoringCloudData_NONE$usedMemory_MB, type="l", col="black", 
     xlab="Time (hour)", ylab="Used Memory (MB)", main="Memory Analysis (None Workload)")

plot(monitoringCloudData_01$time_in_hours, monitoringCloudData_01$usedMemory_MB, type="l", col="red", 
     xlab="Time (hour)", ylab="Used Memory (MB)", main="Memory Analysis (Workload of 0.1)")

plot(monitoringCloudData_05$time_in_hours, monitoringCloudData_05$usedMemory_MB, type="l", col="blue", 
     xlab="Time (hour)", ylab="Used Memory (MB)", main="Memory Analysis (Workload of 0.5)")

plot(monitoringCloudData_1$time_in_hours, monitoringCloudData_1$usedMemory_MB, type="l", col="green", 
      xlab="Time (hour)", ylab="Used Memory (MB)", main="Memory Analysis (Workload of 1.0)")

Questão 7

library(readr)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(plotly)

## Carregando pacotes exigidos: ggplot2

## 
## Attaching package: 'plotly'

## The following object is masked from 'package:ggplot2':
## 
##     last_plot

## The following object is masked from 'package:stats':
## 
##     filter

## The following object is masked from 'package:graphics':
## 
##     layout

netflix_titles <- read_csv("netflix_titles.csv")

## Rows: 7787 Columns: 12

## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (11): show_id, type, title, director, cast, country, date_added, rating,...
## dbl  (1): release_year
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

netflix_single_country <- netflix_titles %>%
  filter(!is.na(country) & !grepl(",", country)) %>%
  group_by(country) %>%
  summarize(count = n()) %>%
  arrange(desc(count)) %>%
  head(10)

plot_ly(netflix_single_country, labels = ~country, values = ~count, type = 'pie') %>%
  layout(title = 'Top 10 Países com Mais Conteúdos na Netflix (2019)')

Questão 8

library(dplyr)
library(plotly)
library(readr)
netflix_titles <- read_csv("netflix_titles.csv")

## Rows: 7787 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (11): show_id, type, title, director, cast, country, date_added, rating,...
## dbl  (1): release_year
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

filtered_data <- netflix_titles %>%
  filter(!is.na(country) & !grepl(",", country))

# Contando o número de conteúdos por país
content_by_country <- filtered_data %>%
  count(country, name = "Total de conteúdos") %>%
  arrange(desc(`Total de conteúdos`)) %>%
  slice(1:10)

# Criando a tabela com Plotly
fig <- plot_ly(
  type = 'table',
  header = list(
    values = c('País', 'Total de conteúdos'),
    align = c('center', 'center'),
    fill = list(color = 'gray'),
    font = list(color = 'white', size = 12)
  ),
  cells = list(
    values = t(content_by_country),
    align = c('center', 'center')
  )
)

fig

Questão 9

library(readr)
library(plotly)
library(dplyr)

netflix_titles <- read_csv("netflix_titles.csv")

## Rows: 7787 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (11): show_id, type, title, director, cast, country, date_added, rating,...
## dbl  (1): release_year
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

# Criando uma coluna de décadas
netflix_titles <- netflix_titles %>%
  mutate(decade = floor(release_year / 10) * 10)

# Contando o número de conteúdos por década e tipo
decade_data <- netflix_titles %>%
  filter(!is.na(type)) %>%
  group_by(decade, type) %>%
  summarise(count = n()) %>%
  ungroup()

## `summarise()` has grouped output by 'decade'. You can override using the
## `.groups` argument.

# Criando o gráfico de linha
fig <- plot_ly(decade_data, x = ~decade, y = ~count, color = ~type, colors = c("blue", "yellow"),
               type = 'scatter', mode = 'lines+markers') %>%
  layout(
    title = "Quantidade de Conteúdo por Década",
    xaxis = list(title = "Década"),
    yaxis = list(title = "Qtd. de Conteúdo"),
    legend = list(title = list(text = '<b>Tipo de Conteúdo</b>'))
  )

fig

Questão 10

library(readr)
library(plotly)
library(dplyr)

netflix_titles <- read_csv("netflix_titles.csv")

## Rows: 7787 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (11): show_id, type, title, director, cast, country, date_added, rating,...
## dbl  (1): release_year
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

# Filtrando os dados entre 2000 e 2010
filtered_data <- netflix_titles %>%
  filter(release_year >= 2000, release_year <= 2010) %>%
  mutate(listed_in = sapply(strsplit(as.character(listed_in), ","), `[`, 1))

# Filtrando apenas os gêneros desejados
filtered_data <- filtered_data %>%
  filter(listed_in %in% c("Dramas", "Action & Adventure", "Comedies"))

# Contando os filmes por ano e gênero
yearly_genre_count <- filtered_data %>%
  group_by(release_year, listed_in) %>%
  summarise(count = n()) %>%
  ungroup()

## `summarise()` has grouped output by 'release_year'. You can override using the
## `.groups` argument.

# Criando o gráfico de barras lado-a-lado
fig <- plot_ly(yearly_genre_count, x = ~release_year, y = ~count, color = ~listed_in, colors = c("blue", "orange", "green"),
               type = 'bar') %>%
  layout(
    title = "Quantidade de Filmes por Gênero (2000-2010)",
    xaxis = list(title = "Ano de Lançamento"),
    yaxis = list(title = "Qtd. de Lançamentos"),
    barmode = 'group'
  )

fig

Atividade 12

Christian Oliveira

2024-08-25

Questões

Questão 1

Questão 2

Questão 3

Questão 4

Questão 5

Questão 6

Questão 7

Questão 8

Questão 9

Questão 10