analise.knit

Questão 1

clock <- c(0.1, 0.5, 1, 1.5, 2, 2.5, 3)

MRT_1F <- c(517.1468515630205, 85.13094142168089, 30.333207896694553,
             12.694776264558937, 3.3041601673945418, 1.1823111717498882,
             1.1892293502386786)

MRT_3F <- c(156.68929936163462, 11.540837783562276, 0.4512835621696538,
             0.4509797929766453, 0.4502068233039181, 0.4496185276300172,
             0.4543157082191288)

MRT_5F <- c(83.90319666471157, 0.3068151086494968, 0.30522314133037304,
             0.3072588968084928, 0.30655265997285697, 0.3055812715727718,
             0.3053297166713006)

MRT_10F <- c(29.55430642951759, 0.19832832665772515, 0.1971923924717474,
              0.19796648905716516, 0.19615594370806338, 0.2034569237883263,
              0.19617420889447737)

MRT_15F <- c(11.317736530583566, 0.167364215666193, 0.16172168266811013,
              0.16701085329580515, 0.1598052657153692, 0.1645934043532696,
              0.16216563797118075)

MRT_sem_F <- c(11.93430909937736, 0.6095414637034009, 0.6060645101029295,
                0.612167181646899, 0.6146761002685637, 0.6096747087200697,
                0.6125810476877268)

layout(matrix(1:2, nrow=1))

plot(clock, MRT_1F, type="l", col="blue", lwd=2,
     xlab="Clock", ylab="Valores MRT",
     main="Gráfico de Linhas MRT")
lines(clock, MRT_3F, col="red", lwd=2)
lines(clock, MRT_5F, col="green", lwd=2)
legend("topright",
       legend=c("MRT_1F", "MRT_3F", "MRT_5F"),
       col=c("blue","red","green"), lwd=2)

barplot(MRT_1F, log="y", col=c("#E6E6E6","#666666"),
        main="Gráfico de Barras (Escala Log)",
        xlab="Índice", ylab="MRT_1F (log)")

Questão 2

dados <- matrix(c(53.8, 33.9, 2.6, 0.0,
                  43.6, 54.2, 60.5, 21.4,
                  2.6, 11.9, 36.8, 78.6),
                nrow = 3, byrow = TRUE)
rownames(dados) <- c("Good", "Very Good", "Excellent")
colnames(dados) <- c("$10–19", "$20–29", "$30–39", "$40–49")

cores <- c("lightblue", "orange", "seagreen")

barplot(dados,
        beside = FALSE,
        col = cores,
        main = "Qualidade da Refeição por Faixa de Preço",
        xlab = "Faixa de Preço da Refeição",
        ylab = "Percentual (%)",
        legend.text = rownames(dados),
        args.legend = list(x = "topright", bty = "n"))

Questão 3

data("airquality")

maio <- subset(airquality, Month == 5)

tempC <- (maio$Temp - 32) / 1.8

hist(tempC,
     col = "lightblue",
     main = "Temperaturas de Maio (°C)",
     xlab = "Temperatura (°C)",
     ylab = "Frequência",
     probability = TRUE)

lines(density(tempC, na.rm = TRUE),
      col = "red",
      lwd = 2)

Questão 4

sales <- data.frame(
  country = c("US", "UK", "France", "Poland", "Japan", "China"),
  total = c(340, 290, 510, 820, 120, 780)
)

porcentagem <- round(100 * sales$total / sum(sales$total), 1)

rotulos <- paste(sales$country, "-", porcentagem, "%")

pie(sales$total,
    labels = rotulos,
    col = rainbow(length(sales$total)),
    main = "Porcentagem de Vendas por País")

legend("topright",
       legend = sales$country,
       fill = rainbow(length(sales$total)),
       title = "Países")

Questão 5

data("InsectSprays")

boxplot(count ~ spray,
        data = InsectSprays,
        col = "yellow",
        outline = FALSE,
        main = "Contagem de Insetos por Tipo de Inseticida",
        xlab = "Tipo de Inseticida",
        ylab = "Numero de Insetos")

Questão 6

library(stringr)

processar_dados <- function(arquivo) {
  df <- read.csv(arquivo, stringsAsFactors = FALSE)
  df$currentTime <- as.POSIXct(df$currentTime)
  df$time_hour <- as.numeric(difftime(df$currentTime, df$currentTime[1], units = "hours"))
  df$usedMemory <- str_trim(df$usedMemory)
  valores <- as.numeric(str_extract(df$usedMemory, "[0-9.]+"))
  unidade <- str_extract(df$usedMemory, "[A-Za-z]+")
  df$usedMemoryMB <- ifelse(unidade == "TB", valores * 1000000,
                            ifelse(unidade == "GB", valores * 1024, valores))
  
  return(df)
}

none <- processar_dados("monitoringCloudData_NONE.csv")
w01  <- processar_dados("monitoringCloudData_0.1.csv")
w05  <- processar_dados("monitoringCloudData_0.5.csv")
w1   <- processar_dados("monitoringCloudData_1.csv")

layout(matrix(1:4, nrow = 2, byrow = TRUE))

plot(none$time_hour, none$usedMemoryMB, type="l", xlab="Time (hour)", ylab="Used Memory (MB)",
     main="Memory Analysis (None Workload)")

plot(w01$time_hour, w01$usedMemoryMB, type="l", xlab="Time (hour)", ylab="Used Memory (MB)",
     main="Memory Analysis (Workload of 0.1)")

plot(w05$time_hour, w05$usedMemoryMB, type="l", xlab="Time (hour)", ylab="Used Memory (MB)",
     main="Memory Analysis (Workload of 0.5)")

plot(w1$time_hour, w1$usedMemoryMB, type="l", xlab="Time (hour)", ylab="Used Memory (MB)",
     main="Memory Analysis (Workload of 1.0)")

Questão 7

library(dplyr)

## 
## Anexando pacote: 'dplyr'

## Os seguintes objetos são mascarados por 'package:stats':
## 
##     filter, lag

## Os seguintes objetos são mascarados por 'package:base':
## 
##     intersect, setdiff, setequal, union

library(plotly)

## Warning: pacote 'plotly' foi compilado no R versão 4.5.2

## Carregando pacotes exigidos: ggplot2

## 
## Anexando pacote: 'plotly'

## O seguinte objeto é mascarado por 'package:ggplot2':
## 
##     last_plot

## O seguinte objeto é mascarado por 'package:stats':
## 
##     filter

## O seguinte objeto é mascarado por 'package:graphics':
## 
##     layout

netflix <- read.csv(
  "netflix_titles.csv",
  stringsAsFactors = FALSE,
  na.strings = c("", "NA", "NULL"),
  fileEncoding = "UTF-8",
  strip.white = TRUE
)
dados_filtrados <- netflix %>%
  filter(!is.na(country) & !grepl(",", country))

top10 <- dados_filtrados %>%
  count(country, sort = TRUE) %>%
  top_n(10, n)

fig <- plot_ly(
  top10,
  labels = ~country,
  values = ~n,
  type = 'pie',
  textinfo = 'label+percent',
  insidetextorientation = 'radial'
) %>%
  layout(title = "Top 10 Países com Mais Conteúdos na Netflix")

fig

Questão 8

library(plotly)
library(dplyr)

netflix <- read.csv(
  "netflix_titles.csv",
  stringsAsFactors = FALSE,
  na.strings = c("", "NA", "NULL"),
  fileEncoding = "UTF-8",
  strip.white = TRUE
)

netflix[] <- lapply(netflix, function(x) {
  if (is.character(x)) {
    x <- iconv(x, from = "", to = "UTF-8", sub = "")
    gsub("\n", " ", x, fixed = TRUE)
  } else x
})

top10 <- netflix %>%
  filter(!is.na(country) & !grepl(",", country)) %>%
  count(country, sort = TRUE) %>%
  top_n(10, n)

fig <- plot_ly(
  type = 'table',
  header = list(
    values = c("País", "Total de Conteúdos"),
    fill = list(color = "#666666"),
    font = list(color = "white", size = 14),
    align = "center"
  ),
  cells = list(
    values = list(top10$country, top10$n),
    align = "center"                
  )
)

fig

Questão 9

library(dplyr)
library(plotly)

netflix <- read.csv(
  "netflix_titles.csv",
  stringsAsFactors = FALSE,
  na.strings = c("", "NA", "NULL"),
  fileEncoding = "UTF-8",
  strip.white = TRUE
)

netflix[] <- lapply(netflix, function(x) {
  if (is.character(x)) {
    x <- iconv(x, from = "", to = "UTF-8", sub = "")
    gsub("\n", " ", x, fixed = TRUE)
  } else x
})

netflix <- netflix %>%
  filter(!is.na(release_year)) %>%
  mutate(decada = floor(release_year / 10) * 10)

dados_decada <- netflix %>%
  group_by(decada, type) %>%
  summarise(total = n(), .groups = "drop") %>%
  arrange(decada)

dados_decada <- dados_decada %>%
  arrange(decada)

series <- subset(dados_decada, type == "TV Show")
filmes <- subset(dados_decada, type == "Movie")

fig <- plot_ly() %>%
  add_trace(x = series$decada, y = series$total, type = 'scatter', mode = 'lines+markers',
            name = "TV Series", line = list(color = "blue", width = 2)) %>%
  add_trace(x = filmes$decada, y = filmes$total, type = 'scatter', mode = 'lines+markers',
            name = "Movies", line = list(color = "yellow", width = 2)) %>%
  layout(title = "Quantidade de Conteúdos por Década na Netflix",
         xaxis = list(title = "Década"),
         yaxis = list(title = "Qtd. de Conteúdos"),
         legend = list(title = list(text = "Tipo de Conteúdo")))

fig

Questão 10

library(dplyr)
library(plotly)

netflix <- read.csv(
  "netflix_titles.csv",
  stringsAsFactors = FALSE,
  na.strings = c("", "NA", "NULL"),
  fileEncoding = "UTF-8",
  strip.white = TRUE
)

netflix[] <- lapply(netflix, function(x) {
  if (is.character(x)) {
    x <- iconv(x, from = "", to = "UTF-8", sub = "")
    gsub("\n", " ", x, fixed = TRUE)
  } else x
})

dados_filmes <- netflix %>%
  filter(type == "Movie",
         release_year >= 2000,
         release_year <= 2010,
         !is.na(listed_in))

dados_filmes$genero_principal <- sapply(strsplit(dados_filmes$listed_in, ","), `[`, 1)
dados_filmes$genero_principal <- trimws(dados_filmes$genero_principal)

dados_filmes <- dados_filmes %>%
  filter(genero_principal %in% c("Dramas", "Action & Adventure", "Comedies"))

dados_contagem <- dados_filmes %>%
  group_by(release_year, genero_principal) %>%
  summarise(qtd = n(), .groups = "drop") %>%
  arrange(release_year)

fig <- plot_ly(dados_contagem,
               x = ~release_year,
               y = ~qtd,
               color = ~genero_principal,
               type = "bar") %>%
  layout(title = "Filmes por Gênero (2000–2010)",
         xaxis = list(title = "Ano de Lançamento"),
         yaxis = list(title = "Quantidade de Filmes"),
         barmode = "group",
         legend = list(title = list(text = "Gênero")))

fig