Lista12

Questões

Questão 1

# Dados
MRT_1F <- c(517.1468515630205, 85.13094142168089, 30.333207896694553, 12.694776264558937, 3.3041601673945418, 1.1823111717498882, 1.1892293502386786)
MRT_3F <- c(156.68929936163462, 11.540837783562276, 0.4512835621696538, 0.4509797929766453, 0.4502068233039181, 0.4496185276300172, 0.4543157082191288)
MRT_5F <- c(83.90319666471157, 0.3068151086494968, 0.30522314133037304, 0.3072588968084928, 0.30655265997285697, 0.3055812715727718, 0.3053297166713006)
MRT_10F <- c(29.55430642951759, 0.19832832665772515, 0.1971923924717474, 0.19796648905716516, 0.19615594370806338, 0.2034569237883263, 0.19617420889447737)
MRT_15F <- c(11.317736530583566, 0.167364215666193, 0.16172168266811013, 0.16701085329580515, 0.1598052657153692, 0.1645934043532696, 0.16216563797118075)
MRT_sem_F <- c(11.93430909937736, 0.6095414637034009, 0.6060645101029295, 0.612167181646899, 0.6146761002685637, 0.6096747087200697, 0.6125810476877268)
clock <- c(0.1, 0.5, 1, 1.5, 2, 2.5, 3)

# Layout 
par(mar=c(1, 3, 1, 1))
layout(matrix(c(1, 1), nrow=1, ncol=2))

# Gráfico de Linhas
plot(clock, MRT_1F, type="l", col="red", xlab="Clock (GHz)", ylab="MRT", ylim=c(0, max(MRT_1F, MRT_3F, MRT_5F, MRT_10F, MRT_15F, MRT_sem_F)))
lines(clock, MRT_3F, type="l", col="blue")
lines(clock, MRT_5F, type="l", col="green")
lines(clock, MRT_10F, type="l", col="purple")
lines(clock, MRT_15F, type="l", col="orange")
lines(clock, MRT_sem_F, type="l", col="brown")

# Legenda
legend("topright", legend=c("MRT_1F", "MRT_3F", "MRT_5F", "MRT_10F", "MRT_15F", "MRT_sem_F"), 
       col=c("red", "blue", "green", "purple", "orange", "brown"), 
       lty=1, cex=0.8)

# Organiza os dados 

data_matrix <- rbind(MRT_1F, MRT_sem_F, MRT_3F, MRT_sem_F, MRT_5F, MRT_sem_F, MRT_10F, MRT_sem_F, MRT_15F, MRT_sem_F)

# Gráfico de Barras e Legendas
barplot(data_matrix[c(1,2),], beside=TRUE, col=c("#666666", "#E6E6E6"), log="y", names.arg=clock, xlab="Clock (GHz)", ylab="MRT_1F", main="MRT_1F")
legend("topright", legend=c("MRT_1F", "MRT_sem_F"), fill=c("#666666", "#E6E6E6"))

barplot(data_matrix[c(3,4),], beside=TRUE, col=c("#666666", "#E6E6E6"), log="y", names.arg=clock, xlab="Clock (GHz)", ylab="MRT_3F", main="MRT_3F")
legend("topright", legend=c("MRT_3F", "MRT_sem_F"), fill=c("#666666", "#E6E6E6"))

barplot(data_matrix[c(5,6),], beside=TRUE, col=c("#666666", "#E6E6E6"), log="y", names.arg=clock, xlab="Clock (GHz)", ylab="MRT_5F", main="MRT_5F")
legend("topright", legend=c("MRT_5F", "MRT_sem_F"), fill=c("#666666", "#E6E6E6"))

barplot(data_matrix[c(7,8),], beside=TRUE, col=c("#666666", "#E6E6E6"), log="y", names.arg=clock, xlab="Clock (GHz)", ylab="MRT_10F", main="MRT_10F")
legend("topright", legend=c("MRT_10F", "MRT_sem_F"), fill=c("#666666", "#E6E6E6"))

barplot(data_matrix[c(9,10),], beside=TRUE, col=c("#666666", "#E6E6E6"), log="y", names.arg=clock, xlab="Clock (GHz)", ylab="MRT_15F", main="MRT_15F")
legend("topright", legend=c("MRT_15F", "MRT_sem_F"), fill=c("#666666", "#E6E6E6"))

Questão 2

# Dados 
Quality_Rating <- c("Good", "Very Good", "Excellent")
Meal_Price <- matrix(c(53.8, 43.6, 2.6,
                       33.9, 54.2, 11.9,
                       2.6, 60.5, 36.8,
                       0.0, 21.4, 78.6),
                     nrow=3, ncol=4)

colnames(Meal_Price) <- c("$10-19", "$20-29", "$30-39", "$40-49")
rownames(Meal_Price) <- Quality_Rating

# Gráfico de Barras Empilhadas e legenda
barplot(Meal_Price, beside=FALSE, col=c("coral", "palegreen", "dodgerblue"),
        legend.text=c("Good", "Very Good", "Excellent"), args.legend=list(x="topright", bty="n", inset=c(-0.0, -0.3)),
        main="Meal Price by Quality Rating", xlab="Meal Price", ylab="Percentage")

Questão 3

# Carregar o dataset airquality
data("airquality")

# Filtrar apenas os dados de maio
may_data <- subset(airquality, Month == 5)

# Converter as temperaturas de Fahrenheit para Celsius
may_data$Temp_Celsius <- (may_data$Temp - 32) / 1.8

# Gerar o histograma das temperaturas em Celsius
hist(may_data$Temp_Celsius,
     main = "Histograma das Temperaturas em Maio (°C)",
     xlab = "Temperatura (°C)",
     ylab = "Frequência",
     col = "lightblue",
     border = "black",
     probability = TRUE)

# Adicionar a curva de densidade
lines(density(may_data$Temp_Celsius), col = "magenta", lwd = 2)

Questão 4

# Dataset
sales <- read.table("https://training-course-material.com/images/8/8f/Sales.txt", header = TRUE)

# Verificar as colunas 
str(sales)

## 'data.frame':    6 obs. of  2 variables:
##  $ COUNTRY: chr  "US" "UK" "France" "Poland" ...
##  $ SALES  : int  340 290 510 820 120 780

if ("SALES" %in% colnames(sales)) {
  sales_sum <- aggregate(sales$SALES, by = list(sales$COUNTRY), FUN = sum)
} else {
  stop("Nenhuma coluna encontrada.")
}

# Renomear as colunas
names(sales_sum) <- c("Country", "TotalSales")
sales_sum$Percentage <- round(100 * sales_sum$TotalSales / sum(sales_sum$TotalSales), 1)

# Gráfico de pizza
pie(sales_sum$TotalSales, 
    labels = paste(sales_sum$Country, sales_sum$Percentage, "%"), 
    col = rainbow(length(sales_sum$Country)), 
    main = "Porcentagem de Vendas por País")

# Legenda
legend("topright", 
       legend = sales_sum$Country, 
       fill = rainbow(length(sales_sum$Country)))

Questão 5

# Dataset
data("InsectSprays")

# Criar o boxplot amarelo
boxplot(count ~ spray, data = InsectSprays, 
        main = "Boxplot das Contagens de Insetos por Tipo de Inseticida", 
        xlab = "Tipo de Inseticida", 
        ylab = "Contagem de Insetos", 
        col = "yellow",
        outline = FALSE)

Questão 6

setwd('/Users/alison/Downloads/monitoringCloudData')

# Arquivos
x1<-read.csv('monitoringCloudData_0.1.csv')
x2<-read.csv('monitoringCloudData_0.5.csv')
x3<-read.csv('monitoringCloudData_1.csv')
x4<-read.csv('monitoringCloudData_NONE.csv')

# Função para tornar o tempo contínuo
Tempo<-function(Datas){
  
  TT<-length(Datas)
  v<-vector(length=TT)
  v[1]<-0
  for (i in 1:(TT-1)){
    v[i+1]<-difftime(Datas[i+1],Datas[i])+v[i]
  }
  return(v)
}

# Função de Tempo Contínuo
x1$Tempo<-Tempo(x1$currentTime)
x2$Tempo<-Tempo(x2$currentTime)
x3$Tempo<-Tempo(x3$currentTime)
x4$Tempo<-Tempo(x4$currentTime)


# Separar a unidade de medida dos dados 
Separar<-function(Datas){
  TT<-tidyr::separate(Datas,col="usedMemory",into=c("usedMemory_P1","usedMemory_P2"),sep=-2)
  TT$usedMemory_P1<-as.numeric(TT$usedMemory_P1)
  return(TT)
}

# Função que separa unidade de medida  
x1<-Separar(x1)  
x2<-Separar(x2)  
x3<-Separar(x3) 
x4<-Separar(x4) 

# Função atribuição do valor à ordem de grandeza da unidade de medida
Contabilizar<-function(Datas){
  
  TT<-length(Datas)
  v<-vector(length=TT)
  for (i in 1:TT){
    if (stringr::str_trim(Datas[i])=="MB"){
      v[i]=1}
    else if (stringr::str_trim(Datas[i])=="GB"){
      v[i]=1024}
    else if (stringr::str_trim(Datas[i])=="TB"){
      v[i]=1000000}
  }
  return(v)
}

# Função atribuição à ordem de grandeza
x1$Fator<-Contabilizar(x1$usedMemory_P2)
x2$Fator<-Contabilizar(x2$usedMemory_P2)
x3$Fator<-Contabilizar(x3$usedMemory_P2)
x4$Fator<-Contabilizar(x4$usedMemory_P2)


# Calcula a quantidade dos dados totais
x1$Fator<-x1$Fator*x1$usedMemory_P1
x2$Fator<-x2$Fator*x2$usedMemory_P1
x3$Fator<-x3$Fator*x3$usedMemory_P1
x4$Fator<-x4$Fator*x4$usedMemory_P1


# Gráficos
Graf<-function(Tempo,Fator,numero){
  if (numero==0){
    m<-"Memory Analisys (None Workload)"}
  else{
    m<-paste("Memory Analisys (Workload of",numero,")")}
  
  plot(Tempo,Fator,type="l",ylab="Used Memory (MB)", xlab="Time (hour)",main=m)
}

layout(matrix(c(1,1,2,2,3,3,4,4),nrow=2,byrow=T))
Graf(x4$Tempo,x4$Fator,0)
Graf(x1$Tempo,x1$Fator,0.1)
Graf(x2$Tempo,x2$Fator,0.5)
Graf(x3$Tempo,x3$Fator,1)

Questão 7

# Pacotes
install.packages("plotly", repos = "https://cran.rstudio.com/")

## also installing the dependency 'httr'

## 
## The downloaded binary packages are in
##  /var/folders/kx/b1hmncwj7813y3_sr85d623w0000gn/T//RtmpFvGm9b/downloaded_packages

install.packages("dplyr", repos = "https://cran.rstudio.com/")

## 
## The downloaded binary packages are in
##  /var/folders/kx/b1hmncwj7813y3_sr85d623w0000gn/T//RtmpFvGm9b/downloaded_packages

if (!require(plotly)) {
  install.packages("plotly")
  library(plotly)
} else {
  library(plotly)
}

## Loading required package: plotly

## Loading required package: ggplot2

## 
## Attaching package: 'plotly'

## The following object is masked from 'package:ggplot2':
## 
##     last_plot

## The following object is masked from 'package:stats':
## 
##     filter

## The following object is masked from 'package:graphics':
## 
##     layout

if (!require(dplyr)) {
  install.packages("dplyr")
  library(dplyr)
} else {
  library(dplyr)
}

## Loading required package: dplyr

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

# Dataset
netflix_data <- read.csv("/Users/alison/Downloads/netflix_titles.csv")

# Filtra os para apenas um país de origem e remover valores que não sejam alfabéticos
single_country_data <- netflix_data %>%
  filter(!is.na(country)) %>%
  filter(!grepl(",", country)) %>%
  filter(grepl("^[A-Za-z ]+$", country))  

# Conta os países com mais conteúdos
country_counts <- single_country_data %>%
  count(country, sort = TRUE)

# Seleciona os 10 países com mais conteúdo, incluindo o México em caso de empate
top_countries <- country_counts %>%
  arrange(desc(n)) %>%
  mutate(rank = row_number()) %>%
  filter(rank <= 10 | country == "Mexico") %>%
  arrange(desc(n))

if (nrow(top_countries) < 10) {
  top_countries <- bind_rows(top_countries, 
                             country_counts %>% filter(country == "Mexico"))
}

# Gráfico de pizza com a legenda
fig <- plot_ly(top_countries, labels = ~country, values = ~n, type = 'pie') %>%
  layout(title = 'Top 10 Países com mais Conteúdo na Netflix',
         legend = list(x = 1, y = 0.5))

fig

Questão 8

# Pacotes
if (!require(plotly)) {
  install.packages("plotly")
  library(plotly)
} else {
  library(plotly)
}

if (!require(dplyr)) {
  install.packages("dplyr")
  library(dplyr)
} else {
  library(dplyr)
}

# Dataset
netflix_data <- read.csv("/Users/alison/Downloads/netflix_titles.csv")

# Filtrar os conteúdos com apenas um país de origem
single_country_data <- netflix_data %>%
  filter(!is.na(country)) %>%
  filter(!grepl(",", country)) %>%
  filter(grepl("^[A-Za-z ]+$", country))  

# Contar os 10 países com mais conteúdos na plataforma
top_countries <- single_country_data %>%
  count(country, sort=TRUE) %>%
  top_n(10)

## Selecting by n

# Por causa do empate, removi a Turquia
if (nrow(top_countries) > 10) {
  top_countries <- top_countries %>%
    filter(country != "Turkey")
}

# Cria a tabela 
fig_table <- plot_ly(
  type = 'table',
  header = list(
    values = c('País', 'Total de conteúdos'),
    align = c('center', 'center'),
    font = list(size = 12, color = 'white'),
    fill = list(color = 'gray')
  ),
  cells = list(
    values = list(top_countries$country, top_countries$n),
    align = c('center', 'center'),
    font = list(size = 11),
    fill = list(color = c('white', 'lightgray'))
  )
)

fig_table

Questão 9

# Pacotes
if (!require(plotly)) {
  install.packages("plotly")
  library(plotly)
} else {
  library(plotly)
}

if (!require(dplyr)) {
  install.packages("dplyr")
  library(dplyr)
} else {
  library(dplyr)
}

# Dataset
netflix_data <- read.csv("/Users/alison/Downloads/netflix_titles.csv")

# Nova coluna para a década
netflix_data <- netflix_data %>%
  mutate(decade = floor(release_year / 10) * 10)

# Filtrar por filmes e séries
series_data <- netflix_data %>%
  filter(type == "TV Show") %>%
  count(decade)

movies_data <- netflix_data %>%
  filter(type == "Movie") %>%
  count(decade)

# Gráfico de linha
fig <- plot_ly() %>%
  add_lines(x = ~series_data$decade, y = ~series_data$n, 
            name = 'TV Shows', line = list(color = 'blue')) %>%
  add_markers(x = ~series_data$decade, y = ~series_data$n, 
              name = 'TV Shows', marker = list(color = 'blue')) %>%
  add_lines(x = ~movies_data$decade, y = ~movies_data$n, 
            name = 'Movies', line = list(color = 'yellow')) %>%
  add_markers(x = ~movies_data$decade, y = ~movies_data$n, 
              name = 'Movies', marker = list(color = 'yellow')) %>%
  layout(title = "Quantidade de Conteúdo por Década no Netflix",
         xaxis = list(title = "Década"),
         yaxis = list(title = "Quantidade de Conteúdo"))

fig

Questão 10

# Pacotes
if (!require(plotly)) {
  install.packages("plotly")
  library(plotly)
} else {
  library(plotly)
}

if (!require(dplyr)) {
  install.packages("dplyr")
  library(dplyr)
} else {
  library(dplyr)
}

# Dataset
netflix_data <- read.csv("/Users/alison/Downloads/netflix_titles.csv")

# Filtro filmes lançados entre 2000 e 2010
filtered_data <- netflix_data %>%
  filter(type == "Movie", release_year >= 2000, release_year <= 2010)

# Primeiro gênero listado na coluna 'listed_in'
filtered_data <- filtered_data %>%
  mutate(genre = sapply(strsplit(as.character(listed_in), ", "), function(x) x[1]))

# Filtrar pelos gêneros "Dramas", "Action & Adventure" e "Comedies"
filtered_data <- filtered_data %>%
  filter(genre %in% c("Dramas", "Action & Adventure", "Comedies"))

# Contar o número de filmes por ano para cada gênero
genre_counts <- filtered_data %>%
  group_by(release_year, genre) %>%
  summarize(count = n()) %>%
  ungroup()

## `summarise()` has grouped output by 'release_year'. You can override using the
## `.groups` argument.

# Gráfico de barras lado-a-lado
fig <- plot_ly(genre_counts, x = ~release_year, y = ~count, color = ~genre, type = 'bar') %>%
  layout(title = "Quantidade de Filmes por Gênero (2000-2010)",
         xaxis = list(title = "Ano"),
         yaxis = list(title = "Quantidade de Filmes"),
         barmode = 'group')
fig

Lista12

Alison Ferreira

2025-02-10

Questões

Questão 1

Questão 2

Questão 3

Questão 4

Questão 5

Questão 6

Questão 7

Questão 8

Questão 9

Questão 10