Respostas

Questão 1

Carregando os dados que serão representados no gráfico:

MRT_1F <-c(517.1468515630205, 85.13094142168089, 30.333207896694553, 12.694776264558937, 3.3041601673945418, 1.1823111717498882, 1.1892293502386786)

MRT_3F <-c(156.68929936163462, 11.540837783562276, 0.4512835621696538, 0.4509797929766453, 0.4502068233039181, 0.4496185276300172, 0.4543157082191288)

MRT_5F <-c(83.90319666471157, 0.3068151086494968, 0.30522314133037304, 0.3072588968084928, 0.30655265997285697, 0.3055812715727718, 0.3053297166713006)

MRT_10F <-c(29.55430642951759, 0.19832832665772515, 0.1971923924717474, 0.19796648905716516, 0.19615594370806338, 0.2034569237883263, 0.19617420889447737)

MRT_15F <-c(11.317736530583566, 0.167364215666193, 0.16172168266811013, 0.16701085329580515, 0.1598052657153692, 0.1645934043532696, 0.16216563797118075)

MRT_sem_F <-c(11.93430909937736, 0.6095414637034009, 0.6060645101029295, 0.612167181646899, 0.6146761002685637, 0.6096747087200697, 0.6125810476877268)

clock <- c(0.1, 0.5, 1, 1.5, 2, 2.5, 3)

Criando uma matriz para organizar os gráficos de linha e de barras.

 layout.matrix <- matrix(c(1, 2, 4, 6, 1, 3, 5, 0), nrow = 4, ncol = 2)
 layout(mat = layout.matrix,
        heights = c(30, 30, 30, 30), # Heights of the four rows
        widths = c(5, 5)) # Widths of the two columns

# Lineplot
par(mar = c(0, 14, 0, 14)) # Margens (bottom, left, top, right)
plot(clock, MRT_1F, type="o", pch=4, col="black", 
      xlab="Time between Things requests (seconds)",
      ylab="Response Time (sec.)")
 lines(clock,MRT_3F, type="o", pch=11, col="yellow")
 lines(clock,MRT_5F, type="o", pch=1, col="red")
 lines(clock,MRT_10F, type="o", pch=2, col="blue")
 lines(clock,MRT_15F, type="o", pch=5, col="purple")
 lines(clock,MRT_sem_F, type="o", pch=4, col="green")

legend("topright", pch=c(4,11,1,2,5,4),
        col=c("black","yellow","red","blue","purple","green"),
        legend=c("1 Frog","3 Frogs","5 Frogs","10 Frogs","15 Frogs","w/o Frogs"))

# Barplot 1
par(mar = c(0, 5, 2, 1))
colors = c("#E6E6E6","#666666")
values <- matrix(c(MRT_sem_F,MRT_1F), nrow=2, ncol=7, byrow = TRUE)
barplot(values, log="y", names.arg=clock, xlab="Time between Things requests", ylab="Response time (s)", col=colors, beside=T)
legend("topright", pch=c(15,15), col=colors, legend=c("w/o Frogs","1 Frog"))

# Barplot 2
par(mar = c(0, 5, 2, 1))
values <- matrix(c(MRT_sem_F,MRT_3F), nrow=2, ncol=7, byrow = TRUE)
barplot(values, log="y", names.arg=clock, xlab="Time between Things requests", ylab="Response time (s)", col=colors, beside=T)
legend("topright", pch=c(15,15), col=colors, legend=c("w/o Frogs","3 Frogs"))

# Barplot 3
par(mar = c(0, 5, 2, 1))
values <- matrix(c(MRT_sem_F,MRT_5F), nrow=2, ncol=7, byrow = TRUE)
barplot(values, log="y", names.arg=clock, xlab="Time between Things requests", ylab="Response time (s)", col=colors, beside=T)
legend("topright", pch=c(15,15), col=colors, legend=c("w/o Frogs","5 Frogs"))

# Barplot 4
par(mar = c(0, 5, 2, 1))
values <- matrix(c(MRT_sem_F,MRT_10F), nrow=2, ncol=7, byrow = TRUE)
barplot(values, log="y", names.arg=clock, xlab="Time between Things requests", ylab="Response time (s)", col=colors, beside=T)
legend("topright", pch=c(15,15), col=colors, legend=c("w/o Frogs","10 Frogs"))
 
# Barplot 5
par(mar = c(0, 5, 2, 1))
values <- matrix(c(MRT_sem_F,MRT_15F), nrow=2, ncol=7, byrow = TRUE)
barplot(values, log="y", names.arg=clock, xlab="Time between Things requests", ylab="Response time (s)", col=colors, beside=T)
legend("topright", pch=c(15,15), col=colors, legend=c("w/o Frogs","15 Frogs"))

Questão 2

Construindo o gráfico de barras empilhadas:

colors=c("blue","pink","yellow")
price_range <- c("$10-19","$20-29","$30-39","$40-49")
quality <- c("Good","Very Good","Excellent")

prices <- matrix(c("53.8","33.9","2.6","0.0","43.6","54.2","60.5","21.4","2.6","11.9","36.8","78.6"), nrow=3, ncol=4, byrow = TRUE)

barplot(prices, main="Meal Quality by Pricing", names.arg=price_range, xlab="Price Range", ylab="Price", col=colors)
legend("topright",pch=c(15,15,15),col=colors,cex = 0.8,legend=quality)

Questão 3

Tratamento inicial: filtragem dos meses para obter apenas o mês de Maio (5) e, então, realizar a conversão das temperaturas.

library("dplyr")

airquality2 <- airquality %>% filter(Month==5)

CelsiusTemp <- (airquality2$Temp - 32) / 1.8

Construindo o gráfico de histograma:

hist(CelsiusTemp, col="grey", main = "Histograma das Temperaturas em Celsius", 
     xlab="Temperaturas em Celsius", ylab="Densidade", freq=F, probability = T)

# Curva de densidade
densityTemp <- density(CelsiusTemp)
lines(densityTemp)

Questão 4

Tratamento inicial: determinar as porcentagens das fatias

# Dataset
sales <- read.table("https://training-course-material.com/images/8/8f/Sales.txt",header=TRUE)
# Porcentagem
pct <- round(sales$SALES / sum(sales$SALES) * 100)
# Labels
lbls <- paste(pct, "%", sep = "")

Construindo o gráfico de pizza:

pie(sales$SALES, labels = lbls, main = "Grafico de vendas por paises", col= rainbow(6))

# Legenda
legend("topright", legend = sales$COUNTRY, fill=rainbow(length(sales$COUNTRY)))

Questão 5

Construindo o diagrama de caixa:

boxplot(count ~ spray, data=InsectSprays, outline = FALSE, main = "Dados sobre os Sprays",
        ylab = "Contagem", xlab = "Tipo de spray", col = "yellow")

Questão 6

Tratamento inicial: ajuste nas colunas currentTime e usedMemory. É necessário deixar o tempo contínuo e a memória em MB.

cloud_0 <- read.csv(file = "monitoringCloudData_NONE.csv", header = T, strip.white = T, na.strings = "")

# Transformar a coluna 'currentTime' no tipo POSIXct
cloud_0$currentTime <- as.POSIXct(cloud_0$currentTime, format = "%Y-%m-%d %H:%M:%S")

# Calcular a diferença e armazenar em uma nova coluna 'hours'
cloud_0$hours <- as.numeric(difftime(cloud_0$currentTime, cloud_0$currentTime[1], units = "hours"))

# Remover a unidade 'MB' dos valores da coluna 'usedMemory'
cloud_0$usedMemory <- gsub("MB", "", cloud_0$usedMemory)

# Convertendo para tipo numérico
cloud_0$usedMemory <- as.numeric(cloud_0$usedMemory)

cloud_01 <- read.csv(file = "monitoringCloudData_0.1.csv", header = T, strip.white = T, na.strings = "")
# Transformar a coluna 'currentTime' no tipo POSIXct
cloud_01$currentTime <- as.POSIXct(cloud_01$currentTime, format = "%Y-%m-%d %H:%M:%S")

# Calcular a diferença e armazenar em uma nova coluna 'hours'
cloud_01$hours <- as.numeric(difftime(cloud_01$currentTime, cloud_01$currentTime[1], units = "hours"))

# Identificar, converter os valores e remover a unidade 'MB' e 'GB' dos valores da coluna 'usedMemory'
cloud_01$usedMemory <- ifelse(grepl("GB", cloud_01$usedMemory),
                              as.numeric(gsub("GB", "", cloud_01$usedMemory)) * 1024,
                              as.numeric(gsub("MB", "", cloud_01$usedMemory)))

## Warning in ifelse(grepl("GB", cloud_01$usedMemory), as.numeric(gsub("GB", : NAs
## introduzidos por coerção
## Warning in ifelse(grepl("GB", cloud_01$usedMemory), as.numeric(gsub("GB", : NAs
## introduzidos por coerção

cloud_05 <- read.csv(file = "monitoringCloudData_0.5.csv", header = T, strip.white = T, na.strings = "")
# Transformar a coluna 'currentTime' no tipo POSIXct
cloud_05$currentTime <- as.POSIXct(cloud_05$currentTime, format = "%Y-%m-%d %H:%M:%S")

# Removendo NAs
cloud_05 <- cloud_05[complete.cases(cloud_05),]

# Calcular a diferença e armazenar em uma nova coluna 'hours'
cloud_05$hours <- as.numeric(difftime(cloud_05$currentTime, cloud_05$currentTime[1], units = "hours"))

# Identificar, converter os valores e remover a unidade 'MB' e 'GB' dos valores da coluna 'usedMemory'
cloud_05$usedMemory <- ifelse(grepl("GB", cloud_05$usedMemory),
                              as.numeric(gsub("GB", "", cloud_05$usedMemory)) * 1024,
                              as.numeric(gsub("MB", "", cloud_05$usedMemory)))

## Warning in ifelse(grepl("GB", cloud_05$usedMemory), as.numeric(gsub("GB", : NAs
## introduzidos por coerção
## Warning in ifelse(grepl("GB", cloud_05$usedMemory), as.numeric(gsub("GB", : NAs
## introduzidos por coerção

cloud_1 <- read.csv(file = "monitoringCloudData_1.csv", header = T, strip.white = T, na.strings = "")
# Transformar a coluna 'currentTime' no tipo POSIXct
cloud_1$currentTime <- as.POSIXct(cloud_1$currentTime, format = "%Y-%m-%d %H:%M:%S")

# Calcular a diferença e armazenar em uma nova coluna 'hours'
cloud_1$hours <- as.numeric(difftime(cloud_1$currentTime, cloud_1$currentTime[1], units = "hours"))

# Identificar, converter os valores e remover a unidade 'MB' e 'GB' dos valores da coluna 'usedMemory'
cloud_1$usedMemory <- ifelse(grepl("GB", cloud_1$usedMemory),
                             as.numeric(gsub("GB", "", cloud_1$usedMemory)) * 1024,
                             as.numeric(gsub("MB", "", cloud_1$usedMemory)))

Construindo os gráficos de linha:

# Layout dos gráficos
layout(matrix(c(1,2,
                3,4), nrow=2, byrow = TRUE),
       heights = c(2,2), widths = c(2,2))

# Gráfico do cloud_0
plot(cloud_0$hours, cloud_0$usedMemory, type="l", col="black", ylim = c(96,106), xlim = c(0,72),
     main = "Memory Analysis (None Workload)", xlab = "Time (hour)", ylab = "Used Memory (MB)")

# Gráfico do cloud_01
plot(cloud_01$hours, cloud_01$usedMemory, type="l", col="black", ylim = c(243,3500), xlim = c(0,72),
     main = "Memory Analysis (Workload of 0.1)", xlab = "Time (hour)", ylab = "Used Memory (MB)")

# Gráfico do cloud_05
plot(cloud_05$hours, cloud_05$usedMemory, type="l", col="black", ylim = c(256,1200), xlim = c(0,72),
     main = "Memory Analysis (Workload of 0.5)", xlab = "Time (hour)", ylab = "Used Memory (MB)")

# Gráfico do cloud_1
plot(cloud_1$hours, cloud_1$usedMemory, type="l", col="black", ylim = c(241,254), xlim = c(0,72),
     main = "Memory Analysis (Workload of 1)", xlab = "Time (hour)", ylab = "Used Memory (MB)")

Questão 7

A partir desde exercício utilizaremos o pacote plotly para as resoluções. Começamos carregando a bilioteca.

#install.packages("plotly")
library("plotly")

Com a biblioteca já pronta para uso, carregamos o dataset da Netflix.

netflix <- read.csv(file = "netflix_titles.csv", strip.white = T, header = T, na.strings = "")

Agora faremos a filtragem dos conteúdos com apenas UM país de origem e em seguida iremos verificar os 10 países que possuem mais conteúdos.

# Aplicando regex para selecionar apenas os países únicos nas linhas
apenas_um_pais <- sub("^([^,]*).*", "\\1", netflix$country)

# Agregando a quantidade que os países se repetem do resultado filtrado
frequencia_nomes <- aggregate(x = list(freq=apenas_um_pais), 
                              by = list(nome=apenas_um_pais), 
                              FUN = length)

# Ordenando o resultado da agregação em ordem decrescende de tamanho
frequencia_nomes <- frequencia_nomes[order(-frequencia_nomes$freq), ]

Criação do gráfico de pizza usando plotly:

plot_ly(type='pie', frequencia_nomes, labels=~nome[1:10], values=~freq[1:10])

Questão 8

Criando uma tabela com a resposta da questão anterior: os 10 países que possuem mais conteúdos e seus valores.

plot_ly(type='table', columnwidth=c(50,50), columnorder=c(0,1),
        header = list(values=c("País","Frequência"),
                      align=c("center","center"),
                      line = list(width=1.5, color="black"),
                      fill = list(color = c("black")),
                      font= list(family="Arial",size=12,color=c("white"))
        ),
        cells = list(values=rbind(frequencia_nomes$nome[1:10],frequencia_nomes$freq[1:10]),
                     align=c("center","center"),
                     line = list(width=1.5, color="black"),
                     font= list(family="Arial",size=12,color='black')
        )
)

Questão 9

Primeiramente separaremos o dataset para obtermos apenas as colunas release_yeare type. Em seguida,

new_netflix <- netflix %>% select(type, release_year)

new_netflix <- new_netflix %>% mutate(decade = floor(release_year/10)*10)

content_by_dec_type <- new_netflix %>% group_by(type,decade) %>% summarise(qtd=n()) %>% ungroup()

Construindo o gráfico de linhas:

fig <- plot_ly(content_by_dec_type %>% filter(type == "TV Show"), x=~decade, 
               y=~qtd, type = 'scatter', mode="lines+markers", 
               name = 'TV Series') %>% 
  add_trace(data=content_by_dec_type %>% filter(type == "Movie"), x=~decade, y=~qtd, 
            mode="lines+markers", name = 'Movies') %>%
  layout(xaxis = list(title="Decada"),
         yaxis = list(title="Qnd. Conteudo"),
         legend = list(title = list(text = 'Tipo de Conteúdo')))

fig

Questão 10

# Filtrar os dados para os anos entre 2000 e 2010
filtered_data <- netflix %>%
  filter(release_year >= 2000 & release_year <= 2010) %>%
  
  # Extrair apenas o primeiro gênero antes da primeira vírgula
  mutate(main_gender = sub(",.*", "", listed_in)) %>%
  
  # Filtrar para os gêneros especificados
  filter(main_gender %in% c("Dramas", "Action & Adventure", "Comedies"))

# Contar a quantidade de filmes por ano e gênero
content_by_year_gender <- filtered_data %>%
  group_by(release_year, main_gender) %>%
  summarise(quantidade = n()) %>%
  ungroup()

# Visualizar o resultado
print(content_by_year_gender)

## # A tibble: 33 × 3
##    release_year main_gender        quantidade
##           <int> <chr>                   <int>
##  1         2000 Action & Adventure          7
##  2         2000 Comedies                    8
##  3         2000 Dramas                      5
##  4         2001 Action & Adventure          8
##  5         2001 Comedies                   11
##  6         2001 Dramas                      7
##  7         2002 Action & Adventure         12
##  8         2002 Comedies                    3
##  9         2002 Dramas                     14
## 10         2003 Action & Adventure         12
## # ℹ 23 more rows

Criando o gráfico de barras lado a lado:

fig <- plot_ly(content_by_year_gender, 
               x = ~release_year, 
               y = ~quantidade, 
               color = ~main_gender, 
               type = 'bar', 
               barmode = 'group') %>%
  layout(xaxis = list(title = "Ano de Lançamento"),
         yaxis = list(title = "Qnt. de Lançamentos"),
         barmode = 'group') # Barras lado a lado

fig

## Warning: 'bar' objects don't have these attributes: 'barmode'
## Valid attributes include:
## '_deprecated', 'alignmentgroup', 'base', 'basesrc', 'cliponaxis', 'constraintext', 'customdata', 'customdatasrc', 'dx', 'dy', 'error_x', 'error_y', 'hoverinfo', 'hoverinfosrc', 'hoverlabel', 'hovertemplate', 'hovertemplatesrc', 'hovertext', 'hovertextsrc', 'ids', 'idssrc', 'insidetextanchor', 'insidetextfont', 'legendgroup', 'legendgrouptitle', 'legendrank', 'marker', 'meta', 'metasrc', 'name', 'offset', 'offsetgroup', 'offsetsrc', 'opacity', 'orientation', 'outsidetextfont', 'selected', 'selectedpoints', 'showlegend', 'stream', 'text', 'textangle', 'textfont', 'textposition', 'textpositionsrc', 'textsrc', 'texttemplate', 'texttemplatesrc', 'transforms', 'type', 'uid', 'uirevision', 'unselected', 'visible', 'width', 'widthsrc', 'x', 'x0', 'xaxis', 'xcalendar', 'xhoverformat', 'xperiod', 'xperiod0', 'xperiodalignment', 'xsrc', 'y', 'y0', 'yaxis', 'ycalendar', 'yhoverformat', 'yperiod', 'yperiod0', 'yperiodalignment', 'ysrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
## Warning: 'bar' objects don't have these attributes: 'barmode'
## Valid attributes include:
## '_deprecated', 'alignmentgroup', 'base', 'basesrc', 'cliponaxis', 'constraintext', 'customdata', 'customdatasrc', 'dx', 'dy', 'error_x', 'error_y', 'hoverinfo', 'hoverinfosrc', 'hoverlabel', 'hovertemplate', 'hovertemplatesrc', 'hovertext', 'hovertextsrc', 'ids', 'idssrc', 'insidetextanchor', 'insidetextfont', 'legendgroup', 'legendgrouptitle', 'legendrank', 'marker', 'meta', 'metasrc', 'name', 'offset', 'offsetgroup', 'offsetsrc', 'opacity', 'orientation', 'outsidetextfont', 'selected', 'selectedpoints', 'showlegend', 'stream', 'text', 'textangle', 'textfont', 'textposition', 'textpositionsrc', 'textsrc', 'texttemplate', 'texttemplatesrc', 'transforms', 'type', 'uid', 'uirevision', 'unselected', 'visible', 'width', 'widthsrc', 'x', 'x0', 'xaxis', 'xcalendar', 'xhoverformat', 'xperiod', 'xperiod0', 'xperiodalignment', 'xsrc', 'y', 'y0', 'yaxis', 'ycalendar', 'yhoverformat', 'yperiod', 'yperiod0', 'yperiodalignment', 'ysrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
## Warning: 'bar' objects don't have these attributes: 'barmode'
## Valid attributes include:
## '_deprecated', 'alignmentgroup', 'base', 'basesrc', 'cliponaxis', 'constraintext', 'customdata', 'customdatasrc', 'dx', 'dy', 'error_x', 'error_y', 'hoverinfo', 'hoverinfosrc', 'hoverlabel', 'hovertemplate', 'hovertemplatesrc', 'hovertext', 'hovertextsrc', 'ids', 'idssrc', 'insidetextanchor', 'insidetextfont', 'legendgroup', 'legendgrouptitle', 'legendrank', 'marker', 'meta', 'metasrc', 'name', 'offset', 'offsetgroup', 'offsetsrc', 'opacity', 'orientation', 'outsidetextfont', 'selected', 'selectedpoints', 'showlegend', 'stream', 'text', 'textangle', 'textfont', 'textposition', 'textpositionsrc', 'textsrc', 'texttemplate', 'texttemplatesrc', 'transforms', 'type', 'uid', 'uirevision', 'unselected', 'visible', 'width', 'widthsrc', 'x', 'x0', 'xaxis', 'xcalendar', 'xhoverformat', 'xperiod', 'xperiod0', 'xperiodalignment', 'xsrc', 'y', 'y0', 'yaxis', 'ycalendar', 'yhoverformat', 'yperiod', 'yperiod0', 'yperiodalignment', 'ysrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'

CPAD | Exercício 12

Julyanne Maria dos Santos Correia

17/08/2024