Questão 1

#knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE, fig.show = 'hold')
#library(dplyr)
#library(shiny)
#library(DT)
MRT_1F <-c(517.1468515630205, 85.13094142168089, 30.333207896694553, 12.694776264558937, 3.3041601673945418, 1.1823111717498882, 1.1892293502386786)

MRT_3F <-c(156.68929936163462, 11.540837783562276, 0.4512835621696538, 0.4509797929766453, 0.4502068233039181, 0.4496185276300172, 0.4543157082191288)

MRT_5F <-c(83.90319666471157, 0.3068151086494968, 0.30522314133037304, 0.3072588968084928, 0.30655265997285697, 0.3055812715727718, 0.3053297166713006)

MRT_10F <-c(29.55430642951759, 0.19832832665772515, 0.1971923924717474, 0.19796648905716516, 0.19615594370806338, 0.2034569237883263, 0.19617420889447737)

MRT_15F <-c(11.317736530583566, 0.167364215666193, 0.16172168266811013, 0.16701085329580515, 0.1598052657153692, 0.1645934043532696, 0.16216563797118075)

MRT_sem_F <-c(11.93430909937736, 0.6095414637034009, 0.6060645101029295, 0.612167181646899, 0.6146761002685637, 0.6096747087200697, 0.6125810476877268)



clock <- c(0.1, 0.5, 1, 1.5, 2, 2.5, 3)

plot(clock, MRT_1F, type="o", pch=4,
     xlab="EixoX", ylab="EixoY",
     main="Gráfico simples")
lines(clock, MRT_3F, type="o", col="yellow", pch=11)
lines(clock, MRT_5F, type="o", col="red")
lines(clock, MRT_10F, type="o", col="blue", pch=2)
lines(clock, MRT_15F, type="o", col="purple", pch=5)
lines(clock, MRT_sem_F, type="o", col="green", pch=4)
legend("topright", pch=c(4, 11, 1, 2, 5, 4),
       col=c("black", "yellow", "red", "blue", "purple", "green"),
       legend=c("1 Fog", "3 Fogs", "5 Fogs", "10 Fogs", "15 Fogs", "w/o Fogs"))

barplot(matrix(c(MRT_sem_F, MRT_1F), nrow=2, ncol=7, byrow=T), col=c("#E6E6E6", "#666666"),
        xlab="Time between Things requests", ylab = "Response time(s)" , beside=T)
legend("topright", col=c("#E6E6E6", "#666666"), pch=c(15,15), legend=c("w/o Fogs", "1 Fog"))

barplot(matrix(c(MRT_sem_F, MRT_3F), nrow=2, ncol=7, byrow=T), col=c("#E6E6E6", "#666666"),
        xlab="Time between Things requests", ylab = "Response time(s)" , beside=T)
legend("topright", col=c("#E6E6E6", "#666666"), pch=c(15,15), legend=c("w/o Fogs", "3 Fogs"))

barplot(matrix(c(MRT_sem_F, MRT_5F), nrow=2, ncol=7, byrow=T), col=c("#E6E6E6", "#666666"),
        xlab="Time between Things requests", ylab = "Response time(s)" , beside=T)
legend("topright", col=c("#E6E6E6", "#666666"), pch=c(15,15), legend=c("w/o Fogs", "5 Fogs"))

barplot(matrix(c(MRT_sem_F, MRT_10F), nrow=2, ncol=7, byrow=T), col=c("#E6E6E6", "#666666"),
        xlab="Time between Things requests", ylab = "Response time(s)" , beside=T)
legend("topright", col=c("#E6E6E6", "#666666"), pch=c(15,15), legend=c("w/o Fogs", "10 Fogs"))

barplot(matrix(c(MRT_sem_F, MRT_15F), nrow=2, ncol=7, byrow=T), col=c("#E6E6E6", "#666666"),
        xlab="Time between Things requests", ylab = "Response time(s)" , beside=T)
legend("topright", col=c("#E6E6E6", "#666666"), pch=c(15,15), legend=c("w/o Fogs", "15 Fogs"))

Questão 2

#knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE, fig.show = 'hold')
#library(dplyr)
#library(shiny)
#library(DT)

data <- matrix(
  c(53.8, 33.9, 2.6, 0, 43.6, 54.2, 60.5, 21.4, 2.6, 11.9, 36.8, 78.6),
  nrow = 3,
  byrow = TRUE
)

barplot(
  data,
  col = c("#E6E6E6", "#666666", "#B3B3B3"),
  legend.text = rownames(data),
  args.legend = list(x = "topleft", bty = "n"),
  xlab = "Meal Price",
  ylab = "Quality Rating",
)

legend("topright", col=c("#E6E6E6", "#666666", "#B3B3B3"), pch=c(15,15,15),
       legend=c("Good", "Very Good", "Excellent"))

Questão 3

#knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE, fig.show = 'hold')
#library(dplyr)
#library(shiny)
#library(DT)

data(airquality)

temp_maio <- airquality[airquality$Month == 5, "Temp"]

temp_maio_C <- (temp_maio - 32) / 1.8


hist(
  temp_maio_C,
  col = "grey",
  probability = T,
  main = "Histogram of Temperature",
  xlab = "Temperature (°C)",
  ylab = "Density"
)

lines(density(temp_maio_C), col = "red")

Questão 4

#knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE, fig.show = 'hold')
#library(dplyr)
#library(shiny)
#library(DT)

sales <- read.table("https://training-course-material.com/images/8/8f/Sales.txt", header = TRUE)

pct <- round(sales$SALES/sum(sales$SALES)*100)
lbls <- paste(sales$COUNTRY, pct)
lbls <- paste(lbls, "%", sep="")

pie(sales$SALES, labels = lbls,
    main = "Sales by Country",
    col=rainbow(nrow(sales)))

legend("topright", legend = sales$COUNTRY,
       fill = rainbow(nrow(sales)), bty = "n")

Questão 5

#knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE, fig.show = 'hold')
#library(dplyr)
#library(shiny)
#library(DT)

data(InsectSprays)

boxplot(count ~ spray, data = InsectSprays,
        col = "yellow", outline = FALSE,
        xlab = "Spray", ylab = "Count")

Questão 6

# Carregar bibliotecas
library(dplyr)

## Warning: package 'dplyr' was built under R version 4.3.2

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(ggplot2)

## Warning: package 'ggplot2' was built under R version 4.3.2

# Carregar dados
data_0.1 <- read.csv("C:/Users/Lemos/Desktop/monitoringCloudData_0.1.csv")
data_0.5 <- read.csv("C:/Users/Lemos/Desktop/monitoringCloudData_0.5.csv")
data_1 <- read.csv("C:/Users/Lemos/Desktop/monitoringCloudData_1.csv")
data_NONE <- read.csv("C:/Users/Lemos/Desktop/monitoringCloudData_NONE.csv")

# Função para ajustar a coluna "usedMemory" para megabytes
convert_memory <- function(memory_str) {
  # Extrair valor numérico
  value <- as.numeric(gsub("[^0-9.]", "", memory_str))
  
  # Extrair unidade (TB, GB, MB)
  unit <- gsub("[0-9.]", "", memory_str)
  
  # Converter para megabytes
  if (unit == "TB") {
    return(value * 1000000)
  } else if (unit == "GB") {
    return(value * 1024)
  } else {
    return(value)
  }
}

# Ajustar coluna "usedMemory"
data_0.1$usedMemory <- sapply(data_0.1$usedMemory, convert_memory)
data_0.5$usedMemory <- sapply(data_0.5$usedMemory, convert_memory)
data_1$usedMemory <- sapply(data_1$usedMemory, convert_memory)
data_NONE$usedMemory <- sapply(data_NONE$usedMemory, convert_memory)

# Ajustar coluna "currentTime" para diferença de tempo em horas
data_0.1$currentTime <- as.numeric(difftime(as.POSIXct(data_0.1$currentTime), as.POSIXct(data_0.1$currentTime[1]), units = "hours"))
data_0.5$currentTime <- as.numeric(difftime(as.POSIXct(data_0.5$currentTime), as.POSIXct(data_0.5$currentTime[1]), units = "hours"))
data_1$currentTime <- as.numeric(difftime(as.POSIXct(data_1$currentTime), as.POSIXct(data_1$currentTime[1]), units = "hours"))
data_NONE$currentTime <- as.numeric(difftime(as.POSIXct(data_NONE$currentTime), as.POSIXct(data_NONE$currentTime[1]), units = "hours"))

# Criar gráficos de linha do tempo
plot_0.1 <- ggplot(data_0.1, aes(x = currentTime, y = usedMemory)) +
  geom_line() +
  labs(title = "Monitoring Cloud Data - 0.1",
       x = "Time (hours)",
       y = "Used Memory (MB)")

plot_0.5 <- ggplot(data_0.5, aes(x = currentTime, y = usedMemory)) +
  geom_line() +
  labs(title = "Monitoring Cloud Data - 0.5",
       x = "Time (hours)",
       y = "Used Memory (MB)")

plot_1 <- ggplot(data_1, aes(x = currentTime, y = usedMemory)) +
  geom_line() +
  labs(title = "Monitoring Cloud Data - 1",
       x = "Time (hours)",
       y = "Used Memory (MB)")

plot_NONE <- ggplot(data_NONE, aes(x = currentTime, y = usedMemory)) +
  geom_line() +
  labs(title = "Monitoring Cloud Data - NONE",
       x = "Time (hours)",
       y = "Used Memory (MB)")

# Organizar os gráficos usando a função layout()
layout(matrix(c(1, 2, 3, 4), nrow = 2, byrow = TRUE))
print(plot_0.1)

print(plot_0.5)

print(plot_1)

print(plot_NONE)

Questão 7

#knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE, fig.show = 'hold')

# Carregue as bibliotecas necessárias
library(dplyr)
library(plotly)

## Warning: package 'plotly' was built under R version 4.3.2

## 
## Attaching package: 'plotly'

## The following object is masked from 'package:ggplot2':
## 
##     last_plot

## The following object is masked from 'package:stats':
## 
##     filter

## The following object is masked from 'package:graphics':
## 
##     layout

# Carregue o conjunto de dados
netflix_data <- read.csv("C:/Users/Lemos/Desktop/netflix_titles.csv")  # Substitua pelo caminho real do seu arquivo

# Filtrar os conteúdos com apenas UM país de origem
netflix_data_single_country <- netflix_data %>%
  filter(!is.na(country)) %>%
  filter(!grepl(",", country))

# Contar a quantidade de conteúdo por país
content_count_by_country <- netflix_data_single_country %>%
  group_by(country) %>%
  summarise(count = n()) %>%
  arrange(desc(count)) %>%
  head(10)

# Criar o gráfico de pizza com o Plotly
fig <- plot_ly(content_count_by_country, labels = ~country, values = ~count, type = "pie", hole = 0.6)

# Adicionar título ao gráfico
fig <- fig %>% layout(title = "Top 10 Países com Mais Conteúdo na Netflix")

# Exibir o gráfico
fig

Questão 8

#knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE, fig.show = 'hold')

# Carregue as bibliotecas necessárias
library(dplyr)
library(DT)

## Warning: package 'DT' was built under R version 4.3.2

library(htmltools)

## Warning: package 'htmltools' was built under R version 4.3.2

# Carregue o conjunto de dados
netflix_data <- read.csv("C:/Users/Lemos/Desktop/netflix_titles.csv")  # Substitua pelo caminho real do seu arquivo

# Filtrar os conteúdos com apenas UM país de origem
netflix_data_single_country <- netflix_data %>%
  filter(!is.na(country)) %>%
  filter(!grepl(",", country))

# Contar a quantidade de conteúdo por país
content_count_by_country <- netflix_data_single_country %>%
  group_by(country) %>%
  summarise(count = n()) %>%
  arrange(desc(count)) %>%
  head(10)



# Adicionar título à tabela
title <- htmltools::tags$caption(HTML("<b>Top 10 Países com Mais Conteúdo na Netflix</b>"))
datatable(content_count_by_country, 
          caption = title,
          options = list(
            dom = 't',
            paging = FALSE,
            ordering = FALSE,
            searching = FALSE
          ),
          rownames = FALSE) %>%
  formatStyle(
    names(content_count_by_country),
    backgroundColor = styleEqual(c("País", "Total de conteúdos"), c('#504e4e', '#504e4e')),
    color = styleEqual(c("País", "Total de conteúdos"), c('white', 'white')),
    textAlign = 'center'
  )

Questao 9

netflix <- read.csv("C:/Users/Lemos/Desktop/netflix_titles.csv")
library(dplyr)
library(plotly)
library(stringr)
netflix <- netflix %>%
  mutate(decade = 10 * (release_year %/% 10))

filmesPorDecada <- netflix %>%
  filter(type == "Movie") %>%
  group_by(decade) %>%
  summarise(qtd_conteúdo = n())


seriesPorDecada <- netflix %>%
  filter(type == "TV Show") %>%
  group_by(decade) %>%
  summarise(num_series = n())


seriesFilmes <- left_join(filmesPorDecada, seriesPorDecada, by = "decade")

seriesFilmes$num_series[2] <- 1



fig <- plot_ly(
    seriesFilmes, 
    x = ~decade
) %>%
  add_trace(
    y = ~qtd_conteúdo,
    name = 'Filmes',
    mode = 'lines+markers'
  ) %>%
  add_trace(
    y = ~num_series,
    name = 'Séries',
    mode = 'lines+markers'
  )
fig

## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter

Questao 10

library(stringr)
netflix <- read.csv("C:/Users/Lemos/Desktop/netflix_titles.csv")


df_filtrado <- netflix %>%
  filter(between(release_year, 2000, 2010) & type == "Movie") %>%
  select(release_year, listed_in)


contagem_categorias_por_ano <- df_filtrado %>%
  mutate(primeira_categoria = ifelse(str_detect(listed_in, ","), word(listed_in, 1, sep = ", "), listed_in)) %>%
  group_by(release_year, primeira_categoria) %>%
  summarise(num_filmes = n())

## `summarise()` has grouped output by 'release_year'. You can override using the
## `.groups` argument.

categorias <- c("Action & Adventure", "Comedies", "Dramas")
df_final <- contagem_categorias_por_ano %>%
  filter(primeira_categoria %in% categorias)


df_grafico <- data.frame(release_year = 2000:2010)


df_grafico <- df_grafico %>%
  left_join(
    df_final %>%
      filter(str_detect(primeira_categoria, "Comedies")) %>%
      select(release_year, num_filmes) %>%
      rename(Comedies = num_filmes),
    by = "release_year"
  ) %>%
  left_join(
    df_final %>%
      filter(str_detect(primeira_categoria, "Dramas")) %>%
      select(release_year, num_filmes) %>%
      rename(Dramas = num_filmes),
    by = "release_year"
  ) %>%
  left_join(
    df_final %>%
      filter(str_detect(primeira_categoria, "Action & Adventure")) %>%
      select(release_year, num_filmes) %>%
      rename(`ActionAdventure` = num_filmes),
    by = "release_year"
  )


fig <- plot_ly(
  df_grafico,
  x = ~release_year,
  y = ~Comedies,
  type = 'bar',
  name = 'Comédia'
) %>%
  add_trace(
    y = ~Dramas,
    name = 'Drama'
  ) %>%
  add_trace(
    y = ~ActionAdventure,
    name = 'Ação e Aventura'
  ) %>%
  layout(
    yaxis = list(title = 'Qnt. de Lançamentos'),
    xaxis = list(title = 'Ano de Lançamento')
)

#exibir gráfico
fig

Atividade de Análise de Dados - Exercício 12

Marcelo Lemos

2023-12-14

Questão 1

Questão 2

Questão 3

Questão 4

Questão 5

Questão 6

Questão 7

Questão 8

Questao 9

Questao 10