Questao 1

MRT_1F <- c(517.1468515630205, 85.13094142168089, 30.333207896694553, 12.694776264558937, 3.3041601673945418, 1.1823111717498882, 1.1892293502386786)
MRT_3F <- c(156.68929936163462, 11.540837783562276, 0.4512835621696538, 0.4509797929766453, 0.4502068233039181, 0.4496185276300172, 0.4543157082191288)
MRT_5F <- c(83.90319666471157, 0.3068151086494968, 0.30522314133037304, 0.3072588968084928, 0.30655265997285697, 0.3055812715727718, 0.3053297166713006)
MRT_10F <- c(29.55430642951759, 0.19832832665772515, 0.1971923924717474, 0.19796648905716516, 0.19615594370806338, 0.2034569237883263, 0.19617420889447737)
MRT_15F <- c(11.317736530583566, 0.167364215666193, 0.16172168266811013, 0.16701085329580515, 0.1598052657153692, 0.1645934043532696, 0.16216563797118075)
MRT_sem_F <- c(11.93430909937736, 0.6095414637034009, 0.6060645101029295, 0.612167181646899, 0.6146761002685637, 0.6096747087200697, 0.6125810476877268)

clock <- c(0.1, 0.5, 1, 1.5, 2, 2.5, 3)

layout(matrix(1:6, nrow=2, byrow=TRUE))

generate_barplot <- function(data, title) {
  barplot(data, names.arg=clock, log="y", col=c("#E6E6E6", "#666666"), xlab="Clock", ylab="MRT (log scale)", main=title)
}

generate_barplot(MRT_1F, "MRT 1F")
generate_barplot(MRT_3F, "MRT 3F")
generate_barplot(MRT_5F, "MRT 5F")
generate_barplot(MRT_10F, "MRT 10F")
generate_barplot(MRT_15F, "MRT 15F")

plot(clock, MRT_sem_F, type="o", col="blue", xlab="Clock", ylab="MRT", main="MRT sem F")

questao 2

library(ggplot2)
library(tidyr)

data <- data.frame(
  QualityRating = c("good", "very good", "excelent"),
  `10-19` = c(53.8, 43.6, 2.6),
  `20-29` = c(33.9, 54.2, 11.9),
  `30-39` = c(2.6, 60.5, 36.8),
  `40-49` = c(0.0, 21.4, 78.6)
)

data_long <- pivot_longer(data, cols = -QualityRating, names_to = "PriceRange", values_to = "Percentage")

ggplot(data_long, aes(x = PriceRange, y = Percentage, fill = QualityRating)) +
  geom_bar(stat = "identity") +
  labs(
    title = "qualidade de refeicao de acordo com categorias de precos",
    x = "Faixa de Preco",
    y = "Porcentagem",
    fill = "Avaliacao da Qualidade"
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

questao 3

library(ggplot2)

data(airquality)

may_data <- subset(airquality, Month == 5)

may_data$Temp_Celsius <- (may_data$Temp - 32) / 1.8

ggplot(may_data, aes(x = Temp_Celsius)) +
  geom_histogram(aes(y = after_stat(density)), 
                 bins = 20, 
                 fill = "skyblue", 
                 color = "black", 
                 alpha = 0.7) +
  geom_density(alpha = 0.2, fill = "red") +
  labs(
    title = "Histograma das Temperaturas de Maio (Celsius)",
    x = "Temperatura (C)",
    y = "Densidade"
  ) +
  theme_minimal()

questao 4

sales <- read.table("https://training-course-material.com/images/8/8f/Sales.txt", header = TRUE)

sales$Percentage <- (sales$SALES / sum(sales$SALES)) * 100

colors <- c("lightblue", "lightgreen", "lightcoral", "lightyellow", "lightpink", "lightgray")

pie(sales$SALES, 
    labels = paste(sales$COUNTRY, "\n", round(sales$Percentage, 1), "%"), 
    col = colors, 
    main = "Porcentagem total de Vendas por Pais")

legend("topright", 
       legend = sales$COUNTRY, 
       fill = colors, 
       title = "Paises", 
       bty = "n")

questao 5

data(InsectSprays)

boxplot(count ~ spray, data = InsectSprays, 
        outline = FALSE,
        col = "purple",
        main = "Contagem de insetos por tipo de inseticida",
        xlab = "Tipo de inseticida",
        ylab = "Contagem de insetos",
        border = "black")

questao 6

library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
process_data <- function(file) {
  data <- read.csv(file, stringsAsFactors = FALSE)
  

  data$currentTime <- as.numeric(difftime(strptime(data$currentTime, format="%Y-%m-%d %H:%M:%S"),
                                          strptime(data$currentTime[1], format="%Y-%m-%d %H:%M:%S"),
                                          units="hours"))

  data$usedMemory <- sapply(data$usedMemory, function(x) {
    if (grepl("TB", x)) {
      as.numeric(gsub("TB", "", x)) * 1000000
    } else if (grepl("GB", x)) {
      as.numeric(gsub("GB", "", x)) * 1024
    } else if (grepl("MB", x)) {
      as.numeric(gsub("MB", "", x))
    } else {
      
      
      as.numeric(x) 
    }
  })
  
  return(data)
}

files <- c(
  "C:/Users/TemisWin10/Downloads/monitoringCloudData/monitoringCloudData_0.1.csv",
  "C:/Users/TemisWin10/Downloads/monitoringCloudData/monitoringCloudData_0.5.csv",
  "C:/Users/TemisWin10/Downloads/monitoringCloudData/monitoringCloudData_1.csv",
  "C:/Users/TemisWin10/Downloads/monitoringCloudData/monitoringCloudData_NONE.csv"
)

names(files) <- c("0.1", "0.5", "1", "NONE")

data_list <- lapply(files, process_data)

par(mfrow=c(2, 2))

for (name in names(data_list)) {
  data <- data_list[[name]]
  plot(data$currentTime, data$usedMemory, type="l", col="blue", 
       xlab="Tempo (horas)", ylab="Memória Usada (MB)", 
       main=paste("Uso de Memória -", name))
}

Questao 7

library(dplyr)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
netflix_data <- read.csv("C:/Users/TemisWin10/Downloads/netflix_titles.csv", stringsAsFactors = FALSE)

filtered_data <- netflix_data %>% 
  filter(!is.na(country) & !grepl(",", country)) %>% 
  count(country, sort = TRUE) %>% 
  top_n(10, n)

fig7 <- plot_ly(filtered_data, labels = ~country, values = ~n, type = 'pie', textinfo = 'label+percent') %>%
  layout(title = "Top 10 Paises com mais conteudo na netflix")
fig7

Questão 8

fig8 <- plot_ly(
  type = 'table',
  header = list(values = c("Pais", "Total de conteudos"),
                fill = list(color = "grey"),
                font = list(color = "white", size = 14),
                align = "center"),
  cells = list(values = rbind(filtered_data$country, filtered_data$n),
               align = "center",
               font = list(size = 12))
)
fig8

Questão 9

netflix_data$decade <- (netflix_data$release_year %/% 10) * 10

decade_data <- netflix_data %>% 
  filter(!is.na(decade)) %>% 
  count(decade, type) %>% 
  spread(type, n, fill = 0)

colnames(decade_data) <- c("decade", "Movie", "TV_Show")

fig9 <- plot_ly(decade_data, x = ~decade) %>%
  add_trace(y = ~Movie, type = 'scatter', mode = 'lines+markers', name = 'Filmes', line = list(color = 'yellow')) %>%
  add_trace(y = ~TV_Show, type = 'scatter', mode = 'lines+markers', name = 'Series', line = list(color = 'blue')) %>%
  layout(title = "Quantidade de Conteudo por Decada", xaxis = list(title = "Decada"), yaxis = list(title = "Quantidade"))
fig9

Questão 10

filtered_movies <- netflix_data %>% 
  filter(release_year >= 2000 & release_year <= 2010, type == "Movie") %>%
  mutate(primary_genre = sapply(strsplit(listed_in, ","), `[`, 1)) %>%
  filter(primary_genre %in% c("Dramas", "Action & Adventure", "Comedies")) %>%
  count(release_year, primary_genre)

fig10 <- plot_ly(filtered_movies, x = ~release_year, y = ~n, color = ~primary_genre, type = 'bar') %>%
  layout(title = "Quantidade de Filmes por Genero (2000-2010)", xaxis = list(title = "Ano"), yaxis = list(title = "Quantidade"), barmode = 'group')
fig10