Questões

Questão 1

Não fiz

Questão 2

data <- data.frame(
  Quality = c("Good", "Very Good", "Excellent"),
  `10_19` = c(53.8, 43.6, 2.6),
  `20_29` = c(33.9, 54.2, 11.9),
  `30_39` = c(2.6, 60.5, 36.8),
  `40_49` = c(0.0, 21.4, 78.6)
)

mat <- as.matrix(data[, -1])
rownames(mat) <- data$Quality

colors <- c("lightblue", "lightgreen", "lightcoral")

par(mar = c(5, 4, 4, 8))

bp <- barplot(
  mat,
  beside = FALSE,
  col = colors,
  names.arg = c("$10–19", "$20–29", "$30–39", "$40–49"),
  main = "Qualidade da refeição por faixa de preço",
  xlab = "Faixa de preço", ylab = "Percentual",
  legend.text = FALSE
)

legend(
  x = max(bp) + 0.8,
  y = 100,
  legend = rownames(mat),
  fill = colors,
  bty = "n", 
  cex = 0.8,
  xpd = TRUE
)

Questão 3

data("airquality")
may <- subset(airquality, Month == 5)
celcius_temp <- (may$Temp - 32) / 1.8

hist(celcius_temp, probability=TRUE, main="Histograma das temperaturas em Maio em Celcius",
     xlab="Temperatura (Celcius)", ylab="Densidade", col="lightblue")
lines(density(celcius_temp, na.rm=TRUE), lwd=2)

Questão 4

sales <-
read.table("https://training-course-material.com/images/8/8f/Sales.txt",header=TRUE)

sales_sum <- aggregate(SALES ~ COUNTRY, data = sales, sum)
country <- sales_sum$COUNTRY
values <- sales_sum$SALES
p <- round(100 * values / sum(values), 1)
l <- paste(country, p, "%")

pie_colors <- rainbow(length(country))

pie(values, labels=l, main="Percentual de vendas por país", col=pie_colors)
legend("topright", legend=country, fill=pie_colors, cex=0.8)

Questão 5

data("InsectSprays")
boxplot(count ~ spray, data=InsectSprays, outline=FALSE,
        col="yellow", main="Contagem de insetos por inseticida",
        xlab="Inseticida", ylab="Contagem")

Questão 6

treat_file <- function(path) {
  data <- read.csv(path, stringsAsFactors = FALSE)
  data$currentTime <- as.POSIXct(data$currentTime, format="%Y-%m-%d %H:%M:%S", tz="UTC")
  data <- data[order(data$currentTime), ]
  data$hours_from_start <- as.numeric(difftime(data$currentTime, data$currentTime[1], units="hours"))
  data$usedMemoryMB <- muda_to_mb(data$usedMemory)
  data
}

muda_to_mb <- function(n) {
  n <- str_trim(n)
  num <- as.numeric(str_extract(n, "[0-9]+\\.?[0-9]*"))
  v <- toupper(str_extract(n, "MB|GB|TB"))
  ifelse(v == "MB", num,
         ifelse(v == "GB", num * 1024,
                ifelse(v == "TB", num * 1000000, NA)))
}

files <- c("monitoringCloudData_NONE.csv", "monitoringCloudData_0.1.csv", 
           "monitoringCloudData_0.5.csv", "monitoringCloudData_1.csv")
lista_arquivos <- lapply(files, treat_file)

titles <- c("Memory Analysis (None workload)", "Memory Analysis (Workload 0.1)", 
            "Memory Analysis (Workload 0.5)", "Memory Analysis (Workload 1.0)")

par(mfrow = c(2, 2), mar = c(4, 4, 3, 2))
for(i in seq_along(lista_arquivos)) {
  d <- lista_arquivos[[i]]
  plot(d$hours_from_start, d$usedMemoryMB, type="l",
       main=titles[i],
       xlab="Time hour", ylab="UsedMemory (MB)")
}

Questão 7

netflix_data <- read.csv("netflix_titles.csv", stringsAsFactors = FALSE)

top10 <- netflix_data %>%
  filter(!is.na(country) & country != "" & !grepl(",", country)) %>%
  count(country, name="total") %>%
  arrange(desc(total)) %>%
  head(10)

img <- plot_ly(top10, labels=~country, values=~total, type='pie', textinfo='label+percent')
img <- img %>% layout(title="Top 10 países cm mais conteúdos (apenas 1 país origem)")
img

Questão 8

table_with_headers <- top10 %>% rename(País = country, `Total de conteúdos` = total)

tabela <- plot_ly(
  type = 'table',
  header = list(
    values = c("<b>País</b>", "<b>Total de conteúdos</b>"),
    align = c('center', 'center'),
    fill = list(color = c('grey','grey')),
    font = list(color = 'white', size = 12)
  ),
  cells = list(
    values = rbind(table_with_headers$País, table_with_headers$`Total de conteúdos`),
    align = c('center', 'center')
  )
)
tabela

Questão 9

net_by_decade <- netflix_data %>%
  filter(!is.na(release_year)) %>%
  mutate(decade = floor(release_year/10)*10) %>%
  group_by(decade, type) %>%
  summarise(total = n(), .groups="drop")

dec_order <- sort(unique(net_by_decade$decade))
net_by_decade$decade <- factor(net_by_decade$decade, levels=dec_order)

pw <- net_by_decade %>% pivot_wider(names_from=type, values_from=total, values_fill=0)

image <- plot_ly(pw, x=~decade)
if("TV Show" %in% names(pw)) image <- image %>% add_lines(y=~`TV Show`, name="TV Series", line=list(color="blue"))
if("Movie" %in% names(pw)) image <- image %>% add_lines(y=~Movie, name="Movies", line=list(color="yellow"))
image <- image %>% layout(xaxis=list(title="Década"), yaxis=list(title="Quantidade"))
image

Questão 10

Não fiz