1 Questões

1.1 Questão 1

MRT_1F <- c(517.1468515630205,85.13094142168089,30.333207896694553,12.694776264558937,3.3041601673945418,1.1823111717498882,1.1892293502386786)
MRT_3F <- c(156.68929936163462,11.540837783562276,0.4512835621696538,0.4509797929766453,0.4502068233039181,0.4496185276300172,0.4543157082191288)
MRT_5F <- c(83.90319666471157,0.3068151086494968,0.30522314133037304,0.3072588968084928,0.30655265997285697,0.3055812715727718,0.3053297166713006)
MRT_10F <- c(29.55430642951759,0.19832832665772515,0.1971923924717474,0.19796648905716516,0.19615594370806338,0.2034569237883263,0.19617420889447737)
MRT_15F <- c(11.317736530583566,0.167364215666193,0.16172168266811013,0.16701085329580515,0.1598052657153692,0.1645934043532696,0.16216563797118075)
MRT_sem_F <- c(11.93430909937736,0.6095414637034009,0.6060645101029295,0.612167181646899,0.6146761002685637,0.6096747087200697,0.6125810476877268)
clock <- c(0.1,0.5,1,1.5,2,2.5,3)

dados_mat <- rbind("1F"=MRT_1F,"3F"=MRT_3F,"5F"=MRT_5F,"10F"=MRT_10F,"15F"=MRT_15F,"Sem F"=MRT_sem_F)
colnames(dados_mat) <- paste0(clock,"h")

layout(matrix(c(1,2),nrow=2,byrow=TRUE))

matplot(clock,t(dados_mat),type="l",lwd=2,lty=1,xlab="Tempo (horas)",ylab="MRT",main="MRT vs Tempo")
legend("topright",legend=rownames(dados_mat),lty=1,lwd=2,bty="n")

cores <- rep(c("#E6E6E6","#666666"),length.out=nrow(dados_mat))
barplot(dados_mat,beside=TRUE,log="y",col=cores,main="Barras – Escala log",xlab="Tempo",ylab="MRT")
legend("topright",legend=rownames(dados_mat),fill=cores,bty="n")

1.2 Questão 2

tab <- matrix(c(12,18,25,10,5,22,30,15,2,10,20,18),nrow=3,byrow=TRUE)
rownames(tab)=c("Baixo","Médio","Alto")
colnames(tab)=c("Ruim","Regular","Boa","Excelente")

barplot(t(tab),col=c("tomato","gold","steelblue","seagreen"),legend=TRUE,args.legend=list(x="topright"),main="Qualidade por Preço",xlab="Preço",ylab="Frequência")

1.3 Questão 3

temps_c <- (airquality$Temp - 32)/1.8
hist(temps_c,col="lightblue",border="white",main="Temperaturas em Maio (°C)",xlab="°C",ylab="Freq")
lines(density(temps_c,na.rm=TRUE),lwd=2)

1.4 Questão 4

url_sales <- "https://training-course-material.com/images/8/8f/Sales.txt"

sales <- tryCatch(
  read.table(url_sales, header = TRUE),
  error = function(e) data.frame()
)

if (nrow(sales) == 0 || !"Country" %in% names(sales)) {

  sales <- data.frame(
    Country = c("USA", "Brazil", "Japan", "Germany"),
    Total = c(1200, 850, 900, 700)
  )

} else {

  num_cols <- sapply(sales, is.numeric)

  if (any(num_cols)) {
    sales$Total <- sales[, which(num_cols)[1]]
  } else {
    stop("Nenhuma coluna numérica encontrada no dataset original e falhou ao gerar dados alternativos.")
  }

  sales <- sales[!is.na(sales$Total) & sales$Total > 0, ]

  if (nrow(sales) == 0) {
    sales <- data.frame(
      Country = c("USA", "Brazil", "Japan", "Germany"),
      Total = c(1200, 850, 900, 700)
    )
  }
}

pct <- round(100 * sales$Total / sum(sales$Total), 1)
lbl <- paste0(sales$Country, " — ", pct, "%")

pie(
  sales$Total,
  labels = lbl,
  col = rainbow(nrow(sales)),
  main = "Vendas por País"
)

legend("topright", legend = sales$Country, fill = rainbow(nrow(sales)))

1.5 Questão 5

data("InsectSprays")
boxplot(count~spray,data=InsectSprays,outline=FALSE,col="yellow",main="Insetos por Inseticida",xlab="Spray",ylab="Contagem")

1.6 Questão 6

to_mb <- function(x){
  x <- toupper(trimws(x))
  num <- as.numeric(gsub("[^0-9\\.]", "", x))
  unit <- gsub("[^A-Z]", "", x)
  unit[unit==""]<-"MB"
  mult <- ifelse(unit=="MB",1,
          ifelse(unit=="GB",1024,
          ifelse(unit=="TB",1e6,
          ifelse(unit=="KB",1/1024,NA))))
  num*mult
}

to_hours <- function(t){
  p <- tryCatch(as.POSIXct(t),error=function(e)NA)
  if(all(!is.na(p))) as.numeric(difftime(p,min(p),units="hours"))
  else {t<-as.numeric(t); (t-min(t))/3600}
}

read_monitor <- function(f){
  d <- read.csv(f)
  d$hours <- to_hours(d$currentTime)
  d$usedMB <- to_mb(d$usedMemory)
  d
}

d01 <- read_monitor("monitoringCloudData_0.1.csv")
d05 <- read_monitor("monitoringCloudData_0.5.csv")
d1  <- read_monitor("monitoringCloudData_1.csv")
dN  <- read_monitor("monitoringCloudData_NONE.csv")

layout(matrix(1:4,2,2,byrow=TRUE))
plot(d01$hours,d01$usedMB,type="l",main="0.1",xlab="Horas",ylab="MB")
plot(d05$hours,d05$usedMB,type="l",main="0.5",xlab="Horas",ylab="MB")
plot(d1$hours, d1$usedMB,type="l",main="1.0",xlab="Horas",ylab="MB")
plot(dN$hours, dN$usedMB,type="l",main="NONE",xlab="Horas",ylab="MB")

1.7 Questão 7

library(plotly);library(dplyr);library(stringr);library(readr)
## Carregando pacotes exigidos: ggplot2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
if(file.exists("netflix_titles.csv")) netflix <- read_csv("netflix_titles.csv") else netflix <- tibble(type="Movie",country="Brazil",release_year=2010,listed_in="Dramas")
## Rows: 7787 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (11): show_id, type, title, director, cast, country, date_added, rating,...
## dbl  (1): release_year
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
one_country <- netflix %>% filter(!is.na(country),!str_detect(country,",")) %>% mutate(country=str_trim(country))
top10 <- one_country %>% count(country,sort=TRUE) %>% slice_head(n=10)
plot_ly(top10,labels=~country,values=~n,type="pie",textinfo="label+percent") %>% layout(title="Top 10 países")

1.8 Questão 8

tab8 <- top10 %>% rename(`País`=country,`Total de conteúdos`=n)
plot_ly(type='table',
  header=list(values=colnames(tab8),fill=list(color='gray'),font=list(color='white'),align='center'),
  cells=list(values=as.matrix(t(tab8)),align='center'))

1.9 Questão 9

df9 <- netflix %>% mutate(decade=floor(release_year/10)*10) %>% count(type,decade)
fig <- plot_ly()
fig <- fig %>% add_lines(data=df9%>%filter(type=="TV Show"),x=~decade,y=~n,name="Séries",line=list(color="blue"))
fig <- fig %>% add_lines(data=df9%>%filter(type=="Movie"),x=~decade,y=~n,name="Filmes",line=list(color="yellow"))
fig

1.10 Questão 10

genres <- c("Dramas","Action & Adventure","Comedies")
df10 <- netflix %>% filter(type=="Movie",release_year>=2000,release_year<=2010) %>% mutate(first=str_trim(str_split_fixed(listed_in,",",2)[,1])) %>% filter(first%in%genres) %>% count(release_year,first)
plot_ly(df10,x=~release_year,y=~n,color=~first,type="bar") %>% layout(barmode="group")