Exercício 12 [Visualização de Dados]

Questão 01

MRT_1F <-c(517.1468515630205, 85.13094142168089, 30.333207896694553, 12.694776264558937, 3.3041601673945418, 1.1823111717498882, 1.1892293502386786)

MRT_3F <-c(156.68929936163462, 11.540837783562276, 0.4512835621696538, 0.4509797929766453, 0.4502068233039181, 0.4496185276300172, 0.4543157082191288)

MRT_5F <-c(83.90319666471157, 0.3068151086494968, 0.30522314133037304, 0.3072588968084928, 0.30655265997285697, 0.3055812715727718, 0.3053297166713006)

MRT_10F <-c(29.55430642951759, 0.19832832665772515, 0.1971923924717474, 0.19796648905716516, 0.19615594370806338, 0.2034569237883263, 0.19617420889447737)

MRT_15F <-c(11.317736530583566, 0.167364215666193, 0.16172168266811013, 0.16701085329580515, 0.1598052657153692, 0.1645934043532696, 0.16216563797118075)

MRT_sem_F <-c(11.93430909937736, 0.6095414637034009, 0.6060645101029295, 0.612167181646899, 0.6146761002685637, 0.6096747087200697, 0.6125810476877268)

clock <- c(0.1, 0.5, 1, 1.5, 2, 2.5, 3)

plot(clock,MRT_1F,type="o",pch=4,col="black",xlab="Time between Things requests
     (seconds)",ylab="Response Time (sec.)",)

lines(clock,MRT_3F,type="o",pch=11,col="yellow")

lines(clock,MRT_5F,type="o",pch=1,col="red")

lines(clock,MRT_10F,type="o",pch=2,col="blue")

lines(clock,MRT_15F,type="o",pch=5,col="purple")

lines(clock,MRT_sem_F,type="o",pch=4,col="green")

legend("topright",pch=c(4,11,1,2,5,4),
       col=c("black","yellow","red","blue","purple","green"),
       legend=c("1 Fog","3 Fogs","5 Fogs","10 Fogs","15 Fogs","w/o Fog"))

par(mfrow = c(3, 2))

barplot(matrix(c(MRT_sem_F,MRT_1F),nrow=2,ncol=7,byrow=TRUE),log="y",
        ylab="Response time (s)",xlab="Time between Things requests",
        names.arg=clock,col=c("gray","azure4"),beside=TRUE)

legend("topright",pch=c(15,15),col=c("gray","azure4"),legend=c("w/o Fog","1 Fog"))

barplot(matrix(c(MRT_sem_F,MRT_3F),nrow=2,ncol=7,byrow=TRUE),log="y",
        ylab="Response time (s)",xlab="Time between Things requests",
        names.arg=clock,col=c("gray","azure4"),beside=TRUE)

legend("topright",pch=c(15,15),col=c("gray","azure4"),legend=c("w/o Fog","3 Fogs"))

barplot(matrix(c(MRT_sem_F,MRT_5F),nrow=2,ncol=7,byrow=TRUE),log="y",
        ylab="Response time (s)",xlab="Time between Things requests",
        names.arg=clock,col=c("gray","azure4"),beside=TRUE)

legend("topright",pch=c(15,15),col=c("gray","azure4"),legend=c("w/o Fog","5 Fogs"))

barplot(matrix(c(MRT_sem_F,MRT_10F),nrow=2,ncol=7,byrow=TRUE),log="y",
        ylab="Response time (s)",xlab="Time between Things requests",
        names.arg=clock,col=c("gray","azure4"),beside=TRUE)

legend("topright",pch=c(15,15),col=c("gray","azure4"),legend=c("w/o Fog","10 Fogs"))

barplot(matrix(c(MRT_sem_F,MRT_15F),nrow=2,ncol=7,byrow=TRUE),log="y",
        ylab="Response time (s)",xlab="Time between Things requests",
        names.arg=clock,col=c("gray","azure4"),beside=TRUE)

legend("topright",pch=c(15,15),col=c("gray","azure4"),legend=c("w/o Fog","15 Fogs"))

Questão 02

cores<-c("gray","yellow","green")
metal_price<-c("$10-19","$20-29","$30-39","$40-49")
Porcentagens<-matrix(c(53.8,33.9,2.6,0.0,43.6,54.2,60.5,21.4,2.6,11.9,36.8,78.6),
                     nrow=3,ncol=4,byrow=TRUE)
quality_rating<-c("BOM","MUITO BOM","EXCELENTE")

barplot(Porcentagens, main="Qualidade da Refeição",names.arg=metal_price,
        ylab="Total (%)",xlab="Preço da refeição",col=cores)
legend("topright",pch=c(15,15,15),col=cores,legend=quality_rating)

QUESTÃO 3

airquality_may<-subset(airquality,airquality$Month==5)
Temperature<-airquality_may$Temp
Temperature_C<-(Temperature-32)/1.8
hist(Temperature_C,main="Histograma da Temperatura",col="blue", xlab="Temperatura de Maio (°C)",ylab="Frequência",density=10)

QUESTÃO 4

sales <- read.table("https://training-course-material.com/images/8/8f/Sales.txt",header=TRUE)
sales_pct<-round(sales$SALES/sum(sales$SALES)*100)
lbls<-paste(sales$COUNTRY,sales_pct)
lbls<-paste(lbls,"%",sep="")
pie(sales$SALES,lbls,main="Vendas por país",col=c("green","red","blue","yellow","magenta","cyan"))
legend("topright",pch=c(15,15),col=c("green","blue","red","yellow","magenta","cyan"),legend=sales$COUNTRY)

QUESTÃO 5

cores <- rep("yellow", length(unique(InsectSprays$spray)))
boxplot(count ~ spray, data = InsectSprays, ylab = "Contagem de Insetos", xlab = "Inseticida",
        main = "Contagens de insetos tratados com diferentes inseticidas",
        col = cores, 
        outline = FALSE)

QUESTÃO 6

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(ggplot2)
library(lubridate)

## 
## Attaching package: 'lubridate'

## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union

library(gridExtra)

## 
## Attaching package: 'gridExtra'

## The following object is masked from 'package:dplyr':
## 
##     combine

data_0.1 <- read.csv("monitoringCloudData_0.1.csv")
data_0.5 <- read.csv("monitoringCloudData_0.5.csv")
data_1 <- read.csv("monitoringCloudData_1.csv")
data_NONE <- read.csv("monitoringCloudData_NONE.csv")

data_0.1$currentTime <- ymd_hms(data_0.1$currentTime)
data_0.5$currentTime <- ymd_hms(data_0.5$currentTime)

## Warning: 194 failed to parse.

data_1$currentTime <- ymd_hms(data_1$currentTime)
data_NONE$currentTime <- ymd_hms(data_NONE$currentTime)

convert_memory <- function(memory_str) {
  if (grepl("TB", memory_str)) {
    memory <- as.numeric(gsub("TB", "", memory_str)) * 1000000
  } else if (grepl("GB", memory_str)) {
    memory <- as.numeric(gsub("GB", "", memory_str)) * 1024
  } else if (grepl("MB", memory_str)) {
    memory <- as.numeric(gsub("MB", "", memory_str))
  }
  return(memory)
}

data_0.1$usedMemory <- sapply(data_0.1$usedMemory, convert_memory)
data_0.5$usedMemory <- sapply(data_0.5$usedMemory, convert_memory)
data_1$usedMemory <- sapply(data_1$usedMemory, convert_memory)
data_NONE$usedMemory <- sapply(data_NONE$usedMemory, convert_memory)


plot_NONE <-ggplot(data_NONE, aes(x = currentTime, y = usedMemory)) +
  geom_line(color = "purple") +
  labs(title = "Memory Analysis (None Workload)",
       x = "Time (hour)",
       y = "Used Memory (MB)")

plot_0.1 <-ggplot(data_0.1, aes(x = currentTime, y = usedMemory)) +
  geom_line(color = "blue") +
  labs(title = "Memory Analysis (Workload of 0.1)",
       x = "Time (hour)",
       y = "Used Memory (MB)")

plot_0.5 <-ggplot(data_0.5, aes(x = currentTime, y = usedMemory)) +
  geom_line(color = "red") +
  labs(title = "Memory Analysis (Workload of 0.5)",
       x = "Time (hour)",
       y = "Used Memory (MB)")

plot_1 <-ggplot(data_1, aes(x = currentTime, y = usedMemory)) +
  geom_line(color = "green") +
  labs(title = "Memory Analysis (Workload of 1.0)",
       x = "Time (hour)",
       y = "Used Memory (MB)")

grid.arrange(plot_NONE, plot_0.1, plot_0.5, plot_1, nrow = 2)

## Warning: Removed 194 rows containing missing values or values outside the scale range
## (`geom_line()`).

QUESTÃO 7

library(dplyr)
library(plotly)

## 
## Attaching package: 'plotly'

## The following object is masked from 'package:ggplot2':
## 
##     last_plot

## The following object is masked from 'package:stats':
## 
##     filter

## The following object is masked from 'package:graphics':
## 
##     layout

library(readr)
library(stringr)

netflix_df <- read_csv("netflix_titles.csv")

## Rows: 7787 Columns: 12

## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (11): show_id, type, title, director, cast, country, date_added, rating,...
## dbl  (1): release_year
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

single_country_df <- netflix_df %>%
  filter(!is.na(country) & !str_detect(country, ","))

country_counts <- single_country_df %>%
  count(country, sort = TRUE) %>%
  slice_max(n, n = 10) %>%
  arrange(desc(n))

if (nrow(country_counts) > 10) {
  country_counts <- country_counts[1:10,]
}

country_counts

## # A tibble: 10 × 2
##    country            n
##    <chr>          <int>
##  1 United States   2555
##  2 India            923
##  3 United Kingdom   397
##  4 Japan            226
##  5 South Korea      183
##  6 Canada           177
##  7 Spain            134
##  8 France           115
##  9 Egypt            101
## 10 Mexico           100

fig <- plot_ly(country_counts, labels = ~country, values = ~n, type = 'pie') %>%
  layout(title = 'Top 10 Países com Mais Conteúdos na Netflix',
         xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
         yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE))

fig

QUESTÃO 8

fig <- plot_ly(
  type = 'table',
  header = list(
    values = c("País", "Total de conteúdos"),
    align = c('center', 'center'),
    line = list(width = 1, color = 'black'),
    fill = list(color = 'grey'),
    font = list(family = "Arial", size = 12, color = "white")
  ),
  cells = list(
    values = rbind(country_counts$country, country_counts$n),
    align = c('center', 'center'),
    line = list(color = "black", width = 1),
    fill = list(color = c('white', 'white')),
    font = list(family = "Arial", size = 11, color = c("black"))
  )
)

fig

QUESTÃO 9

netflix_df <- netflix_df %>%
  mutate(decade = (release_year %/% 10) * 10)

content_by_decade <- netflix_df %>%
  group_by(decade, type) %>%
  summarise(count = n()) %>%
  ungroup()

## `summarise()` has grouped output by 'decade'. You can override using the
## `.groups` argument.

content_by_decade

## # A tibble: 18 × 3
##    decade type    count
##     <dbl> <chr>   <int>
##  1   1920 TV Show     1
##  2   1940 Movie      13
##  3   1940 TV Show     1
##  4   1950 Movie      11
##  5   1960 Movie      22
##  6   1960 TV Show     3
##  7   1970 Movie      63
##  8   1970 TV Show     4
##  9   1980 Movie      99
## 10   1980 TV Show     7
## 11   1990 Movie     194
## 12   1990 TV Show    31
## 13   2000 Movie     601
## 14   2000 TV Show   127
## 15   2010 Movie    3951
## 16   2010 TV Show  1760
## 17   2020 Movie     423
## 18   2020 TV Show   476

fig <- plot_ly(content_by_decade, x = ~decade, y = ~count, type = 'scatter', mode = 'lines+markers', color = ~type, 
               colors = c( 'Movie' = 'orange','TV Show' = 'blue'),
               line = list(shape = 'linear')) %>%
  layout(title = 'Quantidade de Conteúdos por Década na Netflix',
         xaxis = list(title = 'Década'),
         yaxis = list(title = 'Qnd. Conteúdo'))
         

fig

Exercício 12 [Visualização de Dados]

Yuri Chaves de Lima

2024-06-04

Questão 01

Questão 02

QUESTÃO 3

QUESTÃO 4

QUESTÃO 5

QUESTÃO 6

QUESTÃO 7

QUESTÃO 8

QUESTÃO 9