Visualizing data and Boxplots
Importing libraries
library(dplyr)
library(tidyverse)
library(reshape2)
library(plotly)
library(kableExtra)
library(knitr)
Reading data-set
setwd("C:/Users/arthu/OneDrive/Área de Trabalho/R_projects/trabalhos de VED/Databases")
getwd()
data = readxl::read_xlsx("preço_hotel.xlsx")
df = data.frame(cidade = data$Cidade, duas_estrelas = data$`Duas-Estrelas`,
tres_estrelas = data$`Três-Estrelas`, quatro_estrelas = data$`Quatro-Estrelas`)
Two-star hotels
o1 = select(df, cidade, duas_estrelas)
o2 = arrange(o1, duas_estrelas)
| City | Price (£) |
|---|---|
| Shanghai | 22 |
| Bangkok | 23 |
| Nova Delhi | 29 |
| Benidorm | 32 |
| Budapeste | 33 |
| Las Vegas | 33 |
| Cancun | 38 |
| Orlando | 38 |
| Taline | 40 |
| Hong Kong | 43 |
| Mumbai | 43 |
| Viena | 43 |
| Dublin | 44 |
| Pequim | 45 |
| Istanbul | 47 |
| Lisboa | 48 |
| Dubai | 50 |
| Tóquio | 52 |
| Berlim | 54 |
| Sydney | 55 |
| Madri | 56 |
| Singapura | 56 |
| Oslo | 60 |
| Helsinki | 62 |
| Los Angeles | 63 |
| Barcelona | 65 |
| Nice | 66 |
| Londres | 67 |
| Munique | 67 |
| Edinburgo | 68 |
| Miami | 68 |
| Estocolmo | 69 |
| Jerusalém | 70 |
| Frankfurt | 71 |
| Montreal | 71 |
| Toronto | 71 |
| Bruxelas | 73 |
| Chicago | 73 |
| Copenhagen | 73 |
| São Francisco | 73 |
| Vancouver | 73 |
| Roma | 75 |
| Seattle | 75 |
| Amsterdam | 77 |
| Cidade do Cabo | 77 |
| Paris | 78 |
| Washington | 80 |
| Veneza | 82 |
| Zurique | 84 |
| Boston | 90 |
| Geneva | 106 |
| Nova York | 110 |
Three-star hotels
o3 = select(df, cidade, tres_estrelas)
o4 = arrange(o3, tres_estrelas)
Four-star hotels
o5 = select(df, cidade, quatro_estrelas)
o6 = arrange(o5, quatro_estrelas)
R base Boxplot
boxplot(df$duas_estrelas, df$tres_estrelas, df$quatro_estrelas,
main = "Boxplots", names = c("2-stars", "3-stars", "4-stars"),
col = c("yellow","orange","red"), ylab = "Price (£)", xlab = "Hotels",
whisklty = 5, whisklwd = 1.5, staplelwd = 1.75)
ggplot2 Boxplot
#Drawing multiple plots with the ggplot2 requires data in long format --> (reshape2 package)
df_long = melt(df)
p = ggplot(df_long, aes(x = variable, y = value))+
geom_boxplot(color = "black", fill = "red", alpha = 0.4, outlier.color = "red", outlier.alpha = 1)+
stat_summary(fun.y = mean, geom = "point", shape = 20, size = 3, color = "red")+
labs(title = "Boxplots",x = "Hotels", y = "Price (£)")+
scale_x_discrete(labels = c("two stars", "three stars", "four stars"))+
theme(plot.title = element_text(hjust = 0.5), plot.tag.position = "topright")
ggplotly(p)