R project

Arthur Martins Ferreira de Sousa

Visualizing data and Boxplots

Importing libraries

library(dplyr)
library(tidyverse)
library(reshape2)
library(plotly)
library(kableExtra)
library(knitr)

Reading data-set

setwd("C:/Users/arthu/OneDrive/Área de Trabalho/R_projects/trabalhos de VED/Databases")
getwd()
data = readxl::read_xlsx("preço_hotel.xlsx")
df = data.frame(cidade = data$Cidade, duas_estrelas = data$`Duas-Estrelas`,
                tres_estrelas = data$`Três-Estrelas`, quatro_estrelas = data$`Quatro-Estrelas`)

Selecting and arranging specif columns in order from data-set

Two-star hotels

o1 = select(df, cidade, duas_estrelas)
o2 = arrange(o1, duas_estrelas)
Two-star Hotels
City Price (£)
Shanghai 22
Bangkok 23
Nova Delhi 29
Benidorm 32
Budapeste 33
Las Vegas 33
Cancun 38
Orlando 38
Taline 40
Hong Kong 43
Mumbai 43
Viena 43
Dublin 44
Pequim 45
Istanbul 47
Lisboa 48
Dubai 50
Tóquio 52
Berlim 54
Sydney 55
Madri 56
Singapura 56
Oslo 60
Helsinki 62
Los Angeles 63
Barcelona 65
Nice 66
Londres 67
Munique 67
Edinburgo 68
Miami 68
Estocolmo 69
Jerusalém 70
Frankfurt 71
Montreal 71
Toronto 71
Bruxelas 73
Chicago 73
Copenhagen 73
São Francisco 73
Vancouver 73
Roma 75
Seattle 75
Amsterdam 77
Cidade do Cabo 77
Paris 78
Washington 80
Veneza 82
Zurique 84
Boston 90
Geneva 106
Nova York 110

Three-star hotels

o3 = select(df, cidade, tres_estrelas)
o4 = arrange(o3, tres_estrelas)

Four-star hotels

o5 = select(df, cidade, quatro_estrelas)
o6 = arrange(o5, quatro_estrelas)

R base Boxplot

boxplot(df$duas_estrelas, df$tres_estrelas, df$quatro_estrelas,
        main = "Boxplots", names = c("2-stars", "3-stars", "4-stars"), 
        col = c("yellow","orange","red"), ylab = "Price (£)", xlab = "Hotels",
        whisklty = 5, whisklwd = 1.5, staplelwd = 1.75)

ggplot2 Boxplot

#Drawing multiple plots with the ggplot2 requires data in long format --> (reshape2 package)
df_long = melt(df)
p = ggplot(df_long, aes(x = variable, y = value))+
  geom_boxplot(color = "black", fill = "red", alpha = 0.4, outlier.color = "red", outlier.alpha = 1)+
  stat_summary(fun.y = mean, geom = "point", shape = 20, size = 3, color = "red")+
  labs(title = "Boxplots",x = "Hotels", y = "Price (£)")+
  scale_x_discrete(labels = c("two stars", "three stars", "four stars"))+
  theme(plot.title = element_text(hjust = 0.5), plot.tag.position = "topright")
ggplotly(p)