library(socviz)
library(tidyverse)
## -- Attaching packages -------------------------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.2.1 v purrr 0.3.3
## v tibble 2.1.3 v dplyr 0.8.3
## v tidyr 1.0.0 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.4.0
## -- Conflicts ----------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(dplyr)
library(tidyr)
library(ggplot2)
library(countrycode)
library(readxl)
tallestbuildings<-read_excel("tallestbuildings.xlsx")
tallestbuildings
## # A tibble: 100 x 9
## Rank `Building Name` City `Height (m)` `Height (ft)` Floors Completed
## <dbl> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 1 Burj Khalifa Duba~ 828 2717 163 2010
## 2 2 Shanghai Tower Shan~ 632 2073 128 2015
## 3 3 Makkah Royal C~ Mecc~ 601 1972 120 2012
## 4 4 One World Trad~ New ~ 541. 1776 94 2014
## 5 5 TAIPEI 101 Taip~ 508 1667 101 2004
## 6 6 Shanghai World~ Shan~ 492 1614 101 2008
## 7 7 International ~ Hong~ 484 1588 108 2010
## 8 8 Petronas Twin ~ Kual~ 452. 1483 88 1998
## 9 8 Petronas Twin ~ Kual~ 452. 1483 88 1998
## 10 10 Zifeng Tower Nanj~ 450 1476 66 2010
## # ... with 90 more rows, and 2 more variables: Material <chr>, Use <chr>
tallestbuildings%>% group_by(City) %>% summarise(Building_Number=n())%>%arrange(desc(Building_Number))
## # A tibble: 38 x 2
## City Building_Number
## <chr> <int>
## 1 Dubai (AE) 18
## 2 New York City (US) 7
## 3 Chicago (US) 6
## 4 Guangzhou (CN) 6
## 5 Hong Kong (CN) 6
## 6 Abu Dhabi (AE) 4
## 7 Moscow (RU) 4
## 8 Shanghai (CN) 4
## 9 Shenzhen (CN) 4
## 10 Kuala Lumpur (MY) 3
## # ... with 28 more rows
1. Please display how many buildings are there in each city represented in that dataset. An arrangement in either an ascending or a descending order of number of buildings is always helpful.
#tallestbuildings %>% group_by(City) %>% summarise(n())
tallestbuildings %>%
group_by(City) %>% summarise(Building_Number = n()) %>%
arrange(desc(Building_Number)) %>%
ggplot(aes(y=Building_Number, x=reorder(City, Building_Number))) +
geom_histogram(stat = "identity", fill = 'blue', color = 'green') + coord_flip()
## Warning: Ignoring unknown parameters: binwidth, bins, pad

2. Please plot different cities in order of the mean height of buildings in a city.
tallestbuildings %>%
group_by(City) %>% summarise(Mean.height = mean(`Height (ft)`)) %>%
arrange(desc(Mean.height)) %>%
ggplot(aes(y=Mean.height, x=reorder(City, Mean.height))) +
geom_bar(stat = "identity", fill='green', color='blue') + coord_flip()

3. Convert country codes into country names and plot how many buildings are in each country based on problems 1 and 2.
Country=separate(data = tallestbuildings,col=City, into =c("City","Country"), sep ="\\(")
new_bldg=separate(data = Country,col=Country, into =c("Country",NA), sep ="\\)")
library(countrycode)
new_bldg$CountryName=countrycode(new_bldg$Country,"iso2c", "country.name")
new_bldg=new_bldg[,-5]
country_count=new_bldg%>%group_by(CountryName)%>%summarize(number=length(CountryName))
ggplot(country_count,aes(reorder(CountryName,number),number))+geom_bar(stat="identity", fill='blue', color ='green')+coord_flip()+xlab("Country Name")

4. Plot different cities based on mean height of buildings by country.
mean_country=new_bldg%>%group_by(CountryName)%>%summarize(Mean=mean(`Height (ft)`))
ggplot(mean_country,aes(reorder(CountryName,Mean),Mean))+coord_flip()+geom_bar(stat="identity", fill='green',color='blue')+xlab("Country")

5. In this bar graph, please color each bar for a country based on the number of buildings from this dataset that are present in that country.
ggplot(country_count,aes(reorder(CountryName,number),number,fill=CountryName))+geom_bar(stat="identity",position="dodge")+coord_flip()+xlab("CountryName")

6. What are the mean heights (in feet) of buildings that are used for different purposes. (Here, you will have different purposes in a column and the corresponding mean height in a different column.) In computing this, it is okay to double or triple count a building if it has multiple uses.
building_use =tallestbuildings%>%group_by(Use)%>%summarize(Mean=mean(`Height (ft)`))
ggplot(building_use,aes(reorder(Use,Mean),Mean))+geom_bar(stat="identity", fill= 'red', position = "dodge")+coord_flip()+ylab("Mean Height (ft)")+xlab("Use")
