library(socviz)
library(tidyverse)
## -- Attaching packages -------------------------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.2.1     v purrr   0.3.3
## v tibble  2.1.3     v dplyr   0.8.3
## v tidyr   1.0.0     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.4.0
## -- Conflicts ----------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(dplyr)
library(tidyr)
library(ggplot2)
library(countrycode)
library(readxl)
tallestbuildings<-read_excel("tallestbuildings.xlsx")
tallestbuildings
## # A tibble: 100 x 9
##     Rank `Building Name` City  `Height (m)` `Height (ft)` Floors Completed
##    <dbl> <chr>           <chr>        <dbl>         <dbl>  <dbl>     <dbl>
##  1     1 Burj Khalifa    Duba~         828           2717    163      2010
##  2     2 Shanghai Tower  Shan~         632           2073    128      2015
##  3     3 Makkah Royal C~ Mecc~         601           1972    120      2012
##  4     4 One World Trad~ New ~         541.          1776     94      2014
##  5     5 TAIPEI 101      Taip~         508           1667    101      2004
##  6     6 Shanghai World~ Shan~         492           1614    101      2008
##  7     7 International ~ Hong~         484           1588    108      2010
##  8     8 Petronas Twin ~ Kual~         452.          1483     88      1998
##  9     8 Petronas Twin ~ Kual~         452.          1483     88      1998
## 10    10 Zifeng Tower    Nanj~         450           1476     66      2010
## # ... with 90 more rows, and 2 more variables: Material <chr>, Use <chr>
tallestbuildings%>% group_by(City) %>% summarise(Building_Number=n())%>%arrange(desc(Building_Number))
## # A tibble: 38 x 2
##    City               Building_Number
##    <chr>                        <int>
##  1 Dubai (AE)                      18
##  2 New York City (US)               7
##  3 Chicago (US)                     6
##  4 Guangzhou (CN)                   6
##  5 Hong Kong (CN)                   6
##  6 Abu Dhabi (AE)                   4
##  7 Moscow (RU)                      4
##  8 Shanghai (CN)                    4
##  9 Shenzhen (CN)                    4
## 10 Kuala Lumpur (MY)                3
## # ... with 28 more rows

1. Please display how many buildings are there in each city represented in that dataset. An arrangement in either an ascending or a descending order of number of buildings is always helpful.

#tallestbuildings %>% group_by(City) %>% summarise(n())
tallestbuildings %>%
    group_by(City) %>% summarise(Building_Number = n()) %>%
    arrange(desc(Building_Number)) %>%
    ggplot(aes(y=Building_Number, x=reorder(City, Building_Number))) +
    geom_histogram(stat = "identity", fill = 'blue', color = 'green') + coord_flip()
## Warning: Ignoring unknown parameters: binwidth, bins, pad

2. Please plot different cities in order of the mean height of buildings in a city.

tallestbuildings %>%
    group_by(City) %>% summarise(Mean.height = mean(`Height (ft)`)) %>%
    arrange(desc(Mean.height)) %>%
    ggplot(aes(y=Mean.height, x=reorder(City, Mean.height))) +
    geom_bar(stat = "identity", fill='green', color='blue') + coord_flip()

3. Convert country codes into country names and plot how many buildings are in each country based on problems 1 and 2.

Country=separate(data = tallestbuildings,col=City, into =c("City","Country"), sep ="\\(")

new_bldg=separate(data = Country,col=Country, into =c("Country",NA), sep ="\\)")
library(countrycode)
new_bldg$CountryName=countrycode(new_bldg$Country,"iso2c", "country.name")

new_bldg=new_bldg[,-5]
country_count=new_bldg%>%group_by(CountryName)%>%summarize(number=length(CountryName))

ggplot(country_count,aes(reorder(CountryName,number),number))+geom_bar(stat="identity", fill='blue', color ='green')+coord_flip()+xlab("Country Name")

4. Plot different cities based on mean height of buildings by country.

mean_country=new_bldg%>%group_by(CountryName)%>%summarize(Mean=mean(`Height (ft)`))
ggplot(mean_country,aes(reorder(CountryName,Mean),Mean))+coord_flip()+geom_bar(stat="identity", fill='green',color='blue')+xlab("Country")

5. In this bar graph, please color each bar for a country based on the number of buildings from this dataset that are present in that country.

ggplot(country_count,aes(reorder(CountryName,number),number,fill=CountryName))+geom_bar(stat="identity",position="dodge")+coord_flip()+xlab("CountryName")

6. What are the mean heights (in feet) of buildings that are used for different purposes. (Here, you will have different purposes in a column and the corresponding mean height in a different column.) In computing this, it is okay to double or triple count a building if it has multiple uses.

building_use =tallestbuildings%>%group_by(Use)%>%summarize(Mean=mean(`Height (ft)`))
ggplot(building_use,aes(reorder(Use,Mean),Mean))+geom_bar(stat="identity", fill= 'red', position = "dodge")+coord_flip()+ylab("Mean Height (ft)")+xlab("Use")