#install.packages("readxl")
library(readxl)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(tidyr)
library(readxl)
tallestbuildings = read_excel("tallestbuildings.xlsx")
Question: #1
Please display how many buildings are there in each city represented in that dataset. An arrangement in either an ascending or a descending order of number of buildings is always helpful.
City_Counts = tallestbuildings %>% group_by(City) %>% summarize(number=length(City))
Question #2
Please plot different cities in order of the mean height of buildings in a city.
ggplot(City_Counts,aes(reorder(City,-number),number))+geom_bar(stat = "identity",fill="dark green")+coord_flip()

colnames(tallestbuildings)[colnames(tallestbuildings)=="Height (ft)"] <- "Height"
City_Mean = tallestbuildings%>%group_by(City)%>%summarize(number=mean(Height))
ggplot(City_Mean,aes(reorder(City,-number),number))+geom_bar(stat = "identity",fill="dark green")+coord_flip()

Question #3 & #4
Please redo 1 and 2 using the country information that is given. (Note that the country variable is present with the city variable. Perhaps a split of that variable is necessary.) You may want to check out the countrycode package to get the full names of different countries instead of relying on the cryptic country codes that are present in that dataset.
Download the country code package
library(countrycode)
Seperate (split) city and country into two columns
tallestbuildings2 = tallestbuildings %>% separate(City, c("City", "Country"), sep="[:punct:]")
## Warning: Expected 2 pieces. Additional pieces discarded in 100 rows [1, 2, 3, 4,
## 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
Applying Country Code Pkg to our data set
tallestbuildings2$Country=countrycode(tallestbuildings2$Country, "iso2c", "country.name", warn = TRUE, custom_dict = NULL, custom_match = NULL, origin_regex = FALSE)
Create New Set: Displays Number of Buildings per Country
Country_Counts = tallestbuildings2 %>% group_by(Country) %>% summarise(number=length(Country))
Creating Bar Graph with Country Name on Y-Axis & Num of Buildings on X-axis
ggplot(Country_Counts,aes(reorder(Country,-number),number))+geom_bar(stat = "identity",fill="dark green")+coord_flip()

Creating a Data Set that Displays Mean Height of Buildings in Each Country
Country_Mean = tallestbuildings2 %>% group_by(Country) %>% summarise(number=mean(Height))
Creating Bar Graph w/ Country Name on the Y-Axis & Mean Height of Buildings by Country on the X-Axis
ggplot(Country_Mean,aes(reorder(Country,-number),number))+geom_bar(stat = "identity",fill="dark green")+coord_flip()

5. In 4 above, you would’ve plot different countries in order of their mean height of a buildings in a city. If you have not used a bar graph there, please create a bar graph. In this bar graph, please color each bar for a country based on the number of buildings from this dataset that are present in that country.
Create data set which displays the country name with the number of buildings in each country and the mean height of the buildings in each country
Country_Mean_Count=tallestbuildings2 %>% group_by(Country) %>% summarize(Country_Counts=n(), Country_Mean=mean(Height))
Create a bar graph with country name on the vertical axis and mean height of the buildings in each country on the horizontal axis. Have each bar be colored on the basis of the building count of each country
ggplot(Country_Mean_Count,aes(reorder(Country,-Country_Mean),Country_Mean, fill = as.factor(Country_Counts))) + geom_bar(stat = "identity", position="dodge") + coord_flip() + labs(list(title= "Countries Ranked by the Mean Height of Tall Buildings", x="", y="Country Mean"))

6. What are the mean heights (in feet) of buildings that are used for different purposes. (Here, you will have different purposes in a column and the corresponding mean height in a different column.) In computing this, it is okay to double or triple count a building if it has multiple uses.
uses = tallestbuildings2 %>% group_by(Use) %>%
summarise(Mean.Height = mean(`Height`))
uses %>%
ggplot(aes(x= reorder(Use,Mean.Height),y=Mean.Height)) +
geom_bar(stat = "identity", fill = "dark green", position="dodge") + coord_flip() + labs(x = "Use") + labs(y = "Avg. Height")
