{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE, cache = TRUE, cache.lazy = FALSE )
{r, message = FALSE, warning = FALSE} library(naniar) library(broom) library(ggmap) library(knitr) library(lubridate) library(rwalkr) library(sugrrants) library(timeDate) library(tsibble) library(here) library(readr) library(tidyverse) library(ggResidpanel) library(gridExtra) library(kableExtra)
{r , readingdata, message = FALSE, warning = FALSE, eval = FALSE} tree_data0 <- Trees_with_species_and_dimensions_Urban_Forest_
# Question 1: Rename the variables *Date Planted* and *Year Planted* to *Dateplanted* and *Yearplanted* using the *rename()* function. Make sure *Dateplanted* is defined as a **date variable**. Then extract from the variable *Dateplanted* the year and store it in a new variable called *Year*. (6pts)
```{r, eval = FALSE}
tree_data <- tree_data0 %>%
rename(c("Dateplanted" = "Date Planted","Yearplanted" = "Year Planted"))%>%
mutate(Dateplanted = dmy(Dateplanted))%>%
mutate(Year = year(Dateplanted))
{r, eval = FALSE} vis_miss(tree_data, warn_large_data = FALSE) #I can not see anything
{r, eval = FALSE} miss_var_summary(tree_data) %>% arrange(-pct_miss)
{r, eval = FALSE} tree_data_clean0 <- tree_data %>% filter(!is.na(Dateplanted)) The number of rows in the cleaned data set are --- and the number of columns are ----
{r, eval = FALSE} ifelse(tree_data_clean0$Year==tree_data_clean0$Yearplanted,tree_data_clean0$Year, tree_data_clean0$Yearplanted)
```{r , message = FALSE, warning = FALSE, eval = FALSE} # We have created the map below for you melb_map <- read_rds(“Data/melb-map.rds”)
ggmap(melb_map) + geom_point(tree_data_clean0, mapping = aes(Longitude, Latitude), colour = “#006400”, alpha = 0.5, size = 0.2)
# Question 7: Repeat the map and draw trees in the *Genus* group of Eucalyptus, Macadamia, Prunus, Acacia, Quercus. Use the "Dark2" color palette and display the legend at the bottom of the plot. (6pts)
```{r, eval = FALSE}
selected_group <- tree_data %>%
group_by(Genus)%>%
filter(Genus%in%
c("Eucalyptus","Macadamia","Prunus","Acacia","Quercus"))
```{r , message = FALSE, warning = FALSE, eval = FALSE} ggmap(melb_map) + geom_point(selected_group, mapping = aes(Longitude, Latitude),
alpha = 0.5,
size = 0.2)
scale_color_brewer(palette = “Dark2”)
# Question 8: How many trees are in Melbourne according to this data set? (1pt)
The number of trees described in the data set is `----`
# Question 9: Filter the data *tree_data_clean* so that you display only the variables "Year", "Located in", "Common Name", arrange the data set by *Year* in descending order and display the first 4 lines. Call this new data set *tree_data_clean_filter*. Then answer the following question using inline R code: When (Year), where and the common name of the tree that was first tree planted in Melbourne according to this data set? (8pts)
```{r}
tree_data_clean_filter <- tree_data_clean0 %>%
select("Year", "Located in", "Common Name") %>%
arrange(Year)
head(tree_data_clean_filter,4)
The first tree was planted in 1997 at a park and the tree name is spotted gum
to call variables that have a space in the name. (4pts){r, eval = FALSE} tree_data_clean0 %>% select(`Located in`) %>% group_by(`Located in`) %>% summarise(n()) %>% kable(caption = "Trees planted") %>% kable_styling(bootstrap_options = c("striped", "hover")) kable(head(mtcars[, 1:4]), "simple")
tree_data_clean0 %>%
filter(Family != "NA") %>%
select(Family) %>%
group_by(Family) %>%
summarise(n())
{r,eval = FALSE} tree_data_clean0 <- tree_data_clean0 %>% seq(from = 1, to = 1, by = ((to - from)/(length.out - 1)), length.out = NULL, along.with = NULL)
``{r, eval = FALSE} library(kableExtra) tree_data_clean0 %>% filter(Year == c(1899, 1900, 1995, 2000, 2019, 2020),Common Name== c("Ironbark", "Olive", "Plum", "Oak", "Elm")) %>% select(Year,Common Name) %>% group_by(Year,Common Name`) %>% summarise(n()) %>% kable() %>% kable_styling(bootstrap_options = c(“striped”, “hover”))
# Question 14: Select the trees with diameters (Diameter Breast Height) greater than 80 and smaller 100 cm and comment where the trees are located (Streets or Parks, max 25 words) (6pts)
```{r,eval=FALSE}
large_trees_data <- tree_data_clean0 %>%
filter(c(`Diameter Breast Height`> 80,
`Diameter Breast Height`<100)) %>%
group_by(`Located in`) %>%
summarise(n())
{r, eval = FALSE} large_trees_data_parks <- tree_data_clean0 %>% filter(`Diameter Breast Height` > 80, `Diameter Breast Height` <100)
Large trees seem to be concentrated on in certain streets.
{r, message = FALSE, warning = FALSE, eval = FALSE} ggmap(melb_map) + geom_point(large_trees_data_parks, mapping = aes(x = Longitude, y = Latitude, colour = `Located in`), alpha = 0.9, size = 0.2)
#Question 16: Do you see any pattern in the locations of the trees in the map that you produced in Question 15? Comment on the results (max 30 words) (2pts)
no,I can not see anything, There must be mistake
{r, eval = FALSE} tree_data_clean0 %>% filter(Year >= 1999)%>% group_by(family) %>% count(family,year)%>% ggplot(aes(x = family, y = n, group = family)) geom_boxplot()
{r, eval = FALSE} tree_data_clean_2006 <- tree_data_clean0 %>% filter(Year >= 2006)%>% group_by(Year) %>% summarise(n = n())
{r, eval = FALSE} IDK
{r, eval = FALSE} ggplot(tree_data_clean_fliter, aes(Year, n)) + geom_line() + ylab("number of trees") + ggtitle("number of trees which is planted per year in 2006") +
{r, eval = FALSE} -#May be it is bad model, but ,sorry i can not run it
```{r, eval = FALSE}
ped_loc <- pull_sensor() %>% filter(status == “A”)
selected_sensors <- c(“Victoria Point”, “Melbourne Central”, “Flinders Street Station Underpass”, “Lonsdale St-Spring St (West)”)
# identify those sensors that are selected <- ped_loc %>% filter(——) nonselected <- ped_loc %>% filter(——-)
# Question 23: Add the sensors locations (both selected and nonselected) into the map below. The *selected sensors* should be display with a diamond shape (Hint: shape = 18) (6pts)
```{r, eval = FALSE}
melb_map <- read_rds(here::here("assignment-2/data-raw/melb-map.rds"))
{r, eval = FALSE} ggmap(melb_map) + geom_point(nonselected, mapping = aes(longitude, latitude), colour = "#2b8cbe", alpha = 0.6, size = 2) + geom_point(data = selected, mapping = aes(x = longitude, y = latitude, colour = sensor), size = 3, shape = 18) + labs(x = "longitude", y = "latitude") + scale_color_brewer(palette = "Spectral", name = "sensor" ) + guides(col = guide_legend(nrow = 2)
rwalkr package (Hint: You should use the melb_walk() function). Please set the R code chunk option to cache = TRUE so it does not run every time you knit (it takes several minutes to extract the data for the first time) (2pts){r, eval = FALSE} library(rwalkr) walk_data <- melb_walk(from = as.Date("2019-01-01"), to = as.Date("2020-08-01"))
```{r, eval = FALSE} walk_data_subset <- walk_data %>% filter(Sensor %in% selected_sensors) %>% mutate(Day = day(Date), Month = month(Date), Year = year(Date))
# Question 26: Create a **function** that takes pedestrian counts data as input and produces boxplots for each `year` while faceting over `Sensor` (use facet_wrap). Use this function to create boxplots for year 2019 and 2020 for all 4 sensors selected in *walk_data_subset*. What do you observe in this figure?. Does this figure make comparison between sensors across years easy, explain your answer? (max 30 words). (8pts)
```{r, eval = FALSE}
plot_function <- function(data)
mutate(Year = as.character(Year)) %>%
ggplot(aes(x = Year, y = Count)) +
coord_flip() +
geom_boxplot() +
facet_wrap(~Sensor)
}
```{r, eval = FALSE}
# Question 28: What can you conclude from the plot that you created in Question 27 (max 30 words)? (2pts)
sorry I dont know how to do it, I think that is too hard for me
# Question 29: Read in the weather data and create flagging variables to the data set (10 pts). Below you will find more information about the weather data:
One question we want to answer is: "Does the weather make a difference to the number of people walking out?"
Time of day and day of week are the predominant driving force of the number of pedestrian, depicted in the previous data plots. Apart from these temporal factors, the weather condition could possibly affect how many people are walking in the city. In particular, people are likely to stay indoors, when the day is too hot or too cold, or raining hard.
Daily meteorological data as a separate source, available on [National Climatic Data Center](https://www1.ncdc.noaa.gov/pub/data/ghcn/daily/), is used and joined to the main pedestrian data table using common dates.
Binary variables are created to serve as the tipping points
We have pulled information on weather stations for the Melbourne area - can you combine it together into one dataset?
- high_prcp if `prcp` > 5 (if yes, "rain", if no, "none")
- high_temp if `tmax` > 25 (if yes, "hot", if no, "not")
- low_temp if `tmin` < 6 (if yes, "cold", if no, "not")
```{r, eval = FALSE}
# Now create some flag variables
melb_weather_2020 <- read_csv("assignment-2/data/melb_ncdc_2020.csv") %>%
melb_weather_2020 <- read_csv("Data/melb_ncdc_2020.csv") %>%
mutate(
high_prcp = if_else(condition = prcp >= 5,
true = "rain",
false = "none"),
high_temp = if_else(condition = tmax >= 25,
true = "hot",
false = "not"),
low_temp = if_else(condition = tmin <= 6,
true = "cold",
false = "not"))
# Question 30: Select the pedestrian count data only for the dates recorded in the weather data and combine those two datasets so that the resulting combined data set contains the variables from both datasets (Hint: Look at the materials for week 4). Display the first 3 rows of the new data set and the report the dimension of the new data set. (6pts)
```{r, eval = FALSE}
walk_data_subset_2020 <- walk_data_subset %>%
filter(Date %in% melb_weather_2020$date)
combined_data <- melb_weather_2020 %>%
left_join(walk_data_subset_2020,
by = c("date" = "Date"))
head()
-----
{r, eval = FALSE} combined_data %>% group_by(Month, Sensor) %>% summarise(mean_tmax = mean(tmax, na.rm = TRUE), mean_tmin = mean(tmin, na.rm = TRUE)) %>% head(6)
high_prcp, high_temp, and low_temp). Make sure the legend on the x-axis is fully visible. Answer the following question: Does the weather make a difference to the number of people walking out? (Max 40 words) (9pts){r, eval = FALSE} ggplot(combined_data, --------
{r, eval = FALSE} ggplot(combined_data, ------- {r, eval = FALSE} ggplot(combined_data, ------