library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.5.3
## -- Attaching packages ------------------------------------------------------------------------ tidyverse 1.2.1 --
## v ggplot2 3.1.1 v purrr 0.3.1
## v tibble 2.0.1 v dplyr 0.8.0.1
## v tidyr 0.8.3 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.4.0
## Warning: package 'ggplot2' was built under R version 3.5.3
## Warning: package 'readr' was built under R version 3.5.3
## Warning: package 'forcats' was built under R version 3.5.3
## -- Conflicts --------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(dplyr)
library(ggplot2)
avocado_price <- read.csv("https://raw.githubusercontent.com/Zchen116/tidyverse/master/avocado.csv")
head(avocado_price)
summary(avocado_price)
## X Date AveragePrice Total.Volume
## Min. : 0.00 2015-01-04: 108 Min. :0.440 Min. : 85
## 1st Qu.:10.00 2015-01-11: 108 1st Qu.:1.100 1st Qu.: 10839
## Median :24.00 2015-01-18: 108 Median :1.370 Median : 107377
## Mean :24.23 2015-01-25: 108 Mean :1.406 Mean : 850644
## 3rd Qu.:38.00 2015-02-01: 108 3rd Qu.:1.660 3rd Qu.: 432962
## Max. :52.00 2015-02-08: 108 Max. :3.250 Max. :62505647
## (Other) :17601
## X4046 X4225 X4770
## Min. : 0 Min. : 0 Min. : 0
## 1st Qu.: 854 1st Qu.: 3009 1st Qu.: 0
## Median : 8645 Median : 29061 Median : 185
## Mean : 293008 Mean : 295155 Mean : 22840
## 3rd Qu.: 111020 3rd Qu.: 150207 3rd Qu.: 6243
## Max. :22743616 Max. :20470573 Max. :2546439
##
## Total.Bags Small.Bags Large.Bags
## Min. : 0 Min. : 0 Min. : 0
## 1st Qu.: 5089 1st Qu.: 2849 1st Qu.: 127
## Median : 39744 Median : 26363 Median : 2648
## Mean : 239639 Mean : 182195 Mean : 54338
## 3rd Qu.: 110783 3rd Qu.: 83338 3rd Qu.: 22029
## Max. :19373134 Max. :13384587 Max. :5719097
##
## XLarge.Bags type year
## Min. : 0.0 conventional:9126 Min. :2015
## 1st Qu.: 0.0 organic :9123 1st Qu.:2015
## Median : 0.0 Median :2016
## Mean : 3106.4 Mean :2016
## 3rd Qu.: 132.5 3rd Qu.:2017
## Max. :551693.7 Max. :2018
##
## region
## Albany : 338
## Atlanta : 338
## BaltimoreWashington: 338
## Boise : 338
## Boston : 338
## BuffaloRochester : 338
## (Other) :16221
head(filter(avocado_price,region== "Boston"))
head(avocado_price%>%arrange(desc(AveragePrice)))
head(avocado_price%>%arrange(AveragePrice))
head(avocado_price%>%select(region, year,type,AveragePrice))
head(rename(avocado_price, city = region))
filter(avocado_price,region == "Boston")%>%summarise(AveragePrice_min=min(AveragePrice), AveragePrice_mean=mean(AveragePrice),AveragePrice_max=max(AveragePrice))
by_region <- group_by(avocado_price, region)
sui <- summarise(by_region,
count = n(),
AveragePrice_mean = mean(AveragePrice, na.rm = TRUE))
head(sui %>% arrange(desc(AveragePrice_mean)))
my_data<-head(sui %>% arrange(desc(AveragePrice_mean)) %>% mutate(AveragePrice_mean = round(AveragePrice_mean, 2)),20)
my_data
my_data%>%ggplot(aes(x=region, y=AveragePrice_mean, fill=region))+
geom_bar(stat = "identity", position = "dodge") +
guides(fill = FALSE) +
ggtitle("Average Price mean") +
theme(axis.text.x = element_text(angle = 60, hjust = 1))
#Part 2 #Extend an Existing Example. Using one of your classmate’s examples (as created above), extend his or her example with additional annotated code. (15 points) #1, Get dataset from 538 (existing code)
weather <- read.csv("https://raw.githubusercontent.com/fivethirtyeight/data/master/us-weather-history/KCLT.csv")
summary(weather)
## date actual_mean_temp actual_min_temp actual_max_temp
## 2014-10-1 : 1 Min. :18.00 Min. : 7.00 Min. : 26.00
## 2014-10-10: 1 1st Qu.:47.00 1st Qu.:37.00 1st Qu.: 58.00
## 2014-10-11: 1 Median :63.00 Median :52.00 Median : 73.00
## 2014-10-12: 1 Mean :61.05 Mean :49.96 Mean : 71.63
## 2014-10-13: 1 3rd Qu.:75.00 3rd Qu.:65.00 3rd Qu.: 86.00
## 2014-10-14: 1 Max. :88.00 Max. :75.00 Max. :100.00
## (Other) :359
## average_min_temp average_max_temp record_min_temp record_max_temp
## Min. :29.00 Min. :50.00 Min. :-5.00 Min. : 69.00
## 1st Qu.:36.00 1st Qu.:58.00 1st Qu.:15.00 1st Qu.: 79.00
## Median :48.00 Median :72.00 Median :30.00 Median : 90.00
## Mean :48.82 Mean :70.98 Mean :31.47 Mean : 88.73
## 3rd Qu.:63.00 3rd Qu.:84.00 3rd Qu.:49.00 3rd Qu.: 98.00
## Max. :68.00 Max. :89.00 Max. :62.00 Max. :104.00
##
## record_min_temp_year record_max_temp_year actual_precipitation
## Min. :1879 Min. :1879 Min. :0.0000
## 1st Qu.:1918 1st Qu.:1931 1st Qu.:0.0000
## Median :1963 Median :1953 Median :0.0000
## Mean :1953 Mean :1954 Mean :0.1024
## 3rd Qu.:1983 3rd Qu.:1984 3rd Qu.:0.0300
## Max. :2015 Max. :2015 Max. :2.6500
##
## average_precipitation record_precipitation
## Min. :0.0900 Min. :0.850
## 1st Qu.:0.1000 1st Qu.:1.650
## Median :0.1100 Median :1.980
## Mean :0.1141 Mean :2.209
## 3rd Qu.:0.1200 3rd Qu.:2.540
## Max. :0.1500 Max. :6.880
##
library(tidyr)
weather2 <- weather %>% separate(date, c("year", "month", "day"), sep = "-")
summary(weather2)
## year month day actual_mean_temp
## Length:365 Length:365 Length:365 Min. :18.00
## Class :character Class :character Class :character 1st Qu.:47.00
## Mode :character Mode :character Mode :character Median :63.00
## Mean :61.05
## 3rd Qu.:75.00
## Max. :88.00
## actual_min_temp actual_max_temp average_min_temp average_max_temp
## Min. : 7.00 Min. : 26.00 Min. :29.00 Min. :50.00
## 1st Qu.:37.00 1st Qu.: 58.00 1st Qu.:36.00 1st Qu.:58.00
## Median :52.00 Median : 73.00 Median :48.00 Median :72.00
## Mean :49.96 Mean : 71.63 Mean :48.82 Mean :70.98
## 3rd Qu.:65.00 3rd Qu.: 86.00 3rd Qu.:63.00 3rd Qu.:84.00
## Max. :75.00 Max. :100.00 Max. :68.00 Max. :89.00
## record_min_temp record_max_temp record_min_temp_year
## Min. :-5.00 Min. : 69.00 Min. :1879
## 1st Qu.:15.00 1st Qu.: 79.00 1st Qu.:1918
## Median :30.00 Median : 90.00 Median :1963
## Mean :31.47 Mean : 88.73 Mean :1953
## 3rd Qu.:49.00 3rd Qu.: 98.00 3rd Qu.:1983
## Max. :62.00 Max. :104.00 Max. :2015
## record_max_temp_year actual_precipitation average_precipitation
## Min. :1879 Min. :0.0000 Min. :0.0900
## 1st Qu.:1931 1st Qu.:0.0000 1st Qu.:0.1000
## Median :1953 Median :0.0000 Median :0.1100
## Mean :1954 Mean :0.1024 Mean :0.1141
## 3rd Qu.:1984 3rd Qu.:0.0300 3rd Qu.:0.1200
## Max. :2015 Max. :2.6500 Max. :0.1500
## record_precipitation
## Min. :0.850
## 1st Qu.:1.650
## Median :1.980
## Mean :2.209
## 3rd Qu.:2.540
## Max. :6.880
head(select(weather2, year, actual_mean_temp, record_min_temp, record_max_temp, record_precipitation))
head(filter(weather2, year == "2014"))
weather3 <- weather2 %>%select(year, actual_mean_temp, actual_min_temp, actual_max_temp, actual_precipitation)
head(filter(weather3, year == "2014"))
filter(weather3, year == "2014")%>%summarise(min=min(actual_min_temp), mean=mean(actual_mean_temp),max=max(actual_max_temp))