# install.packages("tidyverse")
library (tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.6 v dplyr 1.0.7
## v tidyr 1.1.4 v stringr 1.4.0
## v readr 2.1.1 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
airquality <- airquality
str(airquality)
## 'data.frame': 153 obs. of 6 variables:
## $ Ozone : int 41 36 12 18 NA 28 23 19 8 NA ...
## $ Solar.R: int 190 118 149 313 NA NA 299 99 19 194 ...
## $ Wind : num 7.4 8 12.6 11.5 14.3 14.9 8.6 13.8 20.1 8.6 ...
## $ Temp : int 67 72 74 62 56 66 65 59 61 69 ...
## $ Month : int 5 5 5 5 5 5 5 5 5 5 ...
## $ Day : int 1 2 3 4 5 6 7 8 9 10 ...
mean(airquality$Temp)
## [1] 77.88235
mean(airquality[,4])
## [1] 77.88235
##calculate median,division,and variance
median(airquality$Temp)
## [1] 79
sd(airquality$Wind)
## [1] 3.523001
var(airquality$Wind)
## [1] 12.41154
airquality$Month[airquality$Month == 5]<- "May"
airquality$Month[airquality$Month == 6]<- "June"
airquality$Month[airquality$Month == 7]<- "July"
airquality$Month[airquality$Month == 8]<- "August"
airquality$Month[airquality$Month == 9]<- "September"
str(airquality)
## 'data.frame': 153 obs. of 6 variables:
## $ Ozone : int 41 36 12 18 NA 28 23 19 8 NA ...
## $ Solar.R: int 190 118 149 313 NA NA 299 99 19 194 ...
## $ Wind : num 7.4 8 12.6 11.5 14.3 14.9 8.6 13.8 20.1 8.6 ...
## $ Temp : int 67 72 74 62 56 66 65 59 61 69 ...
## $ Month : chr "May" "May" "May" "May" ...
## $ Day : int 1 2 3 4 5 6 7 8 9 10 ...
summary(airquality)
## Ozone Solar.R Wind Temp
## Min. : 1.00 Min. : 7.0 Min. : 1.700 Min. :56.00
## 1st Qu.: 18.00 1st Qu.:115.8 1st Qu.: 7.400 1st Qu.:72.00
## Median : 31.50 Median :205.0 Median : 9.700 Median :79.00
## Mean : 42.13 Mean :185.9 Mean : 9.958 Mean :77.88
## 3rd Qu.: 63.25 3rd Qu.:258.8 3rd Qu.:11.500 3rd Qu.:85.00
## Max. :168.00 Max. :334.0 Max. :20.700 Max. :97.00
## NA's :37 NA's :7
## Month Day
## Length:153 Min. : 1.0
## Class :character 1st Qu.: 8.0
## Mode :character Median :16.0
## Mean :15.8
## 3rd Qu.:23.0
## Max. :31.0
##
#recorder the month so they do not default to alphabetical
airquality$Month<-factor(airquality$Month, levels=c("May", "June","July", "August", "September"))
#quplot stands for “quick-plot”(in the ggplot2 package)
p1 <- qplot(data = airquality,Temp,fill = Month,geom = "histogram", bins = 20)
p1
#ggplot is more sophisticated than qplot, but still uses ggplot2 package #recorder the legend so that it is not the default (alphabetical),but rather in order that months come #outlie the bars in white using the color=“white”command
p2 <- airquality %>%
ggplot(aes(x=Temp, fill=Month)) +
geom_histogram(position="identity", alpha=0.5, binwidth = 5, color = "white")+
scale_fill_discrete(name = "Month", labels = c("May", "June","July", "August", "September"))
p2
###plot3:create side-by-side boxplots categorized by month #fill=month command fills each boxplot with a different color in the aesthetics #scale_fill_discrite makes the legend on the side for discrete color values
p3 <- airquality %>%
ggplot(aes(Month, Temp, fill = Month)) +
ggtitle("Temperatures") +
xlab("Monthly Temperatures") +
ylab("Frequency") +
geom_boxplot() +
scale_fill_discrete(name = "Month", labels = c("May", "June","July", "August", "September"))
p3
#use the scale_fill_grey command for the grey-scale legend,and again,and,use fill=month in the aesthetics
p4 <- airquality %>%
ggplot(aes(Month, Temp, fill = Month)) +
ggtitle("Monthly Temperature Variations") +
xlab("Monthly Temperatures") +
ylab("Frequency") +
geom_boxplot()+
scale_fill_grey(name = "Month", labels = c("May", "June","July", "August", "September"))
p4
airquality$Month[airquality$Month == 5]<- "May"
airquality$Month[airquality$Month == 6]<- "June"
airquality$Month[airquality$Month == 7]<- "July"
airquality$Month[airquality$Month == 8]<- "August"
airquality$Month[airquality$Month == 9]<- "September"
str(airquality)
## 'data.frame': 153 obs. of 6 variables:
## $ Ozone : int 41 36 12 18 NA 28 23 19 8 NA ...
## $ Solar.R: int 190 118 149 313 NA NA 299 99 19 194 ...
## $ Wind : num 7.4 8 12.6 11.5 14.3 14.9 8.6 13.8 20.1 8.6 ...
## $ Temp : int 67 72 74 62 56 66 65 59 61 69 ...
## $ Month : Factor w/ 5 levels "May","June","July",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Day : int 1 2 3 4 5 6 7 8 9 10 ...
summary(airquality)
## Ozone Solar.R Wind Temp
## Min. : 1.00 Min. : 7.0 Min. : 1.700 Min. :56.00
## 1st Qu.: 18.00 1st Qu.:115.8 1st Qu.: 7.400 1st Qu.:72.00
## Median : 31.50 Median :205.0 Median : 9.700 Median :79.00
## Mean : 42.13 Mean :185.9 Mean : 9.958 Mean :77.88
## 3rd Qu.: 63.25 3rd Qu.:258.8 3rd Qu.:11.500 3rd Qu.:85.00
## Max. :168.00 Max. :334.0 Max. :20.700 Max. :97.00
## NA's :37 NA's :7
## Month Day
## May :31 Min. : 1.0
## June :30 1st Qu.: 8.0
## July :31 Median :16.0
## August :31 Mean :15.8
## September:30 3rd Qu.:23.0
## Max. :31.0
##
#recorder the month so they do not default to alphabetical
airquality$Month<-factor(airquality$Month, levels=c("May", "June","July", "August", "September"))
#quplot stands for “quick-plot”(in the ggplot2 package)
p1 <- qplot(data = airquality,Temp,fill = Month,geom = "histogram", bins = 20)
p1
#ggplot is more sophisticated than qplot, but still uses ggplot2 package #recorder the legend so that it is not the default (alphabetical),but rather in order that months come #outlie the bars in white using the color=“white”command
p5a <- airquality %>%
ggplot(aes(x=Temp, y=Ozone)) +
geom_point(position="identity", alpha=0.5, binwidth = 5, color = "white")+
scale_fill_discrete(name = "Month", labels = c("May", "June","July", "August", "September"))
## Warning: Ignoring unknown parameters: binwidth
p5a
## Warning: Removed 37 rows containing missing values (geom_point).
###plot5b:create side-by-side boxplots categorized by month #fill=month command fills each boxplot with a different color in the aesthetics #scale_fill_discrite makes the legend on the side for discrete color values
p5b <- airquality %>%
ggplot(aes(Ozone, Temp, fill = Ozone)) +
ggtitle("Temperatures") +
xlab("Monthly Temperatures") +
ylab("Frequency") +
geom_point()
scale_fill_discrete(name = "Month", labels = c("May", "June","July", "August", "September"))
## <ggproto object: Class ScaleDiscrete, Scale, gg>
## aesthetics: fill
## axis_order: function
## break_info: function
## break_positions: function
## breaks: waiver
## call: call
## clone: function
## dimension: function
## drop: TRUE
## expand: waiver
## get_breaks: function
## get_breaks_minor: function
## get_labels: function
## get_limits: function
## guide: legend
## is_discrete: function
## is_empty: function
## labels: May June July August September
## limits: NULL
## make_sec_title: function
## make_title: function
## map: function
## map_df: function
## n.breaks.cache: NULL
## na.translate: TRUE
## na.value: grey50
## name: Month
## palette: function
## palette.cache: NULL
## position: left
## range: <ggproto object: Class RangeDiscrete, Range, gg>
## range: NULL
## reset: function
## train: function
## super: <ggproto object: Class RangeDiscrete, Range, gg>
## rescale: function
## reset: function
## scale_name: hue
## train: function
## train_df: function
## transform: function
## transform_df: function
## super: <ggproto object: Class ScaleDiscrete, Scale, gg>
p5b
## Warning: Removed 37 rows containing missing values (geom_point).
#use the scale_fill_grey command for the grey-scale legend,and again,and,use fill=month in the aesthetics
p5c <- airquality %>%
ggplot(aes(Ozone, Temp, fill = Ozone)) +
ggtitle("Monthly Temperature Variations") +
xlab("Monthly Temperatures") +
ylab("Frequency") +
geom_point()
scale_fill_grey(name = "Month", labels = c("May", "June","July", "August", "September"))
## <ggproto object: Class ScaleDiscrete, Scale, gg>
## aesthetics: fill
## axis_order: function
## break_info: function
## break_positions: function
## breaks: waiver
## call: call
## clone: function
## dimension: function
## drop: TRUE
## expand: waiver
## get_breaks: function
## get_breaks_minor: function
## get_labels: function
## get_limits: function
## guide: legend
## is_discrete: function
## is_empty: function
## labels: May June July August September
## limits: NULL
## make_sec_title: function
## make_title: function
## map: function
## map_df: function
## n.breaks.cache: NULL
## na.translate: TRUE
## na.value: red
## name: Month
## palette: function
## palette.cache: NULL
## position: left
## range: <ggproto object: Class RangeDiscrete, Range, gg>
## range: NULL
## reset: function
## train: function
## super: <ggproto object: Class RangeDiscrete, Range, gg>
## rescale: function
## reset: function
## scale_name: grey
## train: function
## train_df: function
## transform: function
## transform_df: function
## super: <ggproto object: Class ScaleDiscrete, Scale, gg>
p5c
## Warning: Removed 37 rows containing missing values (geom_point).