# install.packages("tidyverse")
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.6     v dplyr   1.0.7
## v tidyr   1.1.4     v stringr 1.4.0
## v readr   2.1.1     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

Warning: package ‘tidyverse’ was built under R version 4.1.2

– Attaching packages ————————————— tidyverse 1.3.1 –

v ggplot2 3.3.5 v purrr 0.3.4

v tibble 3.1.6 v dplyr 1.0.7

v tidyr 1.1.4 v stringr 1.4.0

v readr 2.1.1 v forcats 0.5.1

Warning: package ‘ggplot2’ was built under R version 4.1.2

Warning: package ‘tibble’ was built under R version 4.1.2

Warning: package ‘tidyr’ was built under R version 4.1.2

Warning: package ‘readr’ was built under R version 4.1.2

Warning: package ‘purrr’ was built under R version 4.1.2

Warning: package ‘dplyr’ was built under R version 4.1.2

Warning: package ‘stringr’ was built under R version 4.1.2

Warning: package ‘forcats’ was built under R version 4.1.2

– Conflicts —————————————— tidyverse_conflicts() –

x dplyr::filter() masks stats::filter()

x dplyr::lag() masks stats::lag()

airquality <- airquality
str(airquality)
## 'data.frame':    153 obs. of  6 variables:
##  $ Ozone  : int  41 36 12 18 NA 28 23 19 8 NA ...
##  $ Solar.R: int  190 118 149 313 NA NA 299 99 19 194 ...
##  $ Wind   : num  7.4 8 12.6 11.5 14.3 14.9 8.6 13.8 20.1 8.6 ...
##  $ Temp   : int  67 72 74 62 56 66 65 59 61 69 ...
##  $ Month  : int  5 5 5 5 5 5 5 5 5 5 ...
##  $ Day    : int  1 2 3 4 5 6 7 8 9 10 ...

‘data.frame’: 153 obs. of 6 variables:

$ Ozone : int 41 36 12 18 NA 28 23 19 8 NA …

$ Solar.R: int 190 118 149 313 NA NA 299 99 19 194 …

$ Wind : num 7.4 8 12.6 11.5 14.3 14.9 8.6 13.8 20.1 8.6 …

$ Temp : int 67 72 74 62 56 66 65 59 61 69 …

$ Month : int 5 5 5 5 5 5 5 5 5 5 …

$ Day : int 1 2 3 4 5 6 7 8 9 10 …

mean(airquality$Temp)
## [1] 77.88235
mean(airquality[,4])
## [1] 77.88235
median(airquality$Temp)
## [1] 79
sd(airquality$Wind)
## [1] 3.523001
var(airquality$Wind)
## [1] 12.41154
airquality$Month[airquality$Month == 5]<- "May"
airquality$Month[airquality$Month == 6]<- "June"
airquality$Month[airquality$Month == 7]<- "July"
airquality$Month[airquality$Month == 8]<- "August"
airquality$Month[airquality$Month == 9]<- "September"
str(airquality)
## 'data.frame':    153 obs. of  6 variables:
##  $ Ozone  : int  41 36 12 18 NA 28 23 19 8 NA ...
##  $ Solar.R: int  190 118 149 313 NA NA 299 99 19 194 ...
##  $ Wind   : num  7.4 8 12.6 11.5 14.3 14.9 8.6 13.8 20.1 8.6 ...
##  $ Temp   : int  67 72 74 62 56 66 65 59 61 69 ...
##  $ Month  : chr  "May" "May" "May" "May" ...
##  $ Day    : int  1 2 3 4 5 6 7 8 9 10 ...

‘data.frame’: 153 obs. of 6 variables:

$ Ozone : int 41 36 12 18 NA 28 23 19 8 NA …

$ Solar.R: int 190 118 149 313 NA NA 299 99 19 194 …

$ Wind : num 7.4 8 12.6 11.5 14.3 14.9 8.6 13.8 20.1 8.6 …

$ Temp : int 67 72 74 62 56 66 65 59 61 69 …

$ Month : chr “May” “May” “May” “May” …

$ Day : int 1 2 3 4 5 6 7 8 9 10 …

summary(airquality)
##      Ozone           Solar.R           Wind             Temp      
##  Min.   :  1.00   Min.   :  7.0   Min.   : 1.700   Min.   :56.00  
##  1st Qu.: 18.00   1st Qu.:115.8   1st Qu.: 7.400   1st Qu.:72.00  
##  Median : 31.50   Median :205.0   Median : 9.700   Median :79.00  
##  Mean   : 42.13   Mean   :185.9   Mean   : 9.958   Mean   :77.88  
##  3rd Qu.: 63.25   3rd Qu.:258.8   3rd Qu.:11.500   3rd Qu.:85.00  
##  Max.   :168.00   Max.   :334.0   Max.   :20.700   Max.   :97.00  
##  NA's   :37       NA's   :7                                       
##     Month                Day      
##  Length:153         Min.   : 1.0  
##  Class :character   1st Qu.: 8.0  
##  Mode  :character   Median :16.0  
##                     Mean   :15.8  
##                     3rd Qu.:23.0  
##                     Max.   :31.0  
## 

Ozone Solar.R Wind Temp

Min. : 1.00 Min. : 7.0 Min. : 1.700 Min. :56.00

1st Qu.: 18.00 1st Qu.:115.8 1st Qu.: 7.400 1st Qu.:72.00

Median : 31.50 Median :205.0 Median : 9.700 Median :79.00

Mean : 42.13 Mean :185.9 Mean : 9.958 Mean :77.88

3rd Qu.: 63.25 3rd Qu.:258.8 3rd Qu.:11.500 3rd Qu.:85.00

Max. :168.00 Max. :334.0 Max. :20.700 Max. :97.00

NA’s :37 NA’s :7

Month Day

Length:153 Min. : 1.0

Class :character 1st Qu.: 8.0

Mode :character Median :16.0

Mean :15.8

3rd Qu.:23.0

Max. :31.0

airquality$Month<-factor(airquality$Month, levels=c("May", "June","July", "August", "September"))
p1 <- qplot(data = airquality,Temp,fill = Month,geom = "histogram", bins = 20)
p1

p2 <- airquality %>%
  ggplot(aes(x=Temp, fill=Month)) +
  geom_histogram(position="identity", alpha=0.5, binwidth = 5, color = "white")+
  scale_fill_discrete(name = "Month", labels = c("May", "June","July", "August", "September"))
p2

p3 <- airquality %>%
  ggplot(aes(Month, Temp, fill = Month)) + 
  ggtitle("Temperatures") +
  xlab("Monthly Temperatures") +
  ylab("Frequency") +
  geom_boxplot() +
  scale_fill_discrete(name = "Month", labels = c("May", "June","July", "August", "September"))
p3 

p4 <- airquality %>%
  ggplot(aes(Month, Temp, fill = Month)) + 
  ggtitle("Monthly Temperature Variations") +
  xlab("Monthly Temperatures") +
  ylab("Frequency") +
  geom_boxplot()+
  scale_fill_grey(name = "Month", labels = c("May", "June","July", "August", "September"))
p4

p5 <- airquality %>%
  ggplot(aes(Month, Temp, color = Month)) +
  ggtitle("Monthly Temperature Variations") +
  xlab("Monthly Temperatures") +
  ylab("Frequency") +
  scale_fill_discrete(name = "Month", labels = c("May", "June","July", "August", "September")) +
  geom_point(aes(size = Ozone)) 
p5
## Warning: Removed 37 rows containing missing values (geom_point).

The dataset CO2 is piped into ggplot. I made the first argument for aesthetic. My independent variable is monthly temperature and my dependent variable is frequency.Color is in relation to month, which means the color of individual plots will be a function of month group that was observed. “SCale_fill_discrete” places the months in order and the last argument with geom_point has data points for the variable “ozone” that change in size.