R Markdown

# install.packages("tidyverse")
library (tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.6     v dplyr   1.0.7
## v tidyr   1.1.4     v stringr 1.4.0
## v readr   2.1.1     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

show the data

airquality <- airquality

Look at the structure of the data

str(airquality)
## 'data.frame':    153 obs. of  6 variables:
##  $ Ozone  : int  41 36 12 18 NA 28 23 19 8 NA ...
##  $ Solar.R: int  190 118 149 313 NA NA 299 99 19 194 ...
##  $ Wind   : num  7.4 8 12.6 11.5 14.3 14.9 8.6 13.8 20.1 8.6 ...
##  $ Temp   : int  67 72 74 62 56 66 65 59 61 69 ...
##  $ Month  : int  5 5 5 5 5 5 5 5 5 5 ...
##  $ Day    : int  1 2 3 4 5 6 7 8 9 10 ...

Calculating summary statistics

mean(airquality$Temp)
## [1] 77.88235
mean(airquality[,4])
## [1] 77.88235

##calculate median,division,and variance

median(airquality$Temp)
## [1] 79
sd(airquality$Wind)
## [1] 3.523001
var(airquality$Wind)
## [1] 12.41154

change the months from 5-9 to may throgh september

airquality$Month[airquality$Month == 5]<- "May"
airquality$Month[airquality$Month == 6]<- "June"
airquality$Month[airquality$Month == 7]<- "July"
airquality$Month[airquality$Month == 8]<- "August"
airquality$Month[airquality$Month == 9]<- "September"

look at the summary statistics of the dataset, and see how month has changed to have characters instead of numbers

str(airquality)
## 'data.frame':    153 obs. of  6 variables:
##  $ Ozone  : int  41 36 12 18 NA 28 23 19 8 NA ...
##  $ Solar.R: int  190 118 149 313 NA NA 299 99 19 194 ...
##  $ Wind   : num  7.4 8 12.6 11.5 14.3 14.9 8.6 13.8 20.1 8.6 ...
##  $ Temp   : int  67 72 74 62 56 66 65 59 61 69 ...
##  $ Month  : chr  "May" "May" "May" "May" ...
##  $ Day    : int  1 2 3 4 5 6 7 8 9 10 ...
summary(airquality)
##      Ozone           Solar.R           Wind             Temp      
##  Min.   :  1.00   Min.   :  7.0   Min.   : 1.700   Min.   :56.00  
##  1st Qu.: 18.00   1st Qu.:115.8   1st Qu.: 7.400   1st Qu.:72.00  
##  Median : 31.50   Median :205.0   Median : 9.700   Median :79.00  
##  Mean   : 42.13   Mean   :185.9   Mean   : 9.958   Mean   :77.88  
##  3rd Qu.: 63.25   3rd Qu.:258.8   3rd Qu.:11.500   3rd Qu.:85.00  
##  Max.   :168.00   Max.   :334.0   Max.   :20.700   Max.   :97.00  
##  NA's   :37       NA's   :7                                       
##     Month                Day      
##  Length:153         Min.   : 1.0  
##  Class :character   1st Qu.: 8.0  
##  Mode  :character   Median :16.0  
##                     Mean   :15.8  
##                     3rd Qu.:23.0  
##                     Max.   :31.0  
## 

month is a categorical variable with different level, called factors

#recorder the month so they do not default to alphabetical

airquality$Month<-factor(airquality$Month, levels=c("May", "June","July", "August", "September"))

plot 1:create a histogram categorized by month with qplot

#quplot stands for “quick-plot”(in the ggplot2 package)

p1 <- qplot(data = airquality,Temp,fill = Month,geom = "histogram", bins = 20)
p1

plot2: make a histogram using ggplot

#ggplot is more sophisticated than qplot, but still uses ggplot2 package #recorder the legend so that it is not the default (alphabetical),but rather in order that months come #outlie the bars in white using the color=“white”command

p2 <- airquality %>%
  ggplot(aes(x=Temp, fill=Month)) +
  geom_histogram(position="identity", alpha=0.5, binwidth = 5, color = "white")+
  scale_fill_discrete(name = "Month", labels = c("May", "June","July", "August", "September"))
p2

###plot3:create side-by-side boxplots categorized by month #fill=month command fills each boxplot with a different color in the aesthetics #scale_fill_discrite makes the legend on the side for discrete color values

p3 <- airquality %>%
  ggplot(aes(Month, Temp, fill = Month)) + 
  ggtitle("Temperatures") +
  xlab("Monthly Temperatures") +
  ylab("Frequency") +
  geom_boxplot() +
  scale_fill_discrete(name = "Month", labels = c("May", "June","July", "August", "September"))
p3 

plot4: make the same side-by-side boxplots,but in grey-scale

#use the scale_fill_grey command for the grey-scale legend,and again,and,use fill=month in the aesthetics

p4 <- airquality %>%
  ggplot(aes(Month, Temp, fill = Month)) + 
  ggtitle("Monthly Temperature Variations") +
  xlab("Monthly Temperatures") +
  ylab("Frequency") +
  geom_boxplot()+
  scale_fill_grey(name = "Month", labels = c("May", "June","July", "August", "September"))
p4

plot5:now make one plot on your own of any of the vairiables in this dataset. it may be a scatterplot, histogram, or boxplot.

change the months from 5-9 to may throgh september

airquality$Month[airquality$Month == 5]<- "May"
airquality$Month[airquality$Month == 6]<- "June"
airquality$Month[airquality$Month == 7]<- "July"
airquality$Month[airquality$Month == 8]<- "August"
airquality$Month[airquality$Month == 9]<- "September"

look at the summary statistics of the dataset, and see how month has changed to have characters instead of numbers

str(airquality)
## 'data.frame':    153 obs. of  6 variables:
##  $ Ozone  : int  41 36 12 18 NA 28 23 19 8 NA ...
##  $ Solar.R: int  190 118 149 313 NA NA 299 99 19 194 ...
##  $ Wind   : num  7.4 8 12.6 11.5 14.3 14.9 8.6 13.8 20.1 8.6 ...
##  $ Temp   : int  67 72 74 62 56 66 65 59 61 69 ...
##  $ Month  : Factor w/ 5 levels "May","June","July",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Day    : int  1 2 3 4 5 6 7 8 9 10 ...
summary(airquality)
##      Ozone           Solar.R           Wind             Temp      
##  Min.   :  1.00   Min.   :  7.0   Min.   : 1.700   Min.   :56.00  
##  1st Qu.: 18.00   1st Qu.:115.8   1st Qu.: 7.400   1st Qu.:72.00  
##  Median : 31.50   Median :205.0   Median : 9.700   Median :79.00  
##  Mean   : 42.13   Mean   :185.9   Mean   : 9.958   Mean   :77.88  
##  3rd Qu.: 63.25   3rd Qu.:258.8   3rd Qu.:11.500   3rd Qu.:85.00  
##  Max.   :168.00   Max.   :334.0   Max.   :20.700   Max.   :97.00  
##  NA's   :37       NA's   :7                                       
##        Month         Day      
##  May      :31   Min.   : 1.0  
##  June     :30   1st Qu.: 8.0  
##  July     :31   Median :16.0  
##  August   :31   Mean   :15.8  
##  September:30   3rd Qu.:23.0  
##                 Max.   :31.0  
## 

month is a categorical variable with different level, called factors

#recorder the month so they do not default to alphabetical

airquality$Month<-factor(airquality$Month, levels=c("May", "June","July", "August", "September"))

plot 1:create a histogram categorized by month with qplot

#quplot stands for “quick-plot”(in the ggplot2 package)

p1 <- qplot(data = airquality,Temp,fill = Month,geom = "histogram", bins = 20)
p1

plot5a: make a scatorplote using ggplot

#ggplot is more sophisticated than qplot, but still uses ggplot2 package #recorder the legend so that it is not the default (alphabetical),but rather in order that months come #outlie the bars in white using the color=“white”command

p5a <- airquality %>%
  ggplot(aes(x=Temp, y=Ozone)) +
  geom_point(position="identity", alpha=0.5, binwidth = 5, color = "white")+
  scale_fill_discrete(name = "Month", labels = c("May", "June","July", "August", "September"))
## Warning: Ignoring unknown parameters: binwidth
p5a
## Warning: Removed 37 rows containing missing values (geom_point).

###plot5b:create side-by-side boxplots categorized by month #fill=month command fills each boxplot with a different color in the aesthetics #scale_fill_discrite makes the legend on the side for discrete color values

p5b <- airquality %>%
  ggplot(aes(Ozone, Temp, fill = Ozone)) + 
  ggtitle("Temperatures") +
  xlab("Monthly Temperatures") +
  ylab("Frequency") +
  geom_point()
  scale_fill_discrete(name = "Month", labels = c("May", "June","July", "August", "September"))
## <ggproto object: Class ScaleDiscrete, Scale, gg>
##     aesthetics: fill
##     axis_order: function
##     break_info: function
##     break_positions: function
##     breaks: waiver
##     call: call
##     clone: function
##     dimension: function
##     drop: TRUE
##     expand: waiver
##     get_breaks: function
##     get_breaks_minor: function
##     get_labels: function
##     get_limits: function
##     guide: legend
##     is_discrete: function
##     is_empty: function
##     labels: May June July August September
##     limits: NULL
##     make_sec_title: function
##     make_title: function
##     map: function
##     map_df: function
##     n.breaks.cache: NULL
##     na.translate: TRUE
##     na.value: grey50
##     name: Month
##     palette: function
##     palette.cache: NULL
##     position: left
##     range: <ggproto object: Class RangeDiscrete, Range, gg>
##         range: NULL
##         reset: function
##         train: function
##         super:  <ggproto object: Class RangeDiscrete, Range, gg>
##     rescale: function
##     reset: function
##     scale_name: hue
##     train: function
##     train_df: function
##     transform: function
##     transform_df: function
##     super:  <ggproto object: Class ScaleDiscrete, Scale, gg>
p5b 
## Warning: Removed 37 rows containing missing values (geom_point).

plot5c: make the same side-by-side boxplots,but in grey-scale

#use the scale_fill_grey command for the grey-scale legend,and again,and,use fill=month in the aesthetics

p5c <- airquality %>%
  ggplot(aes(Ozone, Temp, fill = Ozone)) + 
  ggtitle("Monthly Temperature Variations") +
  xlab("Monthly Temperatures") +
  ylab("Frequency") +
  geom_point()
  scale_fill_grey(name = "Month", labels = c("May", "June","July", "August", "September"))
## <ggproto object: Class ScaleDiscrete, Scale, gg>
##     aesthetics: fill
##     axis_order: function
##     break_info: function
##     break_positions: function
##     breaks: waiver
##     call: call
##     clone: function
##     dimension: function
##     drop: TRUE
##     expand: waiver
##     get_breaks: function
##     get_breaks_minor: function
##     get_labels: function
##     get_limits: function
##     guide: legend
##     is_discrete: function
##     is_empty: function
##     labels: May June July August September
##     limits: NULL
##     make_sec_title: function
##     make_title: function
##     map: function
##     map_df: function
##     n.breaks.cache: NULL
##     na.translate: TRUE
##     na.value: red
##     name: Month
##     palette: function
##     palette.cache: NULL
##     position: left
##     range: <ggproto object: Class RangeDiscrete, Range, gg>
##         range: NULL
##         reset: function
##         train: function
##         super:  <ggproto object: Class RangeDiscrete, Range, gg>
##     rescale: function
##     reset: function
##     scale_name: grey
##     train: function
##     train_df: function
##     transform: function
##     transform_df: function
##     super:  <ggproto object: Class ScaleDiscrete, Scale, gg>
p5c
## Warning: Removed 37 rows containing missing values (geom_point).