# install.packages("tidyverse")
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.6     v dplyr   1.0.7
## v tidyr   1.1.4     v stringr 1.4.0
## v readr   2.1.1     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
airquality <- airquality
# airquality
# datasets::airquality
str(airquality)
## 'data.frame':    153 obs. of  6 variables:
##  $ Ozone  : int  41 36 12 18 NA 28 23 19 8 NA ...
##  $ Solar.R: int  190 118 149 313 NA NA 299 99 19 194 ...
##  $ Wind   : num  7.4 8 12.6 11.5 14.3 14.9 8.6 13.8 20.1 8.6 ...
##  $ Temp   : int  67 72 74 62 56 66 65 59 61 69 ...
##  $ Month  : int  5 5 5 5 5 5 5 5 5 5 ...
##  $ Day    : int  1 2 3 4 5 6 7 8 9 10 ...
# mean(Temp)    ## Error in mean(Temp) : object 'Temp' not found

mean(airquality$Temp)   # by name  
## [1] 77.88235
mean(airquality[,4])    # by column 
## [1] 77.88235
# Calculate Median, Standard Deviation, and Variance
median(airquality$Temp)
## [1] 79
sd(airquality$Wind)
## [1] 3.523001
var(airquality$Wind)
## [1] 12.41154
# Change the Months from 5 - 9 to May through September
airquality$Month[airquality$Month == 5]<- "May"    # "airquality$Month == 5" as if 
airquality$Month[airquality$Month == 6]<- "June"
airquality$Month[airquality$Month == 7]<- "July"
airquality$Month[airquality$Month == 8]<- "August"
airquality$Month[airquality$Month == 9]<- "September"
str(airquality)  # check the previous changes
## 'data.frame':    153 obs. of  6 variables:
##  $ Ozone  : int  41 36 12 18 NA 28 23 19 8 NA ...
##  $ Solar.R: int  190 118 149 313 NA NA 299 99 19 194 ...
##  $ Wind   : num  7.4 8 12.6 11.5 14.3 14.9 8.6 13.8 20.1 8.6 ...
##  $ Temp   : int  67 72 74 62 56 66 65 59 61 69 ...
##  $ Month  : chr  "May" "May" "May" "May" ...
##  $ Day    : int  1 2 3 4 5 6 7 8 9 10 ...
# Look at the summary statistics of the dataset, and see how Month has changed to have characters instead of numbers. pay attention to "Month"
summary(airquality)
##      Ozone           Solar.R           Wind             Temp      
##  Min.   :  1.00   Min.   :  7.0   Min.   : 1.700   Min.   :56.00  
##  1st Qu.: 18.00   1st Qu.:115.8   1st Qu.: 7.400   1st Qu.:72.00  
##  Median : 31.50   Median :205.0   Median : 9.700   Median :79.00  
##  Mean   : 42.13   Mean   :185.9   Mean   : 9.958   Mean   :77.88  
##  3rd Qu.: 63.25   3rd Qu.:258.8   3rd Qu.:11.500   3rd Qu.:85.00  
##  Max.   :168.00   Max.   :334.0   Max.   :20.700   Max.   :97.00  
##  NA's   :37       NA's   :7                                       
##     Month                Day      
##  Length:153         Min.   : 1.0  
##  Class :character   1st Qu.: 8.0  
##  Mode  :character   Median :16.0  
##                     Mean   :15.8  
##                     3rd Qu.:23.0  
##                     Max.   :31.0  
## 
airquality$Month<-factor(airquality$Month, levels=c("May", "June","July", "August", "September"))

This is Plot 1

There are mutiple chunks in the Plot 1, from Plot 1-1 to Plot 1-6

# Plot 1-1 (copy from the instructor): Create a histogram categorized by Month with qplot
# Qplot stands for “Quick-Plot” (in the ggplot2 package)

# qplot(data = airquality,Temp,fill = Month,geom = "histogram", bins = 20)

p1 <- qplot(data = airquality,Temp,fill = Month,geom = "histogram", bins = 20)
p1

# print(p1)   ## in Python
# Plot 1-2 (my study 1) Simple histogram
Temperature <- airquality$Temp
hist(Temperature)

# Plot 1-3 (my study 2) Histogram with added parameters
hist(Temperature,
main="Maximum daily temperature at La Guardia Airport",
xlab="Temperature in degrees Fahrenheit",
xlim=c(40,120),   # can be replaced by other range, e.g. (50,100) 
col="yellow",     # can be replaced by other colors, such as darkmagenta
freq=TRUE         # can be replaced by FALSE 
)

# Plot 1-4 (my study 3) The hist() function returns a list with 6 components.
h <- hist(Temperature)

h
## $breaks
##  [1]  55  60  65  70  75  80  85  90  95 100
## 
## $counts
## [1]  8 10 15 19 33 34 20 12  2
## 
## $density
## [1] 0.010457516 0.013071895 0.019607843 0.024836601 0.043137255 0.044444444
## [7] 0.026143791 0.015686275 0.002614379
## 
## $mids
## [1] 57.5 62.5 67.5 72.5 77.5 82.5 87.5 92.5 97.5
## 
## $xname
## [1] "Temperature"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
# Plot 1-5 (my study 4) Use Histogram return values for labels using text()
h <- hist(Temperature,ylim=c(0,50))
text(h$mids,h$counts,labels=h$counts, adj=c(0.5, -0.5))

# Plot 1-6 (my study 5) Histogram with different breaks
hist(Temperature, breaks=4, main="With breaks=5")

hist(Temperature, breaks=20, main="With breaks=30")

This is Plot 2

There are mutiple chunks in the Plot 1, from Plot 2-1 to Plot 2-2

#  Plot 2-1(copy from the instructor): Make a histogram using ggplot

p2 <- airquality %>%
  ggplot(aes(x=Temp, fill=Month)) +
  geom_histogram(position="identity", alpha=0.5, binwidth = 5, color = "white")+
  scale_fill_discrete(name = "Month", labels = c("May", "June","July", "August", "September"))
p2

#  Plot 2-2 (my study 1): Make a histogram using ggplot and change same values of the parameters
p2 <- airquality %>%
  ggplot(aes(x=Temp, fill=Month)) +
  geom_histogram(position="identity", alpha=0.5, binwidth = 1, color = "black")+
  scale_fill_discrete(name = "Month", labels = c("May", "June","July", "August", "September"))
p2 

This is Plot 3

There are mutiple chunks in the Plot 3, from Plot 3-1 to Plot 3-4

# Plot 3-1(copy from the instructor): Create side-by-side boxplots categorized by Month

p3 <- airquality %>%
  ggplot(aes(Month, Temp, fill = Month)) + 
  ggtitle("Temperatures") +
  xlab("Monthly Temperatures") +
  ylab("Frequency") +
  geom_boxplot() +
  scale_fill_discrete(name = "Month", labels = c("May", "June","July", "August", "September"))
p3 

#  Plot 3-2 (my study 1): Boxplot form Formula
boxplot(Temp~Month,
data=airquality,
main="Different boxplots for each month",
xlab="Month Name",
ylab="Degree Fahrenheit",
col="red",
border="black"
)

# Plot 3-3 (my study 2): Make a boxplot for the ozone readings
boxplot(airquality$Ozone)

# Make a boxplot for the mutiple readings
boxplot(airquality$Ozone, airquality$Wind,airquality$Temp,airquality$Month)

# Plot 3-4 (my study 3): The boxplot() function returns a list with 6 components
b <- boxplot(airquality$Ozone)

b
## $stats
##       [,1]
## [1,]   1.0
## [2,]  18.0
## [3,]  31.5
## [4,]  63.5
## [5,] 122.0
## 
## $n
## [1] 116
## 
## $conf
##          [,1]
## [1,] 24.82518
## [2,] 38.17482
## 
## $out
## [1] 135 168
## 
## $group
## [1] 1 1
## 
## $names
## [1] "1"

This is Plot 4

There are mutiple chunks in the Plot 4, from Plot 4-1 to Plot 4-3

# Plot 4-1(copy from the instructor): Make the same side-by-side boxplots, but in grey-scale

p4 <- airquality %>%
  ggplot(aes(Month, Temp, fill = Month)) + 
  ggtitle("Monthly Temperature Variations") +
  xlab("Monthly Temperatures") +
  ylab("Frequency") +
  geom_boxplot()+
  scale_fill_grey(name = "Month", labels = c("May", "June","July", "August", "September"))
p4

# Plot 4-2(my study 1): Make Side-by-Side Boxplots
boxplot(airquality$Temp ~ airquality$Month,
        col='steelblue',
        main='Temp by Month',
        xlab='Month',
        ylab='Temp') 

# Make Side-by-Side Boxplots and horizontalis TRUE 
boxplot(airquality$Temp ~ airquality$Month,
        col='green',
        main='Temp by Month',
        xlab='Month',
        ylab='Temp',
        horizontal=TRUE) 

# Plot 4-3(my study 2): Make the vertical side-by-side boxplots in ggplot2
ggplot(airquality, aes(x=Month, y=Temperature, fill=Month)) +
  geom_boxplot() +
  ggtitle('Temperature by Month') 

This is Plot 5

There are mutiple chunks in the Plot 5, from Plot 5-1 to Plot 5-2

# Plot 5-1: Now make one plot on your own of any of the variables in this dataset. It may be # a scatterplot, histogram, or boxplot. 

# basic scatterplot
plot(x = airquality$Temp,y = airquality$Ozone,
   xlab = "Temp",
   ylab = "Ozone",
   xlim = c(55,70),
   ylim = c(5,50),       
   main = "Temp vs Ozone"
)

# Plot 5-2: Make a scatterplot is plotted for each pair.
pairs(~Month+Ozone+Wind+Temp,data = airquality,
   main = "Scatterplot Matrix")