Section One

Create three functions: f(x) = x^2 - 2x + 1,g(x) = |4x^3 - 2x^2 + 5x - 2|, h(x) = (2x+3)^0.5

f <- function(x){
                x^2-2*x+1
                }

g <- function(x){
                abs(4*x^3-2*x^2+5*x-2)
                }

h <- function(x){
                (2*x+3)^.5
                }
2.5 %>% f() %>% g() %>% h()  #using pipe
## [1] 9.611191

Double Checking

h(g(f(2.5)))  #using normal way to compare/check
## [1] 9.611191

Section Two

a)

Convert airquality to long format. Notice I removed the “Day” variable because I do not need it for part b) and part c) we use the original data set.

AC = airquality[,-6] %>% melt(id.vars="Month")  #converted to long format
head(AC);tail(AC)  #long formatted *note this is without Day because we do not need it yet.
##   Month variable value
## 1     5    Ozone    41
## 2     5    Ozone    36
## 3     5    Ozone    12
## 4     5    Ozone    18
## 5     5    Ozone    NA
## 6     5    Ozone    28
##     Month variable value
## 607     9     Temp    63
## 608     9     Temp    70
## 609     9     Temp    77
## 610     9     Temp    75
## 611     9     Temp    76
## 612     9     Temp    68

b)

Removal of NA’s from data. Notice I just removed each row, but could impute with Mice (it is not shown, but if imputed the SD of Ozone and Solar are vary by a small amount)

#AC <- complete(mice(AC))  #if you wanted to impute data vs. ignore the NA's for Standard Deviation
AC <-na.omit(AC)  #need to remove or impute NA's to calculate SD
head(AC) #NA's gone
##   Month variable value
## 1     5    Ozone    41
## 2     5    Ozone    36
## 3     5    Ozone    12
## 4     5    Ozone    18
## 6     5    Ozone    28
## 7     5    Ozone    23

Data Frame and Two-Way table of the standard deviations, organized by month, of Ozone, Solar, Wind, and Temp.

AC_SD <-AC %>% group_by(Month,variable) %>% summarise_at(vars(value), list(sd = sd)) %>% as.data.frame() #data.frame group data by Month and Variables, then find SD.
AC_SD
##    Month variable         sd
## 1      5    Ozone  22.224449
## 2      5  Solar.R 115.075499
## 3      5     Wind   3.531450
## 4      5     Temp   6.854870
## 5      6    Ozone  18.207904
## 6      6  Solar.R  92.882975
## 7      6     Wind   3.769234
## 8      6     Temp   6.598589
## 9      7    Ozone  31.635837
## 10     7  Solar.R  80.568344
## 11     7     Wind   3.035981
## 12     7     Temp   4.315513
## 13     8    Ozone  39.681210
## 14     8  Solar.R  76.834943
## 15     8     Wind   3.225930
## 16     8     Temp   6.585256
## 17     9    Ozone  24.141822
## 18     9  Solar.R  79.118280
## 19     9     Wind   3.461254
## 20     9     Temp   8.355671
xtabs(value~.,aggregate(value~Month + variable, AC, sd)) #Two-way table with rows being different months and columns being different readings and value is the standard deviation.
##      variable
## Month      Ozone    Solar.R       Wind       Temp
##     5  22.224449 115.075499   3.531450   6.854870
##     6  18.207904  92.882975   3.769234   6.598589
##     7  31.635837  80.568344   3.035981   4.315513
##     8  39.681210  76.834943   3.225930   6.585256
##     9  24.141822  79.118280   3.461254   8.355671

c)

Below is a Scatter Plot of Ozone vs Day colored by Month

D <-airquality
D <-na.omit(D)
ggplot(D, aes(x = Day, y = Ozone, col = factor(Month))) + 
  geom_point() +
  labs(x = "Day", 
       y = "Ozone",
       title = "Ozone vs. Day",
       caption = "Voronyak 2023",
       col = "Month")  + 
  theme(plot.title = element_text(hjust = 0.5), plot.subtitle = element_text(hjust = 0.5))