################################################;
#Author: Tristan Tucker ;
#Collaborators: Same as last assignment, Anthony Wang, John Healy; ;
#Program Purpose: Homework assignment 3 ;
#Date: 3/1/21 ;
################################################;
##Logical Operators##
#1) Drawing a vector of 50 random uniform and outputting a vector of TRUE/FALSE values that indicate if the values of unif50 are greater than 0.5.#
unif50 <- runif(50, min = 0, max = 1)
(unif50 > 0.5)
## [1] TRUE FALSE TRUE FALSE FALSE TRUE FALSE FALSE TRUE TRUE TRUE TRUE
## [13] TRUE FALSE FALSE TRUE FALSE TRUE TRUE FALSE FALSE TRUE FALSE FALSE
## [25] TRUE FALSE TRUE FALSE TRUE TRUE TRUE FALSE FALSE TRUE TRUE FALSE
## [37] TRUE FALSE TRUE TRUE FALSE TRUE TRUE FALSE FALSE TRUE FALSE FALSE
## [49] FALSE TRUE
#2) Using sum and mean functions
sum(unif50 > .5)
## [1] 26
mean(unif50 > .5)
## [1] 0.52
#Sum and mean values use the values given to determine the sum of values which are TRUE as well as the average of values which are TRUE
#3) Printing the proportion of values in unif50 that are either smaller than 0.25 or larger than 0.75. Negating a check if the values are both less than 0.75 and larger than 0.25.
proportion <- !(.25 < unif50) & (unif50 < .75)
mean(proportion)
## [1] 0.18
##Titanic Data##
#1) Reading in the data
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.3 v purrr 0.3.4
## v tibble 3.0.6 v dplyr 1.0.4
## v tidyr 1.1.2 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(readr)
titanic <- read_csv("C:/Users/trist/Documents/Homework 3/Data/titanic.csv")
##
## -- Column specification --------------------------------------------------------
## cols(
## pclass = col_double(),
## survived = col_double(),
## name = col_character(),
## sex = col_character(),
## age = col_double(),
## sibsp = col_double(),
## parch = col_double(),
## ticket = col_character(),
## fare = col_double(),
## cabin = col_character(),
## embarked = col_character(),
## boat = col_character(),
## body = col_double(),
## home.dest = col_character()
## )
#2) Printing a tibble with names and ages for females in second class younger than 10 years
secondclass <- as_tibble(titanic)
filter(secondclass, sex == "female", pclass == 2, age < 10)
## # A tibble: 11 x 14
## pclass survived name sex age sibsp parch ticket fare cabin embarked
## <dbl> <dbl> <chr> <chr> <dbl> <dbl> <dbl> <chr> <dbl> <chr> <chr>
## 1 2 1 "Becker,~ fema~ 4 2 1 230136 39 F4 S
## 2 2 1 "Collyer~ fema~ 8 0 2 C.A. ~ 26.2 <NA> S
## 3 2 1 "Harper,~ fema~ 6 0 1 248727 33 <NA> S
## 4 2 1 "Hart, M~ fema~ 7 0 2 F.C.C~ 26.2 <NA> S
## 5 2 1 "Laroche~ fema~ 1 1 2 SC/Pa~ 41.6 <NA> C
## 6 2 1 "Laroche~ fema~ 3 1 2 SC/Pa~ 41.6 <NA> C
## 7 2 1 "Quick, ~ fema~ 2 1 1 26360 26 <NA> S
## 8 2 1 "Quick, ~ fema~ 8 1 1 26360 26 <NA> S
## 9 2 1 "Wells, ~ fema~ 4 1 1 29103 23 <NA> S
## 10 2 1 "West, M~ fema~ 0.917 1 2 C.A. ~ 27.8 <NA> S
## 11 2 1 "West, M~ fema~ 5 1 2 C.A. ~ 27.8 <NA> S
## # ... with 3 more variables: boat <chr>, body <dbl>, home.dest <chr>
#3) Printing names, ages, classes and fare for passengers are more than the average fare
farena <- filter(secondclass, fare!="NA")
mean(farena$fare)
## [1] 33.29548
filter(farena, fare > 33.29548)
## # A tibble: 304 x 14
## pclass survived name sex age sibsp parch ticket fare cabin embarked
## <dbl> <dbl> <chr> <chr> <dbl> <dbl> <dbl> <chr> <dbl> <chr> <chr>
## 1 1 1 Allen, ~ fema~ 29 0 0 24160 211. B5 S
## 2 1 1 Allison~ male 0.917 1 2 113781 152. C22 ~ S
## 3 1 0 Allison~ fema~ 2 1 2 113781 152. C22 ~ S
## 4 1 0 Allison~ male 30 1 2 113781 152. C22 ~ S
## 5 1 0 Allison~ fema~ 25 1 2 113781 152. C22 ~ S
## 6 1 1 Andrews~ fema~ 63 1 0 13502 78.0 D7 S
## 7 1 1 Appleto~ fema~ 53 2 0 11769 51.5 C101 S
## 8 1 0 Artagav~ male 71 0 0 PC 17~ 49.5 <NA> C
## 9 1 0 Astor, ~ male 47 1 0 PC 17~ 228. C62 ~ C
## 10 1 1 Astor, ~ fema~ 18 1 0 PC 17~ 228. C62 ~ C
## # ... with 294 more rows, and 3 more variables: boat <chr>, body <dbl>,
## # home.dest <chr>
#4) Printing out the same info as in number three but descending age values
arrange(farena, desc(age))
## # A tibble: 1,308 x 14
## pclass survived name sex age sibsp parch ticket fare cabin embarked
## <dbl> <dbl> <chr> <chr> <dbl> <dbl> <dbl> <chr> <dbl> <chr> <chr>
## 1 1 1 Barkwor~ male 80 0 0 27042 30 A23 S
## 2 1 1 Cavendi~ fema~ 76 1 0 19877 78.8 C46 S
## 3 3 0 Svensso~ male 74 0 0 347060 7.78 <NA> S
## 4 1 0 Artagav~ male 71 0 0 PC 17~ 49.5 <NA> C
## 5 1 0 Goldsch~ male 71 0 0 PC 17~ 34.7 A5 C
## 6 3 0 Connors~ male 70.5 0 0 370369 7.75 <NA> Q
## 7 1 0 Crosby,~ male 70 1 1 WE/P ~ 71 B22 S
## 8 2 0 Mitchel~ male 70 0 0 C.A. ~ 10.5 <NA> S
## 9 1 0 Straus,~ male 67 1 0 PC 17~ 222. C55 ~ S
## 10 2 0 Wheadon~ male 66 0 0 C.A. ~ 10.5 <NA> S
## # ... with 1,298 more rows, and 3 more variables: boat <chr>, body <dbl>,
## # home.dest <chr>
##Airquality Data##
#1) Reading in the data
airquality <- read_delim("C:/Users/trist/Documents/Homework 3/Data/airquality.txt", delim=' ')
##
## -- Column specification --------------------------------------------------------
## cols(
## Ozone = col_double(),
## Solar.R = col_double(),
## Wind = col_double(),
## Temp = col_double(),
## Month = col_double(),
## Day = col_double()
## )
#2) Using filter to get rid of any ozone rows with missing values
AllOzone <- filter(airquality, Ozone !="NA")
#3) Using the select function to drop the day column from allOzone
allOzoneNoDay <- select(airquality, Ozone, Solar.R, Wind, Temp, Month)
#4) Using select to rearrange columns so that month and temp appear first
allOzoneNoDayReorder <- select(airquality, Month, Temp, Ozone, Solar.R, Wind)
#5) Using the rename function to change Solar.R to Solar_Radiation
rename(airquality, Solar_Radiation = "Solar.R")
## # A tibble: 153 x 6
## Ozone Solar_Radiation Wind Temp Month Day
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 41 190 7.4 67 5 1
## 2 36 118 8 72 5 2
## 3 12 149 12.6 74 5 3
## 4 18 313 11.5 62 5 4
## 5 NA NA 14.3 56 5 5
## 6 28 NA 14.9 66 5 6
## 7 23 299 8.6 65 5 7
## 8 19 99 13.8 59 5 8
## 9 8 19 20.1 61 5 9
## 10 NA 194 8.6 69 5 10
## # ... with 143 more rows
#6) Using chaining to do steps 2-5 in one
myAir <- airquality %>% filter(Ozone !="NA") %>% select(Ozone, Solar.R, Wind, Temp, Month) %>% select(Month, Temp, Ozone, Solar.R, Wind) %>% rename(Solar_Radiation = "Solar.R")