library(tidyverse)
## -- Attaching core tidyverse packages ------------------------ tidyverse 2.0.0 --
## v dplyr 1.1.4 v readr 2.1.6
## v forcats 1.0.1 v stringr 1.6.0
## v ggplot2 4.0.1 v tibble 3.3.1
## v lubridate 1.9.4 v tidyr 1.3.2
## v purrr 1.2.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## i Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(stringr)
library(nycflights13)
library(readr)
library(dplyr)
?words
## 打开httpd帮助服务器… 好了
are exactly four letters long
word_data <- tibble(word = words) %>%
mutate(length = str_length(word))
len_4_letters <- word_data %>%
filter(length == 4)
len_4_letters
## # A tibble: 263 x 2
## word length
## <chr> <int>
## 1 able 4
## 2 also 4
## 3 area 4
## 4 away 4
## 5 baby 4
## 6 back 4
## 7 ball 4
## 8 bank 4
## 9 base 4
## 10 bear 4
## # i 253 more rows
are either four or five letters long
len_45_letters <- word_data %>%
filter(length == 4|length==5)
len_45_letters
## # A tibble: 463 x 2
## word length
## <chr> <int>
## 1 able 4
## 2 about 5
## 3 admit 5
## 4 after 5
## 5 again 5
## 6 agent 5
## 7 agree 5
## 8 allow 5
## 9 along 5
## 10 also 4
## # i 453 more rows
the second letter is “s” or “t”
word_data %>%
filter(str_detect(word, "^.[st]")) %>%
print()
## # A tibble: 38 x 2
## word length
## <chr> <int>
## 1 as 2
## 2 ask 3
## 3 associate 9
## 4 assume 6
## 5 at 2
## 6 attend 6
## 7 especial 8
## 8 issue 5
## 9 it 2
## 10 item 4
## # i 28 more rows
contains the pattern like “oxx” where “o” is one letter and “x” is another letter
word_data %>%
filter(str_detect(word,".(.)\\1" )) %>%
filter(!str_detect(word, "(.)\\1\\1")) %>%
print()
## # A tibble: 157 x 2
## word length
## <chr> <int>
## 1 accept 6
## 2 account 7
## 3 across 6
## 4 add 3
## 5 address 7
## 6 affect 6
## 7 afford 6
## 8 afternoon 9
## 9 agree 5
## 10 all 3
## # i 147 more rows
contains “a”, “e” and “o” at the same time
word_data %>%
filter(str_detect(word, "a")) %>%
filter(str_detect(word, "e")) %>%
filter(str_detect(word, "o"))%>%
print()
## # A tibble: 14 x 2
## word length
## <chr> <int>
## 1 absolute 8
## 2 afternoon 9
## 3 another 7
## 4 appropriate 11
## 5 associate 9
## 6 colleague 9
## 7 compare 7
## 8 encourage 9
## 9 operate 7
## 10 organize 8
## 11 probable 8
## 12 programme 9
## 13 reason 6
## 14 relation 8
df1 <- tibble(
sentence = sentences,
word_number = str_count(sentence, "\\s") + 1
)
a bar plot counting sentences with and without “the” (or “The”).
df1 %>%
mutate(has_the = ifelse(str_detect(str_to_lower(sentence), "the"),
"include the",
"exclude the")) %>%
ggplot(aes(x = has_the)) +
geom_bar()
a scatterplot with x being the average length of words in a sentence, and y being the number of words starting with “a” or “e” or “i” or “o” or “u” in the sentence.
df2 <- tibble(sentence = sentences) %>%
mutate(word_number = str_count(sentence, "\\s") + 1,letters=str_count(sentence, "[a-zA-Z]"),num_word = str_split(sentence, "\\s"))
df2
## # A tibble: 720 x 4
## sentence word_number letters num_word
## <chr> <dbl> <int> <list>
## 1 The birch canoe slid on the smooth planks. 8 34 <chr [8]>
## 2 Glue the sheet to the dark blue background. 8 35 <chr [8]>
## 3 It's easy to tell the depth of a well. 9 28 <chr [9]>
## 4 These days a chicken leg is a rare dish. 9 31 <chr [9]>
## 5 Rice is often served in round bowls. 7 29 <chr [7]>
## 6 The juice of lemons makes fine punch. 7 30 <chr [7]>
## 7 The box was thrown beside the parked truck. 8 35 <chr [8]>
## 8 The hogs were fed chopped corn and garbage. 8 35 <chr [8]>
## 9 Four hours of steady work faced us. 7 28 <chr [7]>
## 10 A large size in stockings is hard to sell. 9 33 <chr [9]>
## # i 710 more rows
df2 <- df2 %>%
mutate(avg_len=letters/word_number,vowels = sapply(num_word, function(x) {sum(str_detect(x, "^[aeiouAEIOU]"))}))
df2
## # A tibble: 720 x 6
## sentence word_number letters num_word avg_len vowels
## <chr> <dbl> <int> <list> <dbl> <int>
## 1 The birch canoe slid on the smoo~ 8 34 <chr> 4.25 1
## 2 Glue the sheet to the dark blue ~ 8 35 <chr> 4.38 0
## 3 It's easy to tell the depth of a~ 9 28 <chr> 3.11 4
## 4 These days a chicken leg is a ra~ 9 31 <chr> 3.44 3
## 5 Rice is often served in round bo~ 7 29 <chr> 4.14 3
## 6 The juice of lemons makes fine p~ 7 30 <chr> 4.29 1
## 7 The box was thrown beside the pa~ 8 35 <chr> 4.38 0
## 8 The hogs were fed chopped corn a~ 8 35 <chr> 4.38 1
## 9 Four hours of steady work faced ~ 7 28 <chr> 4 2
## 10 A large size in stockings is har~ 9 33 <chr> 3.67 3
## # i 710 more rows
ggplot(df2, aes(x = avg_len, y = vowels)) +
geom_jitter(width = 0.1, height = 0.1, alpha = 0.7)
Read it into RStudio with read_lines() function
OX_eng<- read_lines("D:/lilith/Oxford_English_Dictionary.txt")
Turn the dictionary into a tibble and remove all blank lines
dfox <- tibble(text = OX_eng)%>%
filter(text != "")
dfox
## # A tibble: 36,740 x 1
## text
## <chr>
## 1 "A "
## 2 "A- prefix (also an- before a vowel sound) not, without (amoral). [greek]"
## 3 "Aa abbr. 1 automobile association. 2 alcoholics anonymous. 3 anti-aircraft~
## 4 "Aardvark n. Mammal with a tubular snout and a long tongue, feeding on term~
## 5 "Ab- prefix off, away, from (abduct). [latin]"
## 6 "Aback adv. \u007f take aback surprise, disconcert. [old english: related t~
## 7 "Abacus n. (pl. -cuses) 1 frame with wires along which beads are slid for c~
## 8 "Abaft naut. —adv. In the stern half of a ship. —prep. Nearer the stern tha~
## 9 "Abandon —v. 1 give up. 2 forsake, desert. 3 (often foll. By to; often refl~
## 10 "Abandoned adj. 1 deserted, forsaken. 2 unrestrained, profligate."
## # i 36,730 more rows
Use regular expression to extract all words for each item in a separate column named “words”
df_words <- dfox %>%
mutate(words = str_extract_all(text, "[A-Za-z]+"))%>%
unnest(words)
Find all words in the dictionary that contain “a”, “e”, “i”, “o”, “u” and “y” at the same time
df_words %>%
filter(str_detect(words, "a")) %>%
filter(str_detect(words, "e")) %>%
filter(str_detect(words, "i")) %>%
filter(str_detect(words, "o")) %>%
filter(str_detect(words, "u")) %>%
filter(str_detect(words, "y"))
## # A tibble: 90 x 2
## text words
## <chr> <chr>
## 1 "Abstemious adj. Moderate or ascetic, esp. In eating and drinking. \u~ abst~
## 2 "Accident n. 1 unfortunate esp. Harmful event, caused unintentionally~ unin~
## 3 "Accident n. 1 unfortunate esp. Harmful event, caused unintentionally~ unin~
## 4 "Accident n. 1 unfortunate esp. Harmful event, caused unintentionally~ unin~
## 5 "Accident n. 1 unfortunate esp. Harmful event, caused unintentionally~ unin~
## 6 "Ancien régime n. (pl. Anciens régimes pronunc. Same) 1 political and~ revo~
## 7 "Blabber —n. (also blabbermouth) person who blabs. —v. (often foll. B~ inco~
## 8 "Bolshevik —n. 1 hist. Member of the radical faction of the russian s~ revo~
## 9 "Byelorussian (also belorussian) —n. Native or language of byelorussi~ Byel~
## 10 "Byelorussian (also belorussian) —n. Native or language of byelorussi~ byel~
## # i 80 more rows
bank_data<- read_csv("D:/lilith/BankChurners.csv")
## Rows: 10127 Columns: 20
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (6): Attrition_Flag, Gender, Education_Level, Marital_Status, Income_Ca...
## dbl (14): Customer_Age, Dependent_count, Months_on_book, Total_Relationship_...
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
–Which features can be regarded as a factor? –A: Attrition_Flag Gender Marital_Status Education_Level Income_Category Card_Category
–Which features can be regarded as an ordered factor (ordinal)? –A: Education_Level Income_Category Card_Category
bank_data <- bank_data %>%
mutate(
Gender = factor(Gender),
Marital_Status = factor(Marital_Status),
Card_Category = factor(Card_Category),
Education_Level = factor(Education_Level,
levels = c("Unknown", "High School", "College",
"Graduate", "Post-Graduate", "Doctorate"),
ordered = TRUE
),
Income_Category = factor(Income_Category,
levels = c("Less than $40K",
"$40K - $60K",
"$60K - $80K",
"$80K - $120K",
"$120K +"),
ordered = TRUE
)
)
bank_data
## # A tibble: 10,127 x 20
## Attrition_Flag Customer_Age Gender Dependent_count Education_Level
## <chr> <dbl> <fct> <dbl> <ord>
## 1 Existing Customer 45 M 3 High School
## 2 Existing Customer 49 F 5 Graduate
## 3 Existing Customer 51 M 3 Graduate
## 4 Existing Customer 40 F 4 High School
## 5 Existing Customer 40 M 3 <NA>
## 6 Existing Customer 44 M 2 Graduate
## 7 Existing Customer 51 M 4 Unknown
## 8 Existing Customer 32 M 0 High School
## 9 Existing Customer 37 M 3 <NA>
## 10 Existing Customer 48 M 2 Graduate
## # i 10,117 more rows
## # i 15 more variables: Marital_Status <fct>, Income_Category <ord>,
## # Card_Category <fct>, Months_on_book <dbl>, Total_Relationship_Count <dbl>,
## # Months_Inactive_12_mon <dbl>, Contacts_Count_12_mon <dbl>,
## # Credit_Limit <dbl>, Total_Revolving_Bal <dbl>, Avg_Open_To_Buy <dbl>,
## # Total_Amt_Chng_Q4_Q1 <dbl>, Total_Trans_Amt <dbl>, Total_Trans_Ct <dbl>,
## # Total_Ct_Chng_Q4_Q1 <dbl>, Avg_Utilization_Ratio <dbl>
ggplot(bank_data, aes(x = Education_Level, y = Avg_Utilization_Ratio)) +
geom_boxplot()
The impact is minimal, but the average utilisation rate is lowest for undergraduate degrees and highest for postgraduate degrees.
?gss_cat
What are the levels of marital variable?
unique(gss_cat$marital)
## [1] Never married Divorced Widowed Married Separated
## [6] No answer
## Levels: No answer Never married Separated Divorced Widowed Married
Combine “Separated”, “Divorced”, “Widowed” into a new category “Once Married”
gss_cat2 <- gss_cat %>%
mutate(
marital_new = fct_collapse(marital,"Once Married" = c("Separated", "Divorced", "Widowed")
)
)
gss_cat2
## # A tibble: 21,483 x 10
## year marital age race rincome partyid relig denom tvhours marital_new
## <int> <fct> <int> <fct> <fct> <fct> <fct> <fct> <int> <fct>
## 1 2000 Never marr~ 26 White $8000 ~ Ind,ne~ Prot~ Sout~ 12 Never marr~
## 2 2000 Divorced 48 White $8000 ~ Not st~ Prot~ Bapt~ NA Once Marri~
## 3 2000 Widowed 67 White Not ap~ Indepe~ Prot~ No d~ 2 Once Marri~
## 4 2000 Never marr~ 39 White Not ap~ Ind,ne~ Orth~ Not ~ 4 Never marr~
## 5 2000 Divorced 25 White Not ap~ Not st~ None Not ~ 1 Once Marri~
## 6 2000 Married 25 White $20000~ Strong~ Prot~ Sout~ NA Married
## 7 2000 Never marr~ 36 White $25000~ Not st~ Chri~ Not ~ 3 Never marr~
## 8 2000 Divorced 44 White $7000 ~ Ind,ne~ Prot~ Luth~ NA Once Marri~
## 9 2000 Married 44 White $25000~ Not st~ Prot~ Other 0 Married
## 10 2000 Married 47 White $25000~ Strong~ Prot~ Sout~ 3 Married
## # i 21,473 more rows
Use the new levels, explore whether there is an effect of martial status on tvhours.
ggplot(gss_cat2, aes(x = marital_new, y = tvhours)) +
geom_boxplot()
## Warning: Removed 10146 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
OlsonNames()
## [1] "Africa/Abidjan" "Africa/Accra"
## [3] "Africa/Addis_Ababa" "Africa/Algiers"
## [5] "Africa/Asmara" "Africa/Asmera"
## [7] "Africa/Bamako" "Africa/Bangui"
## [9] "Africa/Banjul" "Africa/Bissau"
## [11] "Africa/Blantyre" "Africa/Brazzaville"
## [13] "Africa/Bujumbura" "Africa/Cairo"
## [15] "Africa/Casablanca" "Africa/Ceuta"
## [17] "Africa/Conakry" "Africa/Dakar"
## [19] "Africa/Dar_es_Salaam" "Africa/Djibouti"
## [21] "Africa/Douala" "Africa/El_Aaiun"
## [23] "Africa/Freetown" "Africa/Gaborone"
## [25] "Africa/Harare" "Africa/Johannesburg"
## [27] "Africa/Juba" "Africa/Kampala"
## [29] "Africa/Khartoum" "Africa/Kigali"
## [31] "Africa/Kinshasa" "Africa/Lagos"
## [33] "Africa/Libreville" "Africa/Lome"
## [35] "Africa/Luanda" "Africa/Lubumbashi"
## [37] "Africa/Lusaka" "Africa/Malabo"
## [39] "Africa/Maputo" "Africa/Maseru"
## [41] "Africa/Mbabane" "Africa/Mogadishu"
## [43] "Africa/Monrovia" "Africa/Nairobi"
## [45] "Africa/Ndjamena" "Africa/Niamey"
## [47] "Africa/Nouakchott" "Africa/Ouagadougou"
## [49] "Africa/Porto-Novo" "Africa/Sao_Tome"
## [51] "Africa/Timbuktu" "Africa/Tripoli"
## [53] "Africa/Tunis" "Africa/Windhoek"
## [55] "America/Adak" "America/Anchorage"
## [57] "America/Anguilla" "America/Antigua"
## [59] "America/Araguaina" "America/Argentina/Buenos_Aires"
## [61] "America/Argentina/Catamarca" "America/Argentina/ComodRivadavia"
## [63] "America/Argentina/Cordoba" "America/Argentina/Jujuy"
## [65] "America/Argentina/La_Rioja" "America/Argentina/Mendoza"
## [67] "America/Argentina/Rio_Gallegos" "America/Argentina/Salta"
## [69] "America/Argentina/San_Juan" "America/Argentina/San_Luis"
## [71] "America/Argentina/Tucuman" "America/Argentina/Ushuaia"
## [73] "America/Aruba" "America/Asuncion"
## [75] "America/Atikokan" "America/Atka"
## [77] "America/Bahia" "America/Bahia_Banderas"
## [79] "America/Barbados" "America/Belem"
## [81] "America/Belize" "America/Blanc-Sablon"
## [83] "America/Boa_Vista" "America/Bogota"
## [85] "America/Boise" "America/Buenos_Aires"
## [87] "America/Cambridge_Bay" "America/Campo_Grande"
## [89] "America/Cancun" "America/Caracas"
## [91] "America/Catamarca" "America/Cayenne"
## [93] "America/Cayman" "America/Chicago"
## [95] "America/Chihuahua" "America/Ciudad_Juarez"
## [97] "America/Coral_Harbour" "America/Cordoba"
## [99] "America/Costa_Rica" "America/Coyhaique"
## [101] "America/Creston" "America/Cuiaba"
## [103] "America/Curacao" "America/Danmarkshavn"
## [105] "America/Dawson" "America/Dawson_Creek"
## [107] "America/Denver" "America/Detroit"
## [109] "America/Dominica" "America/Edmonton"
## [111] "America/Eirunepe" "America/El_Salvador"
## [113] "America/Ensenada" "America/Fort_Nelson"
## [115] "America/Fort_Wayne" "America/Fortaleza"
## [117] "America/Glace_Bay" "America/Godthab"
## [119] "America/Goose_Bay" "America/Grand_Turk"
## [121] "America/Grenada" "America/Guadeloupe"
## [123] "America/Guatemala" "America/Guayaquil"
## [125] "America/Guyana" "America/Halifax"
## [127] "America/Havana" "America/Hermosillo"
## [129] "America/Indiana/Indianapolis" "America/Indiana/Knox"
## [131] "America/Indiana/Marengo" "America/Indiana/Petersburg"
## [133] "America/Indiana/Tell_City" "America/Indiana/Vevay"
## [135] "America/Indiana/Vincennes" "America/Indiana/Winamac"
## [137] "America/Indianapolis" "America/Inuvik"
## [139] "America/Iqaluit" "America/Jamaica"
## [141] "America/Jujuy" "America/Juneau"
## [143] "America/Kentucky/Louisville" "America/Kentucky/Monticello"
## [145] "America/Knox_IN" "America/Kralendijk"
## [147] "America/La_Paz" "America/Lima"
## [149] "America/Los_Angeles" "America/Louisville"
## [151] "America/Lower_Princes" "America/Maceio"
## [153] "America/Managua" "America/Manaus"
## [155] "America/Marigot" "America/Martinique"
## [157] "America/Matamoros" "America/Mazatlan"
## [159] "America/Mendoza" "America/Menominee"
## [161] "America/Merida" "America/Metlakatla"
## [163] "America/Mexico_City" "America/Miquelon"
## [165] "America/Moncton" "America/Monterrey"
## [167] "America/Montevideo" "America/Montreal"
## [169] "America/Montserrat" "America/Nassau"
## [171] "America/New_York" "America/Nipigon"
## [173] "America/Nome" "America/Noronha"
## [175] "America/North_Dakota/Beulah" "America/North_Dakota/Center"
## [177] "America/North_Dakota/New_Salem" "America/Nuuk"
## [179] "America/Ojinaga" "America/Panama"
## [181] "America/Pangnirtung" "America/Paramaribo"
## [183] "America/Phoenix" "America/Port-au-Prince"
## [185] "America/Port_of_Spain" "America/Porto_Acre"
## [187] "America/Porto_Velho" "America/Puerto_Rico"
## [189] "America/Punta_Arenas" "America/Rainy_River"
## [191] "America/Rankin_Inlet" "America/Recife"
## [193] "America/Regina" "America/Resolute"
## [195] "America/Rio_Branco" "America/Rosario"
## [197] "America/Santa_Isabel" "America/Santarem"
## [199] "America/Santiago" "America/Santo_Domingo"
## [201] "America/Sao_Paulo" "America/Scoresbysund"
## [203] "America/Shiprock" "America/Sitka"
## [205] "America/St_Barthelemy" "America/St_Johns"
## [207] "America/St_Kitts" "America/St_Lucia"
## [209] "America/St_Thomas" "America/St_Vincent"
## [211] "America/Swift_Current" "America/Tegucigalpa"
## [213] "America/Thule" "America/Thunder_Bay"
## [215] "America/Tijuana" "America/Toronto"
## [217] "America/Tortola" "America/Vancouver"
## [219] "America/Virgin" "America/Whitehorse"
## [221] "America/Winnipeg" "America/Yakutat"
## [223] "America/Yellowknife" "Antarctica/Casey"
## [225] "Antarctica/Davis" "Antarctica/DumontDUrville"
## [227] "Antarctica/Macquarie" "Antarctica/Mawson"
## [229] "Antarctica/McMurdo" "Antarctica/Palmer"
## [231] "Antarctica/Rothera" "Antarctica/South_Pole"
## [233] "Antarctica/Syowa" "Antarctica/Troll"
## [235] "Antarctica/Vostok" "Arctic/Longyearbyen"
## [237] "Asia/Aden" "Asia/Almaty"
## [239] "Asia/Amman" "Asia/Anadyr"
## [241] "Asia/Aqtau" "Asia/Aqtobe"
## [243] "Asia/Ashgabat" "Asia/Ashkhabad"
## [245] "Asia/Atyrau" "Asia/Baghdad"
## [247] "Asia/Bahrain" "Asia/Baku"
## [249] "Asia/Bangkok" "Asia/Barnaul"
## [251] "Asia/Beirut" "Asia/Bishkek"
## [253] "Asia/Brunei" "Asia/Calcutta"
## [255] "Asia/Chita" "Asia/Choibalsan"
## [257] "Asia/Chongqing" "Asia/Chungking"
## [259] "Asia/Colombo" "Asia/Dacca"
## [261] "Asia/Damascus" "Asia/Dhaka"
## [263] "Asia/Dili" "Asia/Dubai"
## [265] "Asia/Dushanbe" "Asia/Famagusta"
## [267] "Asia/Gaza" "Asia/Harbin"
## [269] "Asia/Hebron" "Asia/Ho_Chi_Minh"
## [271] "Asia/Hong_Kong" "Asia/Hovd"
## [273] "Asia/Irkutsk" "Asia/Istanbul"
## [275] "Asia/Jakarta" "Asia/Jayapura"
## [277] "Asia/Jerusalem" "Asia/Kabul"
## [279] "Asia/Kamchatka" "Asia/Karachi"
## [281] "Asia/Kashgar" "Asia/Kathmandu"
## [283] "Asia/Katmandu" "Asia/Khandyga"
## [285] "Asia/Kolkata" "Asia/Krasnoyarsk"
## [287] "Asia/Kuala_Lumpur" "Asia/Kuching"
## [289] "Asia/Kuwait" "Asia/Macao"
## [291] "Asia/Macau" "Asia/Magadan"
## [293] "Asia/Makassar" "Asia/Manila"
## [295] "Asia/Muscat" "Asia/Nicosia"
## [297] "Asia/Novokuznetsk" "Asia/Novosibirsk"
## [299] "Asia/Omsk" "Asia/Oral"
## [301] "Asia/Phnom_Penh" "Asia/Pontianak"
## [303] "Asia/Pyongyang" "Asia/Qatar"
## [305] "Asia/Qostanay" "Asia/Qyzylorda"
## [307] "Asia/Rangoon" "Asia/Riyadh"
## [309] "Asia/Saigon" "Asia/Sakhalin"
## [311] "Asia/Samarkand" "Asia/Seoul"
## [313] "Asia/Shanghai" "Asia/Singapore"
## [315] "Asia/Srednekolymsk" "Asia/Taipei"
## [317] "Asia/Tashkent" "Asia/Tbilisi"
## [319] "Asia/Tehran" "Asia/Tel_Aviv"
## [321] "Asia/Thimbu" "Asia/Thimphu"
## [323] "Asia/Tokyo" "Asia/Tomsk"
## [325] "Asia/Ujung_Pandang" "Asia/Ulaanbaatar"
## [327] "Asia/Ulan_Bator" "Asia/Urumqi"
## [329] "Asia/Ust-Nera" "Asia/Vientiane"
## [331] "Asia/Vladivostok" "Asia/Yakutsk"
## [333] "Asia/Yangon" "Asia/Yekaterinburg"
## [335] "Asia/Yerevan" "Atlantic/Azores"
## [337] "Atlantic/Bermuda" "Atlantic/Canary"
## [339] "Atlantic/Cape_Verde" "Atlantic/Faeroe"
## [341] "Atlantic/Faroe" "Atlantic/Jan_Mayen"
## [343] "Atlantic/Madeira" "Atlantic/Reykjavik"
## [345] "Atlantic/South_Georgia" "Atlantic/St_Helena"
## [347] "Atlantic/Stanley" "Australia/ACT"
## [349] "Australia/Adelaide" "Australia/Brisbane"
## [351] "Australia/Broken_Hill" "Australia/Canberra"
## [353] "Australia/Currie" "Australia/Darwin"
## [355] "Australia/Eucla" "Australia/Hobart"
## [357] "Australia/LHI" "Australia/Lindeman"
## [359] "Australia/Lord_Howe" "Australia/Melbourne"
## [361] "Australia/North" "Australia/NSW"
## [363] "Australia/Perth" "Australia/Queensland"
## [365] "Australia/South" "Australia/Sydney"
## [367] "Australia/Tasmania" "Australia/Victoria"
## [369] "Australia/West" "Australia/Yancowinna"
## [371] "Brazil/Acre" "Brazil/DeNoronha"
## [373] "Brazil/East" "Brazil/West"
## [375] "Canada/Atlantic" "Canada/Central"
## [377] "Canada/Eastern" "Canada/Mountain"
## [379] "Canada/Newfoundland" "Canada/Pacific"
## [381] "Canada/Saskatchewan" "Canada/Yukon"
## [383] "CET" "Chile/Continental"
## [385] "Chile/EasterIsland" "CST6CDT"
## [387] "Cuba" "EET"
## [389] "Egypt" "Eire"
## [391] "EST" "EST5EDT"
## [393] "Etc/GMT" "Etc/GMT-0"
## [395] "Etc/GMT-1" "Etc/GMT-10"
## [397] "Etc/GMT-11" "Etc/GMT-12"
## [399] "Etc/GMT-13" "Etc/GMT-14"
## [401] "Etc/GMT-2" "Etc/GMT-3"
## [403] "Etc/GMT-4" "Etc/GMT-5"
## [405] "Etc/GMT-6" "Etc/GMT-7"
## [407] "Etc/GMT-8" "Etc/GMT-9"
## [409] "Etc/GMT+0" "Etc/GMT+1"
## [411] "Etc/GMT+10" "Etc/GMT+11"
## [413] "Etc/GMT+12" "Etc/GMT+2"
## [415] "Etc/GMT+3" "Etc/GMT+4"
## [417] "Etc/GMT+5" "Etc/GMT+6"
## [419] "Etc/GMT+7" "Etc/GMT+8"
## [421] "Etc/GMT+9" "Etc/GMT0"
## [423] "Etc/Greenwich" "Etc/UCT"
## [425] "Etc/Universal" "Etc/UTC"
## [427] "Etc/Zulu" "Europe/Amsterdam"
## [429] "Europe/Andorra" "Europe/Astrakhan"
## [431] "Europe/Athens" "Europe/Belfast"
## [433] "Europe/Belgrade" "Europe/Berlin"
## [435] "Europe/Bratislava" "Europe/Brussels"
## [437] "Europe/Bucharest" "Europe/Budapest"
## [439] "Europe/Busingen" "Europe/Chisinau"
## [441] "Europe/Copenhagen" "Europe/Dublin"
## [443] "Europe/Gibraltar" "Europe/Guernsey"
## [445] "Europe/Helsinki" "Europe/Isle_of_Man"
## [447] "Europe/Istanbul" "Europe/Jersey"
## [449] "Europe/Kaliningrad" "Europe/Kiev"
## [451] "Europe/Kirov" "Europe/Kyiv"
## [453] "Europe/Lisbon" "Europe/Ljubljana"
## [455] "Europe/London" "Europe/Luxembourg"
## [457] "Europe/Madrid" "Europe/Malta"
## [459] "Europe/Mariehamn" "Europe/Minsk"
## [461] "Europe/Monaco" "Europe/Moscow"
## [463] "Europe/Nicosia" "Europe/Oslo"
## [465] "Europe/Paris" "Europe/Podgorica"
## [467] "Europe/Prague" "Europe/Riga"
## [469] "Europe/Rome" "Europe/Samara"
## [471] "Europe/San_Marino" "Europe/Sarajevo"
## [473] "Europe/Saratov" "Europe/Simferopol"
## [475] "Europe/Skopje" "Europe/Sofia"
## [477] "Europe/Stockholm" "Europe/Tallinn"
## [479] "Europe/Tirane" "Europe/Tiraspol"
## [481] "Europe/Ulyanovsk" "Europe/Uzhgorod"
## [483] "Europe/Vaduz" "Europe/Vatican"
## [485] "Europe/Vienna" "Europe/Vilnius"
## [487] "Europe/Volgograd" "Europe/Warsaw"
## [489] "Europe/Zagreb" "Europe/Zaporozhye"
## [491] "Europe/Zurich" "GB"
## [493] "GB-Eire" "GMT"
## [495] "GMT-0" "GMT+0"
## [497] "GMT0" "Greenwich"
## [499] "Hongkong" "HST"
## [501] "Iceland" "Indian/Antananarivo"
## [503] "Indian/Chagos" "Indian/Christmas"
## [505] "Indian/Cocos" "Indian/Comoro"
## [507] "Indian/Kerguelen" "Indian/Mahe"
## [509] "Indian/Maldives" "Indian/Mauritius"
## [511] "Indian/Mayotte" "Indian/Reunion"
## [513] "Iran" "Israel"
## [515] "Jamaica" "Japan"
## [517] "Kwajalein" "Libya"
## [519] "MET" "Mexico/BajaNorte"
## [521] "Mexico/BajaSur" "Mexico/General"
## [523] "MST" "MST7MDT"
## [525] "Navajo" "NZ"
## [527] "NZ-CHAT" "Pacific/Apia"
## [529] "Pacific/Auckland" "Pacific/Bougainville"
## [531] "Pacific/Chatham" "Pacific/Chuuk"
## [533] "Pacific/Easter" "Pacific/Efate"
## [535] "Pacific/Enderbury" "Pacific/Fakaofo"
## [537] "Pacific/Fiji" "Pacific/Funafuti"
## [539] "Pacific/Galapagos" "Pacific/Gambier"
## [541] "Pacific/Guadalcanal" "Pacific/Guam"
## [543] "Pacific/Honolulu" "Pacific/Johnston"
## [545] "Pacific/Kanton" "Pacific/Kiritimati"
## [547] "Pacific/Kosrae" "Pacific/Kwajalein"
## [549] "Pacific/Majuro" "Pacific/Marquesas"
## [551] "Pacific/Midway" "Pacific/Nauru"
## [553] "Pacific/Niue" "Pacific/Norfolk"
## [555] "Pacific/Noumea" "Pacific/Pago_Pago"
## [557] "Pacific/Palau" "Pacific/Pitcairn"
## [559] "Pacific/Pohnpei" "Pacific/Ponape"
## [561] "Pacific/Port_Moresby" "Pacific/Rarotonga"
## [563] "Pacific/Saipan" "Pacific/Samoa"
## [565] "Pacific/Tahiti" "Pacific/Tarawa"
## [567] "Pacific/Tongatapu" "Pacific/Truk"
## [569] "Pacific/Wake" "Pacific/Wallis"
## [571] "Pacific/Yap" "Poland"
## [573] "Portugal" "PRC"
## [575] "PST8PDT" "ROC"
## [577] "ROK" "Singapore"
## [579] "Turkey" "UCT"
## [581] "Universal" "US/Alaska"
## [583] "US/Aleutian" "US/Arizona"
## [585] "US/Central" "US/East-Indiana"
## [587] "US/Eastern" "US/Hawaii"
## [589] "US/Indiana-Starke" "US/Michigan"
## [591] "US/Mountain" "US/Pacific"
## [593] "US/Samoa" "UTC"
## [595] "W-SU" "WET"
## [597] "Zulu"
## attr(,"Version")
## [1] "2025b"
?flights
How many timezones are there for all the destination airports (excluding NA)?
df <- flights %>%
left_join(airports, by = c("dest" = "faa"))%>%
filter(!is.na(tzone)) %>%
summarise(n_timezones = n_distinct(tzone))
df
## # A tibble: 1 x 1
## n_timezones
## <int>
## 1 7
unique(flights$origin)
## [1] "EWR" "LGA" "JFK"
names(airports)
## [1] "faa" "name" "lat" "lon" "alt" "tz" "dst" "tzone"
Use the data in flights and airports only along with R code to show the time difference (in hours) between New York City and the following cities
nyc_tz <- airports %>%
filter(faa == "JFK") %>%
select(tz)
result <- airports %>%
filter(faa == "ORD" |
faa == "DFW" |
faa == "DEN" |
faa == "SEA" |
faa == "ANC" |
faa == "HNL") %>%
select(faa, name, tz) %>%
mutate(time_diff = tz - nyc_tz[["tz"]])
result
## # A tibble: 6 x 4
## faa name tz time_diff
## <chr> <chr> <dbl> <dbl>
## 1 ANC Ted Stevens Anchorage Intl -9 -4
## 2 DEN Denver Intl -7 -2
## 3 DFW Dallas Fort Worth Intl -6 -1
## 4 HNL Honolulu Intl -10 -5
## 5 ORD Chicago Ohare Intl -6 -1
## 6 SEA Seattle Tacoma Intl -8 -3
Write a function Time_difference_NYC(dest)
Time_difference_NYC <- function(dest)
{
nyc_tz <- airports %>%
filter(faa == "JFK") %>%
select(tz)
dest_tz <- airports %>%
filter(faa == dest) %>%
select(tz)
dest_tz[["tz"]] - nyc_tz[["tz"]]
}
Write a function flight_time(dep_time, arr_time, origin, dest)
flight_time <- function(dep_time, arr_time, origin, dest)
{
dep_h <- dep_time %/% 100
dep_m <- dep_time %% 100
arr_h <- arr_time %/% 100
arr_m <- arr_time %% 100
dep_total <- dep_h * 60 + dep_m
arr_total <- arr_h * 60 + arr_m
origin_tz <- airports[["tz"]][airports[["faa"]] == origin]
dest_tz <- airports[["tz"]][airports[["faa"]] == dest]
tz_diff <- dest_tz - origin_tz
flight_mins <- arr_total - dep_total - tz_diff * 60
return(flight_mins)
}