library(tidyverse)
## -- Attaching core tidyverse packages ------------------------ tidyverse 2.0.0 --
## v dplyr     1.1.4     v readr     2.1.6
## v forcats   1.0.1     v stringr   1.6.0
## v ggplot2   4.0.1     v tibble    3.3.1
## v lubridate 1.9.4     v tidyr     1.3.2
## v purrr     1.2.1     
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
## i Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(stringr)
library(nycflights13)
library(readr)
library(dplyr)
?words
## 打开httpd帮助服务器… 好了

are exactly four letters long

word_data <- tibble(word = words) %>%
  mutate(length = str_length(word))

len_4_letters <- word_data %>%
  filter(length == 4)

len_4_letters
## # A tibble: 263 x 2
##    word  length
##    <chr>  <int>
##  1 able       4
##  2 also       4
##  3 area       4
##  4 away       4
##  5 baby       4
##  6 back       4
##  7 ball       4
##  8 bank       4
##  9 base       4
## 10 bear       4
## # i 253 more rows

are either four or five letters long

len_45_letters <- word_data %>%
  filter(length == 4|length==5)

len_45_letters
## # A tibble: 463 x 2
##    word  length
##    <chr>  <int>
##  1 able       4
##  2 about      5
##  3 admit      5
##  4 after      5
##  5 again      5
##  6 agent      5
##  7 agree      5
##  8 allow      5
##  9 along      5
## 10 also       4
## # i 453 more rows

the second letter is “s” or “t”

word_data %>%
  filter(str_detect(word, "^.[st]")) %>%
  print()
## # A tibble: 38 x 2
##    word      length
##    <chr>      <int>
##  1 as             2
##  2 ask            3
##  3 associate      9
##  4 assume         6
##  5 at             2
##  6 attend         6
##  7 especial       8
##  8 issue          5
##  9 it             2
## 10 item           4
## # i 28 more rows

contains the pattern like “oxx” where “o” is one letter and “x” is another letter

word_data %>%
  filter(str_detect(word,".(.)\\1" )) %>%
  filter(!str_detect(word, "(.)\\1\\1")) %>%
  print()
## # A tibble: 157 x 2
##    word      length
##    <chr>      <int>
##  1 accept         6
##  2 account        7
##  3 across         6
##  4 add            3
##  5 address        7
##  6 affect         6
##  7 afford         6
##  8 afternoon      9
##  9 agree          5
## 10 all            3
## # i 147 more rows

contains “a”, “e” and “o” at the same time

word_data %>%
  filter(str_detect(word, "a")) %>%
  filter(str_detect(word, "e")) %>%
  filter(str_detect(word, "o"))%>%
  print()
## # A tibble: 14 x 2
##    word        length
##    <chr>        <int>
##  1 absolute         8
##  2 afternoon        9
##  3 another          7
##  4 appropriate     11
##  5 associate        9
##  6 colleague        9
##  7 compare          7
##  8 encourage        9
##  9 operate          7
## 10 organize         8
## 11 probable         8
## 12 programme        9
## 13 reason           6
## 14 relation         8
df1 <- tibble(
  sentence = sentences,
  word_number = str_count(sentence, "\\s") + 1
)

a bar plot counting sentences with and without “the” (or “The”).

df1 %>%
  mutate(has_the = ifelse(str_detect(str_to_lower(sentence), "the"),
                          "include the",
                          "exclude the")) %>%
  ggplot(aes(x = has_the)) +
  geom_bar()

a scatterplot with x being the average length of words in a sentence, and y being the number of words starting with “a” or “e” or “i” or “o” or “u” in the sentence.

df2 <- tibble(sentence = sentences) %>%
  mutate(word_number = str_count(sentence, "\\s") + 1,letters=str_count(sentence, "[a-zA-Z]"),num_word = str_split(sentence, "\\s"))
  
df2
## # A tibble: 720 x 4
##    sentence                                    word_number letters num_word 
##    <chr>                                             <dbl>   <int> <list>   
##  1 The birch canoe slid on the smooth planks.            8      34 <chr [8]>
##  2 Glue the sheet to the dark blue background.           8      35 <chr [8]>
##  3 It's easy to tell the depth of a well.                9      28 <chr [9]>
##  4 These days a chicken leg is a rare dish.              9      31 <chr [9]>
##  5 Rice is often served in round bowls.                  7      29 <chr [7]>
##  6 The juice of lemons makes fine punch.                 7      30 <chr [7]>
##  7 The box was thrown beside the parked truck.           8      35 <chr [8]>
##  8 The hogs were fed chopped corn and garbage.           8      35 <chr [8]>
##  9 Four hours of steady work faced us.                   7      28 <chr [7]>
## 10 A large size in stockings is hard to sell.            9      33 <chr [9]>
## # i 710 more rows
df2 <- df2 %>%
  mutate(avg_len=letters/word_number,vowels = sapply(num_word, function(x) {sum(str_detect(x, "^[aeiouAEIOU]"))}))

df2
## # A tibble: 720 x 6
##    sentence                          word_number letters num_word avg_len vowels
##    <chr>                                   <dbl>   <int> <list>     <dbl>  <int>
##  1 The birch canoe slid on the smoo~           8      34 <chr>       4.25      1
##  2 Glue the sheet to the dark blue ~           8      35 <chr>       4.38      0
##  3 It's easy to tell the depth of a~           9      28 <chr>       3.11      4
##  4 These days a chicken leg is a ra~           9      31 <chr>       3.44      3
##  5 Rice is often served in round bo~           7      29 <chr>       4.14      3
##  6 The juice of lemons makes fine p~           7      30 <chr>       4.29      1
##  7 The box was thrown beside the pa~           8      35 <chr>       4.38      0
##  8 The hogs were fed chopped corn a~           8      35 <chr>       4.38      1
##  9 Four hours of steady work faced ~           7      28 <chr>       4         2
## 10 A large size in stockings is har~           9      33 <chr>       3.67      3
## # i 710 more rows
ggplot(df2, aes(x = avg_len, y = vowels)) +
  geom_jitter(width = 0.1, height = 0.1, alpha = 0.7)

Read it into RStudio with read_lines() function

OX_eng<- read_lines("D:/lilith/Oxford_English_Dictionary.txt")

Turn the dictionary into a tibble and remove all blank lines

dfox <- tibble(text = OX_eng)%>%
 filter(text != "")
dfox
## # A tibble: 36,740 x 1
##    text                                                                         
##    <chr>                                                                        
##  1 "A "                                                                         
##  2 "A-  prefix (also an- before a vowel sound) not, without (amoral). [greek]"  
##  3 "Aa  abbr. 1 automobile association. 2 alcoholics anonymous. 3 anti-aircraft~
##  4 "Aardvark  n. Mammal with a tubular snout and a long tongue, feeding on term~
##  5 "Ab-  prefix off, away, from (abduct). [latin]"                              
##  6 "Aback  adv. \u007f take aback surprise, disconcert. [old english: related t~
##  7 "Abacus  n. (pl. -cuses) 1 frame with wires along which beads are slid for c~
##  8 "Abaft  naut. —adv. In the stern half of a ship. —prep. Nearer the stern tha~
##  9 "Abandon  —v. 1 give up. 2 forsake, desert. 3 (often foll. By to; often refl~
## 10 "Abandoned  adj. 1 deserted, forsaken. 2 unrestrained, profligate."          
## # i 36,730 more rows

Use regular expression to extract all words for each item in a separate column named “words”

df_words <- dfox %>%
  mutate(words = str_extract_all(text, "[A-Za-z]+"))%>%
  unnest(words)

Find all words in the dictionary that contain “a”, “e”, “i”, “o”, “u” and “y” at the same time

df_words %>%
  filter(str_detect(words, "a")) %>%
  filter(str_detect(words, "e")) %>%
  filter(str_detect(words, "i")) %>%
  filter(str_detect(words, "o")) %>%
  filter(str_detect(words, "u")) %>%
  filter(str_detect(words, "y"))
## # A tibble: 90 x 2
##    text                                                                    words
##    <chr>                                                                   <chr>
##  1 "Abstemious  adj. Moderate or ascetic, esp. In eating and drinking. \u~ abst~
##  2 "Accident  n. 1 unfortunate esp. Harmful event, caused unintentionally~ unin~
##  3 "Accident  n. 1 unfortunate esp. Harmful event, caused unintentionally~ unin~
##  4 "Accident  n. 1 unfortunate esp. Harmful event, caused unintentionally~ unin~
##  5 "Accident  n. 1 unfortunate esp. Harmful event, caused unintentionally~ unin~
##  6 "Ancien régime  n. (pl. Anciens régimes pronunc. Same) 1 political and~ revo~
##  7 "Blabber  —n. (also blabbermouth) person who blabs. —v. (often foll. B~ inco~
##  8 "Bolshevik  —n. 1 hist. Member of the radical faction of the russian s~ revo~
##  9 "Byelorussian  (also belorussian) —n. Native or language of byelorussi~ Byel~
## 10 "Byelorussian  (also belorussian) —n. Native or language of byelorussi~ byel~
## # i 80 more rows
bank_data<- read_csv("D:/lilith/BankChurners.csv")
## Rows: 10127 Columns: 20
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr  (6): Attrition_Flag, Gender, Education_Level, Marital_Status, Income_Ca...
## dbl (14): Customer_Age, Dependent_count, Months_on_book, Total_Relationship_...
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.

–Which features can be regarded as a factor? –A: Attrition_Flag Gender Marital_Status Education_Level Income_Category Card_Category

–Which features can be regarded as an ordered factor (ordinal)? –A: Education_Level Income_Category Card_Category

bank_data <- bank_data %>%
  mutate(
    Gender = factor(Gender),
    Marital_Status = factor(Marital_Status),
    Card_Category = factor(Card_Category),
    
    Education_Level = factor(Education_Level,
      levels = c("Unknown", "High School", "College",
                 "Graduate", "Post-Graduate", "Doctorate"),
      ordered = TRUE
    ),
    
    Income_Category = factor(Income_Category,
      levels = c("Less than $40K",
                 "$40K - $60K",
                 "$60K - $80K",
                 "$80K - $120K",
                 "$120K +"),
      ordered = TRUE
    )
  )
bank_data
## # A tibble: 10,127 x 20
##    Attrition_Flag    Customer_Age Gender Dependent_count Education_Level
##    <chr>                    <dbl> <fct>            <dbl> <ord>          
##  1 Existing Customer           45 M                    3 High School    
##  2 Existing Customer           49 F                    5 Graduate       
##  3 Existing Customer           51 M                    3 Graduate       
##  4 Existing Customer           40 F                    4 High School    
##  5 Existing Customer           40 M                    3 <NA>           
##  6 Existing Customer           44 M                    2 Graduate       
##  7 Existing Customer           51 M                    4 Unknown        
##  8 Existing Customer           32 M                    0 High School    
##  9 Existing Customer           37 M                    3 <NA>           
## 10 Existing Customer           48 M                    2 Graduate       
## # i 10,117 more rows
## # i 15 more variables: Marital_Status <fct>, Income_Category <ord>,
## #   Card_Category <fct>, Months_on_book <dbl>, Total_Relationship_Count <dbl>,
## #   Months_Inactive_12_mon <dbl>, Contacts_Count_12_mon <dbl>,
## #   Credit_Limit <dbl>, Total_Revolving_Bal <dbl>, Avg_Open_To_Buy <dbl>,
## #   Total_Amt_Chng_Q4_Q1 <dbl>, Total_Trans_Amt <dbl>, Total_Trans_Ct <dbl>,
## #   Total_Ct_Chng_Q4_Q1 <dbl>, Avg_Utilization_Ratio <dbl>
ggplot(bank_data, aes(x = Education_Level, y = Avg_Utilization_Ratio)) +
  geom_boxplot()

The impact is minimal, but the average utilisation rate is lowest for undergraduate degrees and highest for postgraduate degrees.

?gss_cat

What are the levels of marital variable?

unique(gss_cat$marital)
## [1] Never married Divorced      Widowed       Married       Separated    
## [6] No answer    
## Levels: No answer Never married Separated Divorced Widowed Married

Combine “Separated”, “Divorced”, “Widowed” into a new category “Once Married”

gss_cat2 <- gss_cat %>%
  mutate(
    marital_new = fct_collapse(marital,"Once Married" = c("Separated", "Divorced", "Widowed")
    )
  )
gss_cat2
## # A tibble: 21,483 x 10
##     year marital       age race  rincome partyid relig denom tvhours marital_new
##    <int> <fct>       <int> <fct> <fct>   <fct>   <fct> <fct>   <int> <fct>      
##  1  2000 Never marr~    26 White $8000 ~ Ind,ne~ Prot~ Sout~      12 Never marr~
##  2  2000 Divorced       48 White $8000 ~ Not st~ Prot~ Bapt~      NA Once Marri~
##  3  2000 Widowed        67 White Not ap~ Indepe~ Prot~ No d~       2 Once Marri~
##  4  2000 Never marr~    39 White Not ap~ Ind,ne~ Orth~ Not ~       4 Never marr~
##  5  2000 Divorced       25 White Not ap~ Not st~ None  Not ~       1 Once Marri~
##  6  2000 Married        25 White $20000~ Strong~ Prot~ Sout~      NA Married    
##  7  2000 Never marr~    36 White $25000~ Not st~ Chri~ Not ~       3 Never marr~
##  8  2000 Divorced       44 White $7000 ~ Ind,ne~ Prot~ Luth~      NA Once Marri~
##  9  2000 Married        44 White $25000~ Not st~ Prot~ Other       0 Married    
## 10  2000 Married        47 White $25000~ Strong~ Prot~ Sout~       3 Married    
## # i 21,473 more rows

Use the new levels, explore whether there is an effect of martial status on tvhours.

ggplot(gss_cat2, aes(x = marital_new, y = tvhours)) +
  geom_boxplot()
## Warning: Removed 10146 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

OlsonNames()
##   [1] "Africa/Abidjan"                   "Africa/Accra"                    
##   [3] "Africa/Addis_Ababa"               "Africa/Algiers"                  
##   [5] "Africa/Asmara"                    "Africa/Asmera"                   
##   [7] "Africa/Bamako"                    "Africa/Bangui"                   
##   [9] "Africa/Banjul"                    "Africa/Bissau"                   
##  [11] "Africa/Blantyre"                  "Africa/Brazzaville"              
##  [13] "Africa/Bujumbura"                 "Africa/Cairo"                    
##  [15] "Africa/Casablanca"                "Africa/Ceuta"                    
##  [17] "Africa/Conakry"                   "Africa/Dakar"                    
##  [19] "Africa/Dar_es_Salaam"             "Africa/Djibouti"                 
##  [21] "Africa/Douala"                    "Africa/El_Aaiun"                 
##  [23] "Africa/Freetown"                  "Africa/Gaborone"                 
##  [25] "Africa/Harare"                    "Africa/Johannesburg"             
##  [27] "Africa/Juba"                      "Africa/Kampala"                  
##  [29] "Africa/Khartoum"                  "Africa/Kigali"                   
##  [31] "Africa/Kinshasa"                  "Africa/Lagos"                    
##  [33] "Africa/Libreville"                "Africa/Lome"                     
##  [35] "Africa/Luanda"                    "Africa/Lubumbashi"               
##  [37] "Africa/Lusaka"                    "Africa/Malabo"                   
##  [39] "Africa/Maputo"                    "Africa/Maseru"                   
##  [41] "Africa/Mbabane"                   "Africa/Mogadishu"                
##  [43] "Africa/Monrovia"                  "Africa/Nairobi"                  
##  [45] "Africa/Ndjamena"                  "Africa/Niamey"                   
##  [47] "Africa/Nouakchott"                "Africa/Ouagadougou"              
##  [49] "Africa/Porto-Novo"                "Africa/Sao_Tome"                 
##  [51] "Africa/Timbuktu"                  "Africa/Tripoli"                  
##  [53] "Africa/Tunis"                     "Africa/Windhoek"                 
##  [55] "America/Adak"                     "America/Anchorage"               
##  [57] "America/Anguilla"                 "America/Antigua"                 
##  [59] "America/Araguaina"                "America/Argentina/Buenos_Aires"  
##  [61] "America/Argentina/Catamarca"      "America/Argentina/ComodRivadavia"
##  [63] "America/Argentina/Cordoba"        "America/Argentina/Jujuy"         
##  [65] "America/Argentina/La_Rioja"       "America/Argentina/Mendoza"       
##  [67] "America/Argentina/Rio_Gallegos"   "America/Argentina/Salta"         
##  [69] "America/Argentina/San_Juan"       "America/Argentina/San_Luis"      
##  [71] "America/Argentina/Tucuman"        "America/Argentina/Ushuaia"       
##  [73] "America/Aruba"                    "America/Asuncion"                
##  [75] "America/Atikokan"                 "America/Atka"                    
##  [77] "America/Bahia"                    "America/Bahia_Banderas"          
##  [79] "America/Barbados"                 "America/Belem"                   
##  [81] "America/Belize"                   "America/Blanc-Sablon"            
##  [83] "America/Boa_Vista"                "America/Bogota"                  
##  [85] "America/Boise"                    "America/Buenos_Aires"            
##  [87] "America/Cambridge_Bay"            "America/Campo_Grande"            
##  [89] "America/Cancun"                   "America/Caracas"                 
##  [91] "America/Catamarca"                "America/Cayenne"                 
##  [93] "America/Cayman"                   "America/Chicago"                 
##  [95] "America/Chihuahua"                "America/Ciudad_Juarez"           
##  [97] "America/Coral_Harbour"            "America/Cordoba"                 
##  [99] "America/Costa_Rica"               "America/Coyhaique"               
## [101] "America/Creston"                  "America/Cuiaba"                  
## [103] "America/Curacao"                  "America/Danmarkshavn"            
## [105] "America/Dawson"                   "America/Dawson_Creek"            
## [107] "America/Denver"                   "America/Detroit"                 
## [109] "America/Dominica"                 "America/Edmonton"                
## [111] "America/Eirunepe"                 "America/El_Salvador"             
## [113] "America/Ensenada"                 "America/Fort_Nelson"             
## [115] "America/Fort_Wayne"               "America/Fortaleza"               
## [117] "America/Glace_Bay"                "America/Godthab"                 
## [119] "America/Goose_Bay"                "America/Grand_Turk"              
## [121] "America/Grenada"                  "America/Guadeloupe"              
## [123] "America/Guatemala"                "America/Guayaquil"               
## [125] "America/Guyana"                   "America/Halifax"                 
## [127] "America/Havana"                   "America/Hermosillo"              
## [129] "America/Indiana/Indianapolis"     "America/Indiana/Knox"            
## [131] "America/Indiana/Marengo"          "America/Indiana/Petersburg"      
## [133] "America/Indiana/Tell_City"        "America/Indiana/Vevay"           
## [135] "America/Indiana/Vincennes"        "America/Indiana/Winamac"         
## [137] "America/Indianapolis"             "America/Inuvik"                  
## [139] "America/Iqaluit"                  "America/Jamaica"                 
## [141] "America/Jujuy"                    "America/Juneau"                  
## [143] "America/Kentucky/Louisville"      "America/Kentucky/Monticello"     
## [145] "America/Knox_IN"                  "America/Kralendijk"              
## [147] "America/La_Paz"                   "America/Lima"                    
## [149] "America/Los_Angeles"              "America/Louisville"              
## [151] "America/Lower_Princes"            "America/Maceio"                  
## [153] "America/Managua"                  "America/Manaus"                  
## [155] "America/Marigot"                  "America/Martinique"              
## [157] "America/Matamoros"                "America/Mazatlan"                
## [159] "America/Mendoza"                  "America/Menominee"               
## [161] "America/Merida"                   "America/Metlakatla"              
## [163] "America/Mexico_City"              "America/Miquelon"                
## [165] "America/Moncton"                  "America/Monterrey"               
## [167] "America/Montevideo"               "America/Montreal"                
## [169] "America/Montserrat"               "America/Nassau"                  
## [171] "America/New_York"                 "America/Nipigon"                 
## [173] "America/Nome"                     "America/Noronha"                 
## [175] "America/North_Dakota/Beulah"      "America/North_Dakota/Center"     
## [177] "America/North_Dakota/New_Salem"   "America/Nuuk"                    
## [179] "America/Ojinaga"                  "America/Panama"                  
## [181] "America/Pangnirtung"              "America/Paramaribo"              
## [183] "America/Phoenix"                  "America/Port-au-Prince"          
## [185] "America/Port_of_Spain"            "America/Porto_Acre"              
## [187] "America/Porto_Velho"              "America/Puerto_Rico"             
## [189] "America/Punta_Arenas"             "America/Rainy_River"             
## [191] "America/Rankin_Inlet"             "America/Recife"                  
## [193] "America/Regina"                   "America/Resolute"                
## [195] "America/Rio_Branco"               "America/Rosario"                 
## [197] "America/Santa_Isabel"             "America/Santarem"                
## [199] "America/Santiago"                 "America/Santo_Domingo"           
## [201] "America/Sao_Paulo"                "America/Scoresbysund"            
## [203] "America/Shiprock"                 "America/Sitka"                   
## [205] "America/St_Barthelemy"            "America/St_Johns"                
## [207] "America/St_Kitts"                 "America/St_Lucia"                
## [209] "America/St_Thomas"                "America/St_Vincent"              
## [211] "America/Swift_Current"            "America/Tegucigalpa"             
## [213] "America/Thule"                    "America/Thunder_Bay"             
## [215] "America/Tijuana"                  "America/Toronto"                 
## [217] "America/Tortola"                  "America/Vancouver"               
## [219] "America/Virgin"                   "America/Whitehorse"              
## [221] "America/Winnipeg"                 "America/Yakutat"                 
## [223] "America/Yellowknife"              "Antarctica/Casey"                
## [225] "Antarctica/Davis"                 "Antarctica/DumontDUrville"       
## [227] "Antarctica/Macquarie"             "Antarctica/Mawson"               
## [229] "Antarctica/McMurdo"               "Antarctica/Palmer"               
## [231] "Antarctica/Rothera"               "Antarctica/South_Pole"           
## [233] "Antarctica/Syowa"                 "Antarctica/Troll"                
## [235] "Antarctica/Vostok"                "Arctic/Longyearbyen"             
## [237] "Asia/Aden"                        "Asia/Almaty"                     
## [239] "Asia/Amman"                       "Asia/Anadyr"                     
## [241] "Asia/Aqtau"                       "Asia/Aqtobe"                     
## [243] "Asia/Ashgabat"                    "Asia/Ashkhabad"                  
## [245] "Asia/Atyrau"                      "Asia/Baghdad"                    
## [247] "Asia/Bahrain"                     "Asia/Baku"                       
## [249] "Asia/Bangkok"                     "Asia/Barnaul"                    
## [251] "Asia/Beirut"                      "Asia/Bishkek"                    
## [253] "Asia/Brunei"                      "Asia/Calcutta"                   
## [255] "Asia/Chita"                       "Asia/Choibalsan"                 
## [257] "Asia/Chongqing"                   "Asia/Chungking"                  
## [259] "Asia/Colombo"                     "Asia/Dacca"                      
## [261] "Asia/Damascus"                    "Asia/Dhaka"                      
## [263] "Asia/Dili"                        "Asia/Dubai"                      
## [265] "Asia/Dushanbe"                    "Asia/Famagusta"                  
## [267] "Asia/Gaza"                        "Asia/Harbin"                     
## [269] "Asia/Hebron"                      "Asia/Ho_Chi_Minh"                
## [271] "Asia/Hong_Kong"                   "Asia/Hovd"                       
## [273] "Asia/Irkutsk"                     "Asia/Istanbul"                   
## [275] "Asia/Jakarta"                     "Asia/Jayapura"                   
## [277] "Asia/Jerusalem"                   "Asia/Kabul"                      
## [279] "Asia/Kamchatka"                   "Asia/Karachi"                    
## [281] "Asia/Kashgar"                     "Asia/Kathmandu"                  
## [283] "Asia/Katmandu"                    "Asia/Khandyga"                   
## [285] "Asia/Kolkata"                     "Asia/Krasnoyarsk"                
## [287] "Asia/Kuala_Lumpur"                "Asia/Kuching"                    
## [289] "Asia/Kuwait"                      "Asia/Macao"                      
## [291] "Asia/Macau"                       "Asia/Magadan"                    
## [293] "Asia/Makassar"                    "Asia/Manila"                     
## [295] "Asia/Muscat"                      "Asia/Nicosia"                    
## [297] "Asia/Novokuznetsk"                "Asia/Novosibirsk"                
## [299] "Asia/Omsk"                        "Asia/Oral"                       
## [301] "Asia/Phnom_Penh"                  "Asia/Pontianak"                  
## [303] "Asia/Pyongyang"                   "Asia/Qatar"                      
## [305] "Asia/Qostanay"                    "Asia/Qyzylorda"                  
## [307] "Asia/Rangoon"                     "Asia/Riyadh"                     
## [309] "Asia/Saigon"                      "Asia/Sakhalin"                   
## [311] "Asia/Samarkand"                   "Asia/Seoul"                      
## [313] "Asia/Shanghai"                    "Asia/Singapore"                  
## [315] "Asia/Srednekolymsk"               "Asia/Taipei"                     
## [317] "Asia/Tashkent"                    "Asia/Tbilisi"                    
## [319] "Asia/Tehran"                      "Asia/Tel_Aviv"                   
## [321] "Asia/Thimbu"                      "Asia/Thimphu"                    
## [323] "Asia/Tokyo"                       "Asia/Tomsk"                      
## [325] "Asia/Ujung_Pandang"               "Asia/Ulaanbaatar"                
## [327] "Asia/Ulan_Bator"                  "Asia/Urumqi"                     
## [329] "Asia/Ust-Nera"                    "Asia/Vientiane"                  
## [331] "Asia/Vladivostok"                 "Asia/Yakutsk"                    
## [333] "Asia/Yangon"                      "Asia/Yekaterinburg"              
## [335] "Asia/Yerevan"                     "Atlantic/Azores"                 
## [337] "Atlantic/Bermuda"                 "Atlantic/Canary"                 
## [339] "Atlantic/Cape_Verde"              "Atlantic/Faeroe"                 
## [341] "Atlantic/Faroe"                   "Atlantic/Jan_Mayen"              
## [343] "Atlantic/Madeira"                 "Atlantic/Reykjavik"              
## [345] "Atlantic/South_Georgia"           "Atlantic/St_Helena"              
## [347] "Atlantic/Stanley"                 "Australia/ACT"                   
## [349] "Australia/Adelaide"               "Australia/Brisbane"              
## [351] "Australia/Broken_Hill"            "Australia/Canberra"              
## [353] "Australia/Currie"                 "Australia/Darwin"                
## [355] "Australia/Eucla"                  "Australia/Hobart"                
## [357] "Australia/LHI"                    "Australia/Lindeman"              
## [359] "Australia/Lord_Howe"              "Australia/Melbourne"             
## [361] "Australia/North"                  "Australia/NSW"                   
## [363] "Australia/Perth"                  "Australia/Queensland"            
## [365] "Australia/South"                  "Australia/Sydney"                
## [367] "Australia/Tasmania"               "Australia/Victoria"              
## [369] "Australia/West"                   "Australia/Yancowinna"            
## [371] "Brazil/Acre"                      "Brazil/DeNoronha"                
## [373] "Brazil/East"                      "Brazil/West"                     
## [375] "Canada/Atlantic"                  "Canada/Central"                  
## [377] "Canada/Eastern"                   "Canada/Mountain"                 
## [379] "Canada/Newfoundland"              "Canada/Pacific"                  
## [381] "Canada/Saskatchewan"              "Canada/Yukon"                    
## [383] "CET"                              "Chile/Continental"               
## [385] "Chile/EasterIsland"               "CST6CDT"                         
## [387] "Cuba"                             "EET"                             
## [389] "Egypt"                            "Eire"                            
## [391] "EST"                              "EST5EDT"                         
## [393] "Etc/GMT"                          "Etc/GMT-0"                       
## [395] "Etc/GMT-1"                        "Etc/GMT-10"                      
## [397] "Etc/GMT-11"                       "Etc/GMT-12"                      
## [399] "Etc/GMT-13"                       "Etc/GMT-14"                      
## [401] "Etc/GMT-2"                        "Etc/GMT-3"                       
## [403] "Etc/GMT-4"                        "Etc/GMT-5"                       
## [405] "Etc/GMT-6"                        "Etc/GMT-7"                       
## [407] "Etc/GMT-8"                        "Etc/GMT-9"                       
## [409] "Etc/GMT+0"                        "Etc/GMT+1"                       
## [411] "Etc/GMT+10"                       "Etc/GMT+11"                      
## [413] "Etc/GMT+12"                       "Etc/GMT+2"                       
## [415] "Etc/GMT+3"                        "Etc/GMT+4"                       
## [417] "Etc/GMT+5"                        "Etc/GMT+6"                       
## [419] "Etc/GMT+7"                        "Etc/GMT+8"                       
## [421] "Etc/GMT+9"                        "Etc/GMT0"                        
## [423] "Etc/Greenwich"                    "Etc/UCT"                         
## [425] "Etc/Universal"                    "Etc/UTC"                         
## [427] "Etc/Zulu"                         "Europe/Amsterdam"                
## [429] "Europe/Andorra"                   "Europe/Astrakhan"                
## [431] "Europe/Athens"                    "Europe/Belfast"                  
## [433] "Europe/Belgrade"                  "Europe/Berlin"                   
## [435] "Europe/Bratislava"                "Europe/Brussels"                 
## [437] "Europe/Bucharest"                 "Europe/Budapest"                 
## [439] "Europe/Busingen"                  "Europe/Chisinau"                 
## [441] "Europe/Copenhagen"                "Europe/Dublin"                   
## [443] "Europe/Gibraltar"                 "Europe/Guernsey"                 
## [445] "Europe/Helsinki"                  "Europe/Isle_of_Man"              
## [447] "Europe/Istanbul"                  "Europe/Jersey"                   
## [449] "Europe/Kaliningrad"               "Europe/Kiev"                     
## [451] "Europe/Kirov"                     "Europe/Kyiv"                     
## [453] "Europe/Lisbon"                    "Europe/Ljubljana"                
## [455] "Europe/London"                    "Europe/Luxembourg"               
## [457] "Europe/Madrid"                    "Europe/Malta"                    
## [459] "Europe/Mariehamn"                 "Europe/Minsk"                    
## [461] "Europe/Monaco"                    "Europe/Moscow"                   
## [463] "Europe/Nicosia"                   "Europe/Oslo"                     
## [465] "Europe/Paris"                     "Europe/Podgorica"                
## [467] "Europe/Prague"                    "Europe/Riga"                     
## [469] "Europe/Rome"                      "Europe/Samara"                   
## [471] "Europe/San_Marino"                "Europe/Sarajevo"                 
## [473] "Europe/Saratov"                   "Europe/Simferopol"               
## [475] "Europe/Skopje"                    "Europe/Sofia"                    
## [477] "Europe/Stockholm"                 "Europe/Tallinn"                  
## [479] "Europe/Tirane"                    "Europe/Tiraspol"                 
## [481] "Europe/Ulyanovsk"                 "Europe/Uzhgorod"                 
## [483] "Europe/Vaduz"                     "Europe/Vatican"                  
## [485] "Europe/Vienna"                    "Europe/Vilnius"                  
## [487] "Europe/Volgograd"                 "Europe/Warsaw"                   
## [489] "Europe/Zagreb"                    "Europe/Zaporozhye"               
## [491] "Europe/Zurich"                    "GB"                              
## [493] "GB-Eire"                          "GMT"                             
## [495] "GMT-0"                            "GMT+0"                           
## [497] "GMT0"                             "Greenwich"                       
## [499] "Hongkong"                         "HST"                             
## [501] "Iceland"                          "Indian/Antananarivo"             
## [503] "Indian/Chagos"                    "Indian/Christmas"                
## [505] "Indian/Cocos"                     "Indian/Comoro"                   
## [507] "Indian/Kerguelen"                 "Indian/Mahe"                     
## [509] "Indian/Maldives"                  "Indian/Mauritius"                
## [511] "Indian/Mayotte"                   "Indian/Reunion"                  
## [513] "Iran"                             "Israel"                          
## [515] "Jamaica"                          "Japan"                           
## [517] "Kwajalein"                        "Libya"                           
## [519] "MET"                              "Mexico/BajaNorte"                
## [521] "Mexico/BajaSur"                   "Mexico/General"                  
## [523] "MST"                              "MST7MDT"                         
## [525] "Navajo"                           "NZ"                              
## [527] "NZ-CHAT"                          "Pacific/Apia"                    
## [529] "Pacific/Auckland"                 "Pacific/Bougainville"            
## [531] "Pacific/Chatham"                  "Pacific/Chuuk"                   
## [533] "Pacific/Easter"                   "Pacific/Efate"                   
## [535] "Pacific/Enderbury"                "Pacific/Fakaofo"                 
## [537] "Pacific/Fiji"                     "Pacific/Funafuti"                
## [539] "Pacific/Galapagos"                "Pacific/Gambier"                 
## [541] "Pacific/Guadalcanal"              "Pacific/Guam"                    
## [543] "Pacific/Honolulu"                 "Pacific/Johnston"                
## [545] "Pacific/Kanton"                   "Pacific/Kiritimati"              
## [547] "Pacific/Kosrae"                   "Pacific/Kwajalein"               
## [549] "Pacific/Majuro"                   "Pacific/Marquesas"               
## [551] "Pacific/Midway"                   "Pacific/Nauru"                   
## [553] "Pacific/Niue"                     "Pacific/Norfolk"                 
## [555] "Pacific/Noumea"                   "Pacific/Pago_Pago"               
## [557] "Pacific/Palau"                    "Pacific/Pitcairn"                
## [559] "Pacific/Pohnpei"                  "Pacific/Ponape"                  
## [561] "Pacific/Port_Moresby"             "Pacific/Rarotonga"               
## [563] "Pacific/Saipan"                   "Pacific/Samoa"                   
## [565] "Pacific/Tahiti"                   "Pacific/Tarawa"                  
## [567] "Pacific/Tongatapu"                "Pacific/Truk"                    
## [569] "Pacific/Wake"                     "Pacific/Wallis"                  
## [571] "Pacific/Yap"                      "Poland"                          
## [573] "Portugal"                         "PRC"                             
## [575] "PST8PDT"                          "ROC"                             
## [577] "ROK"                              "Singapore"                       
## [579] "Turkey"                           "UCT"                             
## [581] "Universal"                        "US/Alaska"                       
## [583] "US/Aleutian"                      "US/Arizona"                      
## [585] "US/Central"                       "US/East-Indiana"                 
## [587] "US/Eastern"                       "US/Hawaii"                       
## [589] "US/Indiana-Starke"                "US/Michigan"                     
## [591] "US/Mountain"                      "US/Pacific"                      
## [593] "US/Samoa"                         "UTC"                             
## [595] "W-SU"                             "WET"                             
## [597] "Zulu"                            
## attr(,"Version")
## [1] "2025b"
?flights

How many timezones are there for all the destination airports (excluding NA)?

df <- flights %>%
  left_join(airports, by = c("dest" = "faa"))%>%
  filter(!is.na(tzone)) %>%
  summarise(n_timezones = n_distinct(tzone))
df
## # A tibble: 1 x 1
##   n_timezones
##         <int>
## 1           7
unique(flights$origin)
## [1] "EWR" "LGA" "JFK"
names(airports)
## [1] "faa"   "name"  "lat"   "lon"   "alt"   "tz"    "dst"   "tzone"

Use the data in flights and airports only along with R code to show the time difference (in hours) between New York City and the following cities

nyc_tz <- airports %>%
  filter(faa == "JFK") %>%
  select(tz)

result <- airports %>%
  filter(faa == "ORD" |
         faa == "DFW" |
         faa == "DEN" |
         faa == "SEA" |
         faa == "ANC" |
         faa == "HNL") %>%
  select(faa, name, tz) %>%
  mutate(time_diff = tz - nyc_tz[["tz"]])

result
## # A tibble: 6 x 4
##   faa   name                          tz time_diff
##   <chr> <chr>                      <dbl>     <dbl>
## 1 ANC   Ted Stevens Anchorage Intl    -9        -4
## 2 DEN   Denver Intl                   -7        -2
## 3 DFW   Dallas Fort Worth Intl        -6        -1
## 4 HNL   Honolulu Intl                -10        -5
## 5 ORD   Chicago Ohare Intl            -6        -1
## 6 SEA   Seattle Tacoma Intl           -8        -3

Write a function Time_difference_NYC(dest)

Time_difference_NYC <- function(dest) 
  {
  

  nyc_tz <- airports %>%
    filter(faa == "JFK") %>%
    select(tz)
  

  dest_tz <- airports %>%
    filter(faa == dest) %>%
    select(tz)
  
  
  dest_tz[["tz"]] - nyc_tz[["tz"]]
}

Write a function flight_time(dep_time, arr_time, origin, dest)

flight_time <- function(dep_time, arr_time, origin, dest) 
  {
  
  dep_h <- dep_time %/% 100
  dep_m <- dep_time %% 100
  
  arr_h <- arr_time %/% 100
  arr_m <- arr_time %% 100
  
  
  dep_total <- dep_h * 60 + dep_m
  arr_total <- arr_h * 60 + arr_m

  
  origin_tz <- airports[["tz"]][airports[["faa"]] == origin]
  dest_tz   <- airports[["tz"]][airports[["faa"]] == dest]
  
  tz_diff <- dest_tz - origin_tz
  
  
  flight_mins <- arr_total - dep_total - tz_diff * 60
  
  return(flight_mins)
}