Module 6 Lab

Exercise 1

setwd(“~/BANA7025/Module 6 Lab/data”) file_names <- paste0(“Month-”, sprintf(“%02d”, 1:11), “.csv”) final_data <- file_names %>% + map_dfr(~ read_csv(.)) dim(final_data) [1] 698159 10

Exercise 2

Account_ID Transaction_Timestamp Factor_A 1 numeric POSIXct numeric 2 numeric POSIXt numeric Factor_B Factor_C Factor_D Factor_E Response 1 numeric character numeric character numeric 2 numeric character numeric character numeric Transaction_Status Month 1 character character 2 character character

Exercise 3

[1] “character” class_info[[“Transaction_Status”]] [1] “character” class_info[[“Response”]] [1] “numeric” class_info[[“Factor_E”]] [1] “character” class_info[[“Factor_D”]] [1] “numeric” class_info[[“Factor_C”]] [1] “character” class_info[[“Factor_B”]] [1] “numeric” class_info[[“Factor_A”]] [1] “numeric” class_info[[“Transaction_Timestamp”]] [2] “POSIXct” “POSIXt” class_info[[“Account_ID”]] [1] “numeric”

Exercise 4

mutate(Factor_D = if_else(Factor_D == 26, 25, Factor_D)) unique_values_count <- n_distinct(final_data$Factor_D) factor_counts <- final_data %>% + group_by(Factor_D) %>% + summarise(Count = n()) print(paste(“Number of unique values in Factor_D:”, unique_values_count)) [1] “Number of unique values in Factor_D: 14” print(factor_counts) # A tibble: 14 × 2

Factor_D Count 1 10 4595 2 15 1089 3 20 527882 4 21 68072 5 25 41021 6 30 7030 7 31 512 8 35 25298 9 40 2720 10 50 3709 11 55 15200 12 70 54 13 85 4 14 90 973

Exercise 5

final_data_cleaned <- final_data %>% + filter_at(vars(starts_with(“Factor_”)), all_vars(. != “NULL”)) View(final_data_cleaned)

Final Data shows 489537 obs. of 10 var.

## Exercise 8 convert_to_qtr <- function(Month) { + case_when( + ‘Jan’ == Month~ “Q1”, + “Feb” == Month ~ “Q1” , + “Mar” == Month ~ “Q1”, + “Apr” == Month ~ “Q2”, + “May” ==Month ~ “Q2”, + “Jun” ==Month ~ “Q2”, + “Jul” ==Month ~ “Q3”, + “Aug” ==Month ~ “Q3”, + “Sep” ==Month ~ “Q3”, + “Oct” ==Month ~ “Q4”, + “Nov” ==Month ~ “Q4”, + “Dec” ==Month ~ “Q4”

3 observations per quarter

Exercise 9

sw_people%>% map_chr(~.x$name)

87 observations

## Exercise 10

sw_people %>% map_int(~ length(.x$films)) [1] 5 6 7 4 5 3 3 1 1 6 3 2 5 4 1 3 3 1 5 5 3 1 1 2 [25] 1 2 1 1 1 1 1 3 1 2 1 1 1 2 1 1 2 1 1 3 1 1 1 3 [49] 3 3 2 2 2 1 3 2 1 1 1 2 2 1 1 2 2 1 1 1 1 1 1 1 [73] 2 1 1 2 1 1 2 2 1 1 1 1 1 1 3

Exercise 11

sw_people %>% map_chr(~.x$name) %>% # extract the names set_names(sw_people, nm = .) %>% # set the list names as character names map_df(~ length(.x$films)) %>% # extract number of films and make a data frame pivot_longer( # tidy data frame cols = everything(), names_to = “Character”, values_to = “Films” ) %>% ggplot(aes(Films, reorder(Character, Films))) + # plot results geom_point()