Import your data

data <- read_excel("mydatasal.xlsx")

#
head(data)
## # A tibble: 6 × 13
##     age workclass     degree marital_status occupation relationship race  gender
##   <dbl> <chr>         <chr>  <chr>          <chr>      <chr>        <chr> <chr> 
## 1    39 State-gov     Bache… Never-married  Adm-cleri… Not-in-fami… White Male  
## 2    50 Self-emp-not… Bache… Married-civ-s… Exec-mana… Husband      White Male  
## 3    38 Private       HS-gr… Divorced       Handlers-… Not-in-fami… White Male  
## 4    53 Private       11th   Married-civ-s… Handlers-… Husband      Black Male  
## 5    28 Private       Bache… Married-civ-s… Prof-spec… Wife         Black Female
## 6    37 Private       Maste… Married-civ-s… Exec-mana… Wife         White Female
## # ℹ 5 more variables: Column11 <dbl>, Column12 <dbl>, hoursperweek <dbl>,
## #   country <chr>, salary <chr>
data_small <- data %>%
  select(occupation, relationship, race, gender, hoursperweek, country, salary) %>%
  filter(country %in% c("United-States", "Canada"))

Pivoting

long to wide form

wide_hours_by_gender <- data_small %>%
group_by(occupation, gender) %>%
summarise(avg_hours = mean(hoursperweek, na.rm = TRUE), .groups = "drop") %>%
pivot_wider(names_from = gender, values_from = avg_hours)

kable(wide_hours_by_gender)
occupation Female Male
? 29.92980 33.35027
Adm-clerical 36.78301 39.34810
Armed-Forces NA 40.66667
Craft-repair 39.75510 42.52112
Exec-managerial 41.41061 46.32423
Farming-fishing 37.38983 48.22141
Handlers-cleaners 35.53472 37.97708
Machine-op-inspct 38.87982 41.51958
Other-service 33.00254 35.21270
Priv-house-serv 30.25301 39.85714
Prof-specialty 39.51367 44.27804
Protective-serv 38.35211 43.60149
Sales 34.13737 44.25170
Tech-support 37.34375 40.79362
Transport-moving 37.12941 45.25177
###2

wide_salary_by_country <- data_small %>%
count(country, salary) %>%
pivot_wider(names_from = salary, values_from = n, values_fill = 0)

kable(wide_salary_by_country)
country <=50K >50K
Canada 82 39
United-States 21999 7171

wide to long form

long_hours_by_gender <- wide_hours_by_gender %>%
pivot_longer(cols = -occupation,
names_to = "gender",
values_to = "avg_hours")

kable(long_hours_by_gender)
occupation gender avg_hours
? Female 29.92980
? Male 33.35027
Adm-clerical Female 36.78301
Adm-clerical Male 39.34810
Armed-Forces Female NA
Armed-Forces Male 40.66667
Craft-repair Female 39.75510
Craft-repair Male 42.52112
Exec-managerial Female 41.41061
Exec-managerial Male 46.32423
Farming-fishing Female 37.38983
Farming-fishing Male 48.22141
Handlers-cleaners Female 35.53472
Handlers-cleaners Male 37.97708
Machine-op-inspct Female 38.87982
Machine-op-inspct Male 41.51958
Other-service Female 33.00254
Other-service Male 35.21270
Priv-house-serv Female 30.25301
Priv-house-serv Male 39.85714
Prof-specialty Female 39.51367
Prof-specialty Male 44.27804
Protective-serv Female 38.35211
Protective-serv Male 43.60149
Sales Female 34.13737
Sales Male 44.25170
Tech-support Female 37.34375
Tech-support Male 40.79362
Transport-moving Female 37.12941
Transport-moving Male 45.25177
data_small_unique <- data_small %>% distinct(occupation, gender, .keep_all = TRUE)

data_wide_unique <- data_small_unique %>%
group_by(occupation, gender) %>%
summarise(avg_hours = mean(hoursperweek, na.rm = TRUE), .groups = "drop") %>%
pivot_wider(names_from = gender, values_from = avg_hours)

kable(data_wide_unique)
occupation Female Male
? 32 40
Adm-clerical 30 40
Armed-Forces NA 40
Craft-repair 40 40
Exec-managerial 40 13
Farming-fishing 45 35
Handlers-cleaners 40 40
Machine-op-inspct 25 40
Other-service 20 15
Priv-house-serv 30 25
Prof-specialty 50 60
Protective-serv 40 52
Sales 40 50
Tech-support 40 40
Transport-moving 30 40
data_long_unique <- data_wide_unique %>%
pivot_longer(cols = -occupation,
names_to = "gender",
values_to = "avg_hours")

kable(data_long_unique)
occupation gender avg_hours
? Female 32
? Male 40
Adm-clerical Female 30
Adm-clerical Male 40
Armed-Forces Female NA
Armed-Forces Male 40
Craft-repair Female 40
Craft-repair Male 40
Exec-managerial Female 40
Exec-managerial Male 13
Farming-fishing Female 45
Farming-fishing Male 35
Handlers-cleaners Female 40
Handlers-cleaners Male 40
Machine-op-inspct Female 25
Machine-op-inspct Male 40
Other-service Female 20
Other-service Male 15
Priv-house-serv Female 30
Priv-house-serv Male 25
Prof-specialty Female 50
Prof-specialty Male 60
Protective-serv Female 40
Protective-serv Male 52
Sales Female 40
Sales Male 50
Tech-support Female 40
Tech-support Male 40
Transport-moving Female 30
Transport-moving Male 40

Separating and Uniting

Separate a column

# Make a  column table3 "hoursperweek/salary"

data_with_pair <- data_small %>%
mutate(pair = paste0(hoursperweek, "/", salary))

head(data_with_pair %>% select(hoursperweek, salary, pair))
## # A tibble: 6 × 3
##   hoursperweek salary pair    
##          <dbl> <chr>  <chr>   
## 1           40 <=50K  40/<=50K
## 2           13 <=50K  13/<=50K
## 3           40 <=50K  40/<=50K
## 4           40 <=50K  40/<=50K
## 5           40 <=50K  40/<=50K
## 6           45 >50K   45/>50K
# Separate pair 

separated <- data_with_pair %>%
separate(pair, into = c("hoursperweek_num", "salary_str"), sep = "/", convert = TRUE)

head(separated %>% select(hoursperweek, salary, hoursperweek_num, salary_str))
## # A tibble: 6 × 4
##   hoursperweek salary hoursperweek_num salary_str
##          <dbl> <chr>             <int> <chr>     
## 1           40 <=50K                40 <=50K     
## 2           13 <=50K                13 <=50K     
## 3           40 <=50K                40 <=50K     
## 4           40 <=50K                40 <=50K     
## 5           40 <=50K                40 <=50K     
## 6           45 >50K                 45 >50K

Unite two columns

# Unite relationship and race into a column

united_underscore <- data_small %>%
unite(rel_race, relationship, race, sep = "_")

head(united_underscore %>% select(rel_race, gender, hoursperweek, country, salary))
## # A tibble: 6 × 5
##   rel_race            gender hoursperweek country       salary
##   <chr>               <chr>         <dbl> <chr>         <chr> 
## 1 Not-in-family_White Male             40 United-States <=50K 
## 2 Husband_White       Male             13 United-States <=50K 
## 3 Not-in-family_White Male             40 United-States <=50K 
## 4 Husband_Black       Male             40 United-States <=50K 
## 5 Wife_White          Female           40 United-States <=50K 
## 6 Husband_White       Male             45 United-States >50K
united_nosep <- data_small %>%
unite(relrace, relationship, race, sep = "", remove = FALSE)  # keep originals

head(united_nosep %>% select(relationship, race, relrace) )
## # A tibble: 6 × 3
##   relationship  race  relrace           
##   <chr>         <chr> <chr>             
## 1 Not-in-family White Not-in-familyWhite
## 2 Husband       White HusbandWhite      
## 3 Not-in-family White Not-in-familyWhite
## 4 Husband       Black HusbandBlack      
## 5 Wife          White WifeWhite         
## 6 Husband       White HusbandWhite

Missing Values

stocks <- tibble(
year   = c(2015, 2015, 2015, 2015, 2016, 2016, 2016),
qtr    = c(   1,    2,    3,    4,    2,    3,    4),
return = c(1.88, 0.59, 0.35,   NA, 0.92, 0.17, 2.66)
)

stocks
## # A tibble: 7 × 3
##    year   qtr return
##   <dbl> <dbl>  <dbl>
## 1  2015     1   1.88
## 2  2015     2   0.59
## 3  2015     3   0.35
## 4  2015     4  NA   
## 5  2016     2   0.92
## 6  2016     3   0.17
## 7  2016     4   2.66
treatment <- tribble(
~person,           ~treatment, ~response,
"Derrick Whitmore", 1,           7,
NA,                 2,          10,
NA,                 3,           9,
"Katherine Burke",  1,           4
)

treatment
## # A tibble: 4 × 3
##   person           treatment response
##   <chr>                <dbl>    <dbl>
## 1 Derrick Whitmore         1        7
## 2 <NA>                     2       10
## 3 <NA>                     3        9
## 4 Katherine Burke          1        4
treatment %>%
fill(person)
## # A tibble: 4 × 3
##   person           treatment response
##   <chr>                <dbl>    <dbl>
## 1 Derrick Whitmore         1        7
## 2 Derrick Whitmore         2       10
## 3 Derrick Whitmore         3        9
## 4 Katherine Burke          1        4