Six variable with ten reecords

library(tidyverse)
-- Attaching packages --------------------------------------- tidyverse 1.3.1 --
v ggplot2 3.3.5     v purrr   0.3.4
v tibble  3.1.6     v dplyr   1.0.8
v tidyr   1.2.0     v stringr 1.4.0
v readr   1.4.0     v forcats 0.5.1
-- Conflicts ------------------------------------------ tidyverse_conflicts() --
x dplyr::filter() masks stats::filter()
x dplyr::lag()    masks stats::lag()
name = c("Chris", "Knut", "Edgar", "Tunde", "Tayo", "Sade", "Gbemi", "Ify", "Kunmi", "John")
sex = c("M", "M", "M", "M", "F", "F", "F", "F","F", "M")
age = c(23, 24, 21, 25, 33, 31, 34, 20, 31, 22)
height = c(180, 170, 188, 185, 160, 168, 172, 162, 165, 200)
weight = c(80, 60, 82, 72, 82, 82, 81, 84, 83, 75)
education = c(3, 2, 2, 3, 3, 3, 3, 2, 1, 1)

dt <- tibble("name" = name <-  c("Chris", "Knut", "Edgar", "Tunde", "Tayo", "Sade", "Gbemi", "Ify", "Kunmi", "John"), "sex" = sex <-  c("M", "M", "M", "M", "F", "F", "F", "F","F", "M"), "age" = age <-  c(23, 24, 21, 25, 33, 31, 34, 20, 31, 22), "height" = height <-  c(180, 170, 188, 185, 160, 168, 172, 162, 165, 200), "weight" = weight <-  c(80, 60, 82, 72, 82, 82, 81, 84, 83, 75), "educ" = education <-  c(3, 2, 2, 3, 3, 3, 3, 2, 1, 1))

dt
# A tibble: 10 x 6
   name  sex     age height weight  educ
   <chr> <chr> <dbl>  <dbl>  <dbl> <dbl>
 1 Chris M        23    180     80     3
 2 Knut  M        24    170     60     2
 3 Edgar M        21    188     82     2
 4 Tunde M        25    185     72     3
 5 Tayo  F        33    160     82     3
 6 Sade  F        31    168     82     3
 7 Gbemi F        34    172     81     3
 8 Ify   F        20    162     84     2
 9 Kunmi F        31    165     83     1
10 John  M        22    200     75     1
write_csv(dt, "data.csv")

Recode age to age group

# Create age groups
dt <- dt %>% 
     mutate(age_group =
       # Convert to factor
       factor(
    # Create categories
      dplyr::case_when(
      age >= 20 & age <= 24 ~ "20-24",
      age > 24 & age <= 29 ~ "25-29",
      age > 29 & age <= 34 ~ "30-34"
    ), levels = c("20-24", "25-29", "30-34"))
  )

Assign BMI using the Height and weight

# Calculate BMI
dt <- dt %>% 
      mutate(
      bmi = weight/(height/100)^2
  )
write_csv(dt, "data2.csv")

Frequencies

Name

Sex

Age

Height

Weight

Education

Age group

Body Mass Index

Cross tabulation of sex and education

Interpretation of the results of the cross tabulation and the p value

  1. Chi-squared = 0.5333
  2. p value = 0.7659
  3. degree of freedom = 2

The p value is more than 0.05. Thus, sex and education are independent. The sample size is just 10 and it was written in some text that “Chi-squared test is invalid if we have fewer than 5 observations in a cell”. The data used does not meet this criterion. The contingency table below is from the data used:


Performing the chi-square in R also gives the same result

# Education levels
ed <- factor(education <- c(3, 2, 2, 3, 3, 3, 3, 2, 1, 1), levels = unique(education))

# Sex
sx <- factor(sex <-  c("M", "M", "M", "M", "F", "F", "F", "F","F", "M"), levels = unique(sex))

# Table
table <- tibble(sx, ed)

# Contingency table
contingency_table <- table(table$sx, table$ed)

# Analysis
chisq.test(contingency_table, correct = TRUE)
Warning in chisq.test(contingency_table, correct = TRUE): Chi-squared
approximation may be incorrect

    Pearson's Chi-squared test

data:  contingency_table
X-squared = 0.53333, df = 2, p-value = 0.7659