The data was collated from over 2000 female police officers in relation to the wearing of body armour, bra type and size, discomfort and ability to perform simple tasks. The data relates to officers from the police forces of England and Wales only, collected in 2016.

###Original Survey

cat(" Original Survey
I confirm that I have been informed about the aim and objectives of this research project and agree to give my inputs.  
- I understand that all information that I provide will be treated with the strictest confidence and my name will not be used in any report publication or presentation.
- I understand that the information I provide will be used by Cranfield University for the purpose of research only. The data will be stored on a secure network accessed only by authorised users in accordance with the Data Protection Act (1998).
- I understand that the results of the research may be published in scientific journals and an anonymised version of the data may be published in support of these results.
- I understand that I am not required to answer a question if I prefer not to provide a response.
- I understand that I am free to withdraw from this survey at any stage.
By ticking this box I confirm that I am happy to participate. [ ]  

Q1: What is your primary role?
- Routine Patrol
- Firearms
- Public Order
- Dog Handler
- Mounted Branch
- Traffic
- Other (please specify)

Q2: How many years have you been wearing body armour?
- < 2
- 2 to 5
- 6 to 10
- 11 to 15
- 16 to 20
- >20

Q3: What size bra do you normally wear?

Q4: What type of bra do you normally wear under your body armour?
- No bra
- Sports bra
- Underwired bra
- Padded/push up bra
- Other (please specify)

Q5: When standing wearing your body armour how comfortable is it?
- Very Comfortable
- Comfortable
- Uncomfortable
- Very Uncomfortable

Q6: On the images below please indicate any areas that are uncomfortable or rub when wearing your body armour.

Q7: Whilst wearing your body armour please rate the following actions (if applicable):
- Sitting in a car (passenger)
- Driving a car
- Walking
- Running
- Self defence techniques
- Using a pistol/TASER
- Using a carbine
- Riding (horse)
- Riding (pedal cycle)
- Riding (motorbike)

Q8: Please indicate which items of equipment you routinely carry on either your body armour or on a belt:
- Body Armour
- Belt
- Not applicable
- Handcuffs
- Irritant spray
- TASER
- Police radio
- Police mobile phone
- Personal mobile phone
- Notebook
- Baton
- Body worn video
- Other (please specify)
")
##  Original Survey
## I confirm that I have been informed about the aim and objectives of this research project and agree to give my inputs.  
## - I understand that all information that I provide will be treated with the strictest confidence and my name will not be used in any report publication or presentation.
## - I understand that the information I provide will be used by Cranfield University for the purpose of research only. The data will be stored on a secure network accessed only by authorised users in accordance with the Data Protection Act (1998).
## - I understand that the results of the research may be published in scientific journals and an anonymised version of the data may be published in support of these results.
## - I understand that I am not required to answer a question if I prefer not to provide a response.
## - I understand that I am free to withdraw from this survey at any stage.
## By ticking this box I confirm that I am happy to participate. [ ]  
## 
## Q1: What is your primary role?
## - Routine Patrol
## - Firearms
## - Public Order
## - Dog Handler
## - Mounted Branch
## - Traffic
## - Other (please specify)
## 
## Q2: How many years have you been wearing body armour?
## - < 2
## - 2 to 5
## - 6 to 10
## - 11 to 15
## - 16 to 20
## - >20
## 
## Q3: What size bra do you normally wear?
## 
## Q4: What type of bra do you normally wear under your body armour?
## - No bra
## - Sports bra
## - Underwired bra
## - Padded/push up bra
## - Other (please specify)
## 
## Q5: When standing wearing your body armour how comfortable is it?
## - Very Comfortable
## - Comfortable
## - Uncomfortable
## - Very Uncomfortable
## 
## Q6: On the images below please indicate any areas that are uncomfortable or rub when wearing your body armour.
## 
## Q7: Whilst wearing your body armour please rate the following actions (if applicable):
## - Sitting in a car (passenger)
## - Driving a car
## - Walking
## - Running
## - Self defence techniques
## - Using a pistol/TASER
## - Using a carbine
## - Riding (horse)
## - Riding (pedal cycle)
## - Riding (motorbike)
## 
## Q8: Please indicate which items of equipment you routinely carry on either your body armour or on a belt:
## - Body Armour
## - Belt
## - Not applicable
## - Handcuffs
## - Irritant spray
## - TASER
## - Police radio
## - Police mobile phone
## - Personal mobile phone
## - Notebook
## - Baton
## - Body worn video
## - Other (please specify)
# Load necessary libraries
pacman::p_load(pacman, Amelia, corrplot, tidyverse, ggplot2, GGally, reshape2, scales, ggbiplot)

# Load the data
file_path <- "Survey_data_UK_female_police-2016.csv"
data <- read.csv(file_path)

# Replace '#NULL!' with NA
data[data == "#NULL!"] <- NA

# Convert relevant columns to factors
categorical_cols <- c("Role", "Years_wearing", "Bra_size", "Bra_type", "Comfort", "sit_car", 
                      "Drive_car", "Walking", "Running", "SDAR", "Pistol_TASER", "Carbine", 
                      "Horse", "Bike", "Motorbike", "Handcuffs", "spray", "TASER", "Radio", 
                      "Mobile_police", "Mobile_personal", "Notebook", "Baton", "BWV", "Other")

data[categorical_cols] <- lapply(data[categorical_cols], as.factor)

# Convert columns to numeric, removing non-numeric characters where applicable
numeric_cols <- c("Front_RH_Breast", "Front_LH_breast", "LHS_breast", "Back_breast", 
                  "RHS_Abdo", "Front_Abdo", "LHS_Abdo", "Back_abdo", "RHS_shoulder", 
                  "Front_LH_shoulder", "Front_RH_shoulder", "LHS_shoulder", 
                  "Rear_LHS_shoulder", "Rear_RHS_Shoulder", "Total_discomfort", "other_count")

data[numeric_cols] <- lapply(data[numeric_cols], function(x) {
  x <- gsub("[^0-9.]", "", x)  # Remove non-numeric characters
  as.numeric(x)
})

# Recheck the structure after conversion
str(data)
## 'data.frame':    2634 obs. of  44 variables:
##  $ ID               : int  1 2 3 4 5 6 8 9 10 11 ...
##  $ Role             : Factor w/ 9 levels "Custody / Offender management",..: 2 8 8 8 8 2 2 4 8 8 ...
##  $ Years_wearing    : Factor w/ 6 levels "< 2","> 20","11 to 15",..: 3 6 4 3 4 3 4 6 5 5 ...
##  $ Bra_size         : Factor w/ 107 levels "","23B","23F",..: 29 61 47 77 46 46 62 60 77 24 ...
##  $ Bra_type         : Factor w/ 5 levels "No bra","Other",..: 5 5 5 5 5 5 5 5 2 3 ...
##  $ Comfort          : Factor w/ 4 levels "Comfortable",..: 3 2 2 1 1 2 2 2 1 2 ...
##  $ RHS_breast       : chr  NA NA NA NA ...
##  $ Front_RH_Breast  : num  NA NA 1 1 NA 1 NA 2 NA NA ...
##  $ Front_LH_breast  : num  NA NA 1 1 NA 1 NA 3 NA NA ...
##  $ LHS_breast       : num  NA NA 1 1 1 NA NA NA NA NA ...
##  $ Back_breast      : num  NA NA NA NA NA NA NA NA NA 2 ...
##  $ RHS_Abdo         : num  NA NA NA 1 NA NA 1 NA 1 1 ...
##  $ Front_Abdo       : num  NA NA 1 NA NA NA 2 NA 2 NA ...
##  $ LHS_Abdo         : num  NA NA 1 NA NA NA 1 NA 1 1 ...
##  $ Back_abdo        : num  NA 2 3 NA NA 2 NA NA 4 5 ...
##  $ RHS_shoulder     : num  NA NA NA NA 1 NA 1 NA NA NA ...
##  $ Front_LH_shoulder: num  NA 1 1 NA NA NA 1 NA NA NA ...
##  $ Front_RH_shoulder: num  NA 1 NA 1 NA NA 1 NA NA NA ...
##  $ LHS_shoulder     : num  NA NA NA NA NA NA 1 NA NA NA ...
##  $ Rear_LHS_shoulder: num  NA NA 1 NA NA 1 1 3 NA NA ...
##  $ Rear_RHS_Shoulder: num  NA NA NA 1 NA 1 1 1 NA NA ...
##  $ Total_discomfort : num  NA 4 1 6 2 6 1 9 8 9 ...
##  $ sit_car          : Factor w/ 5 levels "Difficult","Easy",..: 1 1 1 1 1 1 4 4 2 2 ...
##  $ Drive_car        : Factor w/ 5 levels "Difficult","Easy",..: 1 1 1 1 1 1 4 4 2 1 ...
##  $ Walking          : Factor w/ 5 levels "Difficult","Easy",..: 2 2 2 2 2 1 1 1 2 2 ...
##  $ Running          : Factor w/ 5 levels "Difficult","Easy",..: 4 1 4 2 1 4 4 4 4 4 ...
##  $ SDAR             : Factor w/ 5 levels "Difficult","Easy",..: 1 1 2 2 1 1 4 1 2 4 ...
##  $ Pistol_TASER     : Factor w/ 5 levels "Difficult","Easy",..: 3 3 3 3 3 3 3 3 3 3 ...
##  $ Carbine          : Factor w/ 5 levels "Difficult","Easy",..: 3 3 3 3 3 3 3 3 3 3 ...
##  $ Horse            : Factor w/ 4 levels "Difficult","Easy",..: 3 3 3 3 3 3 3 3 3 3 ...
##  $ Bike             : Factor w/ 5 levels "Difficult","Easy",..: 3 3 3 3 3 3 3 3 3 4 ...
##  $ Motorbike        : Factor w/ 3 levels "Difficult","Not applicable",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ Handcuffs        : Factor w/ 3 levels "Body armour",..: 3 3 1 3 NA 3 1 1 3 3 ...
##  $ spray            : Factor w/ 3 levels "Body armour",..: 3 3 1 3 NA 3 1 1 3 3 ...
##  $ TASER            : Factor w/ 3 levels "Body armour",..: 2 2 NA 2 NA 2 NA 2 2 2 ...
##  $ Radio            : Factor w/ 3 levels "Body armour",..: 1 1 1 3 NA 1 1 1 1 1 ...
##  $ Mobile_police    : Factor w/ 3 levels "Body armour",..: NA 1 1 2 NA 2 NA 1 1 1 ...
##  $ Mobile_personal  : Factor w/ 3 levels "Body armour",..: 1 1 1 2 NA 2 1 1 1 1 ...
##  $ Notebook         : Factor w/ 3 levels "Body armour",..: 1 1 1 1 NA 2 1 1 1 1 ...
##  $ Baton            : Factor w/ 3 levels "Body armour",..: 3 3 1 3 NA 3 1 1 3 3 ...
##  $ BWV              : Factor w/ 3 levels "Body armour",..: 2 2 NA 2 NA 2 NA 2 1 1 ...
##  $ Other            : Factor w/ 3 levels "Body armour",..: NA 3 NA 2 NA NA NA NA NA 1 ...
##  $ Q8_10_TEXT       : chr  "" "First aid pouch" "" "" ...
##  $ other_count      : num  NA 1 NA NA NA NA NA NA NA 2 ...
# Summary of the cleaned dataset
summary(data)
##        ID                                    Role       Years_wearing
##  Min.   :   1.0   Routine Patrol               :2013   < 2     :180  
##  1st Qu.: 706.2   Detective                    : 311   > 20    :101  
##  Median :1571.5   Other                        :  83   11 to 15:788  
##  Mean   :1575.3   Traffic                      :  66   16 to 20:266  
##  3rd Qu.:2446.8   Custody / Offender management:  59   2 to 5  :382  
##  Max.   :3211.0   (Other)                      : 101   6 to 10 :915  
##                   NA's                         :   1   NA's    :  2  
##     Bra_size                    Bra_type                  Comfort    
##  34B    : 223   No bra              :  12   Comfortable       : 817  
##  36C    : 212   Other               :  87   Uncomfortable     :1378  
##  34C    : 208   Padded / push up bra: 221   Very comfortable  :  37  
##  34D    : 152   Sports bra          : 445   Very uncomfortable: 381  
##  36B    : 148   Underwired bra      :1858   NA's              :  21  
##  34DD   : 145   NA's                :  11                            
##  (Other):1546                                                        
##   RHS_breast        Front_RH_Breast Front_LH_breast   LHS_breast   
##  Length:2634        Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  Class :character   1st Qu.:1.000   1st Qu.:1.000   1st Qu.:1.000  
##  Mode  :character   Median :1.000   Median :1.000   Median :1.000  
##                     Mean   :1.282   Mean   :1.332   Mean   :1.218  
##                     3rd Qu.:1.000   3rd Qu.:2.000   3rd Qu.:1.000  
##                     Max.   :6.000   Max.   :7.000   Max.   :4.000  
##                     NA's   :1526    NA's   :1471    NA's   :1698   
##   Back_breast       RHS_Abdo       Front_Abdo      LHS_Abdo    
##  Min.   :1.000   Min.   :1.000   Min.   :1.00   Min.   :1.000  
##  1st Qu.:1.000   1st Qu.:1.000   1st Qu.:1.00   1st Qu.:1.000  
##  Median :2.000   Median :1.000   Median :2.00   Median :1.000  
##  Mean   :1.915   Mean   :1.163   Mean   :1.99   Mean   :1.151  
##  3rd Qu.:2.000   3rd Qu.:1.000   3rd Qu.:2.00   3rd Qu.:1.000  
##  Max.   :6.000   Max.   :3.000   Max.   :8.00   Max.   :4.000  
##  NA's   :2117    NA's   :2259    NA's   :2019   NA's   :2284   
##    Back_abdo       RHS_shoulder   Front_LH_shoulder Front_RH_shoulder
##  Min.   : 1.000   Min.   :1.000   Min.   :1.000     Min.   :1.000    
##  1st Qu.: 1.000   1st Qu.:1.000   1st Qu.:1.000     1st Qu.:1.000    
##  Median : 2.000   Median :1.000   Median :1.000     Median :1.000    
##  Mean   : 1.946   Mean   :1.058   Mean   :1.113     Mean   :1.096    
##  3rd Qu.: 2.000   3rd Qu.:1.000   3rd Qu.:1.000     3rd Qu.:1.000    
##  Max.   :10.000   Max.   :3.000   Max.   :5.000     Max.   :5.000    
##  NA's   :1826     NA's   :2360    NA's   :1988      NA's   :2001     
##   LHS_shoulder   Rear_LHS_shoulder Rear_RHS_Shoulder Total_discomfort
##  Min.   :1.000   Min.   :1.000     Min.   :1.000     Min.   : 1.000  
##  1st Qu.:1.000   1st Qu.:1.000     1st Qu.:1.000     1st Qu.: 2.000  
##  Median :1.000   Median :1.000     Median :1.000     Median : 4.000  
##  Mean   :1.098   Mean   :1.218     Mean   :1.205     Mean   : 4.395  
##  3rd Qu.:1.000   3rd Qu.:1.000     3rd Qu.:1.000     3rd Qu.: 6.000  
##  Max.   :3.000   Max.   :5.000     Max.   :4.000     Max.   :13.000  
##  NA's   :2358    NA's   :2019      NA's   :2043      NA's   :297     
##            sit_car              Drive_car              Walking    
##  Difficult     :1200   Difficult     :1302   Difficult     : 556  
##  Easy          : 928   Easy          : 633   Easy          :1785  
##  Not applicable:  11   Not applicable:  43   Not applicable:   3  
##  Very difficult: 330   Very difficult: 506   Very difficult:  64  
##  Very easy     :  44   Very easy     :  31   Very easy     :  82  
##  NA's          : 121   NA's          : 119   NA's          : 144  
##                                                                   
##            Running                 SDAR              Pistol_TASER 
##  Difficult     :1127   Difficult     :1392   Difficult     :  76  
##  Easy          : 186   Easy          : 563   Easy          : 101  
##  Not applicable:  12   Not applicable:  61   Not applicable:2222  
##  Very difficult:1177   Very difficult: 443   Very difficult:  30  
##  Very easy     :  12   Very easy     :  14   Very easy     :   7  
##  NA's          : 120   NA's          : 161   NA's          : 198  
##                                                                   
##            Carbine                Horse                  Bike     
##  Difficult     :  25   Difficult     :   5   Difficult     : 192  
##  Easy          :  30   Easy          :   8   Easy          :  46  
##  Not applicable:2342   Not applicable:2395   Not applicable:1981  
##  Very difficult:  12   Very difficult:   8   Very difficult: 218  
##  Very easy     :   1   NA's          : 218   Very easy     :   3  
##  NA's          : 224                         NA's          : 194  
##                                                                   
##           Motorbike             Handcuffs               spray     
##  Difficult     :   2   Body armour   : 785   Body armour   : 772  
##  Not applicable:2399   Not applicable: 117   Not applicable: 134  
##  Very difficult:   8   Utility belt  :1575   Utility belt  :1564  
##  NA's          : 225   NA's          : 157   NA's          : 164  
##                                                                   
##                                                                   
##                                                                   
##             TASER                 Radio             Mobile_police 
##  Body armour   : 123   Body armour   :2340   Body armour   : 942  
##  Not applicable:2003   Not applicable:  33   Not applicable:1228  
##  Utility belt  :  64   Utility belt  : 105   Utility belt  : 118  
##  NA's          : 444   NA's          : 156   NA's          : 346  
##                                                                   
##                                                                   
##                                                                   
##        Mobile_personal           Notebook               Baton     
##  Body armour   :1326   Body armour   :1832   Body armour   : 751  
##  Not applicable: 861   Not applicable: 327   Not applicable: 127  
##  Utility belt  : 115   Utility belt  : 226   Utility belt  :1571  
##  NA's          : 332   NA's          : 249   NA's          : 185  
##                                                                   
##                                                                   
##                                                                   
##              BWV                  Other       Q8_10_TEXT         other_count   
##  Body armour   : 984   Body armour   : 307   Length:2634        Min.   :1.000  
##  Not applicable:1275   Not applicable: 394   Class :character   1st Qu.:1.000  
##  Utility belt  :  23   Utility belt  : 153   Mode  :character   Median :1.000  
##  NA's          : 352   NA's          :1780                      Mean   :1.969  
##                                                                 3rd Qu.:3.000  
##                                                                 Max.   :8.000  
##                                                                 NA's   :2115
# Check for missing values
missing_values <- colSums(is.na(data))
print(missing_values)
##                ID              Role     Years_wearing          Bra_size 
##                 0                 1                 2                 0 
##          Bra_type           Comfort        RHS_breast   Front_RH_Breast 
##                11                21              1732              1526 
##   Front_LH_breast        LHS_breast       Back_breast          RHS_Abdo 
##              1471              1698              2117              2259 
##        Front_Abdo          LHS_Abdo         Back_abdo      RHS_shoulder 
##              2019              2284              1826              2360 
## Front_LH_shoulder Front_RH_shoulder      LHS_shoulder Rear_LHS_shoulder 
##              1988              2001              2358              2019 
## Rear_RHS_Shoulder  Total_discomfort           sit_car         Drive_car 
##              2043               297               121               119 
##           Walking           Running              SDAR      Pistol_TASER 
##               144               120               161               198 
##           Carbine             Horse              Bike         Motorbike 
##               224               218               194               225 
##         Handcuffs             spray             TASER             Radio 
##               157               164               444               156 
##     Mobile_police   Mobile_personal          Notebook             Baton 
##               346               332               249               185 
##               BWV             Other        Q8_10_TEXT       other_count 
##               352              1780                 0              2115
# Visualise missing values
missmap(data, main = "Missing values in the dataset after cleaning", col = c("yellow", "black"), legend = FALSE)

# Exploratory Data Analysis (EDA)

# Distribution of categorical variables
for (col in categorical_cols) {
  print(
    ggplot(data, aes(x = !!sym(col))) + 
      geom_bar(fill = "steelblue", na.rm = TRUE) + 
      theme_minimal() + 
      labs(title = paste("Distribution of", col)) +
      theme(axis.text.x = element_text(angle = 45, hjust = 1))  # Rotate x-axis labels
  )
}

# Distribution of numerical variables
for (col in numeric_cols) {
  print(
    ggplot(data, aes(x = !!sym(col))) + 
      geom_histogram(fill = "lightgreen", color = "black", bins = 30, na.rm = TRUE) + 
      theme_minimal() + 
      labs(title = paste("Distribution of", col))
  )
}

# Bra Size Distribution
ggplot(data, aes(x = Bra_size)) +
  geom_bar(fill = "steelblue", na.rm = TRUE) +
  theme_minimal() +
  labs(title = "Distribution of Bra Size") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))  # Rotate x-axis labels

# Breast Size vs. Discomfort (Total Discomfort Score)
ggplot(data, aes(x = Bra_size, y = Total_discomfort, fill = Bra_size)) +
  geom_boxplot(na.rm = TRUE) +
  theme_minimal() +
  labs(title = "Total Discomfort by Bra Size", x = "Bra Size", y = "Total Discomfort Score") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))  # Rotate x-axis labels

# Correlation Between Discomfort and Equipment Used
equipment_cols <- c("Pistol_TASER", "Carbine", "Radio", "Mobile_police", "Mobile_personal", 
                    "Notebook", "Baton", "TASER", "spray", "BWV", "Other")

for (col in equipment_cols) {
  print(
    ggplot(data, aes_string(x = col, y = "Total_discomfort", fill = col)) +
      geom_boxplot(na.rm = TRUE) +
      theme_minimal() +
      labs(title = paste("Total Discomfort by", col), x = col, y = "Total Discomfort Score") +
      theme(axis.text.x = element_text(angle = 45, hjust = 1))  # Rotate x-axis labels
  )
}
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# Comparison of Discomfort Across Roles
ggplot(data, aes(x = Role, y = Total_discomfort, fill = Role)) +
  geom_boxplot(na.rm = TRUE) +
  theme_minimal() +
  labs(title = "Total Discomfort by Role", x = "Role", y = "Total Discomfort Score") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))  # Rotate x-axis labels

# Bra Type and Discomfort
ggplot(data, aes(x = Bra_type, y = Total_discomfort, fill = Bra_type)) +
  geom_boxplot(na.rm = TRUE) +
  theme_minimal() +
  labs(title = "Total Discomfort by Bra Type", x = "Bra Type", y = "Total Discomfort Score") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))  # Rotate x-axis labels

# Handling missing values before correlation analysis
# Removing columns with too many NAs
numeric_cols_clean <- numeric_cols[colSums(is.na(data[, numeric_cols])) < 0.5 * nrow(data)]

# Correlation matrix of numerical variables
if (length(numeric_cols_clean) > 1) {
  corr_matrix <- cor(data[, numeric_cols_clean], use = "complete.obs")
  corrplot(corr_matrix, method = "circle", type = "lower", tl.cex = 0.7)
} else {
  print("Not enough data to compute a correlation matrix.")
}
## [1] "Not enough data to compute a correlation matrix."