# Read in HHS data
fastfield <- read_csv(here("data", "raw", "hhs_fastfield.csv"))
#kobo_1 <- read_excel(here("data", "raw", "hhs_kobo_mod_1.xlsx")) # Same as fp
kobo_1_fp <- read_csv(here("data", "raw", "hhs_fp.csv"))
#kobo_2 <- read_excel(here("data", "raw", "hhs_kobo_mod_2.xlsx")) # Same as fp (divided for hon and phi)
# devtools::install_github("datadotworld/data.world-r")
# devtools::install_github("datadotworld/dwapi-r")
dwapi::configure(auth_token = "eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiJyLWFuZC1yLXN0dWRpbzptYXJpYW5vdml6IiwiaXNzIjoiY2xpZW50OnItYW5kLXItc3R1ZGlvOmFnZW50Om1hcmlhbm92aXo6OjEyYzRkOTYzLTk0NDctNDg5Ni04ZmM0LTQ5YTM1ZWQ2MWQ0NSIsImlhdCI6MTcyODUwNTU0MCwicm9sZSI6WyJ1c2VyX2FwaV9hZG1pbiIsInVzZXJfYXBpX3JlYWQiLCJ1c2VyX2FwaV93cml0ZSJdLCJnZW5lcmFsLXB1cnBvc2UiOnRydWUsInNhbWwiOnt9fQ.MSVsDGnQK4y3WWiWYrDluIYsojOmmbrJC-PeFSgZY7qDq63pIobopiDTHcaCX8LG7pwuBC5HBQrO8Kk8ifahpQ")
sql_stmt <- qry_sql("SELECT * FROM hhs_fp_hnd")  
kobo_2_fp_hon <- data.world::query(
  sql_stmt, "rare/household-surveys"
)
kobo_2_fp_phi <- read_csv(here("data", "raw", "hhs_fp_phl.csv"))
#kobo_3 <- read_excel(here("data", "raw", "hhs_kobo_mod_3.xlsx")) #Check which sheets are relevant!



# Read PPP-adjusted international dollars dataset
ppp <- read_csv(here("data", "raw", "PPP_world_bank", "PPP_world_bank.csv"),
                skip = 4)

FF Household Surveys Conducted

# Sites
fastfield$ma_name <- tolower(gsub(" ", "_", fastfield$maa))
kobo_1_fp$ma_name <- tolower(gsub(" ", "_", kobo_1_fp$ma_name))
kobo_2_fp_hon$ma_name <- tolower(gsub(" ", "_", kobo_2_fp_hon$ma_name))
kobo_2_fp_phi$ma_name <- tolower(gsub(" ", "_", kobo_2_fp_phi$level4_name))

fastfield <- fastfield %>% 
  filter(!is.na(ma_name))

kobo_1_fp <- kobo_1_fp %>% 
  filter(!is.na(ma_name))

kobo_2_fp_hon <- kobo_2_fp_hon %>% 
  filter(!is.na(ma_name))

kobo_2_fp_phi <- kobo_2_fp_phi %>% 
  filter(!is.na(ma_name))

# Get years
kobo_1_fp$year <- year(ymd(kobo_1_fp$submission_time)) 
kobo_2_fp_hon$year <- year(ymd(kobo_2_fp_hon$submission_time))
kobo_2_fp_phi$year <- year(ymd(kobo_2_fp_phi$submission_time))

# Country
fastfield <- fastfield %>% 
  rename(country = iso3)

fastfield <- fastfield %>%
  mutate(country = recode(country,
                          "HND" = "Honduras",
                          "BRA" = "Brazil",
                          "FSM" = "Federated States of Micronesia",
                          "IDN" = "Indonesia",
                          "PLW" = "Palau",
                          "GTM" = "Guatemala",
                          "MOZ" = "Mozambique",
                          "PHL" = "Philippines"))


kobo_1_fp <- kobo_1_fp %>%
  filter(!is.na(country)) %>% # Remove rows with NA in 'country'
  mutate(country = recode(country,
                          "HND" = "Honduras",
                          "BRA" = "Brazil",
                          "FSM" = "Federated States of Micronesia",
                          "IDN" = "Indonesia",
                          "PLW" = "Palau",
                          "MOZ" = "Mozambique",
                          "PHL" = "Philippines"))

kobo_2_fp_hon <- kobo_2_fp_hon %>%
  mutate(country = recode(country,
                          "HND" = "Honduras"))

kobo_2_fp_phi <- kobo_2_fp_phi %>%
  mutate(country = recode(country,
                          "PHL" = "Philippines"))



combined_hhs <- rbind(
  fastfield %>% select(ma_name, year, country),
  kobo_1_fp %>% select(ma_name, year, country),
  kobo_2_fp_hon %>% select(ma_name, year, country),
  kobo_2_fp_phi %>% select(ma_name, year, country)
)

survey_count <- combined_hhs %>%
  group_by(ma_name, year, country) %>%
  summarize(survey_count = n()) %>% 
  arrange(desc(survey_count)) 

survey_count_by_ma <- survey_count %>%
  group_by(ma_name, country) %>%
  summarise(
    years = paste(unique(year), collapse = ", "), 
    total_survey_count = sum(survey_count)         
  ) %>%
  mutate(year_count = sapply(strsplit(years, ", "), length)) %>% 
  arrange(desc(year_count)) 

survey_summary <- survey_count %>%
  group_by(year, country) %>%
  summarise(total_surveys = sum(survey_count)) %>%
  ungroup()

# Bar plot --> DONT LIKE THIS PLOT
# ggplot(survey_summary, aes(x = year, y = total_surveys, fill = country)) +
#   geom_bar(stat = "identity", position = "dodge") +
#   labs(title = "Total Number of Surveys per Year by Country",
#        x = "Year",
#        y = "Total Surveys") +
#   theme_minimal() +
#   theme(legend.title = element_blank()) +
#   scale_fill_brewer(palette = "Set3")

# Range of years
years <- seq(min(survey_summary$year), max(survey_summary$year), by = 1)

# Stacked bar plot
ggplot(survey_summary, aes(x = year, y = total_surveys, fill = country)) +
  geom_bar(stat = "identity", position = "stack") +
  scale_x_continuous(breaks = years) +  # Show all years on x-axis
  labs(title = "Total Number of Surveys per Year by Country",
       x = "Year",
       y = "Total Surveys") +
  theme_minimal() +
  theme(legend.title = element_blank()) +
  scale_fill_brewer(palette = "Set3")



# Faceted plot for each country
ggplot(survey_summary, aes(x = year, y = total_surveys, fill = country)) +
  geom_bar(stat = "identity", position = "stack") +
  scale_x_continuous(breaks = years) +  
  labs(title = "Total Number of Surveys per Year by Country",
       x = "Year",
       y = "Total Surveys") +
  theme_minimal() +
  theme(legend.position = "none",
        axis.text.x = element_text(angle = 45, hjust = 1)) +  
  scale_fill_brewer(palette = "Set3") +
  facet_wrap(~ country, scales = "free_y")

 

# 40.000 surveys
# Remove countries with one year or few surveys

fastfield <- fastfield %>% 
  filter(!country %in% c("Palau", "Guatemala"))
kobo_1_fp <- kobo_1_fp %>% 
  filter(!country %in% c("Palau", "Guatemala"))

HH Income and Income Source Data

Question 83: Household Income

What is your household’s average monthly income from all activities, including salaried and non-salaried labor?

(local currency) –> PPP-adjusted income (World Bank)[]

Question 14: Income Source Data

Please indicate from the following list all sources of household income this year, and estimate their contribution to overall household income as a percentage.

  1. Farming (growing crops and/ or raising livestock)

  2. Harvesting forest products (timber, charcoal, non-timber forest products)

  3. Artisanal Fishing (capture of fish, crustaceans and other marine resources for sale or consumption)

  4. Fish buying or trading

  5. Fish processing (cleaning, drying, etc)

  6. Aquaculture (fish, shrimp, seaweed, etc.; includes grow out pens/ raising small captured fish for sale at larger size)

  7. Extraction of non-renewable marine resources (includes coral mining, sand mining, harvest of live coral, etc.)

  8. Marine tourism (includes scuba, snorkel, glass-bottom boats, sailing, water-skiing, jet skis, etc.)

  9. Other wage labor (e.g. teacher, medical, handcraft professionals, forestry or mining concession worker)

  10. Industrial Fishing (capture of fish, crustaceans and other marine resources for sale or consumption)

  11. Other (specify)

# Q83

# Find data
colnames(fastfield)[grepl("83", colnames(fastfield))]
#> [1] "83_hh_average_income"
colnames(kobo_1_fp)[grepl("average_income", colnames(kobo_1_fp))] # 83_hh_average_income
#> [1] "83_hh_average_income"
colnames(kobo_2_fp_hon)[grepl("average_income", colnames(kobo_2_fp_hon))] # 57_hh_average_income
#> [1] "57_hh_average_income"
colnames(kobo_2_fp_phi)[grepl("average_income", colnames(kobo_2_fp_phi))]
#> [1] "57_hh_average_income"


# Rename
kobo_2_fp_hon <- kobo_2_fp_hon %>% 
  rename('83_hh_average_income' = '57_hh_average_income')
kobo_2_fp_phi <- kobo_2_fp_phi %>% 
  rename('83_hh_average_income' = '57_hh_average_income')



# Answers: local currency

unique(fastfield$country)
#> [1] "Honduras"                       "Brazil"                        
#> [3] "Federated States of Micronesia" "Indonesia"                     
#> [5] "Mozambique"                     "Philippines"
ppp$`Country Name` #Brazil, Micronesia, Fed. Sts., Honduras, Mozambique, Philippines
#>   [1] "Aruba"                                               
#>   [2] "Africa Eastern and Southern"                         
#>   [3] "Afghanistan"                                         
#>   [4] "Africa Western and Central"                          
#>   [5] "Angola"                                              
#>   [6] "Albania"                                             
#>   [7] "Andorra"                                             
#>   [8] "Arab World"                                          
#>   [9] "United Arab Emirates"                                
#>  [10] "Argentina"                                           
#>  [11] "Armenia"                                             
#>  [12] "American Samoa"                                      
#>  [13] "Antigua and Barbuda"                                 
#>  [14] "Australia"                                           
#>  [15] "Austria"                                             
#>  [16] "Azerbaijan"                                          
#>  [17] "Burundi"                                             
#>  [18] "Belgium"                                             
#>  [19] "Benin"                                               
#>  [20] "Burkina Faso"                                        
#>  [21] "Bangladesh"                                          
#>  [22] "Bulgaria"                                            
#>  [23] "Bahrain"                                             
#>  [24] "Bahamas, The"                                        
#>  [25] "Bosnia and Herzegovina"                              
#>  [26] "Belarus"                                             
#>  [27] "Belize"                                              
#>  [28] "Bermuda"                                             
#>  [29] "Bolivia"                                             
#>  [30] "Brazil"                                              
#>  [31] "Barbados"                                            
#>  [32] "Brunei Darussalam"                                   
#>  [33] "Bhutan"                                              
#>  [34] "Botswana"                                            
#>  [35] "Central African Republic"                            
#>  [36] "Canada"                                              
#>  [37] "Central Europe and the Baltics"                      
#>  [38] "Switzerland"                                         
#>  [39] "Channel Islands"                                     
#>  [40] "Chile"                                               
#>  [41] "China"                                               
#>  [42] "Cote d'Ivoire"                                       
#>  [43] "Cameroon"                                            
#>  [44] "Congo, Dem. Rep."                                    
#>  [45] "Congo, Rep."                                         
#>  [46] "Colombia"                                            
#>  [47] "Comoros"                                             
#>  [48] "Cabo Verde"                                          
#>  [49] "Costa Rica"                                          
#>  [50] "Caribbean small states"                              
#>  [51] "Cuba"                                                
#>  [52] "Curacao"                                             
#>  [53] "Cayman Islands"                                      
#>  [54] "Cyprus"                                              
#>  [55] "Czechia"                                             
#>  [56] "Germany"                                             
#>  [57] "Djibouti"                                            
#>  [58] "Dominica"                                            
#>  [59] "Denmark"                                             
#>  [60] "Dominican Republic"                                  
#>  [61] "Algeria"                                             
#>  [62] "East Asia & Pacific (excluding high income)"         
#>  [63] "Early-demographic dividend"                          
#>  [64] "East Asia & Pacific"                                 
#>  [65] "Europe & Central Asia (excluding high income)"       
#>  [66] "Europe & Central Asia"                               
#>  [67] "Ecuador"                                             
#>  [68] "Egypt, Arab Rep."                                    
#>  [69] "Euro area"                                           
#>  [70] "Eritrea"                                             
#>  [71] "Spain"                                               
#>  [72] "Estonia"                                             
#>  [73] "Ethiopia"                                            
#>  [74] "European Union"                                      
#>  [75] "Fragile and conflict affected situations"            
#>  [76] "Finland"                                             
#>  [77] "Fiji"                                                
#>  [78] "France"                                              
#>  [79] "Faroe Islands"                                       
#>  [80] "Micronesia, Fed. Sts."                               
#>  [81] "Gabon"                                               
#>  [82] "United Kingdom"                                      
#>  [83] "Georgia"                                             
#>  [84] "Ghana"                                               
#>  [85] "Gibraltar"                                           
#>  [86] "Guinea"                                              
#>  [87] "Gambia, The"                                         
#>  [88] "Guinea-Bissau"                                       
#>  [89] "Equatorial Guinea"                                   
#>  [90] "Greece"                                              
#>  [91] "Grenada"                                             
#>  [92] "Greenland"                                           
#>  [93] "Guatemala"                                           
#>  [94] "Guam"                                                
#>  [95] "Guyana"                                              
#>  [96] "High income"                                         
#>  [97] "Hong Kong SAR, China"                                
#>  [98] "Honduras"                                            
#>  [99] "Heavily indebted poor countries (HIPC)"              
#> [100] "Croatia"                                             
#> [101] "Haiti"                                               
#> [102] "Hungary"                                             
#> [103] "IBRD only"                                           
#> [104] "IDA & IBRD total"                                    
#> [105] "IDA total"                                           
#> [106] "IDA blend"                                           
#> [107] "Indonesia"                                           
#> [108] "IDA only"                                            
#> [109] "Isle of Man"                                         
#> [110] "India"                                               
#> [111] "Not classified"                                      
#> [112] "Ireland"                                             
#> [113] "Iran, Islamic Rep."                                  
#> [114] "Iraq"                                                
#> [115] "Iceland"                                             
#> [116] "Israel"                                              
#> [117] "Italy"                                               
#> [118] "Jamaica"                                             
#> [119] "Jordan"                                              
#> [120] "Japan"                                               
#> [121] "Kazakhstan"                                          
#> [122] "Kenya"                                               
#> [123] "Kyrgyz Republic"                                     
#> [124] "Cambodia"                                            
#> [125] "Kiribati"                                            
#> [126] "St. Kitts and Nevis"                                 
#> [127] "Korea, Rep."                                         
#> [128] "Kuwait"                                              
#> [129] "Latin America & Caribbean (excluding high income)"   
#> [130] "Lao PDR"                                             
#> [131] "Lebanon"                                             
#> [132] "Liberia"                                             
#> [133] "Libya"                                               
#> [134] "St. Lucia"                                           
#> [135] "Latin America & Caribbean"                           
#> [136] "Least developed countries: UN classification"        
#> [137] "Low income"                                          
#> [138] "Liechtenstein"                                       
#> [139] "Sri Lanka"                                           
#> [140] "Lower middle income"                                 
#> [141] "Low & middle income"                                 
#> [142] "Lesotho"                                             
#> [143] "Late-demographic dividend"                           
#> [144] "Lithuania"                                           
#> [145] "Luxembourg"                                          
#> [146] "Latvia"                                              
#> [147] "Macao SAR, China"                                    
#> [148] "St. Martin (French part)"                            
#> [149] "Morocco"                                             
#> [150] "Monaco"                                              
#> [151] "Moldova"                                             
#> [152] "Madagascar"                                          
#> [153] "Maldives"                                            
#> [154] "Middle East & North Africa"                          
#> [155] "Mexico"                                              
#> [156] "Marshall Islands"                                    
#> [157] "Middle income"                                       
#> [158] "North Macedonia"                                     
#> [159] "Mali"                                                
#> [160] "Malta"                                               
#> [161] "Myanmar"                                             
#> [162] "Middle East & North Africa (excluding high income)"  
#> [163] "Montenegro"                                          
#> [164] "Mongolia"                                            
#> [165] "Northern Mariana Islands"                            
#> [166] "Mozambique"                                          
#> [167] "Mauritania"                                          
#> [168] "Mauritius"                                           
#> [169] "Malawi"                                              
#> [170] "Malaysia"                                            
#> [171] "North America"                                       
#> [172] "Namibia"                                             
#> [173] "New Caledonia"                                       
#> [174] "Niger"                                               
#> [175] "Nigeria"                                             
#> [176] "Nicaragua"                                           
#> [177] "Netherlands"                                         
#> [178] "Norway"                                              
#> [179] "Nepal"                                               
#> [180] "Nauru"                                               
#> [181] "New Zealand"                                         
#> [182] "OECD members"                                        
#> [183] "Oman"                                                
#> [184] "Other small states"                                  
#> [185] "Pakistan"                                            
#> [186] "Panama"                                              
#> [187] "Peru"                                                
#> [188] "Philippines"                                         
#> [189] "Palau"                                               
#> [190] "Papua New Guinea"                                    
#> [191] "Poland"                                              
#> [192] "Pre-demographic dividend"                            
#> [193] "Puerto Rico"                                         
#> [194] "Korea, Dem. People's Rep."                           
#> [195] "Portugal"                                            
#> [196] "Paraguay"                                            
#> [197] "West Bank and Gaza"                                  
#> [198] "Pacific island small states"                         
#> [199] "Post-demographic dividend"                           
#> [200] "French Polynesia"                                    
#> [201] "Qatar"                                               
#> [202] "Romania"                                             
#> [203] "Russian Federation"                                  
#> [204] "Rwanda"                                              
#> [205] "South Asia"                                          
#> [206] "Saudi Arabia"                                        
#> [207] "Sudan"                                               
#> [208] "Senegal"                                             
#> [209] "Singapore"                                           
#> [210] "Solomon Islands"                                     
#> [211] "Sierra Leone"                                        
#> [212] "El Salvador"                                         
#> [213] "San Marino"                                          
#> [214] "Somalia"                                             
#> [215] "Serbia"                                              
#> [216] "Sub-Saharan Africa (excluding high income)"          
#> [217] "South Sudan"                                         
#> [218] "Sub-Saharan Africa"                                  
#> [219] "Small states"                                        
#> [220] "Sao Tome and Principe"                               
#> [221] "Suriname"                                            
#> [222] "Slovak Republic"                                     
#> [223] "Slovenia"                                            
#> [224] "Sweden"                                              
#> [225] "Eswatini"                                            
#> [226] "Sint Maarten (Dutch part)"                           
#> [227] "Seychelles"                                          
#> [228] "Syrian Arab Republic"                                
#> [229] "Turks and Caicos Islands"                            
#> [230] "Chad"                                                
#> [231] "East Asia & Pacific (IDA & IBRD countries)"          
#> [232] "Europe & Central Asia (IDA & IBRD countries)"        
#> [233] "Togo"                                                
#> [234] "Thailand"                                            
#> [235] "Tajikistan"                                          
#> [236] "Turkmenistan"                                        
#> [237] "Latin America & the Caribbean (IDA & IBRD countries)"
#> [238] "Timor-Leste"                                         
#> [239] "Middle East & North Africa (IDA & IBRD countries)"   
#> [240] "Tonga"                                               
#> [241] "South Asia (IDA & IBRD)"                             
#> [242] "Sub-Saharan Africa (IDA & IBRD countries)"           
#> [243] "Trinidad and Tobago"                                 
#> [244] "Tunisia"                                             
#> [245] "Turkiye"                                             
#> [246] "Tuvalu"                                              
#> [247] "Tanzania"                                            
#> [248] "Uganda"                                              
#> [249] "Ukraine"                                             
#> [250] "Upper middle income"                                 
#> [251] "Uruguay"                                             
#> [252] "United States"                                       
#> [253] "Uzbekistan"                                          
#> [254] "St. Vincent and the Grenadines"                      
#> [255] "Venezuela, RB"                                       
#> [256] "British Virgin Islands"                              
#> [257] "Virgin Islands (U.S.)"                               
#> [258] "Viet Nam"                                            
#> [259] "Vanuatu"                                             
#> [260] "World"                                               
#> [261] "Samoa"                                               
#> [262] "Kosovo"                                              
#> [263] "Yemen, Rep."                                         
#> [264] "South Africa"                                        
#> [265] "Zambia"                                              
#> [266] "Zimbabwe"


fastfield_83 <- fastfield %>% 
  select(country, year, '83_hh_average_income')
kobo_1_fp_83 <- kobo_1_fp %>% 
  select(country, year, '83_hh_average_income')
kobo_2_fp_hon_83 <- kobo_2_fp_hon %>% 
  select(country, year, '83_hh_average_income')
kobo_2_fp_phi_83 <- kobo_2_fp_phi %>% 
  select(country, year, '83_hh_average_income')


# Convert to PPP-adjusted income

country_mapping <- c(
  "Honduras" = "Honduras",
  "Brazil" = "Brazil",
  "Micronesia, Fed. Sts." = "Federated States of Micronesia",
  "Indonesia" = "Indonesia",
  "Mozambique" = "Mozambique",
  "Philippines" = "Philippines"
)


ppp <- ppp %>%
  mutate(`2024` = `2023`) # using 2023 values for 2024 since the dataset has no values for 2024


ppp_long <- ppp %>%
  rename(country = `Country Name`) %>%
  mutate(country = recode(country, !!!country_mapping)) %>%
  pivot_longer(cols = starts_with("19") | starts_with("20"),  
               names_to = "year",
               values_to = "ppp_value") %>%
  mutate(year = as.numeric(year))  

ppp_long <- ppp_long %>% 
  select(country, year, ppp_value)


convert_to_ppp <- function(data) {
  data %>%
    left_join(ppp_long, by = c("country", "year")) %>%
    mutate(`83_hh_average_income_ppp` = `83_hh_average_income` / ppp_value) %>%
    select(-ppp_value)  # Remove PPP column after calculation if desired
}

# Apply the function to each dataset to get PPP-adjusted income
fastfield_83 <- convert_to_ppp(fastfield_83)
kobo_1_fp_83 <- convert_to_ppp(kobo_1_fp_83)
kobo_2_fp_hon_83 <- convert_to_ppp(kobo_2_fp_hon_83)
kobo_2_fp_phi_83 <- convert_to_ppp(kobo_2_fp_phi_83)


# sum(is.na(fastfield_83$'83_hh_average_income_ppp'))
# sum(is.na(kobo_1_fp_83$'83_hh_average_income_ppp'))
# sum(is.na(kobo_2_fp_hon_83$'83_hh_average_income_ppp'))
# sum(is.na(kobo_2_fp_phi_83$'83_hh_average_income_ppp'))

# Combine data for question 83
q83_data <- bind_rows(
  fastfield_83 %>% select(year, country, `83_hh_average_income_ppp`),
  kobo_1_fp_83 %>% select(year, country, `83_hh_average_income_ppp`),
  kobo_2_fp_hon_83 %>% select(year, country, `83_hh_average_income_ppp`),
  kobo_2_fp_phi_83 %>% select(year, country, `83_hh_average_income_ppp`))

# Remove NAs
q83_data <- q83_data %>%
  mutate(`83_hh_average_income_ppp` = as.numeric(`83_hh_average_income_ppp`))
q83_data <- q83_data %>%
  filter(!is.na(`83_hh_average_income_ppp`))

# Round PPP-adjusted income
q83_data <- q83_data %>%
  mutate(`83_hh_average_income_ppp` = round(`83_hh_average_income_ppp`, 2))

# Q14

# Find data
colnames(fastfield)[grepl("14", colnames(fastfield))]
#>  [1] "14a_months_farming"             "14a_income_farming"            
#>  [3] "14b_months_harvesting"          "14b_income_harvesting"         
#>  [5] "14c_months_fishing_artisanal"   "14c_income_fishing_artisanal"  
#>  [7] "14j_months_industrial"          "14j_income_industrial"         
#>  [9] "14e_months_buying_trading"      "14e_income_buying_trading"     
#> [11] "14f_months_processing"          "14f_income_processing"         
#> [13] "14d_months_fishing_aquaculture" "14d_income_fishing_aquaculture"
#> [15] "14g_months_extraction"          "14g_income_extraction"         
#> [17] "14h_months_tourism"             "14h_income_tourism"            
#> [19] "14i_months_other_wage"          "14i_income_other_wage"         
#> [21] "14k_other_source"               "14k_months_other"              
#> [23] "14k_income_other"               "14a_farming_yes_no"            
#> [25] "14b_harvesting_yes_no"          "14c_artisanal_yes_no"          
#> [27] "14d_aquaculture_yes_no"         "14e_fishbuying_trading_yes_no" 
#> [29] "14f_processing_yes_no"          "14g_extraction_yes_no"         
#> [31] "14h_tourism_yes_no"             "14i_other_wage_labor_yes_no"   
#> [33] "14j_industrial_yes_no"          "14k_other_yes_no"
colnames(kobo_1_fp)[grepl("14", colnames(kobo_1_fp))] # 14*_income_* (e.g. 14a_income_farming)
#>  [1] "14a_farming_yes_no"             "14a_months_farming"            
#>  [3] "14a_income_farming"             "14b_harvesting_yes_no"         
#>  [5] "14b_months_harvesting"          "14b_income_harvesting"         
#>  [7] "14c_artisanal_yes_no"           "14c_months_fishing_artisanal"  
#>  [9] "14c_income_fishing_artisanal"   "14d_aquaculture_yes_no"        
#> [11] "14d_months_fishing_aquaculture" "14d_income_fishing_aquaculture"
#> [13] "14e_fishbuying_trading_yes_no"  "14e_months_buying_trading"     
#> [15] "14e_income_buying_trading"      "14f_processing_yes_no"         
#> [17] "14f_months_processing"          "14f_income_processing"         
#> [19] "14g_extraction_yes_no"          "14g_months_extraction"         
#> [21] "14g_income_extraction"          "14h_tourism_yes_no"            
#> [23] "14h_months_tourism"             "14h_income_tourism"            
#> [25] "14i_other_wage_labor_yes_no"    "14i_months_other_wage"         
#> [27] "14i_income_other_wage"          "14j_industrial_yes_no"         
#> [29] "14j_months_industrial"          "14j_income_industrial"         
#> [31] "14k_other_yes_no"               "14k_other_source"              
#> [33] "14k_months_other"               "14k_income_other"
colnames(kobo_2_fp_hon)[grepl("10", colnames(kobo_2_fp_hon))] # 10*_income_* (e.g. 10a_income_farming)
#>  [1] "note_q10"                       "10a_farming_yes_no"            
#>  [3] "10a_months_farming"             "10a_income_farming"            
#>  [5] "10b_harvesting_yes_no"          "10b_months_harvesting"         
#>  [7] "10b_income_harvesting"          "10c_artisanal_yes_no"          
#>  [9] "10c_months_fishing_artisanal"   "10c_income_fishing_artisanal"  
#> [11] "10d_aquaculture_yes_no"         "10d_months_fishing_aquaculture"
#> [13] "10d_income_fishing_aquaculture" "10e_fishbuying_trading_yes_no" 
#> [15] "10e_months_buying_trading"      "10e_income_buying_trading"     
#> [17] "10f_processing_yes_no"          "10f_months_processing"         
#> [19] "10f_income_processing"          "10g_extraction_yes_no"         
#> [21] "10g_months_extraction"          "10g_income_extraction"         
#> [23] "10h_tourism_yes_no"             "10h_months_tourism"            
#> [25] "10h_income_tourism"             "10i_other_wage_labor_yes_no"   
#> [27] "10i_months_other_wage"          "10i_income_other_wage"         
#> [29] "10j_industrial_yes_no"          "10j_months_industrial"         
#> [31] "10j_income_industrial"          "10k_other_yes_no"              
#> [33] "10k_other_source"               "10k_months_other"              
#> [35] "10k_income_other"               "note_q10_total"                
#> [37] "calculate_q10_total_proportion" "note_q10_display"
colnames(kobo_2_fp_phi)[grepl("current_fish_catch", colnames(kobo_2_fp_phi))]
#> [1] "17_current_fish_catch"


# Select % income answers
fastfield_14 <- fastfield %>%
  select(country, year, matches("14.*income"))
  # Check if all 14*_income add up to 100%
  fastfield_14_total <- fastfield_14 %>%
    rowwise() %>%
    mutate(`14_income_total` = sum(c_across(-c(country, year)), na.rm = TRUE)) %>%
    ungroup()

kobo_1_fp_14 <- kobo_1_fp %>%
  select(country, year, matches("14.*income"))

kobo_2_fp_hon_14 <- kobo_2_fp_hon %>%
  select(country, year, matches("10.*income"))

kobo_2_fp_phi_14 <- kobo_2_fp_phi %>%
  select(country, year, matches("10.*income"))


# Rename columns
fastfield_14 <- fastfield_14 %>%
  rename_with(~ sub(".*income_", "income_", .), starts_with("14"))

kobo_1_fp_14 <- kobo_1_fp_14 %>%
  rename_with(~ sub(".*income_", "income_", .), starts_with("14"))

kobo_2_fp_hon_14 <- kobo_2_fp_hon_14 %>%
  rename_with(~ sub(".*income_", "income_", .), starts_with("10"))

kobo_2_fp_phi_14 <- kobo_2_fp_phi_14 %>%
  rename_with(~ sub(".*income_", "income_", .), starts_with("10"))

# colnames(fastfield_14)
# colnames(kobo_1_fp_14)
# colnames(kobo_2_fp_hon_14)
# colnames(kobo_2_fp_phi_14)

# Combine data for question 14
q14_data <- bind_rows(fastfield_14, kobo_1_fp_14, kobo_2_fp_hon_14, kobo_2_fp_phi_14)

# Rename all NAs as 0
q14_data <- q14_data %>%
  mutate(across(everything(), ~ replace_na(., 0)))


# Q83 and Q14 (TOGETHER):

fastfield_83_14 <- fastfield %>% 
  select(country, year, '83_hh_average_income', matches("14.*income"))
kobo_1_fp_83_14 <- kobo_1_fp %>% 
  select(country, year, '83_hh_average_income', matches("14.*income"))
kobo_2_fp_hon_83_14 <- kobo_2_fp_hon %>% 
  select(country, year, '83_hh_average_income', matches("10.*income"))
kobo_2_fp_phi_83_14 <- kobo_2_fp_phi %>% 
  select(country, year, '83_hh_average_income', matches("10.*income"))


# Rename columns
fastfield_83_14 <- fastfield_83_14 %>%
  rename_with(~ sub(".*income_", "income_", .), starts_with("14"))
kobo_1_fp_83_14 <- kobo_1_fp_83_14 %>%
  rename_with(~ sub(".*income_", "income_", .), starts_with("14"))
kobo_2_fp_hon_83_14 <- kobo_2_fp_hon_83_14 %>%
  rename_with(~ sub(".*income_", "income_", .), starts_with("10"))
kobo_2_fp_phi_83_14 <- kobo_2_fp_phi_83_14 %>%
  rename_with(~ sub(".*income_", "income_", .), starts_with("10"))


fastfield_83_14 <- convert_to_ppp(fastfield_83_14)
kobo_1_fp_83_14 <- convert_to_ppp(kobo_1_fp_83_14)
kobo_2_fp_hon_83_14 <- convert_to_ppp(kobo_2_fp_hon_83_14)
kobo_2_fp_phi_83_14 <- convert_to_ppp(kobo_2_fp_phi_83_14)

# Combine data for question 83 and 14
q83_14_data <- bind_rows(fastfield_83_14, kobo_1_fp_83_14, kobo_2_fp_hon_83_14, kobo_2_fp_phi_83_14)

# Remove NAs
q83_14_data <- q83_14_data %>%
  mutate(`83_hh_average_income_ppp` = as.numeric(`83_hh_average_income_ppp`))
q83_14_data <- q83_14_data %>%
  filter(!is.na(`83_hh_average_income_ppp`))

# Round PPP-adjusted income
q83_14_data <- q83_14_data %>%
  mutate(`83_hh_average_income_ppp` = round(`83_hh_average_income_ppp`, 2))

# summary(q83_data$`83_hh_average_income_ppp`)

# Remove values above the 99th percentile (errors)
q83_data_filtered <- q83_data %>%
  filter(`83_hh_average_income_ppp` < quantile(`83_hh_average_income_ppp`, 0.99, na.rm = TRUE))
q83_14_data_filtered <- q83_14_data %>%
  filter(`83_hh_average_income_ppp` < quantile(`83_hh_average_income_ppp`, 0.99, na.rm = TRUE))

# Recalculate the trend with filtered data
q83_trend_filtered <- q83_data_filtered %>%
  group_by(country, year) %>%
  summarize(avg_income = mean(`83_hh_average_income_ppp`, na.rm = TRUE))

# Plot with filtered data
ggplot(q83_data_filtered, aes(x = as.factor(year), y = `83_hh_average_income_ppp`, fill = country)) +
  geom_violin(alpha = 0.5) +
  geom_line(data = q83_trend_filtered, aes(x = as.factor(year), y = avg_income, group = 1), color = "black", size = 0.5) +
  geom_point(data = q83_trend_filtered, aes(x = as.factor(year), y = avg_income), color = "black", size = 2) +
  labs(
    title = "Trend of Average Monthly Household Income (Q83) by Country",
    x = "Year",
    y = "Average Monthly Income per Household (PPP)"
  ) +
    scale_fill_brewer(palette = "Set3") +
  theme_minimal() +
  theme(legend.position = "none") +
  facet_wrap(~ country)

Trend of Average Monthly Household Income (Q83) by Country, 2019–2024: The violin plots illustrate the distribution of household income per month for each year, showing the density of values, while the black dots and lines represent the annual average income trend.

# Reshape the data from wide to long format for easier plotting
income_data_long <- q83_14_data_filtered %>%
  select(country, year, starts_with("income_")) %>%
  pivot_longer(cols = starts_with("income_"), names_to = "income_source", values_to = "income_proportion")

# Plot stacked bar chart by year with facets for each country and improved color palette
ggplot(income_data_long, aes(x = as.factor(year), y = income_proportion, fill = income_source)) +
  geom_bar(stat = "identity", position = "fill") +
  scale_y_continuous(labels = scales::percent) +
    scale_fill_brewer(palette = "Set3") +  
  labs(
    title = "Proportion of Income from Different Sources (Q14) by Year",
    x = "Year",
    y = "Proportion of Total Income",
    fill = "Income Source"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    legend.position = "right"
  ) +
  facet_wrap(~ country)

Proportion of Income from Different Sources (Q14) by Year, 2019–2024: Stacked bar plots show the proportion of household income derived from various sources across different countries. Each bar represents a year, segmented by income source, highlighting changes in income composition over time.

# Adjust calculations by dividing proportions by 100
q83_14_data_filtered_inc <- q83_14_data_filtered %>%
  mutate(
    income_farming = `83_hh_average_income_ppp` * (income_farming / 100),
    income_harvesting = `83_hh_average_income_ppp` * (income_harvesting / 100),
    income_fishing_artisanal = `83_hh_average_income_ppp` * (income_fishing_artisanal / 100),
    income_industrial = `83_hh_average_income_ppp` * (income_industrial / 100),
    income_buying_trading = `83_hh_average_income_ppp` * (income_buying_trading / 100),
    income_processing = `83_hh_average_income_ppp` * (income_processing / 100),
    income_fishing_aquaculture = `83_hh_average_income_ppp` * (income_fishing_aquaculture / 100),
    income_extraction = `83_hh_average_income_ppp` * (income_extraction / 100),
    income_tourism = `83_hh_average_income_ppp` * (income_tourism / 100),
    income_other_wage = `83_hh_average_income_ppp` * (income_other_wage / 100),
    income_other = `83_hh_average_income_ppp` * (income_other / 100)
  ) 

#colnames(q83_14_data_filtered_inc)

q83_14_data_filtered_long <- q83_14_data_filtered %>%
  mutate(
    income_farming = `83_hh_average_income_ppp` * (income_farming / 100),
    income_harvesting = `83_hh_average_income_ppp` * (income_harvesting / 100),
    income_fishing_artisanal = `83_hh_average_income_ppp` * (income_fishing_artisanal / 100),
    income_industrial = `83_hh_average_income_ppp` * (income_industrial / 100),
    income_buying_trading = `83_hh_average_income_ppp` * (income_buying_trading / 100),
    income_processing = `83_hh_average_income_ppp` * (income_processing / 100),
    income_fishing_aquaculture = `83_hh_average_income_ppp` * (income_fishing_aquaculture / 100),
    income_extraction = `83_hh_average_income_ppp` * (income_extraction / 100),
    income_tourism = `83_hh_average_income_ppp` * (income_tourism / 100),
    income_other_wage = `83_hh_average_income_ppp` * (income_other_wage / 100),
    income_other = `83_hh_average_income_ppp` * (income_other / 100)
  ) %>%
  # Reshape data to long format for plotting
  pivot_longer(
    cols = starts_with("income_"),
    names_to = "income_source",
    values_to = "income_amount"
  )

# Calculate the average income per country, year, and income source
average_income_data <- q83_14_data_filtered_long %>%
  group_by(country, year, income_source) %>%
  summarize(avg_income = mean(income_amount, na.rm = TRUE))


q83_14_data_filtered_long <- q83_14_data_filtered_long %>% 
  filter(income_source == "income_fishing_artisanal")
average_income_data <- average_income_data %>% 
  filter(income_source == "income_fishing_artisanal")


q83_14_trend_filtered <- q83_14_data_filtered_long %>%
  group_by(country, year) %>%
  summarize(avg_income = mean(`income_amount`, na.rm = TRUE))

# Plot with filtered data
ggplot(q83_14_data_filtered_long, aes(x = as.factor(year), y = `income_amount`, fill = country)) +
  geom_violin(alpha = 0.5, scale = "width", adjust = 0.8) + # Adjusting scale and smoothness
  geom_line(data = q83_14_trend_filtered, aes(x = as.factor(year), y = avg_income, group = 1), color = "black", size = 0.5) +
  geom_point(data = q83_14_trend_filtered, aes(x = as.factor(year), y = avg_income), color = "black", size = 2) +
  labs(
    title = "Trend of Average Monthly Household Income from Artisanal Fishing (Q14) by Country",
    x = "Year",
    y = "Average Monthly Income per Household from Artisanal Fishing (PPP)"
  ) +
  scale_fill_brewer(palette = "Set3") +
  theme_minimal() +
  theme(legend.position = "none") +
  facet_wrap(~ country)



# Plotting the average income by income source per country and year
# ggplot(average_income_data, aes(x = as.factor(year), y = avg_income, fill = income_source)) +
#   geom_bar(stat = "identity", position = "stack") +
#    scale_fill_brewer(palette = "Set3") +  
#   labs(
#     title = "Average Income by Source per Country and Year",
#     x = "Year",
#     y = "Average Income (PPP)",
#     fill = "Income Source"
#   ) +
#   theme_minimal() +
#   theme(
#     axis.text.x = element_text(angle = 45, hjust = 1),
#     legend.position = "right"
#   ) +
#   facet_wrap(~ country)


# Reshape the data to long format for easier plotting
income_data_long <- q83_14_data_filtered_inc %>%
  select(country, year, starts_with("income_")) %>%
  pivot_longer(
    cols = starts_with("income_"),
    names_to = "income_source",
    values_to = "income_amount"
  )

# Define a color palette for income sources
colors <- brewer.pal(n = length(unique(income_data_long$income_source)), name = "Paired")

# Create the violin plot with colors based on income source
# ggplot(income_data_long, aes(x = as.factor(year), y = income_amount, fill = income_source, color = income_source)) + 
#   geom_jitter(width = 0.3, size = 0.7, alpha = 0.4) + 
#   scale_fill_manual(values = colors) +
#   scale_color_manual(values = colors) +
#   labs(
#     title = "Distribution of Income by Source per Year, Faceted by Country",
#     x = "Year",
#     y = "Income Amount (PPP)",
#     fill = "Income Source",
#     color = "Income Source"
#   ) +
#   theme_minimal() +
#   theme(
#     axis.text.x = element_text(angle = 45, hjust = 1),
#     legend.position = "right"
#   ) +
#   facet_wrap(~ country)

# ggplot(income_data_long, aes(x = as.factor(year), y = income_amount, fill = income_source, color = income_source)) + 
#   geom_jitter(position = position_dodge(width = 0.8), size = 0.7, alpha = 0.4) + 
#   scale_fill_manual(values = colors) +
#   scale_color_manual(values = colors) +
#   labs(
#     title = "Distribution of Income by Source per Year",
#     x = "Year",
#     y = "Income Amount (PPP)",
#     fill = "Income Source",
#     color = "Income Source"
#   ) +
#   theme_minimal() +
#   theme(
#     axis.text.x = element_text(angle = 45, hjust = 1),
#     legend.position = "right"
#   ) +
#   facet_wrap(~ country)

Trend of Average Monthly Household Income from Artisanal Fishing (Q14) by Country, 2019–2024: The violin plots illustrate the distribution of household income per month derived from artisanal fishing for each year, showing the density of values, while the black dots and lines represent the annual average income trend across six countries.

Fish Catch

Question 21: Current Fish Catch

Compared to 2 years ago, the current fish catch has…

Possible answers:

  1. Declined a lot

  2. Declined slightly

  3. Stayed the same

  4. Improved slightly

  5. Improved heavily

Question 24: Expected Fish Catch

In the next 5 years, how do you think the fish catch will be compared to today?

Possible answers:

  1. Declines a lot

  2. Declines slightly

  3. Stays the same

  4. Improves slightly

  5. Improves heavily

# Q21

# Find data
colnames(fastfield)[grepl("21", colnames(fastfield))]
#> [1] "21_current_fish_catch"
colnames(kobo_1_fp)[grepl("21", colnames(kobo_1_fp))] # 21_current_fish_catch
#> [1] "21_current_fish_catch"
colnames(kobo_2_fp_hon)[grepl("current_fish_catch", colnames(kobo_2_fp_hon))] # 17_current_fish_catch
#> [1] "17_current_fish_catch"
colnames(kobo_2_fp_phi)[grepl("current_fish_catch", colnames(kobo_2_fp_phi))]
#> [1] "17_current_fish_catch"

# Rename
fastfield$'21_current_fish_catch'<- tolower(gsub(" ", "_", fastfield$'21_current_fish_catch'))
kobo_1_fp$'21_current_fish_catch'<- tolower(gsub(" ", "_", kobo_1_fp$'21_current_fish_catch'))
kobo_2_fp_hon$'21_current_fish_catch'<- tolower(gsub(" ", "_", kobo_2_fp_hon$'17_current_fish_catch'))
kobo_2_fp_phi$'21_current_fish_catch'<- tolower(gsub(" ", "_", kobo_2_fp_phi$'17_current_fish_catch'))

# Answers: "declined_a_lot","not_a_fisher", "not_answered", "improved_slightly", "declined_slightly", "stayed_the_same", "improved_heavily"
unique(fastfield$'21_current_fish_catch')
#> [1] "declined_a_lot"    "not_a_fisher"      "not_answered"     
#> [4] "improved_slightly" "declined_slightly" "stayed_the_same"  
#> [7] "improved_heavily"
unique(kobo_1_fp$'21_current_fish_catch')
#> [1] "declined_slightly" "stayed_the_same"   "declined_alot"    
#> [4] NA                  "improved_slightly" "improved_heavily"
unique(kobo_2_fp_hon$'21_current_fish_catch')
#> [1] "stayed_the_same"   "declined_alot"     NA                 
#> [4] "declined_slightly" "improved_slightly" "improved_heavily"
unique(kobo_2_fp_phi$'21_current_fish_catch')
#> [1] "stayed_the_same"   NA                  "declined_slightly"
#> [4] "declined_alot"     "improved_heavily"  "improved_slightly"

kobo_1_fp$`21_current_fish_catch` <- gsub("declined_alot", "declined_a_lot", kobo_1_fp$`21_current_fish_catch`)
kobo_1_fp$`21_current_fish_catch`[is.na(kobo_1_fp$`21_current_fish_catch`)] <- "not_answered"
kobo_2_fp_hon$`21_current_fish_catch` <- gsub("declined_alot", "declined_a_lot", kobo_2_fp_hon$`21_current_fish_catch`)
kobo_2_fp_hon$`21_current_fish_catch`[is.na(kobo_2_fp_hon$`21_current_fish_catch`)] <- "not_answered"
kobo_2_fp_phi$`21_current_fish_catch` <- gsub("declined_alot", "declined_a_lot", kobo_2_fp_phi$`21_current_fish_catch`)
kobo_2_fp_phi$`21_current_fish_catch`[is.na(kobo_2_fp_phi$`21_current_fish_catch`)] <- "not_answered"


# Q24

# Find data
colnames(fastfield)[grepl("24", colnames(fastfield))]
#> [1] "24_catch_5yrs"
colnames(kobo_1_fp)[grepl("catch_5yrs", colnames(kobo_1_fp))] # 24_catch_5yrs
#> [1] "24_catch_5yrs"
colnames(kobo_2_fp_hon)[grepl("catch_5yrs", colnames(kobo_2_fp_hon))] # 19_catch_5yrs
#> [1] "19_catch_5yrs"
colnames(kobo_2_fp_phi)[grepl("catch_5yrs", colnames(kobo_2_fp_phi))]
#> [1] "19_catch_5yrs"

# Rename
fastfield$'24_catch_5yrs'<- tolower(gsub(" ", "_", fastfield$'24_catch_5yrs'))
kobo_1_fp$'24_catch_5yrs'<- tolower(gsub(" ", "_", kobo_1_fp$'24_catch_5yrs'))
kobo_2_fp_hon$'24_catch_5yrs'<- tolower(gsub(" ", "_", kobo_2_fp_hon$'19_catch_5yrs'))
kobo_2_fp_phi$'24_catch_5yrs'<- tolower(gsub(" ", "_", kobo_2_fp_phi$'19_catch_5yrs'))

# Answers: "improves_slightly", "not_a_fisher", "stays_the_same", "improves_heavily", "declines_slightly", "declines_a_lot", "not_answered"
unique(fastfield$'24_catch_5yrs')
#> [1] "improves_slightly" "not_a_fisher"      "stays_the_same"   
#> [4] "improves_heavily"  "declines_slightly" "declines_a_lot"   
#> [7] "not_answered"
unique(kobo_1_fp$'24_catch_5yrs')
#> [1] "declines_slightly" "improves_slightly" "declines_alot"    
#> [4] "stays_the_same"    "improves_heavily"  NA
unique(kobo_2_fp_hon$'24_catch_5yrs')
#> [1] "declines_slightly" "declines_alot"     "improves_slightly"
#> [4] "stays_the_same"    "improves_heavily"
unique(kobo_2_fp_phi$'24_catch_5yrs')
#> [1] "improves_slightly" "declines_slightly" "declines_alot"    
#> [4] "stays_the_same"    "improves_heavily"

kobo_1_fp$`24_catch_5yrs` <- gsub("declines_alot", "declines_a_lot", kobo_1_fp$`24_catch_5yrs`)
kobo_1_fp$`24_catch_5yrs`[is.na(kobo_1_fp$`24_catch_5yrs`)] <- "not_answered"
kobo_2_fp_hon$`24_catch_5yrs` <- gsub("declines_alot", "declines_a_lot", kobo_2_fp_hon$`24_catch_5yrs`)
kobo_2_fp_hon$`24_catch_5yrs`[is.na(kobo_2_fp_hon$`24_catch_5yrs`)] <- "not_answered"
kobo_2_fp_phi$`24_catch_5yrs` <- gsub("declines_alot", "declines_a_lot", kobo_2_fp_phi$`24_catch_5yrs`)
kobo_2_fp_phi$`24_catch_5yrs`[is.na(kobo_2_fp_phi$`24_catch_5yrs`)] <- "not_answered"

# Combine data for question 21
q21_data <- bind_rows(
  fastfield %>% select(year, country, `21_current_fish_catch`),
  kobo_1_fp %>% select(year, country, `21_current_fish_catch`),
  kobo_2_fp_hon %>% select(year, country, `21_current_fish_catch`),
  kobo_2_fp_phi %>% select(year, country, `21_current_fish_catch`)
) %>% rename(response = `21_current_fish_catch`)

# Combine data for question 24
q24_data <- bind_rows(
  fastfield %>% select(year, country, `24_catch_5yrs`),
  kobo_1_fp %>% select(year, country, `24_catch_5yrs`),
  kobo_2_fp_hon %>% select(year, country, `24_catch_5yrs`),
  kobo_2_fp_phi %>% select(year, country, `24_catch_5yrs`)
) %>% rename(response = `24_catch_5yrs`)

# Visualization for q21 responses by year and country
ggplot(q21_data, aes(x = year, fill = response)) +
  geom_bar(position = "stack") +
  facet_wrap(~ country) +
  labs(title = "Distribution of Responses for Current Fish Catch (Q21) by Year and Country",
       x = "Year",
       y = "Count of Responses",
       fill = "Response") +
    scale_fill_brewer(palette = "Set3") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))


# Repeat for q24 data
ggplot(q24_data, aes(x = year, fill = response)) +
  geom_bar(position = "stack") +
  facet_wrap(~ country) +
  labs(title = "Distribution of Responses for Expected Fish Catch in 5 Years (Q24) by Year and Country",
       x = "Year",
       y = "Count of Responses",
       fill = "Response") +
    scale_fill_brewer(palette = "Set3") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Map responses to 0-100 scale for q21
q21_data <- q21_data %>%
  filter(!response %in% c("not_answered", "not_a_fisher")) %>%
  mutate(score = case_when(
    response == "declined_a_lot" ~ 0,
    response == "declined_slightly" ~ 25,
    response == "stayed_the_same" ~ 50,
    response == "improved_slightly" ~ 75,
    response == "improved_heavily" ~ 100
  ))

# Map responses to 0-100 scale for q24
q24_data <- q24_data %>%
  filter(!response %in% c("not_answered", "not_a_fisher")) %>%
  mutate(score = case_when(
    response == "declines_a_lot" ~ 0,
    response == "declines_slightly" ~ 25,
    response == "stays_the_same" ~ 50,
    response == "improves_slightly" ~ 75,
    response == "improves_heavily" ~ 100
  ))

# Calculate yearly averages for q21 and q24
q21_trend <- q21_data %>%
  group_by(year, country) %>%
  summarise(avg_score = mean(score, na.rm = TRUE))

q24_trend <- q24_data %>%
  group_by(year, country) %>%
  summarise(avg_score = mean(score, na.rm = TRUE))

# # Plotting trends for q21
# ggplot(q21_trend, aes(x = year, y = avg_score, color = country, group = country)) +
#   geom_line() +
#   geom_point() +
#   labs(title = "Trend of Current Fish Catch Perception (Q21) by Country",
#        x = "Year",
#        y = "Average Score (0-100)",
#        color = "Country") +
#   theme_minimal()
# 
# # Plotting trends for q24
# ggplot(q24_trend, aes(x = year, y = avg_score, color = country, group = country)) +
#   geom_line() +
#   geom_point() +
#   labs(title = "Trend of Expected Fish Catch in 5 Years (Q24) by Country",
#        x = "Year",
#        y = "Average Score (0-100)",
#        color = "Country") +
#   theme_minimal()


# Plotting trends for q21 with individual observations and trend line
ggplot(q21_data, aes(x = as.factor(year), y = score, fill = country)) +
  geom_violin(alpha = 0.5) +  # Violin plot to show distribution
  geom_line(data = q21_trend, aes(x = as.factor(year), y = avg_score, group = 1), color = "black", size = 0.5) +
  geom_point(data = q21_trend, aes(x = as.factor(year), y = avg_score), color = "black", size = 2) +
  labs(title = "Trend of Current Fish Catch Perception (Q21) by Country",
       x = "Year",
       y = "Average Score (0-100)") +
    scale_fill_brewer(palette = "Set3") +
  theme_minimal() +
  theme(legend.position = "none") +
  facet_wrap(~ country)




# Plotting trends for q24 with individual observations and trend line
ggplot(q24_data, aes(x = as.factor(year), y = score, fill = country)) +
  geom_violin(alpha = 0.5) +  # Violin plot to show distribution
  geom_line(data = q24_trend, aes(x = as.factor(year), y = avg_score, group = 1), color = "black", size = 0.5) +
  geom_point(data = q24_trend, aes(x = as.factor(year), y = avg_score), color = "black", size = 2) +
  labs(title = "Trend of Expected Fish Catch in 5 Years (Q24) by Country",
       x = "Year",
       y = "Average Score (0-100)") +
    scale_fill_brewer(palette = "Set3") +
  theme_minimal() +
  theme(legend.position = "none") +
  facet_wrap(~ country)

Food Security Data

Question 72: Food Availability

Are you confident that you will be able to procure enough food for you and your family for the next 12 months?

Possible answers:

  1. Certain to have shortage

  2. High chance of having shortage

  3. Uncertain

  4. Confident to procure enough food

  5. Very confident to procure enough food


Other relevant questions regarding Food Security (to add to the analysis):

  • Q70: How do you rate the last year in terms of food availability?

  • Q71: Consider the following statement: “I worry about not having enough food for everyone in the household” Was that often sometimes or never true for you in the last 12 months?

  • Q73: In the last 12 months, how often did your household eat fish?


# Find data
# colnames(fastfield)[grepl("72", colnames(fastfield))]
# colnames(kobo_1_fp)[grepl("food_procurement", colnames(kobo_1_fp))]
# colnames(kobo_2_fp_hon)[grepl("food_procurement", colnames(kobo_2_fp_hon))]


# Select Q24, site, year
fastfield_q72 <- fastfield %>% 
  select(ma_name, country, year, '72_food_procurement')
fastfield_q72$'72_food_procurement'<- tolower(gsub(" ", "_", fastfield_q72$'72_food_procurement'))

kobo_1_fp_q72 <- kobo_1_fp %>% 
  select(ma_name, country, year, '72_food_procurement')
kobo_1_fp_q72$'72_food_procurement'<- tolower(gsub(" ", "_", kobo_1_fp_q72$'72_food_procurement'))

kobo_2_fp_hon_q72 <- kobo_2_fp_hon %>% 
  select(ma_name, country, year, '50_food_procurement') # here is the question 19!
kobo_2_fp_hon_q72$'72_food_procurement'<- tolower(gsub(" ", "_", kobo_2_fp_hon_q72$'50_food_procurement')) #change column name
kobo_2_fp_hon_q72 <- kobo_2_fp_hon_q72 %>% 
  select(ma_name, country, year, '72_food_procurement')

# Check answers
# Possible answers:
# 1. Certain to have shortage
# 2. High chance of having shortage
# 3. Uncertain
# 4. Confident to procure enough food
# 5. Very confident to procure enough food

unique(fastfield_q72$`72_food_procurement`)
#> [1] "uncertain"          "confident_not"      "certain"           
#> [4] "high_chance"        "very_confident_not" "not_answered"
unique(kobo_1_fp_q72$`72_food_procurement`)
#> [1] "uncertain"          "confident_not"      "very_confident_not"
#> [4] "high_chance"        "certain"
unique(kobo_2_fp_hon_q72$`72_food_procurement`)
#> [1] "confident_not"      "certain"            "high_chance"       
#> [4] "uncertain"          "very_confident_not"

# Combine datasets
q72 <- bind_rows(
  select(fastfield_q72, ma_name, country, year, `72_food_procurement`),
  select(kobo_1_fp_q72, ma_name, country, year, `72_food_procurement`),
  select(kobo_2_fp_hon_q72, ma_name, country, year, `72_food_procurement`)
)

# To continuous 
q72 <- q72 %>%
  mutate(q72_continuous = case_when(
    `72_food_procurement` == "certain" ~ 0,
    `72_food_procurement` == "high_chance" ~ 25,
    `72_food_procurement` == "uncertain" ~ 50,
    `72_food_procurement` == "confident_not" ~ 75,
    `72_food_procurement` == "very_confident_not" ~ 100
  ))



q72_trend <- q72 %>%
  group_by(year, country) %>%
  summarise(avg_score = mean(q72_continuous, na.rm = TRUE))

# Plotting trends for q24 with individual observations and trend line
ggplot(q72, aes(x = as.factor(year), y = q72_continuous, fill = country)) +
  geom_violin(alpha = 0.5) +  # Violin plot to show distribution
  geom_line(data = q72_trend, aes(x = as.factor(year), y = avg_score, group = 1), color = "black", size = 0.5) +
  geom_point(data = q72_trend, aes(x = as.factor(year), y = avg_score), color = "black", size = 2) +
  labs(title = "Trend of Food Security (Q24) by Country",
       x = "Year",
       y = "Average Score (0-100)") +
    scale_fill_brewer(palette = "Set3") +
  theme_minimal() +
  theme(legend.position = "none") +
  facet_wrap(~ country)