locations_data <- image_locations %>%left_join(recordings, by =c("superseded_gcp_name_feb25"="superseded_gcp_name_feb25")) %>%filter(!is.na(age)) %>%mutate(rounded_age =round(age_mo /5) *5) %>%group_by(rounded_age, location_clean) %>%summarise(n =n(), .groups ="drop") %>%filter(n >30) %>%# keep only location groups with n > 30group_by(rounded_age) %>%# group by age to calculate proportionsmutate(prop = n /sum(n)) %>%# calculate proportion within each age groupungroup() %>%# ungroup after calculationselect(rounded_age, location_clean, prop) %>%pivot_wider(id_cols = rounded_age, # Explicitly specify ID columnnames_from = location_clean,values_from = prop,values_fill =0# fill missing values with 0 )# Reshape data back to long format for plottingplot_data <- locations_data %>%pivot_longer(cols =-rounded_age,names_to ="location_clean",values_to ="prop" ) %>%filter(!is.na(rounded_age))# Plotggplot(plot_data, aes(x = rounded_age, y = prop, color = location_clean, group = location_clean)) +geom_smooth(method ="lm") +# lm method with no confidence interval shadinggeom_point() +labs(x ="Rounded Age (months)",y ="Proportion of Location",title ="Proportion of Locations by Age",color ="Location" ) +theme_minimal()
`geom_smooth()` using formula = 'y ~ x'
locations_data <- image_locations |>left_join(recordings) %>%# ensure a join key is specifiedgroup_by(age, location_clean) %>%summarise(n =n(), .groups ="drop_last") %>%# count per groupmutate(prop = n /sum(n)) %>%# proportion within each age grouppivot_wider(names_from = location_clean, values_from = prop, values_fill =0)
Joining with `by = join_by(superseded_gcp_name_feb25)`