1. Lectura de datasets
Trabajaré con los datasets individualmente para mirar posibles
inconsistencias antes de intentar juntar todo.
- Haré una operación de enriquecimiento de la columna
timestamp para crear distintos formatos de datas;
- Haré un split en la columna code para facilitar la agregación de los
datasets;
1.1 Satelite Data - Zone 7
sat_data_r07 <- readxl::read_excel("./data/Satellitedata_SpatialJoinISRAR07_fishing_2017_2021.xlsx") %>%
janitor::clean_names() %>%
mutate(isra_code = substr(code, 1, 4),
isra_zone = substr(code, 5, 5),
juri_zone = substr(code, 6, 8),
juri_code = substr(code, 9, 12),
datetime = as_datetime(timestamp),
date = as_date(datetime),
year_month = zoo::as.yearmon(date)) %>%
janitor::clean_names() %>%
relocate(starts_with('isra'), .before = 'name') %>%
relocate(starts_with('juri'), .before = 'jurisdicti') %>%
relocate(c(timestamp, date, datetime, year_month), .before = objectid)
head(sat_data_r07)
## # A tibble: 6 × 38
## timestamp date datetime year_month objectid join_count
## <chr> <date> <dttm> <yearmon> <dbl> <dbl>
## 1 <NA> NA NA <NA> 1 0
## 2 <NA> NA NA <NA> 2 0
## 3 <NA> NA NA <NA> 3 0
## 4 2021-06-06 02:1… 2021-06-06 2021-06-06 02:15:51 jun 2021 4 1
## 5 2018-05-10 02:2… 2018-05-10 2018-05-10 02:23:42 mai 2018 5 1
## 6 2019-04-20 14:3… 2019-04-20 2019-04-20 14:33:08 abr 2019 6 1
## # ℹ 32 more variables: target_fid <dbl>, join_fid <dbl>, code <chr>,
## # status <chr>, isra_code <chr>, isra_zone <chr>, name <chr>,
## # jurisdicti <chr>, juri_zone <chr>, juri_code <chr>, region <chr>,
## # criteria <chr>, subcriteri <chr>, query_crite <chr>, query_subcr <chr>,
## # min_depth <dbl>, max_depth <dbl>, order <chr>, family <chr>, url <chr>,
## # scene_id <chr>, lat <dbl>, lon <dbl>, year <dbl>, mmsi <dbl>,
## # matching_s <dbl>, fishing_sc <dbl>, length_m <dbl>, matched_ca <chr>, …
skim(sat_data_r07)
Data summary
| Name |
sat_data_r07 |
| Number of rows |
1656 |
| Number of columns |
38 |
| _______________________ |
|
| Column type frequency: |
|
| character |
20 |
| Date |
1 |
| numeric |
16 |
| POSIXct |
1 |
| ________________________ |
|
| Group variables |
None |
Variable type: character
| timestamp |
101 |
0.94 |
25 |
25 |
0 |
1104 |
0 |
| year_month |
101 |
0.94 |
4 |
16 |
0 |
60 |
0 |
| code |
0 |
1.00 |
12 |
12 |
0 |
124 |
0 |
| status |
0 |
1.00 |
4 |
4 |
0 |
1 |
0 |
| isra_code |
0 |
1.00 |
4 |
4 |
0 |
1 |
0 |
| isra_zone |
0 |
1.00 |
1 |
1 |
0 |
1 |
0 |
| name |
0 |
1.00 |
4 |
35 |
0 |
125 |
0 |
| jurisdicti |
0 |
1.00 |
4 |
84 |
0 |
33 |
0 |
| juri_zone |
0 |
1.00 |
3 |
3 |
0 |
26 |
0 |
| juri_code |
0 |
1.00 |
4 |
4 |
0 |
124 |
0 |
| region |
0 |
1.00 |
20 |
57 |
0 |
4 |
0 |
| criteria |
0 |
1.00 |
26 |
121 |
0 |
13 |
0 |
| subcriteri |
0 |
1.00 |
2 |
214 |
0 |
38 |
0 |
| query_crite |
0 |
1.00 |
1 |
10 |
0 |
8 |
0 |
| query_subcr |
0 |
1.00 |
2 |
22 |
0 |
23 |
0 |
| order |
0 |
1.00 |
11 |
100 |
0 |
33 |
0 |
| family |
0 |
1.00 |
8 |
173 |
0 |
63 |
0 |
| url |
0 |
1.00 |
51 |
81 |
0 |
124 |
0 |
| scene_id |
101 |
0.94 |
67 |
67 |
0 |
1104 |
0 |
| matched_ca |
101 |
0.94 |
9 |
18 |
0 |
4 |
0 |
Variable type: Date
| date |
101 |
0.94 |
2017-01-08 |
2021-12-27 |
2019-05-26 |
799 |
Variable type: numeric
| objectid |
0 |
1.00 |
828.50 |
478.19 |
1.00 |
414.75 |
828.50 |
1.24225e+03 |
1656.00 |
▇▇▇▇▇ |
| join_count |
0 |
1.00 |
0.94 |
0.24 |
0.00 |
1.00 |
1.00 |
1.00000e+00 |
1.00 |
▁▁▁▁▇ |
| target_fid |
0 |
1.00 |
85.26 |
31.67 |
0.00 |
71.00 |
108.00 |
1.08000e+02 |
124.00 |
▁▂▁▂▇ |
| join_fid |
0 |
1.00 |
92573.01 |
57387.24 |
-1.00 |
36492.00 |
95688.00 |
1.41189e+05 |
186761.00 |
▇▅▆▆▆ |
| min_depth |
0 |
1.00 |
4.05 |
28.13 |
0.00 |
0.00 |
0.00 |
0.00000e+00 |
220.00 |
▇▁▁▁▁ |
| max_depth |
0 |
1.00 |
256.14 |
499.62 |
3.00 |
100.00 |
100.00 |
1.00000e+02 |
1928.00 |
▇▁▁▁▁ |
| lat |
101 |
0.94 |
20.68 |
13.31 |
-28.86 |
20.77 |
25.86 |
2.65200e+01 |
29.99 |
▁▁▁▁▇ |
| lon |
101 |
0.94 |
56.34 |
9.21 |
32.06 |
53.11 |
56.10 |
5.74100e+01 |
77.75 |
▁▁▇▁▂ |
| year |
101 |
0.94 |
2018.91 |
1.41 |
2017.00 |
2018.00 |
2019.00 |
2.02000e+03 |
2021.00 |
▇▇▇▇▆ |
| mmsi |
1347 |
0.19 |
469458307.39 |
113915512.66 |
8617172.00 |
419006400.00 |
422445300.00 |
4.70956e+08 |
994018107.00 |
▁▂▇▂▁ |
| matching_s |
101 |
0.94 |
0.38 |
3.66 |
0.00 |
0.00 |
0.00 |
0.00000e+00 |
87.52 |
▇▁▁▁▁ |
| fishing_sc |
101 |
0.94 |
0.94 |
0.02 |
0.90 |
0.93 |
0.95 |
9.60000e-01 |
0.96 |
▁▁▂▂▇ |
| length_m |
101 |
0.94 |
21.46 |
5.81 |
9.80 |
17.49 |
20.01 |
2.42100e+01 |
45.07 |
▃▇▃▁▁ |
| overpasses |
101 |
0.94 |
328.34 |
122.22 |
67.00 |
284.00 |
289.00 |
4.26000e+02 |
575.00 |
▃▁▇▇▁ |
| shape_length |
0 |
1.00 |
26.51 |
18.21 |
0.01 |
5.21 |
42.43 |
4.24300e+01 |
91.28 |
▆▁▇▁▁ |
| shape_area |
0 |
1.00 |
5.56 |
6.71 |
0.00 |
0.48 |
8.20 |
8.20000e+00 |
156.32 |
▇▁▁▁▁ |
Variable type: POSIXct
| datetime |
101 |
0.94 |
2017-01-08 00:55:46 |
2021-12-27 02:15:31 |
2019-05-26 14:33:35 |
1104 |
1.2 Satelite Data - Zone 12
sat_data_r12 <- readxl::read_excel("./data/Satellitedata_SpatialJoinISRAR12_fishing_2017_2021.xlsx") %>%
janitor::clean_names() %>%
mutate(isra_code = substr(code, 1, 4),
isra_zone = substr(code, 5, 6),
juri_zone = substr(code, 7, 9),
juri_code = substr(code, 10, 13),
datetime = as_datetime(timestamp),
date = as_date(datetime),
year_month = zoo::as.yearmon(date)) %>%
janitor::clean_names() %>%
relocate(starts_with('isra'), .before = 'name') %>%
relocate(starts_with('juri'), .before = 'jurisdicti') %>%
relocate(c(timestamp, date, datetime, year_month), .before = objectid)
head(sat_data_r12)
## # A tibble: 6 × 41
## timestamp date datetime year_month objectid join_count
## <chr> <date> <dttm> <yearmon> <dbl> <dbl>
## 1 2019-09-19 10:0… 2019-09-19 2019-09-19 10:08:12 set 2019 1 1
## 2 2019-10-25 10:0… 2019-10-25 2019-10-25 10:08:13 out 2019 2 1
## 3 2020-10-25 10:0… 2020-10-25 2020-10-25 10:09:08 out 2020 3 1
## 4 2020-01-23 10:0… 2020-01-23 2020-01-23 10:08:59 jan 2020 4 1
## 5 2018-11-28 23:1… 2018-11-28 2018-11-28 23:15:12 nov 2018 5 1
## 6 2018-10-23 23:1… 2018-10-23 2018-10-23 23:14:48 out 2018 6 1
## # ℹ 35 more variables: target_fid <dbl>, join_fid <dbl>, code <chr>,
## # status <chr>, isra_code <chr>, isra_zone <chr>, name <chr>,
## # jurisdicti <chr>, juri_zone <chr>, juri_code <chr>, region <chr>,
## # criteria <chr>, subcriteri <chr>, query_crite <chr>, query_subcr <chr>,
## # min_depth <dbl>, max_depth <dbl>, order <chr>, family <chr>, url <chr>,
## # area_hseas <dbl>, field <dbl>, field1 <dbl>, scene_id <chr>, lat <dbl>,
## # lon <dbl>, year <dbl>, mmsi <dbl>, matching_s <dbl>, fishing_sc <dbl>, …
skim(sat_data_r12)
Data summary
| Name |
sat_data_r12 |
| Number of rows |
3083 |
| Number of columns |
41 |
| _______________________ |
|
| Column type frequency: |
|
| character |
20 |
| Date |
1 |
| numeric |
19 |
| POSIXct |
1 |
| ________________________ |
|
| Group variables |
None |
Variable type: character
| timestamp |
37 |
0.99 |
25 |
25 |
0 |
1459 |
0 |
| year_month |
37 |
0.99 |
4 |
16 |
0 |
60 |
0 |
| code |
0 |
1.00 |
12 |
13 |
0 |
65 |
0 |
| status |
0 |
1.00 |
4 |
4 |
0 |
1 |
0 |
| isra_code |
0 |
1.00 |
4 |
4 |
0 |
1 |
0 |
| isra_zone |
0 |
1.00 |
2 |
2 |
0 |
2 |
0 |
| name |
0 |
1.00 |
6 |
62 |
0 |
65 |
0 |
| jurisdicti |
0 |
1.00 |
4 |
100 |
0 |
18 |
0 |
| juri_zone |
0 |
1.00 |
3 |
3 |
0 |
13 |
0 |
| juri_code |
0 |
1.00 |
3 |
4 |
0 |
65 |
0 |
| region |
0 |
1.00 |
34 |
34 |
0 |
1 |
0 |
| criteria |
0 |
1.00 |
26 |
121 |
0 |
8 |
0 |
| subcriteri |
0 |
1.00 |
2 |
243 |
0 |
30 |
0 |
| query_crite |
0 |
1.00 |
1 |
10 |
0 |
8 |
0 |
| query_subcr |
0 |
1.00 |
2 |
26 |
0 |
27 |
0 |
| order |
0 |
1.00 |
10 |
128 |
0 |
38 |
0 |
| family |
0 |
1.00 |
9 |
226 |
0 |
55 |
0 |
| url |
0 |
1.00 |
53 |
96 |
0 |
64 |
0 |
| scene_id |
37 |
0.99 |
67 |
67 |
0 |
1461 |
0 |
| matched_ca |
37 |
0.99 |
9 |
18 |
0 |
4 |
0 |
Variable type: Date
| date |
37 |
0.99 |
2017-01-04 |
2021-12-26 |
2019-07-06 |
881 |
Variable type: numeric
| objectid |
0 |
1.00 |
1542.00 |
890.13 |
1.00 |
771.50 |
1542.00 |
2312.50 |
3083.00 |
▇▇▇▇▇ |
| join_count |
0 |
1.00 |
0.99 |
0.11 |
0.00 |
1.00 |
1.00 |
1.00 |
1.00 |
▁▁▁▁▇ |
| target_fid |
0 |
1.00 |
31.55 |
17.42 |
0.00 |
23.00 |
24.00 |
48.00 |
64.00 |
▂▇▃▂▃ |
| join_fid |
0 |
1.00 |
98654.87 |
54216.52 |
-1.00 |
57066.50 |
98574.00 |
144084.00 |
186670.00 |
▇▇▇▇▇ |
| min_depth |
0 |
1.00 |
2.70 |
34.32 |
0.00 |
0.00 |
0.00 |
0.00 |
1650.00 |
▇▁▁▁▁ |
| max_depth |
0 |
1.00 |
966.37 |
723.17 |
20.00 |
200.00 |
1000.00 |
1928.00 |
1928.00 |
▇▁▆▁▆ |
| area_hseas |
0 |
1.00 |
733598.59 |
1210183.55 |
0.00 |
21700.00 |
60510.00 |
248693.00 |
3020979.00 |
▇▁▁▁▂ |
| field |
0 |
1.00 |
0.00 |
0.00 |
0.00 |
0.00 |
0.00 |
0.00 |
0.00 |
▁▁▇▁▁ |
| field1 |
0 |
1.00 |
0.00 |
0.00 |
0.00 |
0.00 |
0.00 |
0.00 |
0.00 |
▁▁▇▁▁ |
| lat |
37 |
0.99 |
4.71 |
15.49 |
-43.70 |
-7.84 |
-2.56 |
24.00 |
31.36 |
▁▁▇▃▅ |
| lon |
37 |
0.99 |
-88.39 |
13.61 |
-114.61 |
-107.27 |
-80.66 |
-79.48 |
-70.56 |
▅▁▁▇▃ |
| year |
37 |
0.99 |
2018.99 |
1.42 |
2017.00 |
2018.00 |
2019.00 |
2020.00 |
2021.00 |
▇▇▇▆▇ |
| mmsi |
2865 |
0.07 |
607281715.38 |
231968085.69 |
14334.00 |
371124500.00 |
760000421.00 |
760012394.00 |
990999121.00 |
▁▂▁▇▁ |
| matching_s |
37 |
0.99 |
0.25 |
3.68 |
0.00 |
0.00 |
0.00 |
0.00 |
106.20 |
▇▁▁▁▁ |
| fishing_sc |
37 |
0.99 |
0.94 |
0.02 |
0.90 |
0.93 |
0.95 |
0.96 |
0.96 |
▂▂▂▂▇ |
| length_m |
37 |
0.99 |
22.97 |
6.57 |
10.72 |
18.54 |
21.44 |
25.52 |
61.78 |
▇▇▂▁▁ |
| overpasses |
37 |
0.99 |
267.92 |
113.47 |
34.00 |
147.00 |
267.00 |
292.00 |
797.00 |
▅▇▂▁▁ |
| shape_length |
0 |
1.00 |
61.25 |
78.22 |
0.04 |
12.53 |
23.87 |
32.48 |
208.76 |
▇▁▁▁▂ |
| shape_area |
0 |
1.00 |
59.92 |
98.60 |
0.00 |
1.93 |
4.96 |
20.35 |
246.27 |
▇▁▁▁▂ |
Variable type: POSIXct
| datetime |
37 |
0.99 |
2017-01-04 01:40:11 |
2021-12-26 13:06:12 |
2019-07-06 23:34:14 |
1459 |
Comparacion de columnas entre los datasets de cada región
janitor::compare_df_cols(sat_data_r07, sat_data_r12, strict_description = T)
## column_name sat_data_r07 sat_data_r12
## 1 area_hseas <NA> numeric
## 2 code character character
## 3 criteria character character
## 4 date Date Date
## 5 datetime POSIXct, POSIXt POSIXct, POSIXt
## 6 family character character
## 7 field <NA> numeric
## 8 field1 <NA> numeric
## 9 fishing_sc numeric numeric
## 10 isra_code character character
## 11 isra_zone character character
## 12 join_count numeric numeric
## 13 join_fid numeric numeric
## 14 juri_code character character
## 15 juri_zone character character
## 16 jurisdicti character character
## 17 lat numeric numeric
## 18 length_m numeric numeric
## 19 lon numeric numeric
## 20 matched_ca character character
## 21 matching_s numeric numeric
## 22 max_depth numeric numeric
## 23 min_depth numeric numeric
## 24 mmsi numeric numeric
## 25 name character character
## 26 objectid numeric numeric
## 27 order character character
## 28 overpasses numeric numeric
## 29 query_crite character character
## 30 query_subcr character character
## 31 region character character
## 32 scene_id character character
## 33 shape_area numeric numeric
## 34 shape_length numeric numeric
## 35 status character character
## 36 subcriteri character character
## 37 target_fid numeric numeric
## 38 timestamp character character
## 39 url character character
## 40 year numeric numeric
## 41 year_month yearmon yearmon
| OBJECTID |
Numero de fila |
| Join_Count |
Estas columnas son generadas por ARCGIS Pro al hacer
el spatial joint (no sirven) |
| TARGET_FID |
Estas columnas son generadas por ARCGIS Pro al hacer
el spatial joint (no sirven) |
| JOIN_FID |
Estas columnas son generadas por ARCGIS Pro al hacer
el spatial joint (no sirven) |
| code |
Codigo numerico de la ISRA (poligono) |
| Status |
Todos los poligonos en el mapa son ISRAS entonces
esta columna tampoco sirve |
| Name |
Nombre particular de cada ISRA |
| Jurisdicti |
Pais en el que esta la ISRA |
| Region |
Region del mundo, en este caso van a ser 3 regiones
(12 Central and South American Pacific, 7 Western Indian Ocean, 9
Asia) |
| Criteria |
Las caracteristicas biologicas y ecologicas que
cumplen las especies que cualificaron para que se determinara la
ISRA |
| Subcriteri |
Las caracteristicas biologicas y ecologicas que
cumplen las especies que cualificaron para que se determinara la
ISRA |
| QueryCrite |
Codigos del criteria |
| QuerySubcr |
Codigos del subcriteria |
| MinDepth |
profundidad minima en metros de la isra (0 es desde
la superficie) |
| MaxDepth |
profundidad maxima de la ISRA |
| Order |
Orden de las especies que cualificaron para la ISRA
(QS) |
| Family |
Familia de las especies que cualificaron para la ISRA
(QS) |
| URL |
En esta direccion puedes encontrar el factseet de
cada area donde explican porq ha sido designada como una ISRA |
| AreaHseas |
Estas columnas son generadas por ARCGIS Pro al hacer
el spatial joint (no sirven) |
| Field |
Estas columnas son generadas por ARCGIS Pro al hacer
el spatial joint (no sirven) |
| Field1 |
Estas columnas son generadas por ARCGIS Pro al hacer
el spatial joint (no sirven) |
| scene_id |
Estas columnas son generadas por ARCGIS Pro al hacer
el spatial joint (no sirven) |
| timestamp |
Fecha y hora del evento de pesca |
| lat |
latitud del evento de pesca |
| lon |
longitud del evento de pesca |
| year |
ano del evento de pesca |
| mmsi |
NO SIRVE. Valores del estudio de Paolo et al. 2024
que usaron para determinar si estaban pescando o no esos barcos |
| matching_s |
NO SIRVE. Valores del estudio de Paolo et al. 2024
que usaron para determinar si estaban pescando o no esos barcos |
| fishing_sc |
Aqui ya filtre todos los que tenian un valor igual o
mayor a 0.9 lo que significa que estan pescando |
| length_m |
longitud en metros del barco |
| matched_ca |
Aqui solo aparecen los que hicieron match para decir
que estaban pescando |
| overpasses |
Esto CREO que es el numero de veces que el satelite
paso sobre ese punto durante el estudio 2017-2021 |
| Shape_Length |
Estas columnas son generadas por ARCGIS Pro al hacer
el spatial joint (no sirven) |
| Shape_Area |
Estas columnas son generadas por ARCGIS Pro al hacer
el spatial joint (no sirven) |
Consideraciones sobre los datasets
- La zona 7 no tiene las columnas field,
field1 y area_hseas;
- Los tipos de columnas están iguales;
- La zona 12 posee 82 días a más de muestreo;
- La zona 7, tienes más sítios muestreados;
- En la zona 7, la juri_zone 0514, Oman, posee los
name Gulf of Masirah y Muscat.
Todavía, en la zona 12, el número de juri_zone y
name son iguales. Confirmar si es un
error;
Note 1: había piensado en crear una columna llamada
n_regions para tener el número de regiones que posee
cada ISRA, todavía solamente hay 5 registros de ISRAS que poseen más de
una region.
sat_data_r07 %>%
mutate(n_regions = case_when(region == unique(sat_data_r07$region)[1] ~ 1,
region == unique(sat_data_r07$region)[2] ~ 2,
region == unique(sat_data_r07$region)[3] ~ 3,
region == unique(sat_data_r07$region)[4] ~ 4)) %>% count(n_regions)
## # A tibble: 4 × 2
## n_regions n
## <dbl> <int>
## 1 1 1651
## 2 2 1
## 3 3 1
## 4 4 3
Definicion de columnas
El dataset final para satelite fue definido con las columnas:
objectid, code, timestamp, datetime, date, year, year_month, isra_zone,
jurisdicti, juri_zone, juri_code, name, region, criteria, subcriteri,
query_crite, query_subcr, min_depth, max_depth, order, family, lat, lon,
fishing_sc, length_m, matched_ca, overpasses
sat_data <- full_join(sat_data_r07, sat_data_r12) %>%
select(objectid, code, timestamp, datetime, date, year, year_month,
isra_zone, jurisdicti, juri_zone, juri_code, name, region,
criteria, subcriteri, query_crite, query_subcr, min_depth,
max_depth, order, family, lat, lon, fishing_sc, length_m,
matched_ca, overpasses)
head(sat_data)
## # A tibble: 6 × 27
## objectid code timestamp datetime date year year_month
## <dbl> <chr> <chr> <dttm> <date> <dbl> <yearmon>
## 1 1 ISRA7ARE02… <NA> NA NA NA <NA>
## 2 2 ISRA7ARE02… <NA> NA NA NA <NA>
## 3 3 ISRA7ARE03… <NA> NA NA NA <NA>
## 4 4 ISRA7ARE03… 2021-06-… 2021-06-06 02:15:51 2021-06-06 2021 jun 2021
## 5 5 ISRA7ARE03… 2018-05-… 2018-05-10 02:23:42 2018-05-10 2018 mai 2018
## 6 6 ISRA7ARE03… 2019-04-… 2019-04-20 14:33:08 2019-04-20 2019 abr 2019
## # ℹ 20 more variables: isra_zone <chr>, jurisdicti <chr>, juri_zone <chr>,
## # juri_code <chr>, name <chr>, region <chr>, criteria <chr>,
## # subcriteri <chr>, query_crite <chr>, query_subcr <chr>, min_depth <dbl>,
## # max_depth <dbl>, order <chr>, family <chr>, lat <dbl>, lon <dbl>,
## # fishing_sc <dbl>, length_m <dbl>, matched_ca <chr>, overpasses <dbl>
1.1.3 GPS Data - Zone 7
# RECORDAR: list.files no ejectuta si algunos de los archivos de Excel está abierto
gps_data_r7 <- list.files(path = "./data/Fishing Region 07 AIS/", pattern = "\\.xlsx$", full.names = TRUE) %>%
map_df(~ readxl::read_excel(.x, col_types = c("text", "numeric", "numeric", "numeric", "text", "text", "text", "text", "text", "text", "text", "text", "text", "numeric", "numeric", "text", "text", "text", "text", "numeric", "numeric", "date", "text", "text", "text", "numeric", "numeric", "numeric"))) %>%
janitor::clean_names() %>%
mutate(isra_code = substr(code, 1, 4),
isra_zone = substr(code, 5, 5),
juri_zone = substr(code, 6, 8),
juri_code = substr(code, 9, 12),
date = as_date(time_range),
year = year(date),
year_month = zoo::as.yearmon(date)) %>%
relocate(starts_with('isra'), .before = 'name') %>%
relocate(starts_with('juri'), .before = 'jurisdicti') %>%
relocate(objectid, .before = 'objectid_1') %>%
relocate(c(time_range, date, year_month), .before = 'objectid') %>%
mutate(time_range = as.character(time_range))
head(gps_data_r7)
## # A tibble: 6 × 35
## time_range date year_month objectid objectid_1 join_count target_fid
## <chr> <date> <yearmon> <chr> <chr> <dbl> <dbl>
## 1 <NA> NA <NA> <NA> 1 0 0
## 2 <NA> NA <NA> <NA> 2 0 1
## 3 <NA> NA <NA> <NA> 3 0 2
## 4 <NA> NA <NA> <NA> 4 0 3
## 5 <NA> NA <NA> <NA> 5 0 4
## 6 2014-05-01 2014-05-01 mai 2014 1 6 1 5
## # ℹ 28 more variables: join_fid <dbl>, code <chr>, status <chr>,
## # isra_code <chr>, isra_zone <chr>, name <chr>, jurisdicti <chr>,
## # juri_zone <chr>, juri_code <chr>, region <chr>, criteria <chr>,
## # subcriteri <chr>, query_crite <chr>, query_subcr <chr>, min_depth <dbl>,
## # max_depth <dbl>, order <chr>, family <chr>, url <chr>, lat <dbl>,
## # lon <dbl>, flag <chr>, geartype <chr>, vessel_i_ds <chr>, apparent_f <dbl>,
## # shape_length <dbl>, shape_area <dbl>, year <dbl>
Hay que verificar las diferencias en el formato de fechas antes de
dar merge en los datasets
skim(gps_data_r7)
Data summary
| Name |
gps_data_r7 |
| Number of rows |
407406 |
| Number of columns |
35 |
| _______________________ |
|
| Column type frequency: |
|
| character |
23 |
| Date |
1 |
| numeric |
11 |
| ________________________ |
|
| Group variables |
None |
Variable type: character
| time_range |
154165 |
0.62 |
10 |
10 |
0 |
84 |
0 |
| year_month |
154165 |
0.62 |
4 |
16 |
0 |
84 |
0 |
| objectid |
895 |
1.00 |
1 |
5 |
0 |
64349 |
0 |
| objectid_1 |
0 |
1.00 |
1 |
5 |
0 |
65274 |
0 |
| code |
0 |
1.00 |
12 |
12 |
0 |
124 |
0 |
| status |
0 |
1.00 |
4 |
4 |
0 |
1 |
0 |
| isra_code |
0 |
1.00 |
4 |
4 |
0 |
1 |
0 |
| isra_zone |
0 |
1.00 |
1 |
1 |
0 |
1 |
0 |
| name |
0 |
1.00 |
4 |
35 |
0 |
125 |
0 |
| jurisdicti |
0 |
1.00 |
4 |
84 |
0 |
33 |
0 |
| juri_zone |
0 |
1.00 |
3 |
3 |
0 |
26 |
0 |
| juri_code |
0 |
1.00 |
4 |
4 |
0 |
124 |
0 |
| region |
0 |
1.00 |
20 |
57 |
0 |
4 |
0 |
| criteria |
0 |
1.00 |
26 |
121 |
0 |
13 |
0 |
| subcriteri |
0 |
1.00 |
2 |
214 |
0 |
38 |
0 |
| query_crite |
0 |
1.00 |
1 |
10 |
0 |
8 |
0 |
| query_subcr |
0 |
1.00 |
2 |
22 |
0 |
23 |
0 |
| order |
0 |
1.00 |
11 |
100 |
0 |
33 |
0 |
| family |
0 |
1.00 |
8 |
173 |
0 |
63 |
0 |
| url |
0 |
1.00 |
51 |
81 |
0 |
124 |
0 |
| flag |
5658 |
0.99 |
3 |
3 |
0 |
85 |
0 |
| geartype |
895 |
1.00 |
7 |
18 |
0 |
17 |
0 |
| vessel_i_ds |
895 |
1.00 |
1 |
2 |
0 |
50 |
0 |
Variable type: Date
| date |
154165 |
0.62 |
2014-01-01 |
2023-12-01 |
2018-11-01 |
84 |
Variable type: numeric
| join_count |
0 |
1.00 |
1.00 |
0.05 |
0.00 |
1.00 |
1.00 |
1.00 |
1.00 |
▁▁▁▁▇ |
| target_fid |
0 |
1.00 |
62.99 |
48.82 |
0.00 |
7.00 |
71.00 |
112.00 |
124.00 |
▇▁▂▁▇ |
| join_fid |
0 |
1.00 |
24112.64 |
16270.51 |
-1.00 |
10053.25 |
21914.00 |
36394.00 |
64349.00 |
▇▆▆▃▂ |
| min_depth |
0 |
1.00 |
1.89 |
18.63 |
0.00 |
0.00 |
0.00 |
0.00 |
220.00 |
▇▁▁▁▁ |
| max_depth |
0 |
1.00 |
389.44 |
505.25 |
3.00 |
180.00 |
256.00 |
300.00 |
1928.00 |
▇▁▁▁▁ |
| lat |
895 |
1.00 |
-14.75 |
20.03 |
-45.20 |
-33.00 |
-9.90 |
-2.20 |
29.90 |
▇▂▇▂▂ |
| lon |
895 |
1.00 |
55.67 |
16.03 |
18.80 |
46.40 |
50.90 |
72.50 |
86.80 |
▁▃▇▂▅ |
| apparent_f |
895 |
1.00 |
7.03 |
25.31 |
0.00 |
1.33 |
2.74 |
6.01 |
1864.13 |
▇▁▁▁▁ |
| shape_length |
0 |
1.00 |
50.50 |
21.27 |
0.01 |
42.43 |
54.45 |
57.58 |
91.28 |
▃▂▇▇▂ |
| shape_area |
0 |
1.00 |
75.22 |
52.11 |
0.00 |
16.26 |
64.04 |
125.88 |
156.32 |
▆▁▇▁▇ |
| year |
154165 |
0.62 |
2019.56 |
3.08 |
2014.00 |
2017.00 |
2018.00 |
2023.00 |
2023.00 |
▂▃▃▁▇ |
1.1.4 GPS Data - Zone 12
# RECORDAR: list.files no ejectuta si algunos de los archivos de Excel está abierto
gps_data_r12 <- list.files(path = "./data/Fishing Region 12 AIS/", pattern = "\\.xlsx$", full.names = TRUE) %>%
map_df(~ readxl::read_excel(.x, col_types = c("text", "numeric", "numeric", "numeric", "text", "text", "text", "text", "text", "text", "text", "text", "text", "numeric", "numeric", "text", "text", "text", "text", "numeric", "numeric", "guess", "text", "text", "text", "numeric", "numeric", "numeric"))) %>%
janitor::clean_names() %>%
mutate(isra_code = substr(code, 1, 4),
isra_zone = substr(code, 5, 6),
juri_zone = substr(code, 7, 9),
juri_code = substr(code, 10, 13),
date = ym(time_range),
year = year(date),
year_month = zoo::as.yearmon(date)) %>%
relocate(starts_with('isra'), .before = 'name') %>%
relocate(starts_with('juri'), .before = 'jurisdicti') %>%
relocate(objectid, .before = 'objectid_1') %>%
relocate(c(time_range, date, year_month), .before = 'objectid')
head(gps_data_r12)
## # A tibble: 6 × 35
## time_range date year_month objectid objectid_1 join_count target_fid
## <chr> <date> <yearmon> <chr> <chr> <dbl> <dbl>
## 1 2014-03 2014-03-01 mar 2014 105 1 1 0
## 2 2014-03 2014-03-01 mar 2014 107 2 1 0
## 3 2014-03 2014-03-01 mar 2014 109 3 1 0
## 4 2014-03 2014-03-01 mar 2014 110 4 1 0
## 5 2014-03 2014-03-01 mar 2014 111 5 1 0
## 6 2014-03 2014-03-01 mar 2014 113 6 1 0
## # ℹ 28 more variables: join_fid <dbl>, code <chr>, status <chr>,
## # isra_code <chr>, isra_zone <chr>, name <chr>, jurisdicti <chr>,
## # juri_zone <chr>, juri_code <chr>, region <chr>, criteria <chr>,
## # subcriteri <chr>, query_crite <chr>, query_subcr <chr>, min_depth <dbl>,
## # max_depth <dbl>, order <chr>, family <chr>, url <chr>, lat <dbl>,
## # lon <dbl>, flag <chr>, geartype <chr>, vessel_i_ds <chr>, apparent_f <dbl>,
## # shape_length <dbl>, shape_area <dbl>, year <dbl>
skim(gps_data_r12)
Data summary
| Name |
gps_data_r12 |
| Number of rows |
319544 |
| Number of columns |
35 |
| _______________________ |
|
| Column type frequency: |
|
| character |
23 |
| Date |
1 |
| numeric |
11 |
| ________________________ |
|
| Group variables |
None |
Variable type: character
| time_range |
317 |
1.00 |
7 |
7 |
0 |
119 |
0 |
| year_month |
317 |
1.00 |
4 |
16 |
0 |
119 |
0 |
| objectid |
317 |
1.00 |
1 |
5 |
0 |
41301 |
0 |
| objectid_1 |
0 |
1.00 |
1 |
5 |
0 |
52242 |
0 |
| code |
0 |
1.00 |
12 |
13 |
0 |
65 |
0 |
| status |
0 |
1.00 |
4 |
4 |
0 |
1 |
0 |
| isra_code |
0 |
1.00 |
4 |
4 |
0 |
1 |
0 |
| isra_zone |
0 |
1.00 |
2 |
2 |
0 |
2 |
0 |
| name |
0 |
1.00 |
6 |
62 |
0 |
65 |
0 |
| jurisdicti |
0 |
1.00 |
4 |
100 |
0 |
18 |
0 |
| juri_zone |
0 |
1.00 |
3 |
3 |
0 |
13 |
0 |
| juri_code |
0 |
1.00 |
3 |
4 |
0 |
65 |
0 |
| region |
0 |
1.00 |
34 |
34 |
0 |
1 |
0 |
| criteria |
0 |
1.00 |
26 |
121 |
0 |
8 |
0 |
| subcriteri |
0 |
1.00 |
2 |
243 |
0 |
30 |
0 |
| query_crite |
0 |
1.00 |
1 |
10 |
0 |
8 |
0 |
| query_subcr |
0 |
1.00 |
2 |
26 |
0 |
27 |
0 |
| order |
0 |
1.00 |
10 |
128 |
0 |
38 |
0 |
| family |
0 |
1.00 |
9 |
226 |
0 |
55 |
0 |
| url |
0 |
1.00 |
53 |
96 |
0 |
64 |
0 |
| flag |
26895 |
0.92 |
3 |
3 |
0 |
68 |
0 |
| geartype |
317 |
1.00 |
7 |
18 |
0 |
15 |
0 |
| vessel_i_ds |
317 |
1.00 |
1 |
3 |
0 |
92 |
0 |
Variable type: Date
| date |
317 |
1 |
2014-01-01 |
2023-12-01 |
2020-10-01 |
119 |
Variable type: numeric
| join_count |
0 |
1 |
1.00 |
0.03 |
0.00 |
1.00 |
1.00 |
1.00 |
1.00 |
▁▁▁▁▇ |
| target_fid |
0 |
1 |
29.80 |
14.07 |
0.00 |
23.00 |
23.00 |
50.00 |
64.00 |
▁▇▂▃▁ |
| join_fid |
0 |
1 |
14857.44 |
10706.00 |
-1.00 |
5479.00 |
12879.00 |
22522.00 |
41301.00 |
▇▆▅▃▂ |
| min_depth |
0 |
1 |
0.83 |
13.88 |
0.00 |
0.00 |
0.00 |
0.00 |
1650.00 |
▇▁▁▁▁ |
| max_depth |
0 |
1 |
1164.70 |
775.52 |
20.00 |
200.00 |
1112.00 |
1928.00 |
1928.00 |
▅▁▃▁▇ |
| lat |
317 |
1 |
0.27 |
7.87 |
-43.70 |
-4.50 |
1.00 |
4.10 |
31.50 |
▁▁▇▇▁ |
| lon |
317 |
1 |
-91.39 |
11.28 |
-116.70 |
-98.50 |
-90.20 |
-81.10 |
-70.50 |
▃▃▇▇▅ |
| apparent_f |
317 |
1 |
7.75 |
36.69 |
0.00 |
1.01 |
2.20 |
5.62 |
4293.34 |
▇▁▁▁▁ |
| shape_length |
0 |
1 |
95.93 |
84.57 |
0.04 |
23.87 |
54.14 |
208.76 |
208.76 |
▇▅▁▁▇ |
| shape_area |
0 |
1 |
117.67 |
100.17 |
0.00 |
20.35 |
95.59 |
246.27 |
246.27 |
▇▅▁▁▇ |
| year |
317 |
1 |
2019.96 |
2.38 |
2014.00 |
2018.00 |
2020.00 |
2022.00 |
2023.00 |
▂▃▅▇▇ |
janitor::compare_df_cols(gps_data_r7, gps_data_r12, strict_description = T)
## column_name gps_data_r7 gps_data_r12
## 1 apparent_f numeric numeric
## 2 code character character
## 3 criteria character character
## 4 date Date Date
## 5 family character character
## 6 flag character character
## 7 geartype character character
## 8 isra_code character character
## 9 isra_zone character character
## 10 join_count numeric numeric
## 11 join_fid numeric numeric
## 12 juri_code character character
## 13 juri_zone character character
## 14 jurisdicti character character
## 15 lat numeric numeric
## 16 lon numeric numeric
## 17 max_depth numeric numeric
## 18 min_depth numeric numeric
## 19 name character character
## 20 objectid character character
## 21 objectid_1 character character
## 22 order character character
## 23 query_crite character character
## 24 query_subcr character character
## 25 region character character
## 26 shape_area numeric numeric
## 27 shape_length numeric numeric
## 28 status character character
## 29 subcriteri character character
## 30 target_fid numeric numeric
## 31 time_range character character
## 32 url character character
## 33 vessel_i_ds character character
## 34 year numeric numeric
## 35 year_month yearmon yearmon
| OBJECTID_1 |
Numero de fila |
| Join_Count |
Estas columnas son generadas por ARCGIS Pro al hacer
el spatial joint (no sirven) |
| TARGET_FID |
Estas columnas son generadas por ARCGIS Pro al hacer
el spatial joint (no sirven) |
| JOIN_FID |
Estas columnas son generadas por ARCGIS Pro al hacer
el spatial joint (no sirven) |
| code |
Codigo numerico de la ISRA (poligono) |
| Status |
Todos los poligonos en el mapa son ISRAS entonces
esta columna tampoco sirve |
| Name |
Nombre particular de cada ISRA |
| Jurisdicti |
Pais en el que esta la ISRA |
| Region |
Region del mundo, en este caso van a ser 3 regiones
(12 Central and South American Pacific, 7 Western Indian Ocean, 9
Asia) |
| Criteria |
Las caracteristicas biologicas y ecologicas que
cumplen las especies que cualificaron para que se determinara la
ISRA |
| Subcriteri |
Las caracteristicas biologicas y ecologicas que
cumplen las especies que cualificaron para que se determinara la
ISRA |
| QueryCrite |
Codigos del criteria |
| QuerySubcr |
Codigos del subcriteria |
| MinDepth |
profundidad minima en metros de la isra (0 es desde
la superficie) |
| MaxDepth |
profundidad maxima de la ISRA |
| Order_ |
Orden de las especies que cualificaron para la ISRA
(QS) |
| Family |
Familia de las especies que cualificaron para la ISRA
(QS) |
| URL |
En esta direccion puedes encontrar el factseet de
cada area donde explican porq ha sido designada como una ISRA |
| OBJECTID |
Estas columnas son generadas por ARCGIS Pro al hacer
el spatial joint (no sirven) |
| Lat |
latitud del evento de pesca |
| Lon |
longitud del evento de pesca |
| Time_Range |
mes y ano del evento de pesca |
| Flag |
bandera del barco de pesca |
| Geartype |
tipo de arte de pesca (esta te tengo que enviar la
lista definitiva porque hay algunas artes de pesca que las vamos a
agrupar en una sola categoria) |
| Vessel_IDs |
Numero de la embarcacion (no sirve) |
| Apparent_F |
tiempo de pesca en horas (este sirve para calcular el
esfuerzo pesquero y es lo que hay que sacar con la ecuacion de los datos
satelitates |
| Shape_Length |
Estas columnas son generadas por ARCGIS Pro al hacer
el spatial joint (no sirven) |
| Shape_Area |
Estas columnas son generadas por ARCGIS Pro al hacer
el spatial joint (no sirven) |
Consideraciones sobre los datasets
Definicion de columnas
El dataset final para satelite fue definido con las columnas:
objectid, objectid_1, code, time_range, date, year, year_month,
isra_zone, jurisdicti, juri_zone, juri_code, name, region, criteria,
subcriteri, query_crite, query_subcr, min_depth, max_depth, order,
family, lat, lon, geartype, vessel_i_ds, apparent_f
gps_data <- full_join(gps_data_r7, gps_data_r12) %>%
select(objectid, objectid_1, code, time_range, date, year, year_month,
isra_zone, jurisdicti, juri_zone, juri_code, name, region,
criteria, subcriteri, query_crite, query_subcr, min_depth,
max_depth, order, family, lat, lon, geartype, vessel_i_ds,
apparent_f)
head(gps_data)
## # A tibble: 6 × 26
## objectid objectid_1 code time_range date year year_month isra_zone
## <chr> <chr> <chr> <chr> <date> <dbl> <yearmon> <chr>
## 1 <NA> 1 ISRA7ARE… <NA> NA NA <NA> 7
## 2 <NA> 2 ISRA7ARE… <NA> NA NA <NA> 7
## 3 <NA> 3 ISRA7ARE… <NA> NA NA <NA> 7
## 4 <NA> 4 ISRA7ARE… <NA> NA NA <NA> 7
## 5 <NA> 5 ISRA7BHR… <NA> NA NA <NA> 7
## 6 1 6 ISRA7BNJ… 2014-05-01 2014-05-01 2014 mai 2014 7
## # ℹ 18 more variables: jurisdicti <chr>, juri_zone <chr>, juri_code <chr>,
## # name <chr>, region <chr>, criteria <chr>, subcriteri <chr>,
## # query_crite <chr>, query_subcr <chr>, min_depth <dbl>, max_depth <dbl>,
## # order <chr>, family <chr>, lat <dbl>, lon <dbl>, geartype <chr>,
## # vessel_i_ds <chr>, apparent_f <dbl>