Here are the steps to download the data. Here, I am using GlobColour as an example:
After receiving an email with an username and a password, you have two options to download the data: i) use Filezilla (follow steps 4 to 15), or ii) use wget (follow steps 16 and 17).
For i) Filezilla, go to the FileZilla webpage and download the version that matches your platform. I use a MacBook Pro, so I downloaded FileZilla Client for Mac OS X.
Note that the username will be different. If you would be using a ftp server from another website, like the one I have for SST, you may not need an username and password, so it could look like this instead:
Click “Connect” and you will be connected to the GlobColour server. If an error occurs, make sure you have typed the correct ftp address, username and password.
Once you are connected to the GlobColour ftp server, locate the files you are interested in on the LEFT side “Remote site:”. Say we are interested in global data, the merged product (where all sensors are merged using different algorithms), daily data for Jan 1st 1998. These are the steps you would be following:
As you can see, there are a lot of files within this very specific folder:
To narrow this down, we can use filters to filter OUT what we do not need. To do this, click this icon on the top:
You will see something like this:
Here, click “Edit filter rules”, where you will be able to create your own filters. Click “New” and edit the name. Here, I am using “My filters”.
I am interested in NetCDF data (which ends in “.nc”), the merged products (which begins with “L3m_”), daily data (which contains “DAY”), at 25 km spatial resolution (which contains “25”), CHL Type I (which contains “CHL1”), in which the data have been merged using weighted averaging (which contains “_AV”). You can read more about these features in the Product User Guide. Make sure the drop-down menu is exactly like I have here “Filter out items matching not all of the following” if you want to see all the files with these features, as in the above figure.
Click “OK” and you will be sent back to the filters list. Tick the filter you have just created, and click “Apply”.
The files will start downloading. The time it takes will depend on the amount and type of files you have selected.
For ii) wget, open a terminal window and go to the folder where you want the files to be stored.
Type the following command, but make sure the syntax matches the type of file you want. You can read more about the file naming structure GlobColour uses in their Product User Guide. The following command matches the example I am using for this tutorial.
wget -r --user=your_username --password='your_password' -l10 -t10 -A "L3m*_25_AV*CHL1*.nc" -w3 -Q1000m ftp://ftp.hermes.acri.fr/GLOB/merged/day/
and the download will start.
## Script to move and rename files
# Get the packages needed for doing this.
# This requires tidync, which is currenly only on github.
# If you don't have it run this:
# devtools::install_github("hypertidy/tidync", dependencies = TRUE)
library(stringr)
library(tidyverse)
library(tidync)
library(ncdf4)
# Set your working directory. In this case, this is where you stored the downloaded files. In my case, this is:
setwd("~/Users/sofiaferreira/Downloads/ftp.hermes.acri.fr/GLOB/merged/8-day/")
# Read in a map of the world.
world_map <- map_data("world")
# Get all of the chl files and create a clean data fram detailing them.
files_names <- dir(recursive = TRUE)
files_names <- files_names %>%
as_tibble() %>%
rename(File = value) %>%
select(-Dummy, -Name) %>%
mutate(Year = as.integer(str_sub(File, 4,8))) %>%
mutate(Month = as.integer(str_sub(File, 9,10))) %>%
mutate(Day = as.ineger(str_sub(File, 11,12)))
files_names <- files_names %>%
mutate_at(2:3, as.integer)
# Set the working directory to where you want to move the files.
out_dir <- "~/Users/sofiaferreira/Downloads/"
# Loop through the files you want to move.
for(i in 1:nrow(files_names)){
file.copy(files_names$value[i], str_c(out_dir, files_names$NewFile[i]))
}
##Set path to the directory where the files were moved to and list all files within it. Make sure there is nothing else in that folder, which is probably not the case if you stored them in your "Downloads" directory.
setwd("~/Users/sofiaferreira/Downloads/")
files <- list.files()
files <- files[grep(".nc",files)]
##Open one file to check latitude and longitude coordinates.
nc <- nc_open(files[1])
##Subset latitudes. In this example, I am looking into a box in the North East Pacific.
temp_lat <- nc$dim$lat$val
lat_inds <- which(temp_lat>=25 & temp_lat <=70)
lats <- rev(temp_lat[lat_inds])
##Subset longitudes.
temp_lon <- nc$dim$lon$val
lon_inds <- which(temp_lon>=-180 & temp_lon <=-110)
lons <- temp_lon[lon_inds]
##Close file.
nc_close(nc)
##Set years of interest.
years <- 1998:2016
##Create empty array.
chls <- array(NA,dim=c(length(lons),length(lats),366,length(years)))
##Gather data.
for(yr in 1:length(years)){
##Gather files for specific year.
temp_ind <- which(substr(files,1,4)==years[yr])
for(te in 1:length(temp_ind)){
print(c(years[yr],temp_ind[te]))
nc <- nc_open(files[temp_ind[te]])
chls[,,te,yr] <- ncvar_get(nc,"CHL1_mean",start=c(lon_inds[1],lat_inds[1]),count=c(length(lon_inds),length(lat_inds)))[,length(lats):1]
nc_close(nc)
}
rm(temp_ind)
}
##Save files.
save(chls,file="chls.rda")
save(lons,file="lons.rda")
save(lats,file="lats.rda")
The beginning is essentially the same as in the previous script.
## Script to move and rename files
# Get the packages needed for doing this.
# This requires tidync, which is currenly only on github.
# If you don't have it run this:
# devtools::install_github("hypertidy/tidync", dependencies = TRUE)
library(stringr)
library(tidyverse)
library(tidync)
library(ncdf4)
# Set your working directory. In this case, this is where you stored the downloaded files. In my case, this is:
setwd("~/Users/sofiaferreira/Downloads/ftp.hermes.acri.fr/GLOB/merged/8-day/")
# Read in a map of the world.
world_map <- map_data("world")
# Get all of the chl files and create a clean data fram detailing them.
files_names <- dir(recursive = TRUE)
files_names <- files_names %>%
as_tibble() %>%
rename(File = value) %>%
select(-Dummy, -Name) %>%
mutate(Year = as.integer(str_sub(File, 4,8))) %>%
mutate(Month = as.integer(str_sub(File, 9,10))) %>%
mutate(Day = as.ineger(str_sub(File, 11,12)))
files_names <- files_names %>%
mutate_at(2:3, as.integer)
# Set the working directory to where you want to move the files.
out_dir <- "~/Users/sofiaferreira/Downloads/"
# Loop through the files you want to move.
for(i in 1:nrow(files_names)){
file.copy(files_names$value[i], str_c(out_dir, files_names$NewFile[i]))
}
# Now, create a function to read the netcdf file in and create a long/lat/chl data frame.
# Longitudes and latitudes in all netcdf files are the same. This should only be read in once.
lon <- tidync(files_names $File[1]) %>%
hyper_tibble() %>%
pull(lon)
lat <- tidync(files_names $File[1]) %>%
hyper_tibble() %>%
pull(lat)
# Function to read in the ncdf file and convert to a data frame.
ncdf_df <- function(ff){
print(str_c("reading in ", ff))
nc_raw <- nc_open(ff)
df <- ncdf4::ncvar_get(nc_raw, "CHL1_mean") %>%
as.numeric() %>%
as_tibble() %>%
rename(Chl = value) %>%
mutate(Longitude = lon, Latitude = lat)
nc_close(nc_raw)
df
}
## Now read all files into a single nested data frame.
all_data <- files_names %>%
mutate(data = purrr::map(File, ncdf_df))
# Check that it worked.
all_data %>%
filter(Year == 2010) %>%
unnest() %>%
ggplot(aes(Longitude, Latitude, fill = Chl))+
geom_raster()+
facet_wrap(~Month)
save(all_data,file="all_data.rda")
I do not have a script to create b) a data frame in MatLab. If anyone would like to fill in this gap, let me know asofiaaferreira@gmail.com.
%% Copy nc files
clear all
% Start with a folder and get a list of all subfolders.
% Finds and prints names of all text files in.
% that folder and all of its subfolders.
% Similar to imageSet() function in the Computer Vision System Toolbox: http://www.mathworks.com/help/vision/ref/imageset-class.html
clc; % Clear the command window.
format long g;
format compact;
% Define a starting folder wherever the files are stored.
start_path = fullfile('/Users/sofiaferreira/Downloads/ftp.hermes.acri.fr/GLOB/merged/8-day');
% Ask user to confirm or change.
topLevelFolder = uigetdir(start_path);
if topLevelFolder == 0
return;
end
% Get list of all subfolders.
allSubFolders = genpath(topLevelFolder);
% Parse into a cell array.
remain = allSubFolders;
listOfFolderNames = {};
while true
[singleSubFolder, remain] = strtok(remain, ':');
if isempty(singleSubFolder)
break;
end
listOfFolderNames = [listOfFolderNames singleSubFolder];
end
numberOfFolders = length(listOfFolderNames);
fullFileNames = {};
baseNames = {};
% Process all text files in those folders.
for k = 1 : numberOfFolders
% Get this folder and print it out.
thisFolder = listOfFolderNames{k};
fprintf('Processing folder %s\n', thisFolder);
% Get filenames of all nc files.
filePattern = sprintf('%s/*.nc', thisFolder);
baseFileNames = dir(filePattern);
numberOfFiles = length(baseFileNames);
% Now we have a list of all nc files in this folder.
if numberOfFiles >= 1
% Go through all those text files.
for f = 1 : numberOfFiles
fullFileNames = [fullFileNames fullfile(thisFolder, baseFileNames(f).name)];
baseNames = [baseNames baseFileNames(f).name];
%fprintf('Processing file %s\n', fullFileName);
end
else
fprintf('Folder %s has no nc files in it.\n', thisFolder);
end
end
for j = 1:length(fullFileNames)
thisfilename = fullFileNames{j};
thisbasename = baseNames{j};
baseFullName = fullfile(start_path, thisbasename);
copyfile(thisfilename, baseFullName);
end
% Set path to where all the files were moved to.
nc_path = '/Users/sofiaferreira/Downloads;
cd(nc_path);
% Get info of files/folders in current directory.
list=dir;
% Determine index of files vs folders.
is_file=~[list.isdir];
% Create cell array of file names.
filenames={list(is_file).name};
% Check if files are netCDF.
is_nc = contains(filenames, '.nc');
% Keep only those which are netCDF.
nc_files = filenames(is_nc);
lats = ncread(nc_files{1}, 'lat');
lat_inds = find(lats <= 25 & lats >= 70);
lats = sort(lats(lats <= 25 & lats >= 70));
lons = ncread(nc_files{1}, 'lon');
lon_inds = find(lons <= -180 & lons >= -110);
lons = sort(lons(lons <= -180 & lons >= -110));
years = 1998:2016;
chls = nan(length(lons),length(lats),46,length(years));
for yr = 1:length(years)
temp_ind = find(contains(nc_files, int2str(years(yr))));
for num_ncs = 1:length(temp_ind)
disp([years(yr) num_ncs]);
chls(:,:,46-length(temp_ind)+num_ncs, yr) = ...
flip(ncread(nc_files{temp_ind(num_ncs)},'CHL1_mean',...
[lon_inds(1) lat_inds(1)],[length(lons) length(lat_inds)]), 2);
end
end
save('chls.mat', 'chls');
save('lats.mat', 'lats');
save('lons.mat', 'lons');
This is all. Let me know if you have any questions.