diff --git a/python_src/utils/getEOFFromESA.py b/python_src/utils/getEOFFromESA.py new file mode 100644 index 0000000000000000000000000000000000000000..4bd436e3cc8f489071e4749ebb143f4991e01595 --- /dev/null +++ b/python_src/utils/getEOFFromESA.py @@ -0,0 +1,266 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import re +import os +import time +import requests +import shutil +import argparse +import datetime +import xml.etree.ElementTree + +def searchForSafe(input_dir) : + """ + Search for Safe repository into input_dir + + :input_dir: (str) input directory + :returns: (iterable) list of SAFE repository + """ + list_SAFE = [] + for root, dirs, files in os.walk(input_dir): + for i in (i for i in dirs): + if i.endswith(".SAFE"): + list_SAFE.append(i) + + return list_SAFE + +# Get url for orbits files +def get_url_orbs(orbs_type, sensor, year, month, day, + url="https://aux.sentinel1.eo.esa.int/", + orb_list=None): + """ + Get the set of available orbs from url + + :orbs_type: (str) either POEROB or RESORB + :sensor: (str) seither S1A or S1B + :year: (str) year + :month: (str|int) month (as a 2 digit number) + :day: (str) day as a two digit number in the month + :url: (str) url where to gather the data + :returns: (iterable) list of urls of the EOF files + """ + url = url.strip('/') + month = "{:02d}".format(int(month)) + day = "{:02d}".format(int(day)) + + # Pattern to find EOF file into response + pattern = "".join(['"', sensor, '_.*?_', orbs_type, '_.*?.EOF"']) + + html_list = [] + orb_list_tmp = [] + + tacquisition0 = datetime.datetime(int(year), int(month), int(day)) + + list_of_days_tosearch = [] + # Fetch POEORB if orb_type is POEORB or None (ie, first try with Precises orbits) + # This kind of orbit is restitued around 21 days later + if orbs_type == 'POEORB' or orbs_type is None: + list_of_days_tosearch = [21, 20, 22, 19, 23] + # Fetch RESORB if orbs_type is RESORB + # This kind of orbit is restitued around 3 hours later (day before to day after safe name) + if orbs_type == "RESORB": + list_of_days_tosearch = [0, 1, 2] + + if url.startswith("https"): + for day in list_of_days_tosearch : + + tacquisition = tacquisition0 + datetime.timedelta(days=day) + + # Build the url wtih "ESA format" + url_orb = "{}/{}/{}/{}/{}/".format(url, orbs_type, tacquisition.year, + "{:02d}".format(tacquisition.month), + "{:02d}".format(tacquisition.day)) + + + try: + # Make the get request with verify = False to avoid ssl issues + response = requests.get(url_orb, verify=False) + + if response.status_code != 200 and response.status_code != 201: + print("problem with the url {} ".format(url_orb)) + continue + + orb_list_tmp += list(set([url_orb + "/" + x.strip().strip('"').strip() + for x in re.findall(pattern, response.text)])) + + + except Exception as e: + print("exception for url {} ".format(e)) + return [] + + else: + raise RuntimeError("malformed url or too recent date, should start by https://") + + + # Add result into orb_list or return orb_list_tmp + orb_list += orb_list_tmp + + + +# Get contents of orbit files on disk +def get_orb_content_ondisk(orb_list, output_dir) : + """ + Retrieve on disk the set of selected orbs + + :orb_list: (list) list or url (to get orbit files) + :output_dir: (str) output directory (on disk) to put orbit files + :returns: () + """ + for orb in orb_list: + try: + # Get the content of eof_file + response = requests.get(orb, verify=False, stream=True) + + if response.status_code != 200 and response.status_code != 201: + print("problem with the url {} ".format(orb)) + continue + + # Copy the content on disk + with open(os.path.join(output_dir, os.path.basename(orb)), 'wb') as out_file: + shutil.copyfileobj(response.raw, out_file) + + del response + except Exception as e: + print("exception for url {} ".format(e)) + + +################### +###### Main ####### +################### +if __name__ == "__main__": + + ###### Get the main argument : indir and outdir ###### + # Check arguments + parser = argparse.ArgumentParser() + parser.add_argument("indir", help="input directory to search SAFE repository") + parser.add_argument("outdir", help="output directory to store EOF files") + parser.add_argument("--type", "-t", type=str, choices=['POEORB', 'RESORB'], + default='POEORB', + help=("The type of data request. If set should be either POEORB or RESORB; " + "if not set test first POEORB and backoff to RESORB else")) + args = parser.parse_args() + + # Check input and output directory (if exist) + if not os.path.exists(args.indir): + print(args.indir + " does not exists") + quit() + + if not os.path.exists(args.outdir): + print(args.outdir + " does not exists") + quit() + + # Require a check of user for connexion parameters (proxy, ...) + response = input("Check all your parameters for internet connexion (proxy, ssl ...), before continuing. \ + \nReady to continue (yes/no) :") + + if response != "yes" : + print("Set all your parameters for internet connexion to relaunch this script") + quit() + + list_InSAFE = searchForSafe(args.indir) + + if len(list_InSAFE) == 0 : + print("None SAFE repository was found into {}".format(args.indir)) + quit() + + + # For each SAFE found, retrieve URLs of EOF files (with date correspondance) + orb_list = [] + + for safeDir in list_InSAFE: + + # Tmp lists + orb_list_firstSelect = [] + orb_list_secondSelect = [] + + # patterns for SAFE names + pattern_date = "".join(['\d{8}', "T", '\d{6}']) + pattern_sensor = "".join(["S1."]) + pattern_orbit = "".join(["_", '\d{6}', "_"]) + dates = "" + sensor = "" + orbit_number = "" + + try : + dates = re.findall(pattern_date, safeDir) + sensor = re.findall(pattern_sensor, safeDir) + orbit_number = int(re.findall(pattern_orbit, safeDir)[0][1:-1]) + except Exception as e: + print("Safe name does not match with usual pattern and causes an exception : \ + {}".format(e)) + # Next safe + continue + + if len(dates) == 2 and len(sensor) == 1 : + # First selection for S1A and S1B sensor at corresponding dates + get_url_orbs(args.type, sensor[0], dates[0][0:4], dates[0][4:6], + dates[0][6:8], orb_list=orb_list_firstSelect) + + + # Second selection with times + time_start = time.mktime(time.strptime(dates[0], + "%Y%m%dT%H%M%S")) + + time_end = time.mktime(time.strptime(dates[1], + "%Y%m%dT%H%M%S")) + + for orb in orb_list_firstSelect: + eof_file = orb.split('/')[-1] + + # Without extension + i_eof = eof_file.split(".EOF")[0] + + start_eofDate = i_eof.split('_')[-2] + start_eofDate = start_eofDate.split("V")[1] + end_eofDate = i_eof.split('_')[-1] + + # Save date format + time_start_eofDate = time.mktime(time.strptime(start_eofDate, + "%Y%m%dT%H%M%S")) + + time_end_eofDate = time.mktime(time.strptime(end_eofDate, + "%Y%m%dT%H%M%S")) + + # Compare dates and return eof file if eof file contains the current image dates + if time_end > time_start_eofDate and time_start <= time_end_eofDate : + orb_list_secondSelect.append(orb) + + + # Last selection/check with orbit (Keep ????) + for orb in orb_list_secondSelect: + try: + # Make the get request with verify = False to avoid ssl issues + response = requests.get(orb, verify=False) + + if response.status_code != 200 and response.status_code != 201: + print("problem with the url {} ".format(url_orb)) + continue + + tree = xml.etree.ElementTree.fromstring(response.text) + + Absolute_Orbit_Elt = tree.findall("Data_Block/List_of_OSVs/OSV/Absolute_Orbit") + + orbitNumbers = list(set([int(x.text) + for x in Absolute_Orbit_Elt])) + + if (orbit_number in orbitNumbers) : + orb_list.append(orb) + + + except Exception as e: + print("Exception {}".format(e)) + quit() + + else : + print("Wrong SAFE format for {}".format(args.indir)) + # Next Safe + continue + + + print("Selected orbit files (Ready to be retrieved on disk) : ") + print(orb_list) + + # Get selection into output_dir + get_orb_content_ondisk(orb_list, args.outdir) +