Source code for floatcsep.accessors

from datetime import datetime
from urllib import request
from urllib.parse import urlencode
from csep.utils.time_utils import datetime_to_utc_epoch, utc_now_datetime
from csep.core.catalogs import CSEPCatalog
import git
import requests
import hashlib
import os
import sys
import shutil

HOST_CATALOG = "https://service.iris.edu/fdsnws/event/1/query?"
TIMEOUT = 180



[docs]
def query_gcmt(start_time, end_time, min_magnitude=5.0,
               max_depth=None,
               catalog_id=None,
               min_latitude=None, max_latitude=None,
               min_longitude=None, max_longitude=None):

    eventlist = _query_gcmt(start_time=start_time,
                            end_time=end_time,
                            min_magnitude=min_magnitude,
                            min_latitude=min_latitude,
                            max_latitude=max_latitude,
                            min_longitude=min_longitude,
                            max_longitude=max_longitude,
                            max_depth=max_depth)

    catalog = CSEPCatalog(data=eventlist,
                          name='gCMT',
                          catalog_id=catalog_id,
                          date_accessed=utc_now_datetime())
    return catalog




[docs]
def from_zenodo(record_id, folder, force=False):
    """
    Download data from a Zenodo repository.
    Downloads if file does not exist, checksum has changed in local respect to
    url or force

    Args:
        record_id: corresponding to the Zenodo repository
        folder: where the repository files will be downloaded
        force: force download even if file exists and checksum passes

    Returns:

    """
    # Grab the urls and filenames and checksums
    r = requests.get(f"https://zenodo.org/api/records/{record_id}")
    download_urls = [f['links']['self'] for f in r.json()['files']]
    filenames = [(f['key'], f['checksum']) for f in r.json()['files']]

    # Download and verify checksums
    for (fname, checksum), url in zip(filenames, download_urls):
        full_path = os.path.join(folder, fname)
        if os.path.exists(full_path):
            value, digest = _check_hash(full_path, checksum)
            if value != digest:
                print(
                    f"Checksum is different: re-downloading {fname}"
                    f" from Zenodo...")
                _download_file(url, full_path)
            elif force:
                print(f"Re-downloading {fname} from Zenodo...")
                _download_file(url, full_path)
            else:
                print(f'Found file {fname}. Checksum OK.')

        else:
            print(f"Downloading {fname} from Zenodo...")
            _download_file(url, full_path)
        value, digest = _check_hash(full_path, checksum)
        if value != digest:
            print("Error: Checksum does not match")
            sys.exit(-1)




[docs]
def from_git(url, path, branch=None, depth=1, **kwargs):
    """

    Clones a shallow repository from a git url

    Args:
        url (str): url of the repository
        path (str): path/folder where to clone the repo
        branch (str): repository's branch to clone (default: main)
        depth (int): depth history of commits
        **kwargs: keyword args passed to Repo.clone_from

    Returns:
        the pygit repository
    """

    kwargs.update({'depth': depth})
    git.refresh()

    try:
        repo = git.Repo(path)
    except (git.NoSuchPathError, git.InvalidGitRepositoryError):
        repo = git.Repo.clone_from(url, path, branch=branch, **kwargs)
        git_dir = os.path.join(path, '.git')
        if os.path.isdir(git_dir):
            shutil.rmtree(git_dir)

    return repo



def _query_gcmt(start_time, end_time, min_magnitude=3.50,
                min_latitude=None, max_latitude=None,
                min_longitude=None, max_longitude=None,
                max_depth=1000, extra_gcmt_params=None):
    """
    Return GCMT eventlist from IRIS web service.
    For details see "https://service.iris.edu/fdsnws/event/1/"
    Args:
        start_time (datetime.datetime): start time of catalog query
        end_time (datetime.datetime): end time of catalog query
        min_magnitude (float): minimum magnitude of query
        min_latitude (float): minimum latitude of query
        max_latitude (float): maximum latitude of query
        min_longitude (float): minimum longitude of query
        max_longitude (float): maximum longitude of query
        max_depth (float): maximum depth of query
        extra_gcmt_params (dict): additional parameters to pass to IRIS search
         function

    Returns:
        eventlist
    """
    extra_gcmt_params = extra_gcmt_params or {}

    eventlist = gcmt_search(minmagnitude=min_magnitude,
                            minlatitude=min_latitude,
                            maxlatitude=max_latitude,
                            minlongitude=min_longitude,
                            maxlongitude=max_longitude,
                            starttime=start_time.isoformat(),
                            endtime=end_time.isoformat(),
                            maxdepth=max_depth, **extra_gcmt_params)

    return eventlist

def gcmt_search(format='text',
                starttime=None,
                endtime=None,
                updatedafter=None,
                minlatitude=None,
                maxlatitude=None,
                minlongitude=None,
                maxlongitude=None,
                latitude=None,
                longitude=None,
                maxradius=None,
                catalog='GCMT',
                contributor=None,
                maxdepth=1000,
                maxmagnitude=10.0,
                mindepth=-100,
                minmagnitude=0,
                offset=1,
                orderby='time-asc',
                host=None,
                verbose=False):
    """Search the IRIS database for events matching input criteria.
    This search function is a wrapper around the ComCat Web API described here:
    https://service.iris.edu/fdsnws/event/1/

    This function returns a list of SummaryEvent objects, described elsewhere in this package.
    Args:
        starttime (datetime):
            Python datetime - Limit to events on or after the specified start time.
        endtime (datetime):
            Python datetime - Limit to events on or before the specified end time.
        updatedafter (datetime):
           Python datetime - Limit to events updated after the specified time.
        minlatitude (float):
            Limit to events with a latitude larger than the specified minimum.
        maxlatitude (float):
            Limit to events with a latitude smaller than the specified maximum.
        minlongitude (float):
            Limit to events with a longitude larger than the specified minimum.
        maxlongitude (float):
            Limit to events with a longitude smaller than the specified maximum.
        latitude (float):
            Specify the latitude to be used for a radius search.
        longitude (float):
            Specify the longitude to be used for a radius search.
        maxradius (float):
            Limit to events within the specified maximum number of degrees
            from the geographic point defined by the latitude and longitude parameters.
        catalog (str):
            Limit to events from a specified catalog.
        contributor (str):
            Limit to events contributed by a specified contributor.
        maxdepth (float):
            Limit to events with depth less than the specified maximum.
        maxmagnitude (float):
            Limit to events with a magnitude smaller than the specified maximum.
        mindepth (float):
            Limit to events with depth more than the specified minimum.
        minmagnitude (float):
            Limit to events with a magnitude larger than the specified minimum.
        offset (int):
            Return results starting at the event count specified, starting at 1.
        orderby (str):
            Order the results. The allowed values are:
            - time order by origin descending time
            - time-asc order by origin ascending time
            - magnitude order by descending magnitude
            - magnitude-asc order by ascending magnitude
        host (str):
            Replace default ComCat host (earthquake.usgs.gov) with a custom host.
    Returns:
        list: List of SummaryEvent() objects.
    """

    # getting the inputargs must be the first line of the method!
    inputargs = locals().copy()
    newargs = {}

    for key, value in inputargs.items():
        if value is True:
            newargs[key] = 'true'
            continue
        if value is False:
            newargs[key] = 'false'
            continue
        if value is None:
            continue
        newargs[key] = value

    del newargs['verbose']

    events = _search_gcmt(**newargs)

    return events


def _search_gcmt(**_newargs):
    """
    Performs de-query at ISC API and returns event list and access date

    """
    paramstr = urlencode(_newargs)
    url = HOST_CATALOG + paramstr
    fh = request.urlopen(url, timeout=TIMEOUT)
    data = fh.read().decode('utf8').split('\n')
    fh.close()
    eventlist = []
    for line in data[1:]:
        line_ = line.split('|')
        if len(line_) != 1:
            id_ = line_[0]
            time_ = datetime.fromisoformat(line_[1])
            dt = datetime_to_utc_epoch(time_)
            lat = float(line_[2])
            lon = float(line_[3])
            depth = float(line_[4])
            mag = float(line_[10])
            eventlist.append((id_, dt, lat, lon, depth, mag))

    return eventlist


def _download_file(url: str, filename: str) -> None:
    """

    Downloads files (from zenodo)

    Args:
        url (str): the url where the file is located
        filename (str): the filename required.

    """
    progress_bar_length = 72
    block_size = 1024

    r = requests.get(url, stream=True)
    total_size = r.headers.get('content-length', False)
    if not total_size:
        with requests.head(url) as h:
            try:
                total_size = int(h.headers.get('Content-Length', 0))
            except TypeError:
                total_size = 0
    else:
        total_size = int(total_size)
    download_size = 0
    if total_size:
        print(
            f'Downloading file with size of {total_size / block_size:.3f} kB')
    else:
        print(f'Downloading file with unknown size')
    with open(filename, 'wb') as f:
        for data in r.iter_content(chunk_size=block_size):
            download_size += len(data)
            f.write(data)
            if total_size:
                progress = int(
                    progress_bar_length * download_size / total_size)
                sys.stdout.write(
                    '\r[{}{}] {:.1f}%'.format('█' * progress, '.' *
                                              (progress_bar_length - progress),
                                              100 * download_size / total_size)
                )
                sys.stdout.flush()
        sys.stdout.write('\n')


def _check_hash(filename, checksum):
    """
    Checks if existing file hash matches checksum from url
    """
    algorithm, value = checksum.split(':')
    if not os.path.exists(filename):
        return value, 'invalid'
    h = hashlib.new(algorithm)
    with open(filename, 'rb') as f:
        while True:
            data = f.read(4096)
            if not data:
                break
            h.update(data)
    digest = h.hexdigest()
    return value, digest