Source code for seismic.gps_corrections.relative_tt_residuals_plotter

#!/bin/env python
"""
Bulk analysis script for analysing relative traveltime residuals from a pick ensemble
for the purpose of identifying time periods of GPS clock error in specific stations.

Example usage, which plots 7X.MA11 and 7X.MA12 residuals relative to all common events on
AU network:

``relative_tt_residuals_plotter.py --network1=AU --networks2=7X --stations2="MA11,MA12" /c/data_cache/Picks/20190320/ensemble.p.txt``

"""

import os
import sys
import datetime
import logging

import numpy as np
import pandas as pd
from pandas.plotting import register_matplotlib_converters, deregister_matplotlib_converters
import pytz
import matplotlib
import matplotlib.dates
import matplotlib.pyplot as plt

import click
# Progress bar helper to indicate that slow tasks have not stalled
import tqdm
import obspy

if sys.version_info[0] < 3:
    import pathlib2 as pathlib  # pylint: disable=import-error
else:
    import pathlib  # pylint: disable=import-error

from seismic.gps_corrections.picks_reader_utils import (read_picks_ensemble,
                                                        get_network_stations,
                                                        compute_matching_network_mask,
                                                        generate_large_events_catalog)

# pylint: disable=invalid-name, fixme, too-many-locals, too-many-statements
# pylint: disable=attribute-defined-outside-init, logging-format-interpolation, logging-not-lazy

logging.basicConfig()

# Priority order of trusted channels
CHANNEL_PREF_NO_SHZ = ['HHZ', 'HHZ_10', 'H?Z', 'BHZ_00', 'BHZ', 'BHZ_10', 'B?Z', 'EHZ']
CHANNEL_PREF_BALANCED = CHANNEL_PREF_NO_SHZ + ['S?Z', 'SHZ']
CHANNEL_PREF_GREEDY = CHANNEL_PREF_BALANCED + ['???', '?']
CHANNEL_PREF = CHANNEL_PREF_BALANCED

# Default filter options
DEFAULT_MIN_DISTANCE = 30.0
DEFAULT_MAX_DISTANCE = 90.0
DEFAULT_MIN_EVENT_SNR = 10
DEFAULT_CWT_CUTOFF = 15
DEFAULT_SLOPE_CUTOFF = 3
DEFAULT_NSIGMA_CUTOFF = 4
DEFAULT_MIN_EVENT_MAG = 5.5
DEFAULT_STRICT_FILTERING = True

# IRIS format station catalogs providing additional station metadata to merge with the target network.
# Useful for pulling in additional results from comprehensive networks such as GE, IR and IU.
IRIS_ALTERNATE_STATIONS_FILE = {
    "AU": "AU_irisws-fedcatalog_20190305T012747Z.txt"
    }


[docs]class FilterOptions: """Simple container type for filtering options. """ def __init__(self): # Setting strict_filtering to False will generate a lot more events per station chart, sometimes making it # easier to spot drift. But it may also add many events with significant non-zero residual (e.g. poor picks). self.strict_filtering = DEFAULT_STRICT_FILTERING self.min_event_snr = DEFAULT_MIN_EVENT_SNR self.cwt_cutoff = DEFAULT_CWT_CUTOFF self.slope_cutoff = DEFAULT_SLOPE_CUTOFF self.nsigma_cutoff = DEFAULT_NSIGMA_CUTOFF self.min_event_mag = DEFAULT_MIN_EVENT_MAG self.channel_preference = CHANNEL_PREF
[docs]class DisplayOptions: """Simple container type for display options. """ def __init__(self): self.deployments = None # Historical events to add to the plot self.events = None
[docs]class BatchOptions: """ Simple container type for run time options. """ def __init__(self): # pragma: no cover self.save_file = True self.batch_label = '' # X-axis time range self.x_range = None # Path to folder in which to save the clock errors to csv file self.export_path = None
[docs]def get_iris_station_codes(src_file, original_network): """ Extract the station codes for a given network code from a IRIS query result file. :param src_file: IRIS catalog query result file containing network and station information, in stationtxt format :type src_file: str or path :param original_network: Network code whose station codes will be extracted from IRIS file :type original_network: str :return: Pandas dataframe with station codes as the index and station latitude, longitude as columns :rtype: pandas.DataFrame """ # Get station codes listed in IRIS whose network is original_network. # We need this to ensure we get complete coverage of chosen network, as it is # possible some such codes appear only under other network codes such as IR, GE, etc.. in the event catalog. # Returns a Pandas dataframe consisting of each station code and its mean (latitude, longitude) position. df = pd.read_csv(src_file, header=0, sep='|') df.columns = [c.strip() for c in df.columns.tolist()] orig_net_df = df.loc[(df['Network'] == original_network)] orig_net_df.columns = [c.strip() for c in orig_net_df.columns.tolist()] orig_net_perm_stations = sorted(orig_net_df['Station'].unique()) mean_lat = [] mean_lon = [] for sta in orig_net_perm_stations: mean_lat.append(orig_net_df.loc[(orig_net_df['Station'] == sta), 'Latitude'].mean()) std_dev = orig_net_df.loc[(orig_net_df['Station'] == sta), 'Latitude'].std(ddof=0) assert std_dev < 1.0, "{}: {}".format(sta, std_dev) mean_lon.append(orig_net_df.loc[(orig_net_df['Station'] == sta), 'Longitude'].mean()) std_dev = orig_net_df.loc[(orig_net_df['Station'] == sta), 'Longitude'].std(ddof=0) assert std_dev < 1.0, "{}: {}".format(sta, std_dev) df_dict = {'sta': orig_net_perm_stations, 'lat': mean_lat, 'lon': mean_lon} result_df = pd.DataFrame(df_dict) return result_df.set_index(['sta'])
[docs]def determine_alternate_matching_codes(df, iris_file, original_network): """ Find stations from other networks in df with the same station codes, but different network codes, whose positions match the stations of the same code in the original network. :param df: Pandas dataframe loaded from a pick event ensemble :type df: pandas.DataFrame :param iris_file: IRIS catalog query result file containing network and station information in stationtxt format :type iris_file: str or path :param original_network: Network code whose station codes will be extracted from IRIS file :type original_network: str :return: Matching sequences of network and station codes :rtype: tuple(str), tuple(str) """ log = logging.getLogger(__name__) # Get all the IRIS station codes for the given network matching_network_stn_iris_df = get_iris_station_codes(iris_file, original_network) # Generate mask for all station codes from IRIS that are present in df mask_iris_stns = df['sta'].isin(matching_network_stn_iris_df.index) # Generate mask for all records not belonging to given network mask_not_orig = (df['net'] != original_network) # Generate new dataframe of station codes from IRIS for given network that # are listed under a different network code. df_orig_stns_codes = df.loc[mask_iris_stns & mask_not_orig] # For each non-original network record, compute its distance from the known corresponding original station location from obspy.geodetics import locations2degrees def dist_to_orig_stn(row, orig_df): row_sta = row['sta'] orig_df_sta = orig_df.loc[row_sta] return locations2degrees(row['stationLat'], row['stationLon'], orig_df_sta['lat'], orig_df_sta['lon']) # For each station code, compute its distance in degrees from the same station code location in IRIS. # If distance is too far away, station will be considered different from the AU network same station code. log.info("Computing distances to original network station locations...") distances_from_orig = df_orig_stns_codes.apply(lambda r: dist_to_orig_stn(r, matching_network_stn_iris_df), axis=1) # Only keep stations which are close to the same station code from original network. angle_tolerance_deg = 1.0 df_orig_stns_codes_matching = df_orig_stns_codes.loc[(distances_from_orig < angle_tolerance_deg)] # Split out the network and matching station codes from the dataframe new_codes = [(n, s) for (n, s), _ in df_orig_stns_codes_matching.groupby(['net', 'sta'])] new_nets, new_stas = zip(*new_codes) return new_nets, new_stas
[docs]def pandas_timestamp_to_plottable_datetime(data): """ Convert float UTC timestamp to equivalent type that is plottable by matplotlib :param data: Pandas series of float timestamps :type data: pandas.Series :return: Array of Python datetimes :rtype: numpy.array(datetime) """ return data.transform(datetime.datetime.utcfromtimestamp).astype('datetime64[ms]').dt.to_pydatetime()
def _plot_target_network_rel_residuals(df, target, ref, batch_options, filter_options, tt_scale=50, snr_scale=(0, 60), annotator=None): file_label = batch_options.batch_label save_file = batch_options.save_file def _plot_dataset(ds, net_code, ref_code): # Sort ds rows by SNR, so that the weakest SNR points are drawn first and the high SNR point last, # to make sure high SNR point are in the top rendering layer. ds = ds.sort_values('snr') times = pandas_timestamp_to_plottable_datetime(ds['originTimestamp']) vals = ds[yaxis].values qual = ds['snr'].values min_mag = 4.0 mag = ds['mag'].values - min_mag ylabel = 'Relative TT residual (sec)' title = r"Station {} TT residuals relative to network {} (filtering: ref SNR$\geq${}, CWT$\geq${}, "\ r"slope$\geq${}, $n\sigma\geq{}$)".format(ref_code, net_code, str(filter_options.min_event_snr), str(filter_options.cwt_cutoff), str(filter_options.slope_cutoff), str(filter_options.nsigma_cutoff)) if vals.any(): plt.figure(figsize=(32, 9)) sc = plt.scatter(times, vals, c=qual, alpha=0.5, cmap='gnuplot_r', s=np.maximum(50 * mag, 10), edgecolors=None, linewidths=0) time_formatter = matplotlib.dates.DateFormatter("%Y-%m-%d") plt.gca().xaxis.set_major_formatter(time_formatter) cb = plt.colorbar(sc, drawedges=False) cb.set_label('Signal to noise ratio', fontsize=12) plt.grid(color='#808080', linestyle=':', alpha=0.7) plt.xlabel(xlabel, fontsize=14) plt.ylabel(ylabel, fontsize=14) plt.xticks(fontsize=14) plt.yticks(fontsize=14) plt.xlim(time_range) plt.ylim((-tt_scale, tt_scale)) plt.clim(snr_scale) plt.title(title, fontsize=18) plt.legend(['Point size = Mag - {}, Color = SNR'.format(min_mag)], fontsize=12, loc=1) plt.text(0.01, 0.96, "Channel selection: {}".format(filter_options.channel_preference), transform=plt.gca().transAxes, fontsize=12) plt.text(0.01, 0.92, "Start date: {}".format(str(time_range[0])), transform=plt.gca().transAxes, fontsize=12) plt.text(0.01, 0.88, " End date: {}".format(str(time_range[1])), transform=plt.gca().transAxes, fontsize=12) plt.tight_layout(pad=1.05) if annotator is not None: annotator() if save_file: subfolder = os.path.join(ref_code.split('.')[0] + file_label, net_code) # subfolder = net_code pathlib.Path(subfolder).mkdir(exist_ok=True, parents=True) plt_file = os.path.join(subfolder, '_'.join([ref_code, net_code]) + '_' + ylabel.replace(" ", "").replace(".*", "") + ".png") plt.savefig(plt_file, dpi=150) plt.close() else: # pragma: no cover plt.show() # end plot_dataset df_times = pandas_timestamp_to_plottable_datetime(df['originTimestamp']) if batch_options.x_range is not None: time_range = batch_options.x_range else: time_range = (df_times.min(), df_times.max()) ref_code = ".".join([ref['net'][0], ref['sta'][0]]) log = logging.getLogger(__name__) log.info("Plotting time range " + " to ".join([t.strftime("%Y-%m-%d %H:%M:%S") for t in time_range]) + " for " + ref_code) yaxis = 'relTtResidual' xlabel = 'Event Origin Timestamp' # Remove reference station from target set before producing composite image. # The reference station may not be there, but remove it if it is. mask_ref = compute_matching_network_mask(df, ref) mask_targ = compute_matching_network_mask(df, target) df_agg = df[(mask_targ) & (~mask_ref)] _plot_dataset(df_agg, ','.join(np.unique(target['net'])), ref_code) # Export median error for each origin event if export path is provided if batch_options.export_path is not None: pathlib.Path(batch_options.export_path).mkdir(parents=True, exist_ok=True) df_agg = df_agg.sort_values('originTimestamp') df_export = df_agg[['originTimestamp', 'relTtResidual']] median_errors = {'originTimestamp': [], 'rawClockError': []} for origin_ts, df_event in df_export.groupby('originTimestamp'): median_errors['originTimestamp'].append(origin_ts) median_errors['rawClockError'].append(df_event['relTtResidual'].median()) df_export = pd.DataFrame(median_errors) fname = ref_code + ".raw_clock_error.csv" fname = os.path.join(batch_options.export_path, fname) df_export.to_csv(fname, index=False) def _plot_network_relative_to_ref_station(df_plot, ref, target_stns, batch_options, filter_options, display_options): """ Compute relative residuals and send to plotting function. :param df_plot: Pandas dataframe containing only events which are common to ref station and target stations. :type df_plot: pandas.DataFrame :param ref_stn: Network and station codes for reference network (expected to be just one entry) :type ref_stn: dict of corresponding network and station codes under keys 'net' and 'sta' (expected to be just one entry) :param target_stns: Network and station codes for target network :type target_stns: dict of corresponding network and station codes under keys 'net' and 'sta' :param batch_options: Runtime options. :type batch_options: class BatchOptions :param filter_options: Filter options. :type filter_options: class FilterOptions :param display_options: Display options. :type display_options: class DisplayOptions """ register_matplotlib_converters() df_plot = df_plot.assign(relTtResidual=(df_plot['ttResidual'] - df_plot['ttResidualRef'])) # Re-order columns df_plot = df_plot[['#eventID', 'originTimestamp', 'mag', 'originLon', 'originLat', 'originDepthKm', 'net', 'sta', 'cha', 'pickTimestamp', 'phase', 'stationLon', 'stationLat', 'distance', 'snr', 'ttResidual', 'ttResidualRef', 'relTtResidual', 'qualityMeasureCWT', 'qualityMeasureSlope', 'nSigma']] # Sort data by event origin time df_plot = df_plot.sort_values(['#eventID', 'originTimestamp']) def _plot_decorator(opts): if opts.events is not None: _add_event_marker_lines(opts.events) if opts.deployments is not None: _add_temporary_deployment_intervals(opts.deployments) _plot_target_network_rel_residuals(df_plot, target_stns, ref, batch_options, filter_options, annotator=lambda: _plot_decorator(display_options)) deregister_matplotlib_converters() def _add_event_marker_lines(events): time_lims = plt.xlim() y_lims = plt.ylim() for date, event in events.iterrows(): event_time = pytz.utc.localize(datetime.datetime.strptime(date, "%Y-%m-%d")) if event_time < matplotlib.dates.num2date(time_lims[0]) or \ event_time >= matplotlib.dates.num2date(time_lims[1]): continue plt.axvline(event_time, linestyle='--', linewidth=1, color='#00800080') plt.text(event_time, y_lims[0] + 0.01 * (y_lims[1] - y_lims[0]), event['name'], horizontalalignment='center', verticalalignment='bottom', fontsize=12, fontstyle='italic', color='#008000c0', rotation=90)
[docs]def utc_time_string_to_plottable_datetime(utc_timestamp_str): """ Convert a UTC timestamp string to datetime type that is plottable by matplotlib :param utc_timestamp_str: ISO-8601 UTC timestamp string :type utc_timestamp_str: str :return: Plottable datetime value :rtype: datetime.datetime with tzinfo """ utc_time = obspy.UTCDateTime(utc_timestamp_str) return pytz.utc.localize(datetime.datetime.utcfromtimestamp(float(utc_time)))
def _add_temporary_deployment_intervals(deployments): """ Graphical decorator for the TT residual charts to add visual indication of the time intervals during which selected temporary network deployments took place. """ def _render_deployment_interval(rec_x, rec_y, rec_width, rec_height, rec_color, text): plt.gca().add_patch(plt.Rectangle((rec_x, rec_y), width=rec_width, height=rec_height, color=rec_color, alpha=0.8, clip_on=True)) plt.text(rec_x, rec_y + rec_height / 2.0, text, fontsize=12, verticalalignment='center', clip_on=True) # Keep these in chronological order of start date to ensure optimal y-position staggering sorted_deployments = sorted(deployments, key=lambda v: v[0]) height = 5 base_ypos = -40 - height / 2.0 stagger = height / 2.0 for k in sorted_deployments: d = deployments[k] ypos = base_ypos + stagger _render_deployment_interval(d[0], ypos, (d[1] - d[0]), height, d[2], d[3]) stagger = -stagger
[docs]def apply_event_quality_filtering(df, ref_stn, filter_options): """ Apply event quality requirements to pick events. The event magnitude threshold is only used to filter rows where the quality metrics ['snr', 'qualityMeasureCWT', 'qualityMeasureSlope', 'nSigma'] are all zero. :param df: Pandas dataframe loaded from a pick event ensemble :type df: pandas.DataFrame :param ref_stn: Network and station codes for reference network (expected to be just one entry) :type ref_stn: dict of corresponding network and station codes under keys 'net' and 'sta' (expected to be just one entry) :param filter_options: Filter options. :type filter_options: class FilterOptions :return: Filtered dataframe of pick events :rtype: pandas.DataFrame """ if 'net' not in ref_stn: raise KeyError('Expect \'net\' in dict keys') if 'sta' not in ref_stn: raise KeyError('Expect \'sta\' in dict keys') if not len(ref_stn['net']) == len(ref_stn['sta']) == 1: raise ValueError('Expect only one network and station code when calling apply_event_quality_filtering') # Remove records where the SNR is too low mask_snr = (df['snr'] >= filter_options.min_event_snr) # Filter to constrained quality metrics mask_cwt = (df['qualityMeasureCWT'] >= filter_options.cwt_cutoff) mask_slope = (df['qualityMeasureSlope'] >= filter_options.slope_cutoff) mask_sigma = (df['nSigma'] >= filter_options.nsigma_cutoff) # For events from ISC catalogs the quality metrics are zero, so we use event magnitude instead. mask_zero_quality_stats = (df[['snr', 'qualityMeasureCWT', 'qualityMeasureSlope', 'nSigma']] == 0).all(axis=1) mask_origin_mag = (df['mag'] >= filter_options.min_event_mag) quality_mask = ((~mask_zero_quality_stats & mask_snr & mask_cwt & mask_slope & mask_sigma) | (mask_zero_quality_stats & mask_origin_mag)) mask_ref = compute_matching_network_mask(df, ref_stn) if filter_options.strict_filtering: # But never apply quality mask to ref stations that have all zero quality stats, as we just can't judge quality # and don't want to arbitrarily exclude them. quality_mask = (mask_zero_quality_stats & mask_ref) | quality_mask else: # Only apply quality mask to stations that are not the reference station, i.e. use all ref station events # regardless of pick quality at the ref station. This gives more results, but possibly more noise. quality_mask = mask_ref | (~mask_ref & quality_mask) df_qual = df[quality_mask] return df_qual
[docs]def broadcast_ref_residual_per_event(df_plot, ref_netcode, ref_stacode, filter_options): """For each event in the dataframe, figure out the best reference residual and add it to new column 'ttResidualRef' :param df: Pandas dataframe loaded from a pick event ensemble :type df: pandas.DataFrame :param ref_netcode: The reference network code :type ref_netcode: str :param ref_stacode: The reference station code :type ref_stacode: str :param filter_options: Filter options. :type filter_options: class FilterOptions :return: Dataframe with a consistent reference residual populated for each event for given reference station. :rtype: pandas.DataFrame """ # Create column for entire table first df_plot['ttResidualRef'] = np.nan log = logging.getLogger(__name__) pbar = tqdm.tqdm(total=len(df_plot), ascii=True) pbar.set_description("Broadcasting {}.{} residuals".format(ref_netcode, ref_stacode)) # Assuming we only process one ref station at a time. df_plot['match_ref'] = (df_plot['net'] == ref_netcode) & (df_plot['sta'] == ref_stacode) for eventid, grp in df_plot.groupby('#eventID'): pbar.update(len(grp)) ref_mask = grp['match_ref'] grp_ref = grp[ref_mask] if grp_ref.empty: continue # Choose most favourable channel cha = None available_cha = grp_ref['cha'].values for c in filter_options.channel_preference: if c in available_cha: cha = c break # We must find a channel if cha is None: log.warning("Channels {} are not amongst allowed channels {}".format(available_cha, filter_options.channel_preference)) continue cha_mask = (grp_ref['cha'] == cha) grp_cha = grp_ref[cha_mask] tt_ref_series = grp_cha['ttResidual'].unique() if len(tt_ref_series) > 1: log.warning("WARNING: Multiple reference times found for event {}\n{}," " choosing smallest absolute residual".format(eventid, tt_ref_series)) # In this case, choose the smallest reference tt residual grp_cha['absTTResidual'] = np.abs(grp_cha['ttResidual'].values) grp_cha = grp_cha.sort_values('absTTResidual') tt_ref_series = grp_cha['ttResidual'].unique() ref_time = tt_ref_series[0] assert not np.isnan(ref_time) df_plot.loc[grp.index, 'ttResidualRef'] = ref_time pbar.close() # Quality check - each event should have only one unique reference tt residual assert np.all([len(df['ttResidualRef'].unique()) == 1 for e, df in df_plot.groupby('#eventID')]) return df_plot
[docs]def analyze_target_relative_to_ref(df_picks, ref_stn, target_stns, filter_options): """ Analyze a single (reference) station's residuals relative to all the other stations in a (target) network. :param df_picks: Pandas dataframe loaded from a pick event ensemble :type df_picks: pandas.DataFrame :param ref_stn: Network and station codes for reference network (expected to be just one entry) :type ref_stn: dict of corresponding network and station codes under keys 'net' and 'sta' (expected to be just one entry) :param target_stns: Network and station codes for target network :type target_stns: dict of corresponding network and station codes under keys 'net' and 'sta' :param filter_options: Filter options. :type filter_options: class FilterOptions :return: Dataframe with filtering applied and reference residual populated ready for plotting. :rtype: pandas.DataFrame """ assert len(ref_stn['net']) == 1 assert len(ref_stn['sta']) == 1 # Event quality filtering df_qual = apply_event_quality_filtering(df_picks, ref_stn, filter_options) # Filter to desired ref and target networks mask_ref = compute_matching_network_mask(df_qual, ref_stn) mask_targ = compute_matching_network_mask(df_qual, target_stns) mask = mask_ref | mask_targ # np.any(mask) df_nets = df_qual.loc[mask] # Filter out events in which ref_stn and TARGET stations are not both present log = logging.getLogger(__name__) log.info("Narrowing dataframe to events common to REF and TARGET networks...") df_nets = df_nets.assign(ref_match=compute_matching_network_mask(df_nets, ref_stn)) df_nets = df_nets.assign(target_match=compute_matching_network_mask(df_nets, target_stns)) keep_events = [e for e, d in df_nets.groupby('#eventID') if np.any(d['ref_match']) and np.any(d['target_match'])] event_mask = df_nets['#eventID'].isin(keep_events) df_nets = df_nets[event_mask] log.info("Remaining picks after narrowing to common events: {}".format(len(df_nets))) if df_nets.empty: log.warning("No events left to analyze!") return # print(get_overlapping_date_range(df_nets, ref_stn, target_stns)) # Compute the reference TT residual for each event and store in new column df_final = broadcast_ref_residual_per_event(df_nets, ref_stn['net'][0], ref_stn['sta'][0], filter_options) assert 'ttResidualRef' in df_final.columns return df_final
[docs]def filter_limit_channels(df_picks, channel_pref): """Filter picks dataframe to a limited range of preferred channel types. :param df_picks: Picks dataframe to filter :type df_picks: pandas.DataFrame :return: Filtered picks dataframe with only preferred channels :rtype: pandas.DataFrame """ df_picks = df_picks[df_picks['cha'].isin(channel_pref)].reset_index() return df_picks
[docs]def filter_duplicated_network_codes(df_picks): """Filter picks dataframe to remove records for selected known duplicate network codes based on date of the records we want to keep. :param df_picks: Picks dataframe to filter :type df_picks: pandas.DataFrame :return: Filtered picks dataframe with unwanted records removed. :rtype: pandas.DataFrame """ DATE_FILTER = ( ('7D', pd.Timestamp(datetime.datetime(2010, 1, 1))), ('7G', pd.Timestamp(datetime.datetime(2010, 1, 1))), ) for net, min_date in DATE_FILTER: date_mask = (df_picks['net'] == net) & (df_picks['originTimestamp'] < min_date.timestamp()) df_picks = df_picks[~date_mask] return df_picks
[docs]def filter_to_teleseismic(df_picks, min_dist_deg, max_dist_deg): """Filter picks dataframe to limited range of teleseismic distances of the original events. :param df_picks: Picks dataframe to filter :type df_picks: pandas.DataFrame :param min_dist_deg: Minimum teleseismic distance (degrees) :type min_dist_deg: float :param max_dist_deg: Maximum teleseismic distance (degrees) :type max_dist_deg: float :return: Filtered picks dataframe containing only events with limited range of teleseismic distance. :rtype: pandas.DataFrame """ mask_tele = (df_picks['distance'] >= min_dist_deg) & (df_picks['distance'] <= max_dist_deg) df_picks = df_picks.loc[mask_tele] return df_picks
def _get_known_temporary_deployments(): # TODO: Change implementation to read this data from JSON file temp_deps = {} # Data fields are (start date, end date, plot color, plot label) temp_deps['7X'] = (utc_time_string_to_plottable_datetime('2009-06-16T03:42:00.000000Z'), utc_time_string_to_plottable_datetime('2011-04-01T23:18:49.000000Z'), 'C7', 'Deployment 7X') temp_deps['7D'] = (utc_time_string_to_plottable_datetime('2012-01-01T00:01:36.000000Z'), utc_time_string_to_plottable_datetime('2014-03-27T15:09:51.000000Z'), 'C1', 'Deployment 7D') temp_deps['7F'] = (utc_time_string_to_plottable_datetime('2012-12-31T23:59:59.000000Z'), utc_time_string_to_plottable_datetime('2014-11-15T00:43:14.000000Z'), 'C3', 'Deployment 7F') temp_deps['7G'] = (utc_time_string_to_plottable_datetime('2014-01-01T00:00:06.000000Z'), utc_time_string_to_plottable_datetime('2016-02-09T21:04:29.000000Z'), 'C4', 'Deployment 7G') temp_deps['OA'] = (utc_time_string_to_plottable_datetime('2017-09-13T23:59:13.000000Z'), utc_time_string_to_plottable_datetime('2018-11-28T01:11:14.000000Z'), 'C8', 'Deployment OA') return temp_deps @click.command() @click.argument('picks-file', type=click.Path('r'), required=True) @click.option('--network1', type=str, required=True, help='Network against which to compute residuals') @click.option('--stations1', type=str, required=False, help='Comma separated list of specific network 1 station codes to use.' 'If empty, use all stations in network 1.') @click.option('--networks2', type=str, required=True, help='Comma separated list of network(s) ' 'for which to compute residuals. Can be same as network1 to use itself as reference.') @click.option('--stations2', type=str, required=False, help='Comma separated list of specific network 2 ' 'station codes to use. If empty, use all stations for each network in networks2. Should not ' 'be used unless networks2 has just a single network.') @click.option('--min-distance', type=float, default=DEFAULT_MIN_DISTANCE, show_default=True, help='Minimum teleseismic distance (degrees) of events to plot') @click.option('--max-distance', type=float, default=DEFAULT_MAX_DISTANCE, show_default=True, help='Maximum teleseismic distance (degrees) of events to plot') @click.option('--min-event-snr', type=float, default=DEFAULT_MIN_EVENT_SNR, show_default=True, help='Filter out events with signal-to-noise ratio less than this') @click.option('--cwt-cutoff', type=float, default=DEFAULT_CWT_CUTOFF, show_default=True, help='Filter out events with CWT quality value less than this') @click.option('--slope-cutoff', type=float, default=DEFAULT_SLOPE_CUTOFF, show_default=True, help='Filter out events with slope quality value less than this') @click.option('--nsigma-cutoff', type=int, default=DEFAULT_NSIGMA_CUTOFF, show_default=True, help='Filter out events with nsigma quality value less than this') @click.option('--min-event-magnitude', type=float, default=DEFAULT_MIN_EVENT_MAG, show_default=True, help='Filter out events with magnitude value less than this. ' '(Only applies when pick quality stats are all zero.)') @click.option('--strict-filtering/--no-strict-filtering', default=DEFAULT_STRICT_FILTERING, show_default=True, help='Whether to apply quality filters to picks of the station being analyzed/plotted.' 'If False, quality filters are still applied to the reference station picks, but' 'not to the target station, resulting in a larger number of plotted events per station.') @click.option('--show-deployments', is_flag=True, default=False, show_default=True, help='Show temporary deployments time durations on the plots.') @click.option('--show-historical/--no-show-historical', default=True, show_default=True, help='Show historical events on the plots.') @click.option('--include-alternate-catalog/--no-include-alternate-catalog', default=True, show_default=True, help='Add matching stations from alternate networks in IRIS catalog') @click.option('--export-path', type=click.Path(file_okay=False), help='Folder in which to store raw time series of TT residuals') @click.option('--interactive', is_flag=True, default=False, show_default=True, help='If True, plots will be displayed as popup windows instead of saving to file. ' 'Use this option to interact with the data.') def main(picks_file, network1, networks2, stations1=None, stations2=None, min_distance=DEFAULT_MIN_DISTANCE, max_distance=DEFAULT_MAX_DISTANCE, min_event_snr=DEFAULT_MIN_EVENT_SNR, cwt_cutoff=DEFAULT_CWT_CUTOFF, slope_cutoff=DEFAULT_SLOPE_CUTOFF, nsigma_cutoff=DEFAULT_NSIGMA_CUTOFF, min_event_magnitude=DEFAULT_MIN_EVENT_MAG, strict_filtering=DEFAULT_STRICT_FILTERING, show_deployments=False, show_historical=True, include_alternate_catalog=True, export_path=None, interactive=False): # pragma: no cover """ Main function for running relative traveltime residual plotting. The picks ensemble file should have column headings in accordance with picks_reader_utils.PICKS_TABLE_SCHEMA. :param picks_file: Picks ensemble file :type picks_file: str or path :param network1: Reference network code :type network1: str :param networks2: Comma separated list of network code(s) to analyze :type networks2: str :param stations1: Comma separated list of reference station codes to use for network1, defaults to None :param stations1: str, optional :param stations2: Comma separated list of station codes to analyze for network2, if only one network specified in networks2. Otherwise should not be used. Defaults to None. :param stations2: str, optional """ # TODO: invert naming of "target" and "reference" in the code. The main help documentation is correct, # but internal naming is actually swapped. log = logging.getLogger(__name__) # log.setLevel(logging.INFO) log.setLevel(logging.DEBUG) log.info("Pandas version: " + pd.__version__) log.info("Matplotlib version: " + matplotlib.__version__) # Load in event ensemble log.info("Loading picks file {}".format(picks_file)) df_raw_picks = read_picks_ensemble(picks_file) # Remove unused columns df_raw_picks = df_raw_picks[['#eventID', 'originTimestamp', 'mag', 'originLon', 'originLat', 'originDepthKm', 'net', 'sta', 'cha', 'pickTimestamp', 'phase', 'stationLon', 'stationLat', 'az', 'baz', 'distance', 'ttResidual', 'snr', 'qualityMeasureCWT', 'qualityMeasureSlope', 'nSigma']] log.debug("Number of raw picks: {}".format(len(df_raw_picks))) # Log time period for source dataset log.debug("Raw picks date range: {} to {}".format(obspy.UTCDateTime(df_raw_picks['originTimestamp'].min()), obspy.UTCDateTime(df_raw_picks['originTimestamp'].max()))) filter_options = FilterOptions() filter_options.strict_filtering = strict_filtering filter_options.min_event_snr = min_event_snr filter_options.min_event_mag = min_event_magnitude filter_options.cwt_cutoff = cwt_cutoff filter_options.slope_cutoff = slope_cutoff filter_options.nsigma_cutoff = nsigma_cutoff # Perform series of global filters on picks df_picks = df_raw_picks log.debug("Picks before global filter: {}".format(len(df_picks))) filter_chain = [lambda _df: filter_limit_channels(_df, filter_options.channel_preference), filter_duplicated_network_codes, lambda _df: filter_to_teleseismic(_df, min_distance, max_distance)] for f in filter_chain: df_picks = f(df_picks) log.debug("Picks after global filter: {}".format(len(df_picks))) # Populate temporary deployments details temporary_deployments = _get_known_temporary_deployments() TARGET_NET = network1 TARGET_STN = stations1.split(',') if stations1 else get_network_stations(df_picks, TARGET_NET) TARGET_STNS = {'net': [TARGET_NET] * len(TARGET_STN), 'sta': [s for s in TARGET_STN]} # Find additional network.station codes that match AU network, and add them to target if include_alternate_catalog and TARGET_NET in IRIS_ALTERNATE_STATIONS_FILE: alt_file = IRIS_ALTERNATE_STATIONS_FILE[TARGET_NET] alt_file = os.path.join(os.path.split(__file__)[0], alt_file) new_nets, new_stas = determine_alternate_matching_codes(df_picks, alt_file, TARGET_NET) log.info("Adding {} more stations from alternate networks file {}".format(len(new_nets), alt_file)) TARGET_STNS['net'].extend(list(new_nets)) TARGET_STNS['sta'].extend(list(new_stas)) REF_NETS = networks2.split(',') assert len(REF_NETS) == 1 or not stations2, \ "Can't specify multiple networks and custom station list at the same time!" display_options = DisplayOptions() if show_historical: # Generate catalog of major regional events (mag 8+) for overlays display_options.events = generate_large_events_catalog(df_raw_picks, 8.0) if show_deployments: display_options.deployments = temporary_deployments batch_options = BatchOptions() batch_options.save_file = not interactive batch_options.batch_label = '_strict' if strict_filtering else '_no_strict' batch_options.export_path = export_path assert len(TARGET_STNS['net']) == len(TARGET_STNS['sta']) for ref_net in REF_NETS: if len(REF_NETS) == 1 and stations2: REF_STN = stations2.split(',') else: REF_STN = get_network_stations(df_picks, ref_net) # end if REF_STNS = {'net': [ref_net] * len(REF_STN), 'sta': [s for s in REF_STN]} if ref_net == 'AU' and TARGET_NET == 'AU': REF_STNS['net'].extend(list(new_nets)) REF_STNS['sta'].extend(list(new_stas)) # For certain temporary deployments, force a fixed time range on the x-axis so that all stations # in the deployment can be compared on a common time range. if ref_net in temporary_deployments: batch_options.x_range = (temporary_deployments[ref_net][0], temporary_deployments[ref_net][1]) else: batch_options.x_range = None assert len(REF_STNS['net']) == len(REF_STNS['sta']) for net, sta in zip(REF_STNS['net'], REF_STNS['sta']): log.info("Plotting against REF: " + ".".join([net, sta])) single_ref = {'net': [net], 'sta': [sta]} # Peform analysis df_final = analyze_target_relative_to_ref(df_picks, single_ref, TARGET_STNS, filter_options) # Plot results _plot_network_relative_to_ref_station(df_final, single_ref, TARGET_STNS, batch_options, filter_options, display_options) # end for # end for if __name__ == "__main__": main() # pylint: disable=no-value-for-parameter