Source code for seismic.inversion.wavefield_decomp.plot_nd_batch

#!/usr/bin/env python
# coding: utf-8
"""
Batch plotting a MCMC solution for batch of stations to a single pdf file.
"""

import os
import logging
import json
import copy

import click
from tqdm.auto import tqdm

import numpy as np
from matplotlib.backends.backend_pdf import PdfPages
import matplotlib.pyplot as plt
from matplotlib.table import table
import rf

from seismic.inversion.wavefield_decomp.runners import load_mcmc_solution
from seismic.inversion.wavefield_decomp.wfd_plot import plot_Nd
from seismic.stream_io import read_h5_stream
from seismic.receiver_fn.rf_deconvolution import rf_iter_deconv


default_fig_width_inches = 6.4
RF_TRIM_WINDOW = (-10.0, 25.0)


[docs]def convert_Vs_to_k(soln, config):
    """
    Transform Vs variable into k variable in MCMC solution. Modifies soln in-place.

    :param soln: Solution container
    :type soln: Customized scipy.optimize.OptimizeResult
    :param config: Solution configuration
    :type config: dict
    :return: None
    """
    layers = config['layers']
    for i, layer in enumerate(layers):
        Vp = layer['Vp']
        if 'k_range' not in layer:
            assert 'Vs_range' in layer
            Vs_range = layer['Vs_range']
            layer.update({'k_range': [Vp/Vs_range[1], Vp/Vs_range[0]]})
        # end if
        # Scale Vs content in bounds, x, clusters, samples, bins and distribution to Vp/Vs
        lb = soln.bounds.lb[2*i + 1]
        soln.bounds.lb[2*i + 1] = Vp/soln.bounds.ub[2*i + 1]
        soln.bounds.ub[2 * i + 1] = Vp/lb
        soln.x[:, 2*i + 1] = Vp/soln.x[:, 2*i + 1]
        for cluster in soln.clusters:
            cluster[:, 2*i + 1] = Vp/cluster[:, 2*i + 1]
        # end for
        soln.samples[:, 2*i + 1] = Vp/soln.samples[:, 2*i + 1]
        soln.bins[2*i + 1] = np.flip(Vp/soln.bins[2*i + 1])
        soln.distribution[2*i + 1] = np.flip(soln.distribution[2*i + 1])
    # end for
# end func


def _compute_rf(data, config, log):
    st = rf.RFStream()
    event_ids = config.get("event_ids")
    src_file = config.get("waveform_file")
    if event_ids is None:
        log.error("Unable to generate RF without event IDs")
        return st
    # end if
    if src_file is None:
        log.error("Unable to generate RF without path to source file")
        return st
    # end if
    if not os.path.isfile(src_file):
        log.error("Source file {} for trace metadata not found, cannot generate RF".format(src_file))
        return
    # end if
    net, sta, loc = config["station_id"].split('.')
    src_waveforms = read_h5_stream(src_file, net, sta, loc)
    assert data.shape[0] == len(event_ids)
    for i, event_data in enumerate(data):
        evid = event_ids[i]
        src_stream = rf.RFStream([tr for tr in src_waveforms if tr.stats.event_id == evid])
        # Z component
        z_header = src_stream.select(component='Z')[0].stats
        su_opts = config["su_energy_opts"]
        z_header.starttime = z_header.onset + su_opts["time_window"][0]
        z_header.sampling_rate = su_opts["sampling_rate"]
        z_header.delta = 1.0/z_header.sampling_rate
        z_header.npts = event_data.shape[1]
        assert np.isclose(float(z_header.endtime - z_header.starttime),
                          su_opts["time_window"][1] - su_opts["time_window"][0])
        tr = rf.rfstream.RFTrace(event_data[1, :], header=z_header)
        st += tr
        # R component
        r_header = z_header.copy()
        r_header.channel = z_header.channel[:-1] + 'R'
        tr = rf.rfstream.RFTrace(event_data[0, :], header=r_header)
        st += tr
    # end for

    st.filter('bandpass', freqmin=0.05, freqmax=1.0, corners=2, zerophase=True)
    normalize = 0  # Use Z-component for normalization
    st.rf(rotate=None, method='P', deconvolve='func', func=rf_iter_deconv,
          normalize=normalize, min_fit_threshold=75.0)

    return st
# end func


[docs]def plot_aux_data(soln, config, log, scale):
    f = plt.figure(constrained_layout=False, figsize=(6.4*scale, 6.4*scale))
    f.suptitle(config["station_id"], y=0.96, fontsize=16)
    gs = f.add_gridspec(2, 1, left=0.1, right=0.9, bottom=0.1, top=0.87, hspace=0.3,
                        wspace=0.3, height_ratios=[1, 2])
    gs_top = gs[0].subgridspec(1, 2)
    ax0 = f.add_subplot(gs_top[0, 0])
    ax1 = f.add_subplot(gs_top[0, 1])

    hist_alpha = 0.5
    soln_alpha = 0.3
    axis_font_size = 6*scale
    title_font_size = 6*scale
    nbins = 100

    # Plot energy distribution of samples and solution clusters
    energy_hist, bins = np.histogram(soln.sample_funvals, bins=nbins)
    energy_hist = energy_hist.astype(float)/np.max(energy_hist)
    ax0.bar(bins[:-1], energy_hist, width=np.diff(bins), align='edge', color='#808080', alpha=hist_alpha)

    for i, cluster_energies in enumerate(soln.cluster_funvals):
        color = 'C' + str(i)
        cluster_hist, _ = np.histogram(cluster_energies, bins)
        cluster_hist = cluster_hist.astype(float)/np.max(cluster_hist)
        ax0.bar(bins[:-1], cluster_hist, width=np.diff(bins), align='edge', color=color, alpha=soln_alpha)
    # end for
    ax0.set_title('Energy distribution of random samples and solution clusters',
                  fontsize=title_font_size)
    ax0.set_xlabel('$E_{SU}$ energy (arb. units)')
    ax0.set_ylabel('Normalized counts')
    ax0.tick_params(labelsize=axis_font_size)
    ax0.xaxis.label.set_size(axis_font_size)
    ax0.yaxis.label.set_size(axis_font_size)

    # Plot sorted per-event upwards S-wave energy at top of mantle per solution.
    # Collect event IDs of worst fit traces and present as table of waveform IDs.
    event_ids = config["event_ids"]
    events_best3 = []
    events_worst3 = []
    for i, esu in enumerate(soln.esu):
        assert len(esu) == len(event_ids)
        color = 'C' + str(i)
        esu_sorted = sorted(zip(esu, event_ids))
        events_best3.extend(esu_sorted[:3])
        events_worst3.extend(esu_sorted[-3:])
        esu_sorted = [e[0] for e in esu_sorted]
        ax1.plot(esu_sorted, color=color, alpha=soln_alpha)
    # end for
    events_best3 = sorted(events_best3)
    events_worst3 = sorted(events_worst3, reverse=True)
    best_events_set = set()
    worst_events_set = set()
    for _, evid in events_best3:
        best_events_set.add(evid)
        if len(best_events_set) >= 3:
            break
        # end if
    # end for
    for _, evid in events_worst3:
        worst_events_set.add(evid)
        if len(worst_events_set) >= 3:
            break
        # end if
    # end for
    _tab1 = table(ax1, cellText=[[e] for e in best_events_set], colLabels=['BEST'],
                  cellLoc='left', colWidths=[0.35], loc='upper left',
                  edges='horizontal', fontsize=8, alpha=0.6)  # alpha broken in matplotlib.table!
    _tab2 = table(ax1, cellText=[[e] for e in worst_events_set], colLabels=['WORST'],
                  cellLoc='left', colWidths=[0.35], loc='upper right',
                  edges='horizontal', fontsize=8, alpha=0.6)
    ax1.set_title('Ranked per-event energy for each solution point',
                  fontsize=title_font_size)
    ax1.set_xlabel('Rank (out of # source events)')
    ax1.set_ylabel('Event $E_{SU}$ energy (arb. units)')
    ax1.tick_params(labelsize=axis_font_size)
    ax1.xaxis.label.set_size(axis_font_size)
    ax1.yaxis.label.set_size(axis_font_size)

    # Plot receiver function at base of selected layers
    axis_font_size = 6*scale
    max_solutions = config["solver"].get("max_solutions", 3)
    for layer in config["layers"]:
        lname = layer["name"]
        if soln.subsurface and lname in soln.subsurface:
            base_seismogms = soln.subsurface[lname]
            # Generate RF and plot.
            gs_bot = gs[1].subgridspec(max_solutions, 1, hspace=0.4)
            for i, seismogm in enumerate(base_seismogms):
                soln_rf = _compute_rf(seismogm, config, log)
                assert isinstance(soln_rf, rf.RFStream)
                # Remove any traces for which deconvolution failed.
                # First, find their unique ID. Then remove all traces with that ID.
                exclude_ids = set([tr.stats.event_id for tr in soln_rf if len(tr) == 0])
                soln_rf = rf.RFStream([tr for tr in soln_rf if tr.stats.event_id not in exclude_ids])
                axn = f.add_subplot(gs_bot[i])
                if soln_rf:
                    color = 'C' + str(i)
                    rf_R = soln_rf.select(component='R').trim2(RF_TRIM_WINDOW[0], RF_TRIM_WINDOW[1], reftime='onset')
                    num_RFs = len(rf_R)
                    times = rf_R[0].times() + RF_TRIM_WINDOW[0]
                    data = rf_R.stack()[0].data
                    axn.plot(times, data, color=color, alpha=soln_alpha, linewidth=2)
                    axn.text(0.95, 0.95, 'N = {}'.format(num_RFs), fontsize=10,
                             ha='right', va='top', transform=axn.transAxes)
                    axn.set_xlabel('Time (sec)')
                    axn.grid(color='#80808080', linestyle=':')
                else:
                    axn.annotate('Empty RF plot', (0.5, 0.5), xycoords='axes fraction', ha='center')
                # end if
                axn.set_title(' '.join([config["station_id"], lname, 'base RF', '(soln {})'.format(i)]),
                              fontsize=title_font_size, y=0.92, va='top')
                axn.tick_params(labelsize=axis_font_size)
                axn.xaxis.label.set_size(axis_font_size)
                axn.yaxis.label.set_size(axis_font_size)
            # end for
            break  # TODO: Figure out how to add more layers if needed
        # end if
    # end for

    return f
# end func


@click.command()
@click.argument('solution_file', type=click.Path(exists=True, dir_okay=False), required=True)
@click.option('--output-file', type=click.Path(dir_okay=False), required=True,
              help='Name of the output PDF file in which to save plots')
def main(solution_file, output_file):
    """Plot all the solutions found in a batch run of N-dimensional solver.

    Example usage:
        python seismic/inversion/wavefield_decomp/plot_nd_batch.py --output-file OA_wfd_out.pdf OA_wfd_out.h5

    :param solution_file: Input solution filename
    :param output_file: Output filename
    """
    log = logging.getLogger(__name__)
    log.setLevel(logging.INFO)

    soln_config, job_id = load_mcmc_solution(solution_file, logger=log)

    out_basename, ext = os.path.splitext(output_file)
    out_basename += '_' + job_id
    output_file = out_basename + ext

    with PdfPages(output_file) as pdf:
        for soln, config in tqdm(soln_config):
            empty_soln = (soln.x.shape[0] == 0)
            if empty_soln:
                continue
            assert soln.x.shape[-1] == len(config['layers'])*2
            vars = []
            for layer in config['layers']:
                layer_name = layer['name']
                vars += ['$H_{{{}}}$'.format(layer_name), '$k_{{{}}}$'.format(layer_name)]
            # end for
            vars = tuple(vars)

            # Dump settings page (per station)
            config_no_evids = copy.deepcopy(config)
            config_no_evids.pop('event_ids', None)
            config_text = json.dumps(config_no_evids, indent=4)
            f = plt.figure(figsize=(default_fig_width_inches, default_fig_width_inches*1.414))
            plt.gca().xaxis.set_visible(False)
            plt.gca().yaxis.set_visible(False)
            plt.title(config['station_id'] + ' Processing Settings')
            f.text(0.02, 0.98, 'Settings:\n' + config_text, fontsize=6, va='top',
                   fontname='monospace', transform=plt.gca().transAxes)
            pdf.savefig(dpi=300, papertype='a3', orientation='portrait')
            plt.close()

            #  Convert Vs parameter to k = Vp/Vs
            convert_Vs_to_k(soln, config)
            p, _, _ = plot_Nd(soln, title=config['station_id'], vars=vars)
            scale = p.fig.get_size_inches()[0]/default_fig_width_inches
            # Annotate top left axes with number of events use in the solver.
            ndims = len(soln.bounds.lb)
            ax = p.axes[0, ndims - 1]
            ax.text(0.95, 0.95, 'N = {}'.format(soln.num_input_seismograms), fontsize=10,
                    ha='right', va='top', transform=ax.transAxes)
            pdf.savefig(dpi=300, papertype='a3', orientation='portrait')
            plt.close()

            _p = plot_aux_data(soln, config, log, scale)
            pdf.savefig(dpi=300, papertype='a3', orientation='portrait')
            plt.close()
        # end for
    # end with
    log.info('Produced file {}'.format(output_file))

# end func


if __name__ == "__main__":
    main()
# end if