#!/usr/bin/env seiscomp-python # -*- coding: utf-8 -*- ############################################################################ # Copyright (C) GFZ Potsdam # # All rights reserved. # # # # GNU Affero General Public License Usage # # This file may be used under the terms of the GNU Affero # # Public License version 3.0 as published by the Free Software Foundation # # and appearing in the file LICENSE included in the packaging of this # # file. Please review the following information to ensure the GNU Affero # # Public License version 3.0 requirements will be met: # # https://www.gnu.org/licenses/agpl-3.0.html. # ############################################################################ from __future__ import absolute_import, division, print_function import sys import os import re import argparse import seiscomp.core import seiscomp.io class MyArgumentParser(argparse.ArgumentParser): def format_epilog(self): return self.epilog def str2time(timestring): """ Liberally accept many time string formats and convert them to a seiscomp.core.Time """ timestring = timestring.strip() for c in ["-", "/", ":", "T", "Z"]: timestring = timestring.replace(c, " ") timestring = timestring.split() assert 3 <= len(timestring) <= 6 timestring.extend((6 - len(timestring)) * ["0"]) timestring = " ".join(timestring) timeFormat = "%Y %m %d %H %M %S" if timestring.find(".") != -1: timeFormat += ".%f" time = seiscomp.core.Time() time.fromString(timestring, timeFormat) return time def time2str(time): """ Convert a seiscomp.core.Time to a string """ return time.toString("%Y-%m-%d %H:%M:%S.%f000000")[:23] def recordInput(filename=None, datatype=seiscomp.core.Array.INT): """ Simple Record iterator that reads from a file (to be specified by filename) or -- if no filename was specified -- reads from standard input """ stream = seiscomp.io.RecordStream.Create("file") if not stream: raise IOError("failed to create a RecordStream") if not filename: filename = "-" if filename == "-": print( "Waiting for data input from stdin. Use Ctrl + C to interrupt.", file=sys.stderr, ) else: if not os.path.exists(filename): print("Cannot find file {}".format(filename), file=sys.stderr) sys.exit() if not stream.setSource(filename): print(" + failed to assign source file to RecordStream", file=sys.stderr) sys.exit() records = seiscomp.io.RecordInput(stream, datatype, seiscomp.core.Record.SAVE_RAW) while True: try: record = next(records) except Exception: print("Received invalid or no input", file=sys.stderr) sys.exit() if not record: return yield record tmin = str2time("1970-01-01 00:00:00") tmax = str2time("2500-01-01 00:00:00") ifile = "-" description = ( "Read unsorted and possibly multiplexed miniSEED files. " "Sort data by time (multiplexing) and filter the individual " "records by time and/or streams. Apply this before playbacks " "and waveform archiving." ) epilog = ( "Examples:\n" "Read data from multiple files, extract streams by time, sort records by start " "time, remove duplicate records\n" " cat f1.mseed f2.mseed f3.mseed |\\\n" " scmssort -v -t '2007-03-28 15:48~2007-03-28 16:18' -u > sorted.mseed\n" "\n" "Extract streams by time, stream code and sort records by end time\n" " echo CX.PB01..BH? |\\ \n" " scmssort -v -E -t '2007-03-28 15:48~2007-03-28 16:18' -u -l - test.mseed > " "sorted.mseed" ) # p = MyArgumentParser( # usage="\n %prog [options] [files | < ] > ", description=description, epilog=epilog # ) p = MyArgumentParser( description=description, epilog=epilog, formatter_class=argparse.RawDescriptionHelpFormatter, ) p.add_argument( "-E", "--sort-by-end-time", action="store_true", help="Sort according to record end time; default is start time.", ) p.add_argument( "-r", "--rm", action="store_true", help="Remove all traces in stream list given by --list instead of keeping them.", ) p.add_argument( "-l", "--list", action="store", help="File with stream list to filter the records. " "One stream per line. Instead of a file read the from stdin (-). " "Line format: NET.STA.LOC.CHA - wildcards and regular expressions " "are considered. Example: CX.*..BH?.", ) p.add_argument( "-t", "--time-window", action="store", help="Specify time window (as one -properly quoted- string). Times " "are of course UTC and separated by a tilde '~'.", ) p.add_argument( "-u", "--uniqueness", action="store_true", help="Ensure uniqueness of output, i.e. skip duplicate records.", ) p.add_argument("-v", "--verbose", action="store_true", help="Run in verbose mode.") p.add_argument( "filenames", nargs="+", help="Names of input files in miniSEED format.", ) opt = p.parse_args() filenames = opt.filenames if opt.time_window: tmin, tmax = list(map(str2time, opt.time_window.split("~"))) if opt.verbose: print( "Considered time window: %s~%s" % (time2str(tmin), time2str(tmax)), file=sys.stderr, ) listFile = None removeStreams = False if opt.list: listFile = opt.list print("Considered stream list from: %s" % (listFile), file=sys.stderr) if opt.rm: removeStreams = True print("Removing listed streams", file=sys.stderr) def _time(record): if opt.sort_by_end_time: return seiscomp.core.Time(record.endTime()) return seiscomp.core.Time(record.startTime()) def _in_time_window(record, tMin, tMax): return record.endTime() >= tMin and record.startTime() <= tMax def readStreamList(file): streamList = [] try: if file == "-": f = sys.stdin file = "stdin" else: f = open(listFile, "r", encoding="utf-8") except FileNotFoundError: print("%s: error: unable to open" % listFile, file=sys.stderr) return [] lineNumber = -1 for line in f: lineNumber = lineNumber + 1 line = line.strip() # ignore comments if len(line) > 0 and line[0] == "#": continue if len(line) == 0: continue toks = line.split(".") if len(toks) != 4: f.close() print( "error: %s in line %d has invalid line format, expected " "stream list: NET.STA.LOC.CHA - 1 line per stream including " "regular expressions" % (listFile, lineNumber), file=sys.stderr, ) return [] streamList.append(line) f.close() if len(streamList) == 0: return [] return streamList if not filenames: filenames = ["-"] streams = None if listFile: streams = readStreamList(listFile) if not streams and not removeStreams: print(" + cannot extract data", file=sys.stderr) sys.exit() if opt.verbose: string = " + streams: " for stream in streams: string += stream + " " print("%s" % (string), file=sys.stderr) pattern = re.compile("|".join(streams)) readRecords = 0 networks = set() stations = set() locations = set() channels = set() readStreams = set() outEnd = None outStart = None if filenames: first = None time_raw = [] for fileName in filenames: if opt.verbose: print("Reading file '%s'" % fileName, file=sys.stderr) for rec in recordInput(fileName): if not rec: continue if not _in_time_window(rec, tmin, tmax): continue raw = rec.raw().str() streamCode = "%s.%s.%s.%s" % ( rec.networkCode(), rec.stationCode(), rec.locationCode(), rec.channelCode(), ) if listFile: foundStream = False if pattern.match(streamCode): foundStream = True if removeStreams: foundStream = not foundStream if not foundStream: continue # collect statistics for verbosity mode if opt.verbose: networks.add(rec.networkCode()) stations.add(rec.stationCode()) locations.add(rec.locationCode()) channels.add(rec.channelCode()) readStreams.add(streamCode) readRecords += 1 start = rec.startTime() end = rec.endTime() if (outStart is None) or (start < outStart): outStart = seiscomp.core.Time(start) if (outEnd is None) or (end > outEnd): outEnd = seiscomp.core.Time(end) t = _time(rec) if first is None: first = t t = float(t - first) # float needs less memory time_raw.append((t, raw)) if opt.verbose: print( " + %d networks, %d stations, %d sensor locations, " "%d channel codes, %d streams, %d records" % ( len(networks), len(stations), len(locations), len(channels), len(readStreams), readRecords, ), file=sys.stderr, ) print("Sorting records", file=sys.stderr) time_raw.sort() if opt.verbose: print("Writing output", file=sys.stderr) previous = None out = sys.stdout try: # needed in Python 3, fails in Python 2 out = out.buffer except AttributeError: # assuming this is Python 2, nothing to be done pass duplicates = 0 for item in time_raw: if item == previous: duplicates += 1 if opt.uniqueness: continue t, raw = item out.write(raw) previous = item if opt.verbose: print("Finished", file=sys.stderr) if opt.uniqueness: print( " + found and removed {} duplicate records".format(duplicates), file=sys.stderr, ) else: if duplicates > 0: print( " + found {} duplicate records - remove with: scmssort -u".format( duplicates ), file=sys.stderr, ) else: print(" + found 0 duplicate records", file=sys.stderr) print("Output:", file=sys.stderr) if outStart and outEnd: print( " + time window: %s~%s" % (seiscomp.core.Time(outStart), seiscomp.core.Time(outEnd)), file=sys.stderr, ) else: print("No data found in time window", file=sys.stderr) else: # This is an important hint which should always be printed if duplicates > 0 and not opt.uniqueness: print( "Found {} duplicate records - remove with: scmssort -u".format( duplicates ), file=sys.stderr, )