You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
417 lines
12 KiB
Plaintext
417 lines
12 KiB
Plaintext
2 years ago
|
#!/usr/bin/env seiscomp-python
|
||
|
# -*- coding: utf-8 -*-
|
||
|
############################################################################
|
||
|
# Copyright (C) GFZ Potsdam #
|
||
|
# All rights reserved. #
|
||
|
# #
|
||
|
# GNU Affero General Public License Usage #
|
||
|
# This file may be used under the terms of the GNU Affero #
|
||
|
# Public License version 3.0 as published by the Free Software Foundation #
|
||
|
# and appearing in the file LICENSE included in the packaging of this #
|
||
|
# file. Please review the following information to ensure the GNU Affero #
|
||
|
# Public License version 3.0 requirements will be met: #
|
||
|
# https://www.gnu.org/licenses/agpl-3.0.html. #
|
||
|
############################################################################
|
||
|
|
||
|
from __future__ import absolute_import, division, print_function
|
||
|
|
||
|
import sys
|
||
|
import os
|
||
|
import re
|
||
|
import argparse
|
||
|
import seiscomp.core
|
||
|
import seiscomp.io
|
||
|
|
||
|
|
||
|
class MyArgumentParser(argparse.ArgumentParser):
|
||
|
def format_epilog(self):
|
||
|
return self.epilog
|
||
|
|
||
|
|
||
|
def str2time(timestring):
|
||
|
"""
|
||
|
Liberally accept many time string formats and convert them to a
|
||
|
seiscomp.core.Time
|
||
|
"""
|
||
|
|
||
|
timestring = timestring.strip()
|
||
|
for c in ["-", "/", ":", "T", "Z"]:
|
||
|
timestring = timestring.replace(c, " ")
|
||
|
timestring = timestring.split()
|
||
|
assert 3 <= len(timestring) <= 6
|
||
|
timestring.extend((6 - len(timestring)) * ["0"])
|
||
|
timestring = " ".join(timestring)
|
||
|
timeFormat = "%Y %m %d %H %M %S"
|
||
|
if timestring.find(".") != -1:
|
||
|
timeFormat += ".%f"
|
||
|
|
||
|
time = seiscomp.core.Time()
|
||
|
time.fromString(timestring, timeFormat)
|
||
|
return time
|
||
|
|
||
|
|
||
|
def time2str(time):
|
||
|
"""
|
||
|
Convert a seiscomp.core.Time to a string
|
||
|
"""
|
||
|
return time.toString("%Y-%m-%d %H:%M:%S.%f000000")[:23]
|
||
|
|
||
|
|
||
|
def recordInput(filename=None, datatype=seiscomp.core.Array.INT):
|
||
|
"""
|
||
|
Simple Record iterator that reads from a file (to be specified by
|
||
|
filename) or -- if no filename was specified -- reads from standard input
|
||
|
"""
|
||
|
|
||
|
stream = seiscomp.io.RecordStream.Create("file")
|
||
|
if not stream:
|
||
|
raise IOError("failed to create a RecordStream")
|
||
|
|
||
|
if not filename:
|
||
|
filename = "-"
|
||
|
|
||
|
if filename == "-":
|
||
|
print(
|
||
|
"Waiting for data input from stdin. Use Ctrl + C to interrupt.",
|
||
|
file=sys.stderr,
|
||
|
)
|
||
|
else:
|
||
|
if not os.path.exists(filename):
|
||
|
print("Cannot find file {}".format(filename), file=sys.stderr)
|
||
|
sys.exit()
|
||
|
|
||
|
if not stream.setSource(filename):
|
||
|
print(" + failed to assign source file to RecordStream", file=sys.stderr)
|
||
|
sys.exit()
|
||
|
|
||
|
records = seiscomp.io.RecordInput(stream, datatype, seiscomp.core.Record.SAVE_RAW)
|
||
|
|
||
|
while True:
|
||
|
try:
|
||
|
record = next(records)
|
||
|
except Exception:
|
||
|
print("Received invalid or no input", file=sys.stderr)
|
||
|
sys.exit()
|
||
|
|
||
|
if not record:
|
||
|
return
|
||
|
yield record
|
||
|
|
||
|
|
||
|
tmin = str2time("1970-01-01 00:00:00")
|
||
|
tmax = str2time("2500-01-01 00:00:00")
|
||
|
ifile = "-"
|
||
|
|
||
|
description = (
|
||
|
"Read unsorted and possibly multiplexed miniSEED files. "
|
||
|
"Sort data by time (multiplexing) and filter the individual "
|
||
|
"records by time and/or streams. Apply this before playbacks "
|
||
|
"and waveform archiving."
|
||
|
)
|
||
|
|
||
|
epilog = (
|
||
|
"Examples:\n"
|
||
|
"Read data from multiple files, extract streams by time, sort records by start "
|
||
|
"time, remove duplicate records\n"
|
||
|
" cat f1.mseed f2.mseed f3.mseed |\\\n"
|
||
|
" scmssort -v -t '2007-03-28 15:48~2007-03-28 16:18' -u > sorted.mseed\n"
|
||
|
"\n"
|
||
|
"Extract streams by time, stream code and sort records by end time\n"
|
||
|
" echo CX.PB01..BH? |\\ \n"
|
||
|
" scmssort -v -E -t '2007-03-28 15:48~2007-03-28 16:18' -u -l - test.mseed > "
|
||
|
"sorted.mseed"
|
||
|
)
|
||
|
|
||
|
|
||
|
# p = MyArgumentParser(
|
||
|
# usage="\n %prog [options] [files | < ] > ", description=description, epilog=epilog
|
||
|
# )
|
||
|
p = MyArgumentParser(
|
||
|
description=description,
|
||
|
epilog=epilog,
|
||
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||
|
)
|
||
|
p.add_argument(
|
||
|
"-E",
|
||
|
"--sort-by-end-time",
|
||
|
action="store_true",
|
||
|
help="Sort according to record end time; default is start time.",
|
||
|
)
|
||
|
p.add_argument(
|
||
|
"-r",
|
||
|
"--rm",
|
||
|
action="store_true",
|
||
|
help="Remove all traces in stream list given by --list instead of keeping them.",
|
||
|
)
|
||
|
p.add_argument(
|
||
|
"-l",
|
||
|
"--list",
|
||
|
action="store",
|
||
|
help="File with stream list to filter the records. "
|
||
|
"One stream per line. Instead of a file read the from stdin (-). "
|
||
|
"Line format: NET.STA.LOC.CHA - wildcards and regular expressions "
|
||
|
"are considered. Example: CX.*..BH?.",
|
||
|
)
|
||
|
p.add_argument(
|
||
|
"-t",
|
||
|
"--time-window",
|
||
|
action="store",
|
||
|
help="Specify time window (as one -properly quoted- string). Times "
|
||
|
"are of course UTC and separated by a tilde '~'.",
|
||
|
)
|
||
|
p.add_argument(
|
||
|
"-u",
|
||
|
"--uniqueness",
|
||
|
action="store_true",
|
||
|
help="Ensure uniqueness of output, i.e. skip duplicate records.",
|
||
|
)
|
||
|
p.add_argument("-v", "--verbose", action="store_true", help="Run in verbose mode.")
|
||
|
|
||
|
p.add_argument(
|
||
|
"filenames",
|
||
|
nargs="+",
|
||
|
help="Names of input files in miniSEED format.",
|
||
|
)
|
||
|
opt = p.parse_args()
|
||
|
filenames = opt.filenames
|
||
|
|
||
|
if opt.time_window:
|
||
|
tmin, tmax = list(map(str2time, opt.time_window.split("~")))
|
||
|
|
||
|
if opt.verbose:
|
||
|
print(
|
||
|
"Considered time window: %s~%s" % (time2str(tmin), time2str(tmax)),
|
||
|
file=sys.stderr,
|
||
|
)
|
||
|
|
||
|
listFile = None
|
||
|
removeStreams = False
|
||
|
if opt.list:
|
||
|
listFile = opt.list
|
||
|
print("Considered stream list from: %s" % (listFile), file=sys.stderr)
|
||
|
|
||
|
if opt.rm:
|
||
|
removeStreams = True
|
||
|
print("Removing listed streams", file=sys.stderr)
|
||
|
|
||
|
|
||
|
def _time(record):
|
||
|
if opt.sort_by_end_time:
|
||
|
return seiscomp.core.Time(record.endTime())
|
||
|
return seiscomp.core.Time(record.startTime())
|
||
|
|
||
|
|
||
|
def _in_time_window(record, tMin, tMax):
|
||
|
return record.endTime() >= tMin and record.startTime() <= tMax
|
||
|
|
||
|
|
||
|
def readStreamList(file):
|
||
|
streamList = []
|
||
|
|
||
|
try:
|
||
|
if file == "-":
|
||
|
f = sys.stdin
|
||
|
file = "stdin"
|
||
|
else:
|
||
|
f = open(listFile, "r", encoding="utf-8")
|
||
|
except FileNotFoundError:
|
||
|
print("%s: error: unable to open" % listFile, file=sys.stderr)
|
||
|
return []
|
||
|
|
||
|
lineNumber = -1
|
||
|
for line in f:
|
||
|
lineNumber = lineNumber + 1
|
||
|
line = line.strip()
|
||
|
# ignore comments
|
||
|
if len(line) > 0 and line[0] == "#":
|
||
|
continue
|
||
|
|
||
|
if len(line) == 0:
|
||
|
continue
|
||
|
|
||
|
toks = line.split(".")
|
||
|
if len(toks) != 4:
|
||
|
f.close()
|
||
|
print(
|
||
|
"error: %s in line %d has invalid line format, expected "
|
||
|
"stream list: NET.STA.LOC.CHA - 1 line per stream including "
|
||
|
"regular expressions" % (listFile, lineNumber),
|
||
|
file=sys.stderr,
|
||
|
)
|
||
|
return []
|
||
|
|
||
|
streamList.append(line)
|
||
|
|
||
|
f.close()
|
||
|
|
||
|
if len(streamList) == 0:
|
||
|
return []
|
||
|
|
||
|
return streamList
|
||
|
|
||
|
|
||
|
if not filenames:
|
||
|
filenames = ["-"]
|
||
|
|
||
|
streams = None
|
||
|
if listFile:
|
||
|
streams = readStreamList(listFile)
|
||
|
if not streams and not removeStreams:
|
||
|
print(" + cannot extract data", file=sys.stderr)
|
||
|
sys.exit()
|
||
|
|
||
|
if opt.verbose:
|
||
|
string = " + streams: "
|
||
|
|
||
|
for stream in streams:
|
||
|
string += stream + " "
|
||
|
print("%s" % (string), file=sys.stderr)
|
||
|
|
||
|
pattern = re.compile("|".join(streams))
|
||
|
|
||
|
readRecords = 0
|
||
|
networks = set()
|
||
|
stations = set()
|
||
|
locations = set()
|
||
|
channels = set()
|
||
|
readStreams = set()
|
||
|
outEnd = None
|
||
|
outStart = None
|
||
|
|
||
|
if filenames:
|
||
|
first = None
|
||
|
time_raw = []
|
||
|
for fileName in filenames:
|
||
|
if opt.verbose:
|
||
|
print("Reading file '%s'" % fileName, file=sys.stderr)
|
||
|
|
||
|
for rec in recordInput(fileName):
|
||
|
if not rec:
|
||
|
continue
|
||
|
|
||
|
if not _in_time_window(rec, tmin, tmax):
|
||
|
continue
|
||
|
|
||
|
raw = rec.raw().str()
|
||
|
streamCode = "%s.%s.%s.%s" % (
|
||
|
rec.networkCode(),
|
||
|
rec.stationCode(),
|
||
|
rec.locationCode(),
|
||
|
rec.channelCode(),
|
||
|
)
|
||
|
|
||
|
if listFile:
|
||
|
foundStream = False
|
||
|
|
||
|
if pattern.match(streamCode):
|
||
|
foundStream = True
|
||
|
|
||
|
if removeStreams:
|
||
|
foundStream = not foundStream
|
||
|
|
||
|
if not foundStream:
|
||
|
continue
|
||
|
|
||
|
# collect statistics for verbosity mode
|
||
|
if opt.verbose:
|
||
|
networks.add(rec.networkCode())
|
||
|
stations.add(rec.stationCode())
|
||
|
locations.add(rec.locationCode())
|
||
|
channels.add(rec.channelCode())
|
||
|
readStreams.add(streamCode)
|
||
|
readRecords += 1
|
||
|
|
||
|
start = rec.startTime()
|
||
|
end = rec.endTime()
|
||
|
|
||
|
if (outStart is None) or (start < outStart):
|
||
|
outStart = seiscomp.core.Time(start)
|
||
|
|
||
|
if (outEnd is None) or (end > outEnd):
|
||
|
outEnd = seiscomp.core.Time(end)
|
||
|
|
||
|
t = _time(rec)
|
||
|
if first is None:
|
||
|
first = t
|
||
|
t = float(t - first) # float needs less memory
|
||
|
time_raw.append((t, raw))
|
||
|
|
||
|
if opt.verbose:
|
||
|
print(
|
||
|
" + %d networks, %d stations, %d sensor locations, "
|
||
|
"%d channel codes, %d streams, %d records"
|
||
|
% (
|
||
|
len(networks),
|
||
|
len(stations),
|
||
|
len(locations),
|
||
|
len(channels),
|
||
|
len(readStreams),
|
||
|
readRecords,
|
||
|
),
|
||
|
file=sys.stderr,
|
||
|
)
|
||
|
print("Sorting records", file=sys.stderr)
|
||
|
time_raw.sort()
|
||
|
|
||
|
if opt.verbose:
|
||
|
print("Writing output", file=sys.stderr)
|
||
|
previous = None
|
||
|
|
||
|
out = sys.stdout
|
||
|
try:
|
||
|
# needed in Python 3, fails in Python 2
|
||
|
out = out.buffer
|
||
|
except AttributeError:
|
||
|
# assuming this is Python 2, nothing to be done
|
||
|
pass
|
||
|
|
||
|
duplicates = 0
|
||
|
for item in time_raw:
|
||
|
if item == previous:
|
||
|
duplicates += 1
|
||
|
if opt.uniqueness:
|
||
|
continue
|
||
|
|
||
|
t, raw = item
|
||
|
out.write(raw)
|
||
|
|
||
|
previous = item
|
||
|
|
||
|
if opt.verbose:
|
||
|
print("Finished", file=sys.stderr)
|
||
|
if opt.uniqueness:
|
||
|
print(
|
||
|
" + found and removed {} duplicate records".format(duplicates),
|
||
|
file=sys.stderr,
|
||
|
)
|
||
|
else:
|
||
|
if duplicates > 0:
|
||
|
print(
|
||
|
" + found {} duplicate records - remove with: scmssort -u".format(
|
||
|
duplicates
|
||
|
),
|
||
|
file=sys.stderr,
|
||
|
)
|
||
|
else:
|
||
|
print(" + found 0 duplicate records", file=sys.stderr)
|
||
|
|
||
|
print("Output:", file=sys.stderr)
|
||
|
if outStart and outEnd:
|
||
|
print(
|
||
|
" + time window: %s~%s"
|
||
|
% (seiscomp.core.Time(outStart), seiscomp.core.Time(outEnd)),
|
||
|
file=sys.stderr,
|
||
|
)
|
||
|
else:
|
||
|
print("No data found in time window", file=sys.stderr)
|
||
|
|
||
|
else:
|
||
|
# This is an important hint which should always be printed
|
||
|
if duplicates > 0 and not opt.uniqueness:
|
||
|
print(
|
||
|
"Found {} duplicate records - remove with: scmssort -u".format(
|
||
|
duplicates
|
||
|
),
|
||
|
file=sys.stderr,
|
||
|
)
|