[seiscomp, scanloc] Install, add .gitignore
This commit is contained in:
532
bin/scmssort
Executable file
532
bin/scmssort
Executable file
@ -0,0 +1,532 @@
|
||||
#!/usr/bin/env seiscomp-python
|
||||
# -*- coding: utf-8 -*-
|
||||
############################################################################
|
||||
# Copyright (C) GFZ Potsdam #
|
||||
# All rights reserved. #
|
||||
# #
|
||||
# GNU Affero General Public License Usage #
|
||||
# This file may be used under the terms of the GNU Affero #
|
||||
# Public License version 3.0 as published by the Free Software Foundation #
|
||||
# and appearing in the file LICENSE included in the packaging of this #
|
||||
# file. Please review the following information to ensure the GNU Affero #
|
||||
# Public License version 3.0 requirements will be met: #
|
||||
# https://www.gnu.org/licenses/agpl-3.0.html. #
|
||||
############################################################################
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import traceback
|
||||
|
||||
from seiscomp import core, io
|
||||
|
||||
VERBOSITY = 0
|
||||
|
||||
INFO = 1
|
||||
DEBUG = 2
|
||||
TRACE = 3
|
||||
|
||||
|
||||
def log(level, msg):
|
||||
print(f"[{level}] {msg}", file=sys.stderr)
|
||||
|
||||
|
||||
def info_enabled():
|
||||
return VERBOSITY >= INFO
|
||||
|
||||
|
||||
def debug_enabled():
|
||||
return VERBOSITY >= DEBUG
|
||||
|
||||
|
||||
def trace_enabled():
|
||||
return VERBOSITY >= TRACE
|
||||
|
||||
|
||||
def error(msg):
|
||||
log("error", msg)
|
||||
|
||||
|
||||
def warning(msg):
|
||||
log("warning", msg)
|
||||
|
||||
|
||||
def info(msg):
|
||||
if info_enabled():
|
||||
log("info", msg)
|
||||
|
||||
|
||||
def debug(msg):
|
||||
if debug_enabled():
|
||||
log("debug", msg)
|
||||
|
||||
|
||||
def trace(msg):
|
||||
if trace_enabled():
|
||||
log("trace", msg)
|
||||
|
||||
|
||||
def parse_args():
|
||||
description = (
|
||||
"Read unsorted and possibly multiplexed miniSEED files. Sort data by time "
|
||||
"(multiplexing) and filter the individual records by time and/or streams. "
|
||||
"Apply this before playbacks and waveform archiving."
|
||||
)
|
||||
|
||||
epilog = """Examples:
|
||||
Read data from multiple files, extract streams by time, sort records by start time, \
|
||||
ignore duplicated and empty records
|
||||
cat f1.mseed f2.mseed f3.mseed | \
|
||||
scmssort -v -t 2007-03-28T15:48~2007-03-28T16:18' -ui > sorted.mseed
|
||||
|
||||
Extract streams by time, stream code and sort records by end time
|
||||
echo CX.PB01..BH? | \
|
||||
scmssort -v -E -t '2007-03-28T15:48~2007-03-28T16:18' \
|
||||
-u -l - test.mseed > sorted.mseed
|
||||
"""
|
||||
|
||||
p = argparse.ArgumentParser(
|
||||
description=description,
|
||||
epilog=epilog,
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
)
|
||||
p.add_argument(
|
||||
"file",
|
||||
nargs="*",
|
||||
default="-",
|
||||
help="miniSEED file(s) to sort. If no file name or '-' is specified then "
|
||||
"standard input is used.",
|
||||
)
|
||||
p.add_argument(
|
||||
"-E",
|
||||
"--sort-by-end-time",
|
||||
action="store_true",
|
||||
help="Sort according to record end time; default is start time.",
|
||||
)
|
||||
p.add_argument(
|
||||
"-i",
|
||||
"--ignore",
|
||||
action="store_true",
|
||||
help="Ignore all records which have no data samples.",
|
||||
)
|
||||
p.add_argument(
|
||||
"-l",
|
||||
"--list",
|
||||
action="store",
|
||||
help="Filter records by a list of stream codes specified in a file or on stdin "
|
||||
"(-). One stream per line of format: NET.STA.LOC.CHA - wildcards and regular "
|
||||
"expressions are considered. Example: CX.*..BH?.",
|
||||
)
|
||||
p.add_argument(
|
||||
"-o",
|
||||
"--output",
|
||||
action="store",
|
||||
help="Name of output file for miniSEED data (default is stdout).",
|
||||
)
|
||||
p.add_argument(
|
||||
"-r",
|
||||
"--rm",
|
||||
action="store_true",
|
||||
help="Remove all traces in stream list given by '--list' instead of keeping "
|
||||
"them.",
|
||||
)
|
||||
p.add_argument(
|
||||
"-t",
|
||||
"--time-window",
|
||||
action="store",
|
||||
help="Time window to filter the records, format: <START TIME> ~ <END TIME>. "
|
||||
"Time values are in UTC, must start with an ISO date and may include time "
|
||||
"components starting on the hour down to milliseconds. Example: "
|
||||
"2023-01-15T12:15",
|
||||
)
|
||||
p.add_argument(
|
||||
"-u",
|
||||
"--uniqueness",
|
||||
action="store_true",
|
||||
help="Ensure uniqueness of output by skipping duplicate records.",
|
||||
)
|
||||
p.add_argument(
|
||||
"-v",
|
||||
"--verbose",
|
||||
action="count",
|
||||
default=0,
|
||||
help="Run in verbose mode. This option may be repeated several time to "
|
||||
"increase the level of verbosity. Example: -vvv.",
|
||||
)
|
||||
|
||||
opt = p.parse_args()
|
||||
|
||||
global VERBOSITY
|
||||
VERBOSITY += int(opt.verbose)
|
||||
|
||||
if opt.rm and not opt.list:
|
||||
error("The '--rm' requires the '--list' option to be present as well.")
|
||||
sys.exit(1)
|
||||
|
||||
return opt
|
||||
|
||||
|
||||
def rec2id(record):
|
||||
return (
|
||||
f"{record.networkCode()}.{record.stationCode()}."
|
||||
f"{record.locationCode()}.{record.channelCode()}"
|
||||
)
|
||||
|
||||
|
||||
def str2time(timeString):
|
||||
return core.Time.FromString(timeString)
|
||||
|
||||
|
||||
def time2str(time):
|
||||
"""
|
||||
Convert a seiscomp.core.Time to a string
|
||||
"""
|
||||
if not time:
|
||||
return ""
|
||||
|
||||
return time.toString("%Y-%m-%dT%H:%M:%S.%f000")[:23]
|
||||
|
||||
|
||||
def read_time_window(opt):
|
||||
if not opt.time_window:
|
||||
return None, None
|
||||
|
||||
toks = opt.time_window.split("~")
|
||||
if len(toks) != 2:
|
||||
if len(toks) < 2:
|
||||
raise ValueError(
|
||||
"Time window has wrong format: Use (~) for separating start and end time"
|
||||
)
|
||||
raise ValueError("Time window has wrong format: Too many tildes (~) found")
|
||||
|
||||
start = core.Time.FromString(toks[0])
|
||||
end = core.Time.FromString(toks[1])
|
||||
|
||||
if start is None or end is None:
|
||||
error(f"Could not read time window: {toks}")
|
||||
if debug_enabled():
|
||||
debug(traceback.format_exc())
|
||||
sys.exit(1)
|
||||
|
||||
return start, end
|
||||
|
||||
|
||||
def read_lines(file):
|
||||
# read from stdin
|
||||
if file == "-":
|
||||
yield from sys.stdin
|
||||
return
|
||||
|
||||
# read from file
|
||||
with open(file, "r", encoding="utf-8") as f:
|
||||
yield from f
|
||||
return
|
||||
|
||||
|
||||
def compile_stream_pattern(opt):
|
||||
if not opt.list:
|
||||
return None
|
||||
|
||||
streams = []
|
||||
pattern = None
|
||||
try:
|
||||
line_number = -1
|
||||
for line in map(str.strip, read_lines(opt.list)):
|
||||
line_number += 1
|
||||
|
||||
# ignore empty lines and comments
|
||||
if not line or line.startswith("#"):
|
||||
continue
|
||||
|
||||
toks = line.split(".")
|
||||
if len(toks) != 4:
|
||||
raise ValueError(
|
||||
f"Invalid stream definition at line {line_number}. Expected the 4 "
|
||||
"stream components NET.STA.LOC.CHA separated by a dot, "
|
||||
"got: {line}."
|
||||
)
|
||||
|
||||
streams.append(line)
|
||||
|
||||
if not streams:
|
||||
raise ValueError("No stream definition found.")
|
||||
|
||||
pattern = re.compile("|".join(streams))
|
||||
|
||||
except Exception as e:
|
||||
error(f"Could not compile pattern from stream list file '{opt.list}': {e}")
|
||||
if debug_enabled():
|
||||
debug(traceback.format_exc())
|
||||
sys.exit(1)
|
||||
|
||||
info(
|
||||
f"Using stream id {'DENY' if opt.rm else 'ALLOW'} list with {len(streams)} "
|
||||
"stream masks"
|
||||
)
|
||||
|
||||
if debug_enabled():
|
||||
masks = "\n + ".join(streams)
|
||||
debug(f"Stream masks:\n + {masks}")
|
||||
|
||||
return pattern
|
||||
|
||||
|
||||
def record_input(file, datatype=core.Array.INT):
|
||||
"""
|
||||
Simple record iterator that reads from a file (or stdin in case of '-')
|
||||
"""
|
||||
stream = io.RecordStream.Create("file")
|
||||
if not stream:
|
||||
raise IOError("Failed to create a RecordStream")
|
||||
|
||||
if file != "-" and not os.path.exists(file):
|
||||
raise FileNotFoundError("Could not find file")
|
||||
|
||||
if not stream.setSource(file):
|
||||
raise ValueError("Could not set record stream source")
|
||||
|
||||
it = io.RecordInput(stream, datatype, core.Record.SAVE_RAW)
|
||||
|
||||
if trace_enabled():
|
||||
while True:
|
||||
record = it.next()
|
||||
if not record:
|
||||
return
|
||||
|
||||
trace(
|
||||
f" + {time2str(record.startTime())}~{time2str(record.endTime())} "
|
||||
f"{rec2id(record)}"
|
||||
)
|
||||
yield record
|
||||
else:
|
||||
while True:
|
||||
record = it.next()
|
||||
if not record:
|
||||
return
|
||||
|
||||
yield record
|
||||
|
||||
|
||||
def unique(sequence):
|
||||
seen = set()
|
||||
return [x for x in sequence if not (x in seen or seen.add(x))]
|
||||
|
||||
|
||||
def main():
|
||||
# parse commandline
|
||||
opt = parse_args()
|
||||
|
||||
# time window
|
||||
t_min, t_max = read_time_window(opt)
|
||||
if t_max and t_min and t_max <= t_min:
|
||||
error(
|
||||
f"Invalid time window: {time2str(t_min)}~{time2str(t_max)}\n"
|
||||
" + end time must be greater than start time"
|
||||
)
|
||||
return False
|
||||
|
||||
info(f"Filtering records by time window: {time2str(t_min)}~{time2str(t_max)}")
|
||||
|
||||
# stream filter
|
||||
pattern = compile_stream_pattern(opt)
|
||||
|
||||
outputFile = None
|
||||
if opt.output:
|
||||
outputFile = opt.output
|
||||
|
||||
# record buffer to be sorted later on, each item is a tuple of
|
||||
# (delta_time, raw_binary_record_data)
|
||||
rec_buf = []
|
||||
|
||||
# statistics
|
||||
records_read = 0
|
||||
records_window = 0
|
||||
records_empty = 0
|
||||
|
||||
# statistics (info mode)
|
||||
networks = set()
|
||||
stations = set()
|
||||
streams = set()
|
||||
buf_min = None
|
||||
buf_max = None
|
||||
|
||||
# make sure to read from stdin only once
|
||||
files = [x for x in opt.file if x != "-"]
|
||||
if len(files) == len(opt.file):
|
||||
info(f"Reading data from {len(opt.file)} file(s)")
|
||||
elif not files:
|
||||
files = "-"
|
||||
info("Reading data from stdin. Use Ctrl + C to interrupt.")
|
||||
else:
|
||||
info(
|
||||
f"Reading data from stdin and {len(files)} files. Use Ctrl + C to "
|
||||
"interrupt."
|
||||
)
|
||||
files.insert(opt.file.index("-"), "-")
|
||||
|
||||
# time or first valid record use as reference for sorting
|
||||
ref_time = None
|
||||
|
||||
# read records from input file
|
||||
for file in files:
|
||||
records_file = 0
|
||||
records_empty_file = 0
|
||||
|
||||
try:
|
||||
for rec in record_input(file):
|
||||
records_file += 1
|
||||
stream_id = ""
|
||||
|
||||
# skip record if outside time window
|
||||
if (t_min and rec.endTime() < t_min) or (
|
||||
t_max and rec.startTime() > t_max
|
||||
):
|
||||
continue
|
||||
|
||||
if pattern or info_enabled():
|
||||
records_window += 1
|
||||
stream_id = rec2id(rec)
|
||||
|
||||
if pattern and bool(pattern.match(stream_id)) == bool(opt.rm):
|
||||
continue
|
||||
|
||||
if not rec.sampleCount():
|
||||
trace(
|
||||
f" + found empty record staring at {time2str(rec.startTime())} "
|
||||
f"{rec2id(rec)}"
|
||||
)
|
||||
records_empty_file += 1
|
||||
if opt.ignore:
|
||||
trace(" + ignored")
|
||||
continue
|
||||
|
||||
# record time reference set to start or end time depending on sort
|
||||
# option
|
||||
t = rec.endTime() if opt.sort_by_end_time else rec.startTime()
|
||||
|
||||
if ref_time is None:
|
||||
ref_time = core.Time(t)
|
||||
t = 0
|
||||
else:
|
||||
t = float(t - ref_time) # float needs less memory
|
||||
|
||||
# buffer tuple of (time delta, binary record data)
|
||||
rec_buf.append((t, rec.raw().str()))
|
||||
|
||||
# collect statistics for debug mode
|
||||
if info_enabled():
|
||||
networks.add(rec.networkCode())
|
||||
stations.add(f"{rec.networkCode()}.{rec.stationCode()}")
|
||||
streams.add(stream_id)
|
||||
# copy of time object is required because record may be freed before
|
||||
if not buf_min or rec.startTime() < buf_min:
|
||||
buf_min = core.Time(rec.startTime())
|
||||
if not buf_max or rec.startTime() > buf_max:
|
||||
buf_max = core.Time(rec.endTime())
|
||||
|
||||
name = "<stdin>" if file == "-" else file
|
||||
empty = f", empty: {records_empty_file}" if records_empty_file else ""
|
||||
debug(f" + {name}: {records_file} records{empty}")
|
||||
|
||||
except Exception as e:
|
||||
error(f"Could not read file '{file}: {e}")
|
||||
if debug_enabled():
|
||||
debug(traceback.format_exc())
|
||||
return 1
|
||||
|
||||
records_read += records_file
|
||||
records_empty += records_empty_file
|
||||
|
||||
# stop if no records have been read
|
||||
if not records_read:
|
||||
warning("No records found in input file(s).")
|
||||
return 0
|
||||
|
||||
buf_len = len(rec_buf)
|
||||
|
||||
# statistics about records read and filtered
|
||||
if info_enabled() and buf_len != records_read:
|
||||
info(
|
||||
f"""{records_read-buf_len}/{records_read} records filtered:
|
||||
+ by time window: {records_read-records_window}
|
||||
+ by stream id {'DENY' if opt.rm else 'ALLOW'} list: {records_window-buf_len}"""
|
||||
)
|
||||
|
||||
# stop if no record passed the filter
|
||||
if not buf_len:
|
||||
warning("All records filtered, nothing to write.")
|
||||
return 0
|
||||
|
||||
# network, station and stream information
|
||||
if info_enabled():
|
||||
info(
|
||||
f"Found data for {len(networks)} networks, {len(stations)} stations "
|
||||
f"and {len(streams)} streams",
|
||||
)
|
||||
if debug_enabled() and streams:
|
||||
streamList = "\n + ".join(streams)
|
||||
debug(f"streams:\n + {streamList}")
|
||||
|
||||
# sort records by time only
|
||||
if buf_len > 1:
|
||||
info(f"Sorting {buf_len} records")
|
||||
rec_buf.sort()
|
||||
|
||||
# write sorted records, count duplicates and optional remove them
|
||||
info(f"Writing {buf_len} records")
|
||||
prev_rec = None
|
||||
duplicates = 0
|
||||
|
||||
if outputFile:
|
||||
print(f"Output data to file: {outputFile}", file=sys.stderr)
|
||||
try:
|
||||
out = open(outputFile, "wb")
|
||||
except Exception:
|
||||
print("Cannot create output file {outputFile}", file=sys.stderr)
|
||||
return -1
|
||||
else:
|
||||
out = sys.stdout.buffer
|
||||
|
||||
for _t, rec in rec_buf:
|
||||
if rec == prev_rec:
|
||||
duplicates += 1
|
||||
if opt.uniqueness:
|
||||
continue
|
||||
else:
|
||||
prev_rec = rec
|
||||
|
||||
out.write(rec)
|
||||
|
||||
# statistics about records written
|
||||
if info_enabled():
|
||||
records_written = buf_len - duplicates if opt.uniqueness else buf_len
|
||||
msg = f"""Wrote {records_written} records
|
||||
+ time window: {time2str(buf_min)}~{time2str(buf_max)}"""
|
||||
|
||||
if opt.uniqueness:
|
||||
msg += f"""
|
||||
+ found and removed {duplicates} duplicate records"""
|
||||
elif not duplicates:
|
||||
msg += """
|
||||
+ no duplicate records found"""
|
||||
|
||||
if opt.ignore:
|
||||
msg += f"""
|
||||
+ {records_empty} empty records found and ignored"""
|
||||
|
||||
info(msg)
|
||||
|
||||
# additional warning output
|
||||
if records_empty and not opt.ignore:
|
||||
warning(f"Found {records_empty} empty records - remove with: scmssort -i")
|
||||
|
||||
# This is an important hint which should always be printed
|
||||
if duplicates > 0 and not opt.uniqueness:
|
||||
warning(f"Found {duplicates} duplicate records - remove with: scmssort -u")
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
Reference in New Issue
Block a user