seiscomp-training/lib/python/nettab/convertUtils.py

from __future__ import print_function
import sys
import csv
import re
from datetime import datetime

def getFieldNames(fd):
    tmp = fd.readline().split(',')
    fieldNames = []
    for i in tmp:
        fieldNames.append(i.strip())
    return fieldNames

def quote(instr):
    return '"'+instr+'"'

def hummanStr(instr):
    return instr.replace("_"," ")

def parseDate(val):
    if not val or val == "":
        return None
    date=val.replace("/", "-")
    formats={ len("YYYY-JJJ") : "%Y-%j",
            len("YYYY-MM-DD") : "%Y-%m-%d",
            len("YYYY-JJJ:HHMM") : "%Y-%j:%H%M",
            len("YYYY-JJJTHH:MM") : "%Y-%jT%H:%M",
            len("YYYY-MM-DDTHH:MM") : "%Y-%m-%dT%H:%M",
            len("YYYY-JJJTHH:MM:SS") : "%Y-%jT%H:%M:%S",
            len("YYYY-MM-DDTHH:MM:SS") : "%Y-%m-%dT%H:%M:%S"}
    try:
        return datetime.strptime(date, formats[len(date)])
    except Exception as e:
        raise ValueError("invalid date: " + date + str(e))

def formatDate(date):
    if not date:
        return ""

    if date.hour != 0 or date.minute != 0:
        return datetime.strftime(date,"%Y/%j:%H%M")

    return datetime.strftime(date,"%Y/%j")

def isPyVersion(major, minor):
    return sys.version_info[0] == major and \
           sys.version_info[1] == minor

class StationMappings:
    def __init__(self, networkCode, stationList, filename):
        self.networkCode = networkCode
        self.stationList = stationList
        self.stationMapping = {}
        self.stationBreak = {}

        if not filename: return
        _rx_statmap = re.compile(r'\s*([^_]*)_([^=]*)=(\S*)\s*(from=([0-9]+/[0-9]+))?\s*(to=([0-9]+/[0-9]+))?\s*$')
        fd = open(filename)
        stationMapping = {}
        try:
            lineno = 0
            try:
                line = fd.readline()
                lineno = 1
                while line:
                    m = _rx_statmap.match(line)
                    if m is None:
                        raise Exception("parse error")

                    (sta, net, archive_net, from_def, from_year, to_def, to_year) = m.groups()

                    if net != self.networkCode:
                        line = fd.readline()
                        continue

                    if sta not in self.stationList:
                        line = fd.readline()
                        continue

                    try:
                        sta_net = stationMapping[sta]

                    except KeyError:
                        sta_net = []
                        stationMapping[sta] = sta_net

                    if from_def:
                        from_date = parseDate(from_year)

                    else:
                        from_date = None

                    if to_def:
                        to_date = parseDate(to_year)

                    else:
                        to_date = None

                    sta_net.append((from_date, to_date, archive_net))
                    line = fd.readline()
                    lineno += 1

            except (Exception, TypeError, ValueError) as e:
                raise Exception("%s:%d: %s" % (file, lineno, str(e)))

        finally:
            fd.close()

        if len(stationMapping):
            print("Found %d station mappings" % len(stationMapping), file=sys.stderr)
            self.stationMapping = stationMapping
        else:
            ## print("No station mappings found", file=sys.stderr)
            pass

    def dump(self, fdo, stationCode):
        items = []
        for (code, mapping) in self.stationMapping.items():
            if stationCode and stationCode != code: continue
            items.append(code)
            for (fromDate, toDate, network) in mapping:
                fdo.write("Sa: ArchiveNetworkCode=%s %s" % (network, code))
                if fromDate:
                    fdo.write(" from=%s" % formatDate(fromDate))
                if toDate:
                    fdo.write(" to=%s" % formatDate(toDate))
                fdo.write("\n")

        for code in items:
            self.stationMapping.pop(code)

    def getMappings(self, code, start, end):
        mapping = []

        if (code, start, end) not in self.stationBreak:
            mapping.append([start, end])
        else:
            for (archiveNet, s, e, fr, to) in self.stationBreak[(code, start, end)]:
                mapping.append([s, e])

        return mapping

    def parseStationLine(self, items):
        stationCode = items[0].strip()
        start = parseDate(items[10])

        if len(items) > 11:
            end = parseDate(items[11])
        else:
            end = None

        if stationCode not in self.stationMapping:
            ## print("Skipping %s not in mapping list" % stationCode, file=sys.stderr)
            return self.getMappings(stationCode, start, end)

        for (fDate, tDate, archiveNet) in self.stationMapping[stationCode]:
            if fDate and tDate:
                raise Exception("Not Supported to and from definitions found.")
            elif fDate:
                if fDate >= start:
                    if (end and fDate <= end) or not end:
                        ## print("Processing fDate %s %s %s [%s]" % (stationCode, start, end, fDate), file=sys.stderr)
                        if (stationCode, start, end) in self.stationBreak:
                            raise Exception("Crazy multiple station mapping for the same station line")
                        self.stationBreak[(stationCode, start, end)] = []
                        self.stationBreak[(stationCode, start, end)].append((self.networkCode, start, fDate, fDate, tDate))
                        self.stationBreak[(stationCode, start, end)].append((archiveNet, fDate, end, fDate, tDate))
                        ## prin( " found mapping From -> %s (%s,%s)" % (fDate, stationCode, formatDate(start)), file=sys.stderr)
                        return self.getMappings(stationCode, start, end)
            elif tDate:
                if tDate >= start:
                    if (end and tDate <= end) or not end:
                        ## print("Processing tDate %s %s %s [%s]" % (stationCode, start, end, tDate), file=sys.stderr)
                        if (stationCode, start, end) in self.stationBreak:
                            raise Exception("Crazy multiple station mapping for the same station line")
                        self.stationBreak[(stationCode, start, end)] = []
                        self.stationBreak[(stationCode, start, end)].append((archiveNet, start, tDate, fDate, tDate))
                        self.stationBreak[(stationCode, start, end)].append((self.networkCode, tDate, end, fDate, tDate))
                        ## print(" found mapping To -> %s (%s,%s)" % (tDate, stationCode, formatDate(start)), file=sys.stderr)
                        return self.getMappings(stationCode, start, end)
            else:
                if (stationCode, start, end) in self.stationBreak:
                    raise Exception("Crazy multiple station mapping for the same station line")
                self.stationBreak[(stationCode, start, end)] = []
                self.stationBreak[(stationCode, start, end)].append((archiveNet, start, end, fDate, tDate))
                ## print(" found mapping ALL (%s,%s)" % (stationCode, formatDate(start)), file=sys.stderr)
                return self.getMappings(stationCode, start, end)
        ## print("Ignored %s" % " ".join(items), file=sys.stderr)
        return self.getMappings(stationCode, start, end)

class StationAttributes:
    def __init__(self, networkCode, stationList, filename):
        self.networkCode= networkCode
        self.stationList = stationList
        self.stationAttributeList = {}

        if not filename: return

        fd = open(filename)
        attributes = {}
        try:
            try:
                fieldNames = None
                if isPyVersion(2, 3):
                    fieldNames = getFieldNames(fd)

                for row in csv.DictReader(fd, fieldNames):
                    net_code = row['net_code']
                    if net_code != self.networkCode: continue

                    sta_code = row['sta_code']
                    if sta_code not in self.stationList: continue

                    start = parseDate(row['start'].strip())

                    if sta_code in attributes:
                        raise Exception("multiple %s found in %s" % (str((net_code, sta_code, row['start'])), filename))

                    del row['net_code']
                    del row['sta_code']
                    del row['start']

                    ## Clean up input
                    for key in ['restricted', 'restricted_exc', 'place', 'country', 'affiliation', 'remark']:
                        row[key] = row[key].strip()
                        if len(row[key]) == 0:
                            del row[key]

                    if 'restricted' in row:
                        row['restricted'] = bool(int(row['restricted']))
                        if not row['restricted']: del (row['restricted'])

                    if row:
                        attributes[sta_code] = row

            except KeyError as e:
                raise Exception("column %s missing in %s" % (str(e), filename))

            except (TypeError, ValueError) as e:
                raise Exception("error reading %s: %s" % (filename, str(e)))

        finally:
            fd.close()
        self.stationAttributeList = self.__build__(attributes)
        print(" loaded attributes for %d stations on network %s (%s)" % (len(self.stationAttributeList), self.networkCode, filename), file=sys.stderr)

    def __build__(self, attributes):
        newat = {}

        if not attributes:
            ## print("no station attributes found for network %s" % self.networkCode, file=sys.stderr)
            return newat

        for (code,row) in attributes.items():
            nr = {}
            for (k,v) in row.items():
                if k == 'country': k = 'Country'
                if k == 'place': k = 'Place'
                if k == 'affiliation': k = 'Affiliation'
                if k == 'remark': k = 'Remark'
                if k == 'restricted': k = 'Restricted'
                nr[k] = v
            if nr:
                newat[code] = nr
        return newat

    def get(self, code):
        if self.stationAttributeList and code in self.stationAttributeList:
            return self.stationAttributeList[code]
        else:
            return None

    def __parseDescription__(self, description):
        affiliation = None
        place = None
        country = None
        description = hummanStr(description)
        hasStation = True if description.find("Station") >= 0 else False

        if hasStation:
            affiliation = description[0:(description.index("Station"))].strip()
            parts = description[description.index("Station")+7:].strip().split(",")
        else:
            parts = description.split(",")

        if len(parts) > 1:
            country = parts[len(parts)-1].strip()
            parts = parts[0:(len(parts)-1)]
            place = ",".join(parts)
        else:
            place = ",".join(parts)

#        print("Country:", country, file=sys.stderr)
#        print("Place:", place, file=sys.stderr)
#        print("Affiliation:", affiliation, file=sys.stderr)

        oui = {}
        if country:
            oui['Country'] = country
        if place:
            oui['Place'] = place
        if affiliation:
            oui['Affiliation'] = affiliation
        return oui

    def reorder_station_attr(self):
        att = {}
        if not self.stationAttributeList:
            return None

        for (code, row) in self.stationAttributeList.items():
            for (k, v) in row.items():
                if k == 'restricted_exc':
                    k = 'Restricted'
                    extra=',*,'+str(v)
                    v = (not row['Restricted']) if 'Restricted' in row else True
                else:
                    extra= ''

                try:
                    dk = att[k]
                except:
                    dk = {}
                    att[k] = dk

                try:
                    dv = dk[str(v)]
                except:
                    dv = []
                    dk[str(v)] = dv

                dv.append(code+extra)
        return att

    def parseStationLine(self, items, fStart = None, fEnd = None):
        stationCode = items[0].strip()
        description = items[1]
        start =  parseDate(items[10])
        if stationCode not in self.stationList:
            raise Exception("Station %s not in station list." % stationCode)

        ## Here we can force a different start & End values to the line
        if fStart is not None:
            start = fStart

        if fEnd is not None:
            end = fEnd

        oui = None
        at = self.get(stationCode)
        #print >>sys.stderr,items, at, file=sys.stderr)
        if not at:
            ## print(" Deriving attributes from description %s " % " ".join(items), file=sys.stderr)
            at = self.__parseDescription__(description)
            if at:
                self.stationAttributeList[stationCode] = at
        else:
            for item in ['Affiliation', 'Country', 'Place']:
                if item in at:
                    continue
                if not oui:
                    ## print(" Deriving attribute (%s) from description %s " % (item, " ".join(items)), file=sys.stderr)
                    oui = self.__parseDescription__(description)
                if item in oui:
                    ## print(" Setting attribute (%s) from description for %s = %s" % (item, stationCode, oui[item]), file=sys.stderr)
                    at[item] = oui[item]
                else:
                    ## print(" Empty %s for %s" % (item, stationCode), file=sys.stderr)
                    pass

        country = at['Country'] if 'Country' in at else None
        place =  at['Place'] if 'Place' in at else None
        return [place, country]

    def dump(self, fdo, code):
        if not code:
            att = self.reorder_station_attr()
            for (key,v) in att.items():
                if key in ['Country', 'Place']: continue
                for (value, s) in v.items():
                    fdo.write("Sa: %s=%s" % (key, quote(value)))
                    for station in s:
                            fdo.write(" %s" % (station))
                    fdo.write("\n")
        else:
            at = self.get(code)
            if not at: return
            if 'done' in at: return
            at['done'] = 1 # Mark the item as printed
            for (k,v) in at.items():
                extra = ''
                if k in [ 'done', 'Place', 'Country']: continue
                if k in ['Affiliation']: v = quote(v)

                if k == 'Restricted':
                    extra = ' %s,*,*' % code

                if k == 'restricted_exc':
                    k = 'Restricted'
                    extra=',*,'+str(v)
                    v = (not at['Restricted']) if 'Restricted' in at else True


                fdo.write("Sa: %s=%s %s%s\n" % (k,v,code,extra))

class NetworkAttributes:
    def __build__(self, row):
        #net_code,start,end,restricted,shared,net_class,type,institutions,region,remark

        attList = {}

        if row['start']:
            self.start = row['start'].strftime("%Y/%j")
            self.startDate = row['start']
            self.hasStart = True

        if row['end']:
            self.end = row['end'].strftime("%Y/%j")
            self.endDate = row['end']
            self.hasEnd = True

        if row['restricted'] != 0:
            attList['Restricted'] = row['restricted']

        if row['shared'] != 1:
            attList['Shared'] = row['shared']

        if row['net_class']:
            attList['NetClass'] = row['net_class'].strip()

        if row['type']:
            attList['Type'] = row['type'].strip()

        if row['institutions']:
            attList['Institutions'] = row['institutions'].strip()

        if row['region']:
            attList['Region'] = row['region'].strip()

        if row['remark']:
            attList['Remark'] = row['remark'].strip()

        self.networkAttributes.update(attList)

    def parseNetworkLine(self, items):
        if len(items) < 4 or len(items) > 6:
            raise Exception("Invalid network line")

        attList = {}
        if items[1] == "none":
            attList['Description'] = hummanStr(items[0])
        else:
            attList['Description'] = "%s (%s)" % (hummanStr(items[0]), items[1])

        self.networkAttributes.update(attList)

    def dump(self, fdo):
        for (k,v) in self.networkAttributes.items():
            if k in ['Description', 'Remark', 'Region', 'Institutions']:
                v = quote(v)
            fdo.write("Na: %s=%s\n" % (k,v))

    def __init__(self, networkCode, filename):
        self.networkCode = networkCode
        self.networkAttributes = {}

        self.start = None
        self.end = None

        self.hasStart = False
        self.hasEnd = False

        if not filename: return
        fd = open(filename)
        try:
            try:
                fieldNames = None
                if isPyVersion(2, 3):
                    fieldNames = getFieldNames(fd)

                for row in csv.DictReader(fd, fieldNames):
                    net_code = row['net_code']
                    if net_code != self.networkCode: continue

                    #del row['net_code']
                    #del row['start']
                    row['start'] = parseDate(row['start'])
                    row['end'] = parseDate(row['end'])
                    row['restricted'] = bool(int(row['restricted']))
                    row['shared'] = bool(int(row['shared']))
                    row['region'] = row['region'].strip()
                    row['remark'] = row['remark'].strip()
                    row['institutions'] = row['institutions'].strip()

                    self.__build__(row)
                    break

            except KeyError as e:
                raise Exception("column %s missing in %s" % (str(e), filename))

            except (TypeError, ValueError) as e:
                raise Exception("error reading %s: %s" % (filename, str(e)))

        finally:
            fd.close()
        print(" found %d Attribute for network %s (%s)" % (len(self.networkAttributes), self.networkCode, filename), file=sys.stderr)