You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

507 lines
19 KiB
Python

from __future__ import print_function
import sys
import csv
import re
from datetime import datetime
def getFieldNames(fd):
tmp = fd.readline().split(',')
fieldNames = []
for i in tmp:
fieldNames.append(i.strip())
return fieldNames
def quote(instr):
return '"'+instr+'"'
def hummanStr(instr):
return instr.replace("_"," ")
def parseDate(val):
if not val or val == "":
return None
date=val.replace("/", "-")
formats={ len("YYYY-JJJ") : "%Y-%j",
len("YYYY-MM-DD") : "%Y-%m-%d",
len("YYYY-JJJ:HHMM") : "%Y-%j:%H%M",
len("YYYY-JJJTHH:MM") : "%Y-%jT%H:%M",
len("YYYY-MM-DDTHH:MM") : "%Y-%m-%dT%H:%M",
len("YYYY-JJJTHH:MM:SS") : "%Y-%jT%H:%M:%S",
len("YYYY-MM-DDTHH:MM:SS") : "%Y-%m-%dT%H:%M:%S"}
try:
return datetime.strptime(date, formats[len(date)])
except Exception as e:
raise ValueError("invalid date: " + date + str(e))
def formatDate(date):
if not date:
return ""
if date.hour != 0 or date.minute != 0:
return datetime.strftime(date,"%Y/%j:%H%M")
return datetime.strftime(date,"%Y/%j")
def isPyVersion(major, minor):
return sys.version_info[0] == major and \
sys.version_info[1] == minor
class StationMappings:
def __init__(self, networkCode, stationList, filename):
self.networkCode = networkCode
self.stationList = stationList
self.stationMapping = {}
self.stationBreak = {}
if not filename: return
_rx_statmap = re.compile(r'\s*([^_]*)_([^=]*)=(\S*)\s*(from=([0-9]+/[0-9]+))?\s*(to=([0-9]+/[0-9]+))?\s*$')
fd = open(filename)
stationMapping = {}
try:
lineno = 0
try:
line = fd.readline()
lineno = 1
while line:
m = _rx_statmap.match(line)
if m is None:
raise Exception("parse error")
(sta, net, archive_net, from_def, from_year, to_def, to_year) = m.groups()
if net != self.networkCode:
line = fd.readline()
continue
if sta not in self.stationList:
line = fd.readline()
continue
try:
sta_net = stationMapping[sta]
except KeyError:
sta_net = []
stationMapping[sta] = sta_net
if from_def:
from_date = parseDate(from_year)
else:
from_date = None
if to_def:
to_date = parseDate(to_year)
else:
to_date = None
sta_net.append((from_date, to_date, archive_net))
line = fd.readline()
lineno += 1
except (Exception, TypeError, ValueError) as e:
raise Exception("%s:%d: %s" % (file, lineno, str(e)))
finally:
fd.close()
if len(stationMapping):
print("Found %d station mappings" % len(stationMapping), file=sys.stderr)
self.stationMapping = stationMapping
else:
## print("No station mappings found", file=sys.stderr)
pass
def dump(self, fdo, stationCode):
items = []
for (code, mapping) in self.stationMapping.items():
if stationCode and stationCode != code: continue
items.append(code)
for (fromDate, toDate, network) in mapping:
fdo.write("Sa: ArchiveNetworkCode=%s %s" % (network, code))
if fromDate:
fdo.write(" from=%s" % formatDate(fromDate))
if toDate:
fdo.write(" to=%s" % formatDate(toDate))
fdo.write("\n")
for code in items:
self.stationMapping.pop(code)
def getMappings(self, code, start, end):
mapping = []
if (code, start, end) not in self.stationBreak:
mapping.append([start, end])
else:
for (archiveNet, s, e, fr, to) in self.stationBreak[(code, start, end)]:
mapping.append([s, e])
return mapping
def parseStationLine(self, items):
stationCode = items[0].strip()
start = parseDate(items[10])
if len(items) > 11:
end = parseDate(items[11])
else:
end = None
if stationCode not in self.stationMapping:
## print("Skipping %s not in mapping list" % stationCode, file=sys.stderr)
return self.getMappings(stationCode, start, end)
for (fDate, tDate, archiveNet) in self.stationMapping[stationCode]:
if fDate and tDate:
raise Exception("Not Supported to and from definitions found.")
elif fDate:
if fDate >= start:
if (end and fDate <= end) or not end:
## print("Processing fDate %s %s %s [%s]" % (stationCode, start, end, fDate), file=sys.stderr)
if (stationCode, start, end) in self.stationBreak:
raise Exception("Crazy multiple station mapping for the same station line")
self.stationBreak[(stationCode, start, end)] = []
self.stationBreak[(stationCode, start, end)].append((self.networkCode, start, fDate, fDate, tDate))
self.stationBreak[(stationCode, start, end)].append((archiveNet, fDate, end, fDate, tDate))
## prin( " found mapping From -> %s (%s,%s)" % (fDate, stationCode, formatDate(start)), file=sys.stderr)
return self.getMappings(stationCode, start, end)
elif tDate:
if tDate >= start:
if (end and tDate <= end) or not end:
## print("Processing tDate %s %s %s [%s]" % (stationCode, start, end, tDate), file=sys.stderr)
if (stationCode, start, end) in self.stationBreak:
raise Exception("Crazy multiple station mapping for the same station line")
self.stationBreak[(stationCode, start, end)] = []
self.stationBreak[(stationCode, start, end)].append((archiveNet, start, tDate, fDate, tDate))
self.stationBreak[(stationCode, start, end)].append((self.networkCode, tDate, end, fDate, tDate))
## print(" found mapping To -> %s (%s,%s)" % (tDate, stationCode, formatDate(start)), file=sys.stderr)
return self.getMappings(stationCode, start, end)
else:
if (stationCode, start, end) in self.stationBreak:
raise Exception("Crazy multiple station mapping for the same station line")
self.stationBreak[(stationCode, start, end)] = []
self.stationBreak[(stationCode, start, end)].append((archiveNet, start, end, fDate, tDate))
## print(" found mapping ALL (%s,%s)" % (stationCode, formatDate(start)), file=sys.stderr)
return self.getMappings(stationCode, start, end)
## print("Ignored %s" % " ".join(items), file=sys.stderr)
return self.getMappings(stationCode, start, end)
class StationAttributes:
def __init__(self, networkCode, stationList, filename):
self.networkCode= networkCode
self.stationList = stationList
self.stationAttributeList = {}
if not filename: return
fd = open(filename)
attributes = {}
try:
try:
fieldNames = None
if isPyVersion(2, 3):
fieldNames = getFieldNames(fd)
for row in csv.DictReader(fd, fieldNames):
net_code = row['net_code']
if net_code != self.networkCode: continue
sta_code = row['sta_code']
if sta_code not in self.stationList: continue
start = parseDate(row['start'].strip())
if sta_code in attributes:
raise Exception("multiple %s found in %s" % (str((net_code, sta_code, row['start'])), filename))
del row['net_code']
del row['sta_code']
del row['start']
## Clean up input
for key in ['restricted', 'restricted_exc', 'place', 'country', 'affiliation', 'remark']:
row[key] = row[key].strip()
if len(row[key]) == 0:
del row[key]
if 'restricted' in row:
row['restricted'] = bool(int(row['restricted']))
if not row['restricted']: del (row['restricted'])
if row:
attributes[sta_code] = row
except KeyError as e:
raise Exception("column %s missing in %s" % (str(e), filename))
except (TypeError, ValueError) as e:
raise Exception("error reading %s: %s" % (filename, str(e)))
finally:
fd.close()
self.stationAttributeList = self.__build__(attributes)
print(" loaded attributes for %d stations on network %s (%s)" % (len(self.stationAttributeList), self.networkCode, filename), file=sys.stderr)
def __build__(self, attributes):
newat = {}
if not attributes:
## print("no station attributes found for network %s" % self.networkCode, file=sys.stderr)
return newat
for (code,row) in attributes.items():
nr = {}
for (k,v) in row.items():
if k == 'country': k = 'Country'
if k == 'place': k = 'Place'
if k == 'affiliation': k = 'Affiliation'
if k == 'remark': k = 'Remark'
if k == 'restricted': k = 'Restricted'
nr[k] = v
if nr:
newat[code] = nr
return newat
def get(self, code):
if self.stationAttributeList and code in self.stationAttributeList:
return self.stationAttributeList[code]
else:
return None
def __parseDescription__(self, description):
affiliation = None
place = None
country = None
description = hummanStr(description)
hasStation = True if description.find("Station") >= 0 else False
if hasStation:
affiliation = description[0:(description.index("Station"))].strip()
parts = description[description.index("Station")+7:].strip().split(",")
else:
parts = description.split(",")
if len(parts) > 1:
country = parts[len(parts)-1].strip()
parts = parts[0:(len(parts)-1)]
place = ",".join(parts)
else:
place = ",".join(parts)
# print("Country:", country, file=sys.stderr)
# print("Place:", place, file=sys.stderr)
# print("Affiliation:", affiliation, file=sys.stderr)
oui = {}
if country:
oui['Country'] = country
if place:
oui['Place'] = place
if affiliation:
oui['Affiliation'] = affiliation
return oui
def reorder_station_attr(self):
att = {}
if not self.stationAttributeList:
return None
for (code, row) in self.stationAttributeList.items():
for (k, v) in row.items():
if k == 'restricted_exc':
k = 'Restricted'
extra=',*,'+str(v)
v = (not row['Restricted']) if 'Restricted' in row else True
else:
extra= ''
try:
dk = att[k]
except:
dk = {}
att[k] = dk
try:
dv = dk[str(v)]
except:
dv = []
dk[str(v)] = dv
dv.append(code+extra)
return att
def parseStationLine(self, items, fStart = None, fEnd = None):
stationCode = items[0].strip()
description = items[1]
start = parseDate(items[10])
if stationCode not in self.stationList:
raise Exception("Station %s not in station list." % stationCode)
## Here we can force a different start & End values to the line
if fStart is not None:
start = fStart
if fEnd is not None:
end = fEnd
oui = None
at = self.get(stationCode)
#print >>sys.stderr,items, at, file=sys.stderr)
if not at:
## print(" Deriving attributes from description %s " % " ".join(items), file=sys.stderr)
at = self.__parseDescription__(description)
if at:
self.stationAttributeList[stationCode] = at
else:
for item in ['Affiliation', 'Country', 'Place']:
if item in at:
continue
if not oui:
## print(" Deriving attribute (%s) from description %s " % (item, " ".join(items)), file=sys.stderr)
oui = self.__parseDescription__(description)
if item in oui:
## print(" Setting attribute (%s) from description for %s = %s" % (item, stationCode, oui[item]), file=sys.stderr)
at[item] = oui[item]
else:
## print(" Empty %s for %s" % (item, stationCode), file=sys.stderr)
pass
country = at['Country'] if 'Country' in at else None
place = at['Place'] if 'Place' in at else None
return [place, country]
def dump(self, fdo, code):
if not code:
att = self.reorder_station_attr()
for (key,v) in att.items():
if key in ['Country', 'Place']: continue
for (value, s) in v.items():
fdo.write("Sa: %s=%s" % (key, quote(value)))
for station in s:
fdo.write(" %s" % (station))
fdo.write("\n")
else:
at = self.get(code)
if not at: return
if 'done' in at: return
at['done'] = 1 # Mark the item as printed
for (k,v) in at.items():
extra = ''
if k in [ 'done', 'Place', 'Country']: continue
if k in ['Affiliation']: v = quote(v)
if k == 'Restricted':
extra = ' %s,*,*' % code
if k == 'restricted_exc':
k = 'Restricted'
extra=',*,'+str(v)
v = (not at['Restricted']) if 'Restricted' in at else True
fdo.write("Sa: %s=%s %s%s\n" % (k,v,code,extra))
class NetworkAttributes:
def __build__(self, row):
#net_code,start,end,restricted,shared,net_class,type,institutions,region,remark
attList = {}
if row['start']:
self.start = row['start'].strftime("%Y/%j")
self.startDate = row['start']
self.hasStart = True
if row['end']:
self.end = row['end'].strftime("%Y/%j")
self.endDate = row['end']
self.hasEnd = True
if row['restricted'] != 0:
attList['Restricted'] = row['restricted']
if row['shared'] != 1:
attList['Shared'] = row['shared']
if row['net_class']:
attList['NetClass'] = row['net_class'].strip()
if row['type']:
attList['Type'] = row['type'].strip()
if row['institutions']:
attList['Institutions'] = row['institutions'].strip()
if row['region']:
attList['Region'] = row['region'].strip()
if row['remark']:
attList['Remark'] = row['remark'].strip()
self.networkAttributes.update(attList)
def parseNetworkLine(self, items):
if len(items) < 4 or len(items) > 6:
raise Exception("Invalid network line")
attList = {}
if items[1] == "none":
attList['Description'] = hummanStr(items[0])
else:
attList['Description'] = "%s (%s)" % (hummanStr(items[0]), items[1])
self.networkAttributes.update(attList)
def dump(self, fdo):
for (k,v) in self.networkAttributes.items():
if k in ['Description', 'Remark', 'Region', 'Institutions']:
v = quote(v)
fdo.write("Na: %s=%s\n" % (k,v))
def __init__(self, networkCode, filename):
self.networkCode = networkCode
self.networkAttributes = {}
self.start = None
self.end = None
self.hasStart = False
self.hasEnd = False
if not filename: return
fd = open(filename)
try:
try:
fieldNames = None
if isPyVersion(2, 3):
fieldNames = getFieldNames(fd)
for row in csv.DictReader(fd, fieldNames):
net_code = row['net_code']
if net_code != self.networkCode: continue
#del row['net_code']
#del row['start']
row['start'] = parseDate(row['start'])
row['end'] = parseDate(row['end'])
row['restricted'] = bool(int(row['restricted']))
row['shared'] = bool(int(row['shared']))
row['region'] = row['region'].strip()
row['remark'] = row['remark'].strip()
row['institutions'] = row['institutions'].strip()
self.__build__(row)
break
except KeyError as e:
raise Exception("column %s missing in %s" % (str(e), filename))
except (TypeError, ValueError) as e:
raise Exception("error reading %s: %s" % (filename, str(e)))
finally:
fd.close()
print(" found %d Attribute for network %s (%s)" % (len(self.networkAttributes), self.networkCode, filename), file=sys.stderr)