You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
507 lines
19 KiB
Python
507 lines
19 KiB
Python
from __future__ import print_function
|
|
import sys
|
|
import csv
|
|
import re
|
|
from datetime import datetime
|
|
|
|
def getFieldNames(fd):
|
|
tmp = fd.readline().split(',')
|
|
fieldNames = []
|
|
for i in tmp:
|
|
fieldNames.append(i.strip())
|
|
return fieldNames
|
|
|
|
def quote(instr):
|
|
return '"'+instr+'"'
|
|
|
|
def hummanStr(instr):
|
|
return instr.replace("_"," ")
|
|
|
|
def parseDate(val):
|
|
if not val or val == "":
|
|
return None
|
|
date=val.replace("/", "-")
|
|
formats={ len("YYYY-JJJ") : "%Y-%j",
|
|
len("YYYY-MM-DD") : "%Y-%m-%d",
|
|
len("YYYY-JJJ:HHMM") : "%Y-%j:%H%M",
|
|
len("YYYY-JJJTHH:MM") : "%Y-%jT%H:%M",
|
|
len("YYYY-MM-DDTHH:MM") : "%Y-%m-%dT%H:%M",
|
|
len("YYYY-JJJTHH:MM:SS") : "%Y-%jT%H:%M:%S",
|
|
len("YYYY-MM-DDTHH:MM:SS") : "%Y-%m-%dT%H:%M:%S"}
|
|
try:
|
|
return datetime.strptime(date, formats[len(date)])
|
|
except Exception as e:
|
|
raise ValueError("invalid date: " + date + str(e))
|
|
|
|
def formatDate(date):
|
|
if not date:
|
|
return ""
|
|
|
|
if date.hour != 0 or date.minute != 0:
|
|
return datetime.strftime(date,"%Y/%j:%H%M")
|
|
|
|
return datetime.strftime(date,"%Y/%j")
|
|
|
|
def isPyVersion(major, minor):
|
|
return sys.version_info[0] == major and \
|
|
sys.version_info[1] == minor
|
|
|
|
class StationMappings:
|
|
def __init__(self, networkCode, stationList, filename):
|
|
self.networkCode = networkCode
|
|
self.stationList = stationList
|
|
self.stationMapping = {}
|
|
self.stationBreak = {}
|
|
|
|
if not filename: return
|
|
_rx_statmap = re.compile(r'\s*([^_]*)_([^=]*)=(\S*)\s*(from=([0-9]+/[0-9]+))?\s*(to=([0-9]+/[0-9]+))?\s*$')
|
|
fd = open(filename)
|
|
stationMapping = {}
|
|
try:
|
|
lineno = 0
|
|
try:
|
|
line = fd.readline()
|
|
lineno = 1
|
|
while line:
|
|
m = _rx_statmap.match(line)
|
|
if m is None:
|
|
raise Exception("parse error")
|
|
|
|
(sta, net, archive_net, from_def, from_year, to_def, to_year) = m.groups()
|
|
|
|
if net != self.networkCode:
|
|
line = fd.readline()
|
|
continue
|
|
|
|
if sta not in self.stationList:
|
|
line = fd.readline()
|
|
continue
|
|
|
|
try:
|
|
sta_net = stationMapping[sta]
|
|
|
|
except KeyError:
|
|
sta_net = []
|
|
stationMapping[sta] = sta_net
|
|
|
|
if from_def:
|
|
from_date = parseDate(from_year)
|
|
|
|
else:
|
|
from_date = None
|
|
|
|
if to_def:
|
|
to_date = parseDate(to_year)
|
|
|
|
else:
|
|
to_date = None
|
|
|
|
sta_net.append((from_date, to_date, archive_net))
|
|
line = fd.readline()
|
|
lineno += 1
|
|
|
|
except (Exception, TypeError, ValueError) as e:
|
|
raise Exception("%s:%d: %s" % (file, lineno, str(e)))
|
|
|
|
finally:
|
|
fd.close()
|
|
|
|
if len(stationMapping):
|
|
print("Found %d station mappings" % len(stationMapping), file=sys.stderr)
|
|
self.stationMapping = stationMapping
|
|
else:
|
|
## print("No station mappings found", file=sys.stderr)
|
|
pass
|
|
|
|
def dump(self, fdo, stationCode):
|
|
items = []
|
|
for (code, mapping) in self.stationMapping.items():
|
|
if stationCode and stationCode != code: continue
|
|
items.append(code)
|
|
for (fromDate, toDate, network) in mapping:
|
|
fdo.write("Sa: ArchiveNetworkCode=%s %s" % (network, code))
|
|
if fromDate:
|
|
fdo.write(" from=%s" % formatDate(fromDate))
|
|
if toDate:
|
|
fdo.write(" to=%s" % formatDate(toDate))
|
|
fdo.write("\n")
|
|
|
|
for code in items:
|
|
self.stationMapping.pop(code)
|
|
|
|
def getMappings(self, code, start, end):
|
|
mapping = []
|
|
|
|
if (code, start, end) not in self.stationBreak:
|
|
mapping.append([start, end])
|
|
else:
|
|
for (archiveNet, s, e, fr, to) in self.stationBreak[(code, start, end)]:
|
|
mapping.append([s, e])
|
|
|
|
return mapping
|
|
|
|
def parseStationLine(self, items):
|
|
stationCode = items[0].strip()
|
|
start = parseDate(items[10])
|
|
|
|
if len(items) > 11:
|
|
end = parseDate(items[11])
|
|
else:
|
|
end = None
|
|
|
|
if stationCode not in self.stationMapping:
|
|
## print("Skipping %s not in mapping list" % stationCode, file=sys.stderr)
|
|
return self.getMappings(stationCode, start, end)
|
|
|
|
for (fDate, tDate, archiveNet) in self.stationMapping[stationCode]:
|
|
if fDate and tDate:
|
|
raise Exception("Not Supported to and from definitions found.")
|
|
elif fDate:
|
|
if fDate >= start:
|
|
if (end and fDate <= end) or not end:
|
|
## print("Processing fDate %s %s %s [%s]" % (stationCode, start, end, fDate), file=sys.stderr)
|
|
if (stationCode, start, end) in self.stationBreak:
|
|
raise Exception("Crazy multiple station mapping for the same station line")
|
|
self.stationBreak[(stationCode, start, end)] = []
|
|
self.stationBreak[(stationCode, start, end)].append((self.networkCode, start, fDate, fDate, tDate))
|
|
self.stationBreak[(stationCode, start, end)].append((archiveNet, fDate, end, fDate, tDate))
|
|
## prin( " found mapping From -> %s (%s,%s)" % (fDate, stationCode, formatDate(start)), file=sys.stderr)
|
|
return self.getMappings(stationCode, start, end)
|
|
elif tDate:
|
|
if tDate >= start:
|
|
if (end and tDate <= end) or not end:
|
|
## print("Processing tDate %s %s %s [%s]" % (stationCode, start, end, tDate), file=sys.stderr)
|
|
if (stationCode, start, end) in self.stationBreak:
|
|
raise Exception("Crazy multiple station mapping for the same station line")
|
|
self.stationBreak[(stationCode, start, end)] = []
|
|
self.stationBreak[(stationCode, start, end)].append((archiveNet, start, tDate, fDate, tDate))
|
|
self.stationBreak[(stationCode, start, end)].append((self.networkCode, tDate, end, fDate, tDate))
|
|
## print(" found mapping To -> %s (%s,%s)" % (tDate, stationCode, formatDate(start)), file=sys.stderr)
|
|
return self.getMappings(stationCode, start, end)
|
|
else:
|
|
if (stationCode, start, end) in self.stationBreak:
|
|
raise Exception("Crazy multiple station mapping for the same station line")
|
|
self.stationBreak[(stationCode, start, end)] = []
|
|
self.stationBreak[(stationCode, start, end)].append((archiveNet, start, end, fDate, tDate))
|
|
## print(" found mapping ALL (%s,%s)" % (stationCode, formatDate(start)), file=sys.stderr)
|
|
return self.getMappings(stationCode, start, end)
|
|
## print("Ignored %s" % " ".join(items), file=sys.stderr)
|
|
return self.getMappings(stationCode, start, end)
|
|
|
|
class StationAttributes:
|
|
def __init__(self, networkCode, stationList, filename):
|
|
self.networkCode= networkCode
|
|
self.stationList = stationList
|
|
self.stationAttributeList = {}
|
|
|
|
if not filename: return
|
|
|
|
fd = open(filename)
|
|
attributes = {}
|
|
try:
|
|
try:
|
|
fieldNames = None
|
|
if isPyVersion(2, 3):
|
|
fieldNames = getFieldNames(fd)
|
|
|
|
for row in csv.DictReader(fd, fieldNames):
|
|
net_code = row['net_code']
|
|
if net_code != self.networkCode: continue
|
|
|
|
sta_code = row['sta_code']
|
|
if sta_code not in self.stationList: continue
|
|
|
|
start = parseDate(row['start'].strip())
|
|
|
|
if sta_code in attributes:
|
|
raise Exception("multiple %s found in %s" % (str((net_code, sta_code, row['start'])), filename))
|
|
|
|
del row['net_code']
|
|
del row['sta_code']
|
|
del row['start']
|
|
|
|
## Clean up input
|
|
for key in ['restricted', 'restricted_exc', 'place', 'country', 'affiliation', 'remark']:
|
|
row[key] = row[key].strip()
|
|
if len(row[key]) == 0:
|
|
del row[key]
|
|
|
|
if 'restricted' in row:
|
|
row['restricted'] = bool(int(row['restricted']))
|
|
if not row['restricted']: del (row['restricted'])
|
|
|
|
if row:
|
|
attributes[sta_code] = row
|
|
|
|
except KeyError as e:
|
|
raise Exception("column %s missing in %s" % (str(e), filename))
|
|
|
|
except (TypeError, ValueError) as e:
|
|
raise Exception("error reading %s: %s" % (filename, str(e)))
|
|
|
|
finally:
|
|
fd.close()
|
|
self.stationAttributeList = self.__build__(attributes)
|
|
print(" loaded attributes for %d stations on network %s (%s)" % (len(self.stationAttributeList), self.networkCode, filename), file=sys.stderr)
|
|
|
|
def __build__(self, attributes):
|
|
newat = {}
|
|
|
|
if not attributes:
|
|
## print("no station attributes found for network %s" % self.networkCode, file=sys.stderr)
|
|
return newat
|
|
|
|
for (code,row) in attributes.items():
|
|
nr = {}
|
|
for (k,v) in row.items():
|
|
if k == 'country': k = 'Country'
|
|
if k == 'place': k = 'Place'
|
|
if k == 'affiliation': k = 'Affiliation'
|
|
if k == 'remark': k = 'Remark'
|
|
if k == 'restricted': k = 'Restricted'
|
|
nr[k] = v
|
|
if nr:
|
|
newat[code] = nr
|
|
return newat
|
|
|
|
def get(self, code):
|
|
if self.stationAttributeList and code in self.stationAttributeList:
|
|
return self.stationAttributeList[code]
|
|
else:
|
|
return None
|
|
|
|
def __parseDescription__(self, description):
|
|
affiliation = None
|
|
place = None
|
|
country = None
|
|
description = hummanStr(description)
|
|
hasStation = True if description.find("Station") >= 0 else False
|
|
|
|
if hasStation:
|
|
affiliation = description[0:(description.index("Station"))].strip()
|
|
parts = description[description.index("Station")+7:].strip().split(",")
|
|
else:
|
|
parts = description.split(",")
|
|
|
|
if len(parts) > 1:
|
|
country = parts[len(parts)-1].strip()
|
|
parts = parts[0:(len(parts)-1)]
|
|
place = ",".join(parts)
|
|
else:
|
|
place = ",".join(parts)
|
|
|
|
# print("Country:", country, file=sys.stderr)
|
|
# print("Place:", place, file=sys.stderr)
|
|
# print("Affiliation:", affiliation, file=sys.stderr)
|
|
|
|
oui = {}
|
|
if country:
|
|
oui['Country'] = country
|
|
if place:
|
|
oui['Place'] = place
|
|
if affiliation:
|
|
oui['Affiliation'] = affiliation
|
|
return oui
|
|
|
|
def reorder_station_attr(self):
|
|
att = {}
|
|
if not self.stationAttributeList:
|
|
return None
|
|
|
|
for (code, row) in self.stationAttributeList.items():
|
|
for (k, v) in row.items():
|
|
if k == 'restricted_exc':
|
|
k = 'Restricted'
|
|
extra=',*,'+str(v)
|
|
v = (not row['Restricted']) if 'Restricted' in row else True
|
|
else:
|
|
extra= ''
|
|
|
|
try:
|
|
dk = att[k]
|
|
except:
|
|
dk = {}
|
|
att[k] = dk
|
|
|
|
try:
|
|
dv = dk[str(v)]
|
|
except:
|
|
dv = []
|
|
dk[str(v)] = dv
|
|
|
|
dv.append(code+extra)
|
|
return att
|
|
|
|
def parseStationLine(self, items, fStart = None, fEnd = None):
|
|
stationCode = items[0].strip()
|
|
description = items[1]
|
|
start = parseDate(items[10])
|
|
if stationCode not in self.stationList:
|
|
raise Exception("Station %s not in station list." % stationCode)
|
|
|
|
## Here we can force a different start & End values to the line
|
|
if fStart is not None:
|
|
start = fStart
|
|
|
|
if fEnd is not None:
|
|
end = fEnd
|
|
|
|
oui = None
|
|
at = self.get(stationCode)
|
|
#print >>sys.stderr,items, at, file=sys.stderr)
|
|
if not at:
|
|
## print(" Deriving attributes from description %s " % " ".join(items), file=sys.stderr)
|
|
at = self.__parseDescription__(description)
|
|
if at:
|
|
self.stationAttributeList[stationCode] = at
|
|
else:
|
|
for item in ['Affiliation', 'Country', 'Place']:
|
|
if item in at:
|
|
continue
|
|
if not oui:
|
|
## print(" Deriving attribute (%s) from description %s " % (item, " ".join(items)), file=sys.stderr)
|
|
oui = self.__parseDescription__(description)
|
|
if item in oui:
|
|
## print(" Setting attribute (%s) from description for %s = %s" % (item, stationCode, oui[item]), file=sys.stderr)
|
|
at[item] = oui[item]
|
|
else:
|
|
## print(" Empty %s for %s" % (item, stationCode), file=sys.stderr)
|
|
pass
|
|
|
|
country = at['Country'] if 'Country' in at else None
|
|
place = at['Place'] if 'Place' in at else None
|
|
return [place, country]
|
|
|
|
def dump(self, fdo, code):
|
|
if not code:
|
|
att = self.reorder_station_attr()
|
|
for (key,v) in att.items():
|
|
if key in ['Country', 'Place']: continue
|
|
for (value, s) in v.items():
|
|
fdo.write("Sa: %s=%s" % (key, quote(value)))
|
|
for station in s:
|
|
fdo.write(" %s" % (station))
|
|
fdo.write("\n")
|
|
else:
|
|
at = self.get(code)
|
|
if not at: return
|
|
if 'done' in at: return
|
|
at['done'] = 1 # Mark the item as printed
|
|
for (k,v) in at.items():
|
|
extra = ''
|
|
if k in [ 'done', 'Place', 'Country']: continue
|
|
if k in ['Affiliation']: v = quote(v)
|
|
|
|
if k == 'Restricted':
|
|
extra = ' %s,*,*' % code
|
|
|
|
if k == 'restricted_exc':
|
|
k = 'Restricted'
|
|
extra=',*,'+str(v)
|
|
v = (not at['Restricted']) if 'Restricted' in at else True
|
|
|
|
|
|
fdo.write("Sa: %s=%s %s%s\n" % (k,v,code,extra))
|
|
|
|
class NetworkAttributes:
|
|
def __build__(self, row):
|
|
#net_code,start,end,restricted,shared,net_class,type,institutions,region,remark
|
|
|
|
attList = {}
|
|
|
|
if row['start']:
|
|
self.start = row['start'].strftime("%Y/%j")
|
|
self.startDate = row['start']
|
|
self.hasStart = True
|
|
|
|
if row['end']:
|
|
self.end = row['end'].strftime("%Y/%j")
|
|
self.endDate = row['end']
|
|
self.hasEnd = True
|
|
|
|
if row['restricted'] != 0:
|
|
attList['Restricted'] = row['restricted']
|
|
|
|
if row['shared'] != 1:
|
|
attList['Shared'] = row['shared']
|
|
|
|
if row['net_class']:
|
|
attList['NetClass'] = row['net_class'].strip()
|
|
|
|
if row['type']:
|
|
attList['Type'] = row['type'].strip()
|
|
|
|
if row['institutions']:
|
|
attList['Institutions'] = row['institutions'].strip()
|
|
|
|
if row['region']:
|
|
attList['Region'] = row['region'].strip()
|
|
|
|
if row['remark']:
|
|
attList['Remark'] = row['remark'].strip()
|
|
|
|
self.networkAttributes.update(attList)
|
|
|
|
def parseNetworkLine(self, items):
|
|
if len(items) < 4 or len(items) > 6:
|
|
raise Exception("Invalid network line")
|
|
|
|
attList = {}
|
|
if items[1] == "none":
|
|
attList['Description'] = hummanStr(items[0])
|
|
else:
|
|
attList['Description'] = "%s (%s)" % (hummanStr(items[0]), items[1])
|
|
|
|
self.networkAttributes.update(attList)
|
|
|
|
def dump(self, fdo):
|
|
for (k,v) in self.networkAttributes.items():
|
|
if k in ['Description', 'Remark', 'Region', 'Institutions']:
|
|
v = quote(v)
|
|
fdo.write("Na: %s=%s\n" % (k,v))
|
|
|
|
def __init__(self, networkCode, filename):
|
|
self.networkCode = networkCode
|
|
self.networkAttributes = {}
|
|
|
|
self.start = None
|
|
self.end = None
|
|
|
|
self.hasStart = False
|
|
self.hasEnd = False
|
|
|
|
if not filename: return
|
|
fd = open(filename)
|
|
try:
|
|
try:
|
|
fieldNames = None
|
|
if isPyVersion(2, 3):
|
|
fieldNames = getFieldNames(fd)
|
|
|
|
for row in csv.DictReader(fd, fieldNames):
|
|
net_code = row['net_code']
|
|
if net_code != self.networkCode: continue
|
|
|
|
#del row['net_code']
|
|
#del row['start']
|
|
row['start'] = parseDate(row['start'])
|
|
row['end'] = parseDate(row['end'])
|
|
row['restricted'] = bool(int(row['restricted']))
|
|
row['shared'] = bool(int(row['shared']))
|
|
row['region'] = row['region'].strip()
|
|
row['remark'] = row['remark'].strip()
|
|
row['institutions'] = row['institutions'].strip()
|
|
|
|
self.__build__(row)
|
|
break
|
|
|
|
except KeyError as e:
|
|
raise Exception("column %s missing in %s" % (str(e), filename))
|
|
|
|
except (TypeError, ValueError) as e:
|
|
raise Exception("error reading %s: %s" % (filename, str(e)))
|
|
|
|
finally:
|
|
fd.close()
|
|
print(" found %d Attribute for network %s (%s)" % (len(self.networkAttributes), self.networkCode, filename), file=sys.stderr)
|