A set of tools to verify the operation of the j2ms2 and tConvert programs
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

554 lines
29 KiB

#!/usr/bin/env python3
#######################/opt/anaconda3/bin/python3
# compare-ms-idi.py
#
# Analyse the antenna-, source- and frequency tables to see
# if they represent the same information
from __future__ import print_function
from six import with_metaclass # Grrrrr http://python-future.org/compatible_idioms.html#metaclasses
from functools import partial, reduce, total_ordering
import sys, re, collections, glob, os, operator, itertools, astropy.io.fits, numpy, argparse
import pyrap.tables as pt
# everybody should love themselves some function composition. really.
compose = lambda *fns : lambda x: reduce(lambda acc, f: f(acc), reversed(fns), x)
pam = lambda *fns : lambda x: tuple(map(lambda fn: fn(x), fns)) # fns= [f0, f1, ...] => map(x, fns) => (f0(x), f1(x), ...)
Apply = lambda *args : args[0](*args[1:]) # args= (f0, arg0, arg1,...) => f0(arg0, arg1, ...)
identity = lambda x : x
choice = lambda p, t, f : lambda x: t(x) if p(x) else f(x)
const = lambda c : lambda *_: c
method = lambda f : lambda *args, **kwargs: f(*args, **kwargs)
wrap = lambda f, n=1 : lambda *args, **kwargs: f(*args[0:n]) # transform f to allow many parameters but pass only first n
is_none = partial(operator.is_, None)
Map = lambda fn : partial(map, fn)
Group = lambda n : operator.methodcaller('group', n)
GroupBy = lambda keyfn : partial(itertools.groupby, key=keyfn)
Sort = lambda keyfn : partial(sorted, key=keyfn)
Filter = lambda pred : partial(filter, pred)
Reduce = lambda f, i=None : lambda l: reduce(f, l, i) if i is not None else reduce(f, l)
ZipStar = lambda x : zip(*x)
Star = lambda f : lambda args: f(*args)
StarMap = lambda f : partial(itertools.starmap, f)
D = lambda x : print(x) or x
DD = lambda pfx : lambda x: print(pfx,":",x) or x
D_i = lambda item : lambda x: print(x[item]) or x
D_a = lambda attr : lambda x: print(getattr(x, attr)) or x
DD_i = lambda pfx, item : lambda x: print(pfx,":",x[item]) or x
DD_a = lambda pfx, attr : lambda x: print(pfx,":",getattr(x, attr)) or x
Type = lambda **kwargs : type('', (), kwargs)
Derived = lambda b, **kw : type('', (b,), kw) # Create easy derived type so attributes can be set/added
Obj = lambda **kwargs : Type(**kwargs)()
Append = lambda l, v : l.append(v) or l
SetAttr = lambda o, a, v : setattr(o, a, v) or o
XFormAttr= lambda attr, f, missing=None: lambda obj, *args, **kwargs: SetAttr(obj, attr, f(getattr(obj, attr, missing), *args, **kwargs))
# Thanks to Py3 one must sometimes drain an iterable for its side effects. Thanks guys!
# From https://docs.python.org/2/library/itertools.html#recipes
# consume(), all_equal()
consume = partial(collections.deque, maxlen=0)
all_equal= compose(lambda g: next(g, True) and not next(g, False), itertools.groupby)
# shorthands
GetN = operator.itemgetter
GetA = operator.attrgetter
Repeat = itertools.repeat
Call = operator.methodcaller
# other generic stuff (own invention)
GetA_xform = lambda **kwargs: lambda o: ( kwargs[k](getattr(o,k)) for k in kwargs.keys() )
GetN_xform = lambda **kwargs: lambda o: ( kwargs[k](o[k], o) for k in kwargs.keys() )
# Python3 comparison is much more strict; None cannot be compared anymore.
# So if you need to Py2/Py3 compatible sort a list that can have None's in you must come up with something else.
# Based on https://stackoverflow.com/a/26348624
@total_ordering
class NietsNut(object):
__lt__ = method(const(True)) # compares less than to everything else
__eq__ = operator.is_ # only compares equal to singleton instance
__repr__ = method("Niets".format)
Niets = NietsNut()
# Use this as keyfn in "sorted(key=...)" to transparently replace a None key
# with Niets, but leaving the sorted objects intact - i.e. leaving the None where it was
sorteable_none = choice(is_none, const(Niets), identity)
##################################################################################
#
# this is the information we accumulate per primary key (whatever that may turn
# out to be)
#
##################################################################################
# If you want to figure out if an element in a tuple/list happens to be None
# and there is a risk that one of the elements is a numpy.array() you're up
# Sh*t Creek - without a paddle:
#
# >>> import numpy
# >>> a1 = numpy.arange(3)
# >>> None in (a1, a1)
# Traceback (most recent call last):
# File "<stdin>", line 1, in <module>
# ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()
#
# This behaviour is numpy version dependend - older versions will work, newer will give this
# exception FFS.
#
# So we just have to roll our own "is the value None present in this list/tuple?"
# But that is not enough - if all operands to the operation are None they should compare equal as well
# but without having to call the operator
# index into the comparison table is (any, all).
# we only need to check for conditions where
# any == True and discriminate between all/some.
# If any == False this implies all == False (all == True is impossible in this case)
comp_table_ = {(True, True) : const(True), # all = True -> all operands None -> .EQ.
(True, False): const(False)} # all = False -> some operands None -> .NE.
any_all = compose(pam(any, all), Map(is_none))
SafeCompare = lambda f: lambda *args: comp_table_.get( any_all(args), f)(*args)
SumAttr = lambda a: lambda o0, o1: setattr(o0, a, getattr(o0, a)+getattr(o1, a))
AppendAttr = lambda a: lambda o0, o1: setattr(o0, a, getattr(o0, a).append(getattr(o1, a)))
FloatEq = SafeCompare(lambda l, r: abs(l-r)<1e-3)
ArrayEq = SafeCompare(lambda l, r: numpy.allclose(l, r, atol=1e-3))
StringEq = SafeCompare(lambda l, r: l.lower() == r.lower())
###################################3
# For meta-data comparison we need:
# - antenna info
# position, diameter, mount, offset
# - source info
# name, ra, dec
# - frequency info
# start, end freq, #-of-channels (spectral channel width?)
#
# Creating keys for the antenna, sources can be simple:
# ('antenna', <name>), ('source', <name>)
# for the spectral windows it's going to be difficult to have unique key
# ('frequency', "<lowest channel frequency>") with 6 digits behind the decimal point?
# input: attribute1=function1, attribute2=function2
# Will return function that replaces the value of attributeN with functionN( current value of attributeN )
Replace = lambda **kwargs: lambda o: o._replace( **{k:f(getattr(o, k)) for k,f in kwargs.items()} )
# A named tuple holding the concatenated field differences. A diff is True if it's not empty
# (empty implies no differences, i.e. a "False" diff)
diffbool = method(compose(operator.not_, GetA('diff')))
Diff = Derived(collections.namedtuple('Diff', ['diff']), __repr__=method("DIFF: {0.diff}".format),
__bool__ = diffbool, __nonzero__ = diffbool,
__eq__ = lambda l, r: StringEq(l.diff, r.diff),
__ne__ = lambda l, r: not (l==r),
__hash__ = method(compose(Call('__hash__'), GetA('diff'))))
# kwargs = { attribute:compare-function, attribute:compare-function, ...}
# returns a function which returns an object that returns True if it's "empty" and not True if it has elements
# the contents are the fields that are different
diff_fmt = "{0}: {1} vs {2}".format
def diff_reductor(l, r):
def do_it(acc, item):
diff = item(l, r)
return (acc + ", " +diff if acc else diff) if diff else acc
return do_it
def mk_comp(attr, f):
def do_comp(l, r):
al, ar = getattr(l, attr), getattr(r, attr)
return diff_fmt(attr, al, ar) if not f(al, ar) else ""
return do_comp
def mk_differ(**kwargs):
to_compare = [mk_comp(attr, f) for attr, f in kwargs.items()]
def do_it(l, r):
return Diff( reduce(diff_reductor(l, r), to_compare, "") )
return do_it
AntennaStats = Derived(collections.namedtuple('AntennaStats', ['name', 'position', 'diameter', 'mount', 'offset']), source=None,
__repr__ = method("ANT {0.name}: xyz={0.position} d={0.diameter} offset={0.offset} mount={0.mount}".format),
__eq__ = lambda l,r: FloatEq(l.diameter, r.diameter) and ArrayEq(l.offset, r.offset)
and ArrayEq(l.position, r.position) and StringEq(l.mount, r.mount),
__ne__ = lambda l,r: not (l==r),
__diff__ = mk_differ(diameter=FloatEq, offset=ArrayEq, position=ArrayEq, mount=StringEq))
SourceStats = Derived(collections.namedtuple('SourceStats', ['name', 'position']), source=None,
__repr__ = method("SRC {0.name}: RA/DEC {0.position}".format),
__eq__ = lambda l,r: ArrayEq(l.position, r.position),
__ne__ = lambda l,r: not (l==r),
__diff__ = mk_differ(position=ArrayEq))
FrequencyStats = Derived(collections.namedtuple('FrequencyStats', ['key', 'low', 'high', 'nchan']), source=None,
__repr__ = method("SPW {0.key}: {0.low}-{0.high} #{0.nchan}".format),
__eq__ = lambda l,r: FloatEq(l.low, r.low) and FloatEq(l.high, r.high) and l.nchan and r.nchan,
__ne__ = lambda l,r: not (l==r),
__diff__ = mk_differ(low=FloatEq, high=FloatEq, nchan=operator.__eq__))
DefaultDict = Derived(collections.defaultdict)
Source = lambda **kwargs: Obj(format=kwargs.get('format', 'Missing format='),
location=kwargs.get('location', 'Missing location='),
__repr__=method(compose("{0.format:>4s}: {0.location}".format, XFormAttr('format', str.upper))),
__hash__=method(compose(Call('__hash__'), Call('__repr__'))))
SetSource = XFormAttr('source', lambda _, tp: tp) # just overwrite the attribute's value
#############################################################################################################################
#
# MeasurementSet specific handling
#
#############################################################################################################################
# Transform row of MS Antenna table into AntennaStats.
# note: order of fields in GetN() should match those to AntennaStats c'tor above)
frequency_key = "{0:.3f}".format
ms_antenna_props = compose(Replace(name=str.capitalize), Star(AntennaStats), GetN('NAME', 'POSITION', 'DISH_DIAMETER', 'MOUNT', 'OFFSET'))
ms_source_props = compose(Replace(position=compose(numpy.rad2deg, GetN(0))), Replace(name=str.upper), Star(SourceStats), GetN('NAME', 'REFERENCE_DIR'))
ms_freq_props = compose(Replace(key=compose(frequency_key, numpy.min), low=numpy.min, high=numpy.max),
Star(FrequencyStats), GetN('CHAN_FREQ', 'CHAN_FREQ', 'CHAN_FREQ', 'NUM_CHAN'))
open_ms = partial(pt.table, readonly=True, ack=True)
mssubtable = lambda t: compose(partial(pt.table, readonly=True, ack=False), Call('getkeyword', t))
ms_antenna, ms_field, ms_spectralwindow = list(map(mssubtable, ['ANTENNA', 'FIELD', 'SPECTRAL_WINDOW']))
def process_ms(path_to_ms):
set_source = lambda s: SetSource(s, Source(format='ms', location=path_to_ms))
# The accumulator functions, taking a row of the appropriate table as input
def accumulate_ant(acc, antenna):
antenna = ms_antenna_props(antenna)
return acc[ ('antenna', antenna.name) ].append( set_source(antenna) ) or acc
def accumulate_src(acc, source):
source = ms_source_props(source)
return acc[ ('source', source.name) ].append( set_source(source) ) or acc
def accumulate_spw(acc, spw):
spw = ms_freq_props(spw)
return acc[ ('frequency', spw.key) ].append( set_source(spw) ) or acc
rv = DefaultDict(list)
with open_ms(path_to_ms) as ms:
with ms_antenna(ms) as at:
rv = reduce(accumulate_ant, at, rv)
with ms_field(ms) as ft:
rv = reduce(accumulate_src, ft, rv)
with ms_spectralwindow(ms) as spwt:
rv = reduce(accumulate_spw, spwt, rv)
return set_source(rv)
#############################################################################################################################
#
# FITS-IDI specific handling
#
#############################################################################################################################
# input: list of (absolute) path names, output: string of grouped-by-containing-directory of the file names
summarize = compose("; ".join, Map("{0[0]}: {0[1]}*".format), Map(pam(GetN(0),
compose(os.path.commonprefix, list, Map(GetN(1)), GetN(1)))),
GroupBy(GetN(0)), Sort(GetN(0)), Map(pam(os.path.dirname, os.path.basename)))
# look for paths that have "<stem>[0-9]+" and group by <stem> and compile list of suffixes
idi_chunk = re.compile(r"^(?P<stem>.*\.IDI)(?P<suffix>[0-9]+)$").match
# from a single groupby result:
# (key, [(i, x), (i, x), ...])
# extract all x'es, and return a tuple:
# (x[0], x[-1])
# i.e. the first + last values of the sequence
# handle the case where the groupby returns an empty list (allegedly this can happen)
ranger = compose(choice(operator.truth, pam(GetN(0),GetN(-1)), const(None)), list, Map(GetN(1)), GetN(1))
## Finds consecutive ranges in the sequence.
## Returns a list of (start, end) values.
## Single, non-consecutive, numbers are returned as (start, end) with end==start
##
## Based on
## http://stackoverflow.com/questions/2361945/detecting-consecutive-integers-in-a-list
consecutive_ranges = compose(Filter(operator.truth), Map(ranger), GroupBy(lambda ix: ix[0]-ix[1]), enumerate, sorted)
# input: list of consecutive ranges, output:
# "[start-end, start-end, start, start-end, ...]"
# (a single "start" means a non-consecutive, single, number)
n_fmt_ = "{0[0]}".format
r_fmt_ = "{0[0]}-{0[1]}".format
compressed_suffixes= compose("[{0}]".format, ",".join, Map(lambda se: (n_fmt_ if se[0]==se[1] else r_fmt_)(se)))
# Take a list of items, item = (Source, stem, 'suffix')
# return two things: Source and a nicely formatted compressed
# string of consecutive ranges: "[start-end, start, start-end, ...]" found in the suffixes
# (All Source and stem values should be equal in this one since they were grouped by 'stem', i.e.
# the (unique) file name.)
decompose_idilist = compose(pam(compose(GetN(0), GetN(0)), # Source[0]
compose(compressed_suffixes, consecutive_ranges, GetN(2))), # "[start-end, start, ...]"
list, ZipStar, Map(pam(GetN(0), GetN(1), compose(int, GetN(2)))))
# item = (key, list-of-items-sharing-key)
# The items sharing key are tuples: (Source, stem, 'suffix')
# and the key by which the items are grouped happes to be stem
def xform_gb(acc, item):
# item = key, [list of items]
key, l = item
l = list(l)
if key is None: #in [None, Niets]:
# If key is None, the file names did not have a suffix so should be good enough
# to pass on the original Source objects
# just append the list of sources to acc
acc.extend( map(GetN(0), l) )
else:
# ok we have list of suffixes for this stem
# transform to int, sort, and compress.
# retain the original Source objects because we need the type of the format
# We already have the stem - it's the key
source, suffix = decompose_idilist( l )
# Now that we have a source, the stem and the consecutive ranges as suffix
# we can append a new Source object, formatted as:
# <format>:<stem>[1-3,6,12-14, ...]
acc.append( Source(format=source.format, location=key+suffix) )
return acc
sortkey = compose(sorteable_none, GetN(1)) # defines the key to sort/groupby below
xform_item = choice(compose(partial(operator.is_, None), GetN(1)), pam(GetN(0), const(None), const(None)),
pam(GetN(0), compose(Group('stem'), GetN(1)), compose(Group('suffix'), GetN(1))))
summarize3_= compose(GroupBy(GetN(1)), Sort(sortkey), Map(xform_item), Map(pam(identity, compose(idi_chunk, GetA('location')))))
summarize3 = lambda l: reduce(xform_gb, summarize3_(l), list())
# Helpers for dealing with FITS-IDI files
# - opening an IDI file in a specific mode, adding Debug output prints the
# actual file name, which is nice feedback for the usert :-)
# - building antenna, source lookup tables
# - Given an opened FITS-IDI object, returning the accumulated statistics from the UV_DATA table
open_idi = compose(partial(astropy.io.fits.open, memmap=True, mode='readonly'), D)
idi_tbl = lambda *tbl: pam(*map(lambda c: compose(GetA('data'), GetN(c)), tbl))
anname_key = compose(str.upper, str)
anname_key_l = compose(list, Map(anname_key))
# From AIPSMEM114r, integer codes matched to MeasurementSet strings
idi_mntsta_ = { 0:"ALT-AZ", 1:"EQUATORIAL", 2:"ORBITING", 3:"X-Y", 4:"ALT-AZ-NASMYTH-RH", 5:"ALT-AZ-NASMYTH-LH" }
idi_mntsta = lambda mntsta: idi_mntsta_.get(mntsta, "UnknownMNTSTA#{0}".format(mntsta))
# in FITS-IDI antenna info is spread over two tables: ANTENNA and ARRAY_GEOMETRY
def acc_idi_antenna_table(acc, idi):
an, ag = idi_tbl('ANTENNA', 'ARRAY_GEOMETRY')(idi)
# array geometry table has anname column to link to antenna table
agidx = anname_key_l(ag['ANNAME'])
diameter = lambda i: None if 'DIAMETER' not in ag.columns.names else ag['DIAMETER'][i]
stabxyz, mntsta, staxof = GetN('STABXYZ', 'MNTSTA', 'STAXOF')(ag)
def mk_idi_antenna(acc, station):
name = station.capitalize()
key = ('antenna', name)
# find row in array_geometry describing this station
idx = agidx.index( station )
acc[ key ].append( SetSource(AntennaStats(name, stabxyz[idx], diameter(idx), idi_mntsta(mntsta[idx]), staxof[idx]),
Source(format='idi', location=idi.filename())) )
return acc
return reduce(mk_idi_antenna, anname_key_l(an['ANNAME']), acc)
def acc_idi_source_table(acc, idi):
def mk_idi_source(acc, source):
source, raepo, decepo = source
source = anname_key(source)
key = ('source', source)
acc[ key ].append( SetSource(SourceStats(source, numpy.array([float(raepo), float(decepo)])),
Source(format='idi', location=idi.filename())) )
return acc
return reduce(mk_idi_source, zip(*GetN('SOURCE', 'RAEPO', 'DECEPO')(idi['SOURCE'].data)), acc)
# From AIPSMEM114r:
# "Band frequency offsets. The BANDFREQ column shall contain a one-dimensional
# array of band-specific frequency offsets. There shall be one element for each
# band in the file. The offset for the first band in the frequency setup with
# FREQID value 1 should be 0 Hz. Frequency offsets may be of either sign."
#
# The offset is measured w.r.t. the value of the "REF_FREQ" mandatory header keyword.
# Number of spectral channels is encoded in the value of the "NO_CHAN" mandatory header keyword.
#
idi_freq_hdr_values = compose(GetN_xform(REF_FREQ=wrap(float), NO_CHAN=wrap(int)), GetA('header'))
idi_freq_rows = compose(ZipStar, GetN('BANDFREQ', 'TOTAL_BANDWIDTH', 'CH_WIDTH'), GetA('data'))
def acc_idi_frequency_table(acc, idi):
frequency = idi['FREQUENCY']
ref_freq, no_chan = idi_freq_hdr_values(frequency)
def mk_idi_spw(acc, spw):
(bandfreq, tot_bw, ch_width) = spw
bandfreq = ref_freq + bandfreq
freqkey = frequency_key(bandfreq)
key = ('frequency', freqkey)
acc[ key ].append( SetSource(FrequencyStats(freqkey, bandfreq, bandfreq+(no_chan-1)*ch_width, no_chan),
Source(format='idi', location=idi.filename())) )
return acc
def process_one_setup(acc, setup):
# One row in the frequency table describes a number of spectral windows, a setup
return reduce(mk_idi_spw, zip(*setup), acc)
return reduce(process_one_setup, idi_freq_rows(frequency), acc)
# Actually process a series of FITS-IDI file(s)
def process_idi(list_of_idis):
# reduction of a single FITS-IDI file
def process_one_idi(acc, path_to_idi):
idi = open_idi(path_to_idi)
acc_idi_antenna_table(acc, idi)
acc_idi_source_table(acc, idi)
acc_idi_frequency_table(acc, idi)
return acc
return SetSource(reduce(process_one_idi, list_of_idis, DefaultDict(list)), Source(format='idi', location=summarize(list_of_idis)))
##########################################################################################################################################
#
#
# The analysis part starts here
#
#
##########################################################################################################################################
# extract the source attribute from a list of items
get_src = compose(list, Map(GetA('source')))
# expected input: (set(keys), Stats)
# output: the sorted keys in set(keys)
sorted_keys = compose(sorted, GetN(0))
def split_equal(values):
rv = []
for (v, i) in itertools.groupby(values):
rv.append( (v, get_src(i)) )
return rv
# input: entry from split_equal: (value, [source, source, ...])
# output: formatted string
# \t<value> found in:
# \t\tsrc#0
# \t\tsrc#1
# ...
source_fmt = compose("\n\t\t".join, Map(Call('__repr__')))
print_group = compose(D, "\t{0[0]} found in:\n\t\t{0[1]}".format, pam(GetN(0), compose(source_fmt, summarize3, GetN(1))))
## helper functions to group lists-of-datasources by identical diff
## in order to make the output as compact as possible
## (so if there are values with identical diffs are found across different
## file formats the actual diff will only be reported once)
def reduce_diff(acc, item):
ref_val, group = acc
value, sources = item
if ref_val is None:
print_group(item)
return (value, group)
# now collect the data sources by actual diff
group[ value.__diff__(ref_val) ].update( sources )
return (ref_val, group)
# values is [(value, [source, source, ...]), (value, [source, source, ...]), ... ]
# where value is a value that was found equal in all accompanying sources
# these are assumed to be values associated with the same key (source, antenna or spectral window)
# so we want to highlight the differences.
# Note: we know values is at least a list of length 2 because if length == 1 there are
# no differing values and so the code doesn't bother to report those
def print_diffs(key, values):
print(key,":")
compose(consume, Map(print_group), Call('items'), GetN(1), Reduce(reduce_diff, (None, collections.defaultdict(set))))(values)
return None
# take a list of dicts with dict(key=>statistics) and compare them
def report(list_of_stats):
# transform [Stats, Stats, ...] into [(set(keys), Stats), (set(keys), Stats), ...]
# Yes. I know. The keys of a dict ARE unique by definition so the "set()" doesn't seem to
# add anything. However, when finding out common or extra or missing keys across multiple data sets
# the set() operations like intersect and difference are exactly what we need.
# So by creating them as set() objects once makes them reusable.
list_of_stats = compose(list, Map(lambda s: (set(s.keys()), s)))(list_of_stats)
# can only report on common keys
common_keys = reduce(set.intersection, map(GetN(0), list_of_stats))
# warn about non-matching keys
def count_extra_keys(acc, item): # item = (set(keys), Statistics())
print("="*4, "Problem report", "="*4+"\n", item[1].source, "\n", "Extra keys:\n",
"\n".join(map(compose("\t{0[0]} found {0[1]} times".format, pam(identity, lambda k: len(item[1][k]))), sorted_keys(item))), "\n"+"="*25)
return acc.append( len(item[0]) ) or acc
nExtra = reduce(count_extra_keys,
filter(compose(operator.truth, GetN(0)),
Map(pam(compose(common_keys.__rsub__, GetN(0)), GetN(1)))(list_of_stats)),
list())
# For each common key check all collected values are the same by counting the number of keys
# that don't fulfil this predicate. Also print all values+source whence they came in case of such a mismatch
get_all = lambda k: compose(split_equal, Reduce(operator.__add__), Map(compose(GetN(k), GetN(1))))
def check_key(acc, key):
values = get_all(key)(list_of_stats)
# values is [(value, [source, source, ...]), (value, [source, source, ...]), ... ]
# i.e. the unique values for the current key and the sources in which those values were found
# if that list is of length 1 all values are equal and thus no problems
return acc if len(values)==1 else print_diffs(key, values) or acc+1
nProb = reduce(check_key, sorted(common_keys), 0)
# And one line feedback about what we found in total
print("Checked", len(list_of_stats), "data sets,", len(common_keys), "common keys",
choice(partial(operator.eq, 0), const(""), "with {0} problems identified".format)(nProb),
choice(operator.truth, compose("and {0[1]} non-common keys in {0[0]} formats".format, pam(len, sum)), const(""))(nExtra))
return nProb
# If *all* keys have exactly one value that means there is absolutely nothing to check/compare
# e.g. if multipart IDI fits with N IDI files, each IDI file has an antenna-, source- and frequency table.
# I.e.: one data set but still multiple values!
# nothing_to_compare() analyzes the list of accumulated Stats and returns True if every key has only one value associated.
nothing_to_compare = compose(Reduce(lambda tf, kv: kv[1]==1 if tf is True else tf, True), Call('items'),
Reduce(lambda a, s: reduce(lambda b, kv: b.update([kv[0]]*len(kv[1])) or b, s.items(), a),
collections.Counter()))
# pretty_print() will only be called if all keys found across all datasets have exactly one value
# - i.e. there is nothing to compare
pretty_print = compose(consume, Map(compose(D, GetN(0), GetN(1))), Sort(GetN(0)), Call('items'), D_a('source'))
# This is the main function what to do: execute all statistics-gathering functions
# and if there are things to compare report the differences
main = compose(sys.exit, choice(nothing_to_compare, compose(const(0), consume, Map(pretty_print)), report), list, Map(Apply))
############################################################################
#
# Let argparse do some work - such that we don't have to repeat it
#
############################################################################
# This metaclass extracts the 'process_fn' from a Process()'s __init__'s
# keyword arguments and turns it into an argparse.Action __call__() function
# which appends a lambda() to the Action's destination, which, when called,
# executes the processing function
class ProcessMeta(type):
def __call__(cls, *args, **kwargs):
# Note: pop the 'process_fn=' keyword from kwargs such that
# when Process.__init__() calls argparse.Action.__init__(), the
# latter won't barf; it doesn't like unrecognized keyword arguments ...
process_fn = kwargs.pop('process_fn', None)
if process_fn is None:
raise RuntimeError("Missing process_fn= keyword argument in __init__() for ", kwargs.get('help', "Unknown Process action"))
# Need to do two things: add the __call__() special method to the class
# which looks up the actual method-to-call in the instance
# See: https://stackoverflow.com/a/33824320/26083
setattr(cls, '__call__', lambda *args, **kwargs: args[0].__call__(*args, **kwargs))
# and now decorate the instance with a call method consistent with
# https://docs.python.org/3/library/argparse.html#action-classes
# "Action instances should be callable, so subclasses must override
# the __call__ method, which should accept four parameters: ..."
return SetAttr(type.__call__(cls, *args, **kwargs), '__call__',
lambda self, _parser, namespace, values, _opt: XFormAttr(self.dest, Append)(namespace, lambda: process_fn(values)))
# Create a custom argparse.Action to add statistics gathering functions
Process = Derived(with_metaclass(ProcessMeta, argparse.Action))
if __name__ == '__main__':
###################################################################################################################
#
# Here's where the only interesting stuff happens: parsing the command line & executing stuff!
#
###################################################################################################################
parsert = argparse.ArgumentParser(description="Compare meta data contents between MeasurementSet(s) and/or FITS-IDI files")
parsert.add_argument('--ms', action=Process, dest='path', default=list(), required=False, help="Specify the source MeasurementSet", process_fn=process_ms)
parsert.add_argument('--idi', action=Process, dest='path', default=list(), nargs='*', help="The FITS-IDI file(s) produced from 'ms='", process_fn=process_idi)
parsert.add_argument('--lis', action=Process, dest='path', default=list(), required=False, help="The .lis file that was used to construct 'ms='",
process_fn=compose(lambda lis: SetSource(DefaultDict(list), Source(format='lis', location=lis)), DD("lis-file processing not yet implemented")))
main(parsert.parse_args().path)