Collection of scripts and small programs used by the EVN Support Scientists at JIVE during the regular data processing of EVN observations.
No puede seleccionar más de 25 temas Los temas deben comenzar con una letra o número, pueden incluir guiones ('-') y pueden tener hasta 35 caracteres de largo.
 
 

167 líneas
6.4 KiB

  1. #!/usr/bin/env python3
  2. description = """
  3. Given an ANTAB file, it creates a new one with more Tsys values that are interpolated from the
  4. given ones. This will fill gaps when the time separation between Tsys measurements is too long
  5. (and e.g. AIPS starts flagging scans because there is not Tsys information for them).
  6. IMPORTANT CONSIDERATIONS:
  7. Note that it interpolates data with a smooth function, avoiding outliers or zero values.
  8. However, it does not consider scan boundaries, so if Tsys are recorded in different sources
  9. it can introduce biases. Therefore, it assumes that the Tsys should not change quickly
  10. (e.g. no change of sources).
  11. Version: 2.1
  12. Date: Nov 2020
  13. Written by Benito Marcote (marcote@jive.eu)
  14. version 2.1 changes (Nov 2020)
  15. - Bug fix: particular lines with INDEX were breaking the index recognition.
  16. version 2.0 changes (Aug 2018)
  17. - (MAJOR) Now it does an actual interpolation of the data (linear spline with smoothing).
  18. - Keeps comment lines in the output file (except the ones within the data).
  19. - Allows you to specify a custom time range for the final antab file.
  20. """
  21. # import pdb
  22. import sys
  23. import os
  24. import argparse
  25. import datetime as dt
  26. import numpy as np
  27. from scipy import interpolate
  28. usage = "%(prog)s [-h] [-v] [-p] [-o OUTPUTFILE] [-tini STARTIME] [-tend ENDTIME] antabfile int"
  29. help_tini = 'Starttime of the Tsys measurements. In case you want to modify it from the original file. It will extrapolate the earliest Tsys original values. The format must be as DOY/HH:MM:SS.'
  30. help_tend = 'Ending time of the Tsys measurements. In case you want to modify it from the original file. It will extrapolate the latest Tsys original values. The format must be as DOY/HH:MM:SS.'
  31. help_plot = 'Produce plots (per column) with the original values and the interpolation.'
  32. parser = argparse.ArgumentParser(description=description, prog='antabfs_interpolate.py', usage=usage,
  33. formatter_class=argparse.RawTextHelpFormatter)
  34. parser.add_argument('antabfile', type=str, help='The antabfs file to be read.')
  35. parser.add_argument('int', type=float, help='The interval (in seconds) between the final Tsys measurements')
  36. parser.add_argument('-v', '--version', action='version', version='%(prog)s 2.0')
  37. parser.add_argument('-o', '--output', type=str, default=None, help='Output filename. By default same as antabfile.')
  38. parser.add_argument('-p', '--plot', default=False, action='store_true', help=help_plot)
  39. parser.add_argument('-tini', type=str, default=None, help=help_tini)
  40. parser.add_argument('-tend', type=str, default=None, help=help_tend)
  41. args = parser.parse_args()
  42. antab = open(args.antabfile, 'r+').readlines()
  43. # Reads the current antab file and loads the Tsys values and all the data
  44. antab_times = []
  45. antab_data = []
  46. indexes = []
  47. for aline in antab:
  48. if aline.lstrip()[0].isdigit():
  49. # Then this line is a Tsys input
  50. temp = [i.strip() for i in aline.split()]
  51. # First column is DOY, second one is HH:MM.MM or HH:MM:SS
  52. if temp[1].count(':') == 1:
  53. temp2 = temp[1].split('.')
  54. temp2[1] = '{:02.0f}'.format(60*float('0.'+temp2[1]))
  55. temp[1] = ':'.join(temp2)
  56. elif temp[1].count(':') == 2:
  57. # Nothing to do
  58. if temp[1].count('.') != 0:
  59. temp[1] = temp[1].split('.')[0]
  60. pass
  61. else:
  62. raise ValueError('Time format not supported: {}'.format(temp[1]))
  63. antab_times.append(dt.datetime.strptime(' '.join(temp[0:2]), '%j %H:%M:%S').timestamp())
  64. antab_data.append([float(i) for i in temp[2:]])
  65. else:
  66. if 'INDEX' in aline:
  67. # It must be only one entry
  68. tmp = aline.split('INDEX')[1].replace('/', '').replace("'", "").replace('\n', '').strip()
  69. indexes = [i.strip() for i in tmp.split('=')[1].split(',')]
  70. antab_data = np.array(antab_data)
  71. n_columns = antab_data.shape[1]
  72. assert n_columns == len(indexes)
  73. # For each column, do a spline fit, with the data weighted proportionally to the square of
  74. # their deviation from the median value.
  75. fits = [None]*n_columns
  76. for i in np.arange(n_columns):
  77. weights = np.abs((antab_data[:,i] - np.median(antab_data[:,i]))/np.median(antab_data[:,i]))
  78. # For zero values, consider a value of 1e-3, which would imply an uncertainty in the weight of 0.1%
  79. # This is done to avoid division by zero in the interpolation
  80. weights[np.where(weights == 0.0)] = 1e-3
  81. # s = 1e4 looks optimal to remove large outliers in the ANTAB information
  82. # to be less drastic, you could use 1e3.
  83. # Lower values will produce peaks to outliers
  84. fits[i] = interpolate.splrep(antab_times, antab_data[:,i], w=1/weights**2, k=1, s=1e4)
  85. tsys_timestamps = None
  86. tsys = None
  87. with open(args.antabfile+'.tmp', 'wt') as newfile:
  88. # Write all the header
  89. for aline in antab:
  90. if not aline[0].isdigit():
  91. newfile.write(aline)
  92. else:
  93. break
  94. if args.tini is None:
  95. tsys_times_ini = dt.datetime.fromtimestamp(antab_times[0])
  96. else:
  97. tsys_times_ini = dt.datetime.strptime(args.tini, '%j/%H:%M:%S')
  98. if args.tend is None:
  99. tsys_times_end = dt.datetime.fromtimestamp(antab_times[-1])
  100. else:
  101. tsys_times_end = dt.datetime.strptime(args.tend, '%j/%H:%M:%S')
  102. tsys_times = np.arange(tsys_times_ini, tsys_times_end, dt.timedelta(seconds=args.int))
  103. tsys_timestamps = np.array([i.tolist().timestamp() for i in tsys_times])
  104. plot_times = tsys_timestamps
  105. tsys = np.empty((len(tsys_times), n_columns))
  106. for acol in range(n_columns):
  107. tsys[:,acol] = interpolate.splev(tsys_timestamps, fits[acol], der=0)
  108. for a_time,a_entry in zip(tsys_timestamps, tsys):
  109. temp = ['{:6.1f}'.format(i) for i in a_entry]
  110. newfile.write('{} {}\n'.format(dt.datetime.fromtimestamp(a_time).strftime('%j %H:%M:%S'), ' '.join(temp)))
  111. newfile.write('/\n')
  112. if args.output is None:
  113. os.rename(args.antabfile+'.tmp', args.antabfile)
  114. print('The antab file {} has been updated.'.format(args.antabfile))
  115. else:
  116. os.rename(args.antabfile+'.tmp', args.output)
  117. print('The antab file {} has been created.'.format(args.output))
  118. # Testing purposes: plot the original data and the final one
  119. if args.plot:
  120. import matplotlib.pyplot as plt
  121. for i in range(n_columns):
  122. plt.figure()
  123. plt.plot(antab_times, antab_data[:,i], 'oC0')
  124. plt.plot(tsys_timestamps, tsys[:,i], '-C1')
  125. plt.xlabel(r'Timestamp')
  126. plt.ylabel(r'Tsys')
  127. plt.title('Column: {}'.format(indexes[i]))
  128. plt.show()