Source code for desisim.quickcat

'''
desisim.quickcat
================

Code for quickly generating an output zcatalog given fiber assignment tiles,
a truth catalog, and optionally a previous zcatalog.
'''

from __future__ import absolute_import, division, print_function

import os
import yaml
from collections import Counter
from pkg_resources import resource_filename
from time import asctime

import numpy as np
from astropy.io import fits
from astropy.table import Table, Column, vstack
import sys
import scipy.special as sp
import desisim
from desisim.targets import get_simtype

import astropy.constants
c = astropy.constants.c.to('km/s').value

from desitarget.targetmask import desi_mask, bgs_mask, mws_mask

from desiutil.log import get_logger
log = get_logger()

#- redshift errors, zwarn, cata fail rate fractions from
#- /project/projectdirs/desi/datachallenge/redwood/spectro/redux/redwood/
#- sigmav = c sigmaz / (1+z)
_sigma_v = {
#    'ELG': 38.03,
#    'LRG': 67.38,
    'BGS': 37.70,
#    'QSO': 182.16,
    'STAR': 51.51,
    'WD':54.35,
    'SKY': 9999,      #- meaningless
    'UNKNOWN': 9999,  #- meaningless
}

_zwarn_fraction = {
#    'ELG': 0.087,
#    'LRG': 0.007,
#    'QSO': 0.020,
    'BGS': 0.024,
    'STAR': 0.345,
    'WD':0.094,
    'SKY': 1.0,
    'UNKNOWN': 1.0,
}

_cata_fail_fraction = {
#    'ELG': 0.020,
#    'LRG': 0.002,
#    'QSO': 0.012,
    'BGS': 0.003,
    'STAR': 0.050,
    'WD':0.0,
    'SKY': 0.,
    'UNKNOWN': 0.,
}

[docs]def get_zeff_obs(simtype, obsconditions):
    '''
    '''
    if(simtype=='LRG'):
        p_v = [1.0, 0.15, -0.5]
        p_w = [1.0, 0.4, 0.0]
        p_x = [1.0, 0.06, 0.05]
        p_y = [1.0, 0.0, 0.08]
        p_z = [1.0, 0.0, 0.0]
        sigma_r = 0.02
    elif(simtype=='QSO'):
        p_v = [1.0, -0.2, 0.3]
        p_w = [1.0, -0.5, 0.6]
        p_x = [1.0, -0.1, -0.075]
        p_y = [1.0, -0.08, -0.04]
        p_z = [1.0, 0.0, 0.0]
        sigma_r = 0.05
    elif(simtype=='ELG'):
        p_v = [1.0, -0.1, -0.2]
        p_w = [1.0, 0.25, -0.75]
        p_x = [1.0, 0.0, 0.05]
        p_y = [1.0, 0.2, 0.1]
        p_z = [1.0, -10.0, 300.0]
        sigma_r = 0.075
    else:
        log.warning('No model for how observing conditions impact {} redshift efficiency'.format(simtype))
        return np.ones(len(obsconditions))

    ncond = len(np.atleast_1d(obsconditions['AIRMASS']))
    
    # airmass
    v = obsconditions['AIRMASS'] - np.mean(obsconditions['AIRMASS'])
    pv  = p_v[0] + p_v[1] * v + p_v[2] * (v**2. - np.mean(v**2))

    # ebmv
    # KeyError if dict or Table is missing EBMV
    # ValueError if ndarray is missing EBMV
    try:
        w = obsconditions['EBMV'] - np.mean(obsconditions['EBMV'])
        pw = p_w[0] + p_w[1] * w + p_w[2] * (w**2 - np.mean(w**2))
    except (KeyError, ValueError):
        pw = np.ones(ncond)
    
    # seeing
    x = obsconditions['SEEING'] - np.mean(obsconditions['SEEING'])
    px = p_x[0] + p_x[1]*x + p_x[2] * (x**2 - np.mean(x**2))

    # transparency
    try:
        y = obsconditions['LINTRANS'] - np.mean(obsconditions['LINTRANS'])
        py = p_y[0] + p_y[1]*y + p_y[2] * (y**2 - np.mean(y**2))
    except (KeyError, ValueError):
        py = np.ones(ncond)
    
    # moon illumination fraction
    z = obsconditions['MOONFRAC'] - np.mean(obsconditions['MOONFRAC'])
    pz = p_z[0] + p_z[1]*z + p_z[2] * (z**2 - np.mean(z**2))

    #- if moon is down phase doesn't matter
    pz = np.ones(ncond)
    pz[obsconditions['MOONALT'] < 0] = 1.0

    pr = 1.0 + np.random.normal(size=ncond, scale=sigma_r)

    #- this correction factor can be greater than 1, but not less than 0
    pobs = (pv * pw * px * py * pz * pr).clip(min=0.0)

    return pobs


[docs]def get_redshift_efficiency(simtype, targets, truth, targets_in_tile, obsconditions, params, ignore_obscondition=False):
    """
    Simple model to get the redshift effiency from the observational conditions or observed magnitudes+redshuft

    Args:
        simtype: ELG, LRG, QSO, MWS, BGS
        targets: target catalog table; currently used only for TARGETID
        truth: truth table with OIIFLUX, TRUEZ
        targets_in_tile: dictionary. Keys correspond to tileids, its values are the
            arrays of targetids observed in that tile.
        obsconditions: table observing conditions with columns
           'TILEID': array of tile IDs
           'AIRMASS': array of airmass values on a tile
           'EBMV': array of E(B-V) values on a tile
           'LINTRANS': array of atmospheric transparency during spectro obs; floats [0-1]
           'MOONFRAC': array of moonfraction values on a tile.
           'SEEING': array of FWHM seeing during spectroscopic observation on a tile.
        parameter_filename: yaml file with quickcat parameters
        ignore_obscondition: if True, no variation of efficiency with obs. conditions (adjustment of exposure time should correct for mean change of S/N)
    Returns:
        tuple of arrays (observed, p) both with same length as targets

        observed: boolean array of whether the target was observed in these tiles

        p: probability to get this redshift right
    """
    targetid = targets['TARGETID']
    n = len(targetid)

    try:
        if 'DECAM_FLUX' in targets.dtype.names :
            true_gflux = targets['DECAM_FLUX'][:, 1]
            true_rflux = targets['DECAM_FLUX'][:, 2]
        else:
            true_gflux = targets['FLUX_G']
            true_rflux = targets['FLUX_R']
    except:
        raise Exception('Missing photometry needed to estimate redshift efficiency!')

    a_small_flux=1e-40
    true_gflux[true_gflux<a_small_flux]=a_small_flux
    true_rflux[true_rflux<a_small_flux]=a_small_flux
    

    
    if (obsconditions is None) or ('OIIFLUX' not in truth.dtype.names):
        raise Exception('Missing obsconditions and flux information to estimate redshift efficiency')

    
    
    if (simtype == 'ELG'):
        # Read the model OII flux threshold (FDR fig 7.12 modified to fit redmonster efficiency on OAK)
        # filename = resource_filename('desisim', 'data/quickcat_elg_oii_flux_threshold.txt')
        
        # Read the model OII flux threshold (FDR fig 7.12)
        filename = resource_filename('desisim', 'data/elg_oii_flux_threshold_fdr.txt')
        fdr_z, modified_fdr_oii_flux_threshold = np.loadtxt(filename, unpack=True)
        
        # Compute OII flux thresholds for truez
        oii_flux_limit = np.interp(truth['TRUEZ'],fdr_z,modified_fdr_oii_flux_threshold)
        oii_flux_limit[oii_flux_limit<1e-20]=1e-20
        
        # efficiency is modeled as a function of flux_OII/f_OII_threshold(z) and an arbitrary sigma_fudge
        
        snr_in_lines       = params["ELG"]["EFFICIENCY"]["SNR_LINES_SCALE"]*7*truth['OIIFLUX']/oii_flux_limit
        snr_in_continuum   = params["ELG"]["EFFICIENCY"]["SNR_CONTINUUM_SCALE"]*true_rflux
        snr_tot            = np.sqrt(snr_in_lines**2+snr_in_continuum**2)
        sigma_fudge        = params["ELG"]["EFFICIENCY"]["SIGMA_FUDGE"]
        nsigma             = 3.
        simulated_eff = eff_model(snr_tot,nsigma,sigma_fudge)

    elif(simtype == 'LRG'):
        
       
        r_mag = 22.5 - 2.5*np.log10(true_rflux)
        
        sigmoid_cutoff = params["LRG"]["EFFICIENCY"]["SIGMOID_CUTOFF"]
        sigmoid_fudge  = params["LRG"]["EFFICIENCY"]["SIGMOID_FUDGE"]
        simulated_eff = 1./(1.+np.exp((r_mag-sigmoid_cutoff)/sigmoid_fudge))

        log.info("{} eff = sigmoid with cutoff = {:4.3f} fudge = {:4.3f}".format(simtype,sigmoid_cutoff,sigmoid_fudge))
    
    elif(simtype == 'QSO'):
        
        zsplit = params['QSO_ZSPLIT']
        r_mag = 22.5 - 2.5*np.log10(true_rflux) 
        simulated_eff = np.ones(r_mag.shape)

        # lowz tracer qsos
        sigmoid_cutoff = params["LOWZ_QSO"]["EFFICIENCY"]["SIGMOID_CUTOFF"]
        sigmoid_fudge  = params["LOWZ_QSO"]["EFFICIENCY"]["SIGMOID_FUDGE"]
        ii=(truth['TRUEZ']<=zsplit)
        simulated_eff[ii] = 1./(1.+np.exp((r_mag[ii]-sigmoid_cutoff)/sigmoid_fudge))
        log.info("{} eff = sigmoid with cutoff = {:4.3f} fudge = {:4.3f}".format("LOWZ QSO",sigmoid_cutoff,sigmoid_fudge))
                
        # highz lya qsos
        sigmoid_cutoff = params["LYA_QSO"]["EFFICIENCY"]["SIGMOID_CUTOFF"]
        sigmoid_fudge  = params["LYA_QSO"]["EFFICIENCY"]["SIGMOID_FUDGE"]
        ii=(truth['TRUEZ']>zsplit)
        simulated_eff[ii] = 1./(1.+np.exp((r_mag[ii]-sigmoid_cutoff)/sigmoid_fudge))

        log.info("{} eff = sigmoid with cutoff = {:4.3f} fudge = {:4.3f}".format("LYA QSO",sigmoid_cutoff,sigmoid_fudge))
        
    elif simtype == 'BGS':
        simulated_eff = 0.98 * np.ones(n)

    elif simtype == 'MWS':
        simulated_eff = 0.98 * np.ones(n)

    else:
        default_zeff = 0.98
        log.warning('using default redshift efficiency of {} for {}'.format(default_zeff, simtype))
        simulated_eff = default_zeff * np.ones(n)

    #- Get the corrections for observing conditions per tile, then
    #- correct targets on those tiles.  Parameterize in terms of failure
    #- rate instead of success rate to handle bookkeeping of targets that
    #- are observed on more than one tile.
    #- NOTE: this still isn't quite right since multiple observations will
    #- be simultaneously fit instead of just taking whichever individual one
    #- succeeds.

    if ignore_obscondition :
        ncond = len(np.atleast_1d(obsconditions['AIRMASS']))
        zeff_obs = np.ones(ncond)
    else :
        zeff_obs = get_zeff_obs(simtype, obsconditions)
    pfail = np.ones(n)
    observed = np.zeros(n, dtype=bool)

    # More efficient alternative for large numbers of tiles + large target
    # list, but requires pre-computing the sort order of targetids.
    # Assume targets['TARGETID'] is unique, so not checking this.
    sort_targetid = np.argsort(targetid)

    # Extract the targets-per-tile lists into one huge list.
    concat_targets_in_tile  = np.concatenate([targets_in_tile[tileid] for tileid in obsconditions['TILEID']])
    ntargets_per_tile       = np.array([len(targets_in_tile[tileid])  for tileid in obsconditions['TILEID']])

    # Match entries in each tile list against sorted target list.
    target_idx    = targetid[sort_targetid].searchsorted(concat_targets_in_tile,side='left')
    target_idx_r  = targetid[sort_targetid].searchsorted(concat_targets_in_tile,side='right')
    del(concat_targets_in_tile)

    # Flag targets in tiles that do not appear in the target list (sky,
    # standards).
    not_matched = target_idx_r - target_idx == 0
    target_idx[not_matched] = -1
    del(target_idx_r,not_matched)

    # Not every tile has 5000 targets, so use individual counts to
    # construct offset of each tile in target_idx.
    offset  = np.concatenate([[0],np.cumsum(ntargets_per_tile[:-1])])

    # For each tile, process targets.
    for i, tileid in enumerate(obsconditions['TILEID']):
        if ntargets_per_tile[i] > 0:
            # Quickly get all the matched targets on this tile.
            targets_this_tile  = target_idx[offset[i]:offset[i]+ntargets_per_tile[i]]
            targets_this_tile  = targets_this_tile[targets_this_tile > 0]
            # List of indices into sorted target list for each observed
            # source.
            ii  = sort_targetid[targets_this_tile]
            tmp = (simulated_eff[ii]*zeff_obs[i]).clip(0, 1)
            pfail[ii] *= (1-tmp)
            observed[ii] = True

    simulated_eff = (1-pfail)

    return observed, simulated_eff

# Efficiency model
def eff_model(x, nsigma, sigma, max_efficiency=1):
    return 0.5*max_efficiency*(1.+sp.erf((x-nsigma)/(np.sqrt(2.)*sigma)))

[docs]def reverse_dictionary(a):
    """Inverts a dictionary mapping.

    Args:
        a: input dictionary.

    Returns:
        b: output reversed dictionary.
    """
    b = {}
    for i in a.items():
        try:
            for k in i[1]:
                if k not in b.keys():
                    b[k] = [i[0]]
                else:
                    b[k].append(i[0])
        except:
            k = i[1]
            if k not in b.keys():
                b[k] = [i[0]]
            else:
                b[k].append(i[0])
    return b

[docs]def get_observed_redshifts(targets, truth, targets_in_tile, obsconditions, parameter_filename=None, ignore_obscondition=False):
    """
    Returns observed z, zerr, zwarn arrays given true object types and redshifts

    Args:
        targets: target catalog table; currently used only for target mask bits
        truth: truth table with OIIFLUX, TRUEZ
        targets_in_tile: dictionary. Keys correspond to tileids, its values are the
            arrays of targetids observed in that tile.
        obsconditions: table observing conditions with columns
           'TILEID': array of tile IDs
           'AIRMASS': array of airmass values on a tile
           'EBMV': array of E(B-V) values on a tile
           'LINTRANS': array of atmospheric transparency during spectro obs; floats [0-1]
           'MOONFRAC': array of moonfraction values on a tile.
           'SEEING': array of FWHM seeing during spectroscopic observation on a tile.
        parameter_filename: yaml file with quickcat parameters
        ignore_obscondition: if True, no variation of efficiency with obs. conditions (adjustment of exposure time should correct for mean change of S/N)
    Returns:
        tuple of (zout, zerr, zwarn)
    """
    
    if parameter_filename is None :
        # Load efficiency parameters yaml file
        parameter_filename = resource_filename('desisim', 'data/quickcat.yaml')
    
    params=None
    with open(parameter_filename,"r") as file :
        params = yaml.safe_load(file)
        
    
    simtype = get_simtype(np.char.strip(truth['TRUESPECTYPE']), targets['DESI_TARGET'], targets['BGS_TARGET'], targets['MWS_TARGET'])
    #simtype = get_simtype(np.char.strip(truth['TEMPLATETYPE']), targets['DESI_TARGET'], targets['BGS_TARGET'], targets['MWS_TARGET'])
    truez = truth['TRUEZ']
    targetid = truth['TARGETID']

    try:
        if 'DECAM_FLUX' in targets.dtype.names :
            true_gflux = targets['DECAM_FLUX'][:, 1]
            true_rflux = targets['DECAM_FLUX'][:, 2]
        else:
            true_gflux = targets['FLUX_G']
            true_rflux = targets['FLUX_R']
    except:
        raise Exception('Missing photometry needed to estimate redshift efficiency!')

    a_small_flux=1e-40
    true_gflux[true_gflux<a_small_flux]=a_small_flux
    true_rflux[true_rflux<a_small_flux]=a_small_flux
    
    zout = truez.copy()
    zerr = np.zeros(len(truez), dtype=np.float32)
    zwarn = np.zeros(len(truez), dtype=np.int32)

    objtypes = list(set(simtype))
    n_tiles = len(np.unique(obsconditions['TILEID']))

    if(n_tiles!=len(targets_in_tile)):
        raise ValueError('Number of obsconditions {} != len(targets_in_tile) {}'.format(n_tiles, len(targets_in_tile)))

    for objtype in objtypes:

        ii=(simtype==objtype)

        ###################################
        # redshift errors
        ###################################
        if objtype =='ELG' :

            sigma         = params["ELG"]["UNCERTAINTY"]["SIGMA_17"]
            powerlawindex = params["ELG"]["UNCERTAINTY"]["POWER_LAW_INDEX"]
            oiiflux       = truth['OIIFLUX'][ii]*1e17
            zerr[ii]      = sigma/(1.e-9+oiiflux**powerlawindex)*(1.+truez[ii])
            zout[ii]     += np.random.normal(scale=zerr[ii])
            
            log.info("ELG sigma={:6.5f} index={:3.2f} median zerr={:6.5f}".format(sigma,powerlawindex,np.median(zerr[ii])))
                
        elif objtype == 'LRG' :

            sigma         = params["LRG"]["UNCERTAINTY"]["SIGMA_17"]
            powerlawindex = params["LRG"]["UNCERTAINTY"]["POWER_LAW_INDEX"]
            
            zerr[ii]  = sigma/(1.e-9+true_rflux[ii]**powerlawindex)*(1.+truez[ii])
            zout[ii] += np.random.normal(scale=zerr[ii])
                
            log.info("LRG sigma={:6.5f} index={:3.2f} median zerr={:6.5f}".format(sigma,powerlawindex,np.median(zerr[ii])))

        elif objtype == 'QSO' :

            zsplit = params['QSO_ZSPLIT']
            sigma         = params["LOWZ_QSO"]["UNCERTAINTY"]["SIGMA_17"]
            powerlawindex = params["LOWZ_QSO"]["UNCERTAINTY"]["POWER_LAW_INDEX"]                               
            jj=ii&(truth['TRUEZ']<=zsplit)
            zerr[jj]  = sigma/(1.e-9+(true_rflux[jj])**powerlawindex)*(1.+truez[jj])

            log.info("LOWZ QSO sigma={:6.5f} index={:3.2f} median zerr={:6.5f}".format(sigma,powerlawindex,np.median(zerr[jj])))
            
            sigma         = params["LYA_QSO"]["UNCERTAINTY"]["SIGMA_17"]
            powerlawindex = params["LYA_QSO"]["UNCERTAINTY"]["POWER_LAW_INDEX"]
            jj=ii&(truth['TRUEZ']>zsplit)
            zerr[jj]  = sigma/(1.e-9+(true_rflux[jj])**powerlawindex)*(1.+truez[jj])
            
            if np.count_nonzero(jj) > 0:
                log.info("LYA QSO sigma={:6.5f} index={:3.2f} median zerr={:6.5f}".format(
                    sigma,powerlawindex,np.median(zerr[jj])))
            else:
                log.warning("No LyA QSO generated")

            
            zout[ii] += np.random.normal(scale=zerr[ii])
        elif objtype in _sigma_v.keys() :

            log.info("{} use constant sigmav = {} km/s".format(objtype,_sigma_v[objtype]))
            ii = (simtype == objtype)
            zerr[ii] = _sigma_v[objtype] * (1+truez[ii]) / c
            zout[ii] += np.random.normal(scale=zerr[ii])
        else :
            log.info("{} no redshift error model, will use truth")
                
        ###################################
        # redshift efficiencies
        ###################################
        # Set ZWARN flags for some targets
        # the redshift efficiency only sets warning, but does not impact
        # the redshift value and its error.
        was_observed, goodz_prob = get_redshift_efficiency(
            objtype, targets[ii], truth[ii], targets_in_tile,
            obsconditions=obsconditions,params=params,
            ignore_obscondition=ignore_obscondition)

        n=np.sum(ii)
        assert len(was_observed) == n
        assert len(goodz_prob) == n
        r = np.random.random(len(was_observed))
        zwarn[ii] = 4 * (r > goodz_prob) * was_observed

        ###################################
        # catastrophic failures
        ###################################

        
        zlim=[0.,3.5]
        cata_fail_fraction = np.zeros(n)
        if objtype == "ELG" :
            cata_fail_fraction[:] = params["ELG"]["FAILURE_RATE"]
            zlim=[0.6,1.7]
        elif objtype == "LRG" :
            cata_fail_fraction[:] = params["LRG"]["FAILURE_RATE"]
            zlim=[0.5,1.1]
        elif objtype == "QSO" :
            zsplit = params["QSO_ZSPLIT"]
            cata_fail_fraction[truth['TRUEZ'][ii]<=zsplit] = params["LOWZ_QSO"]["FAILURE_RATE"]
            cata_fail_fraction[truth['TRUEZ'][ii]>zsplit] = params["LYA_QSO"]["FAILURE_RATE"]
            zlim=[0.5,3.5]
        elif objtype in _cata_fail_fraction :
            cata_fail_fraction[:] = _cata_fail_fraction[objtype]
        
        failed = (np.random.uniform(size=n)<cata_fail_fraction)&(zwarn[ii]==0)
        failed_indices = np.where(ii)[0][failed]
        log.info("{} n_failed/n_tot={}/{}={:4.3f}".format(objtype,failed_indices.size,n,failed_indices.size/float(n)))
        zout[failed_indices] = np.random.uniform(zlim[0],zlim[1],failed_indices.size)
        
    return zout, zerr, zwarn

[docs]def get_median_obsconditions(tileids):
    """Gets the observational conditions for a set of tiles.

    Args:
       tileids : list of tileids that were observed

    Returns:
        Table with the observational conditions for every tile.

        It inclues at least the following columns::

           'TILEID': array of tile IDs
           'AIRMASS': array of airmass values on a tile
           'EBMV': array of E(B-V) values on a tile
           'LINTRANS': array of atmospheric transparency during spectro obs; floats [0-1]
           'MOONFRAC': array of moonfraction values on a tile.
           'SEEING': array of FWHM seeing during spectroscopic observation on a tile.
    """
    #- Load standard DESI tiles and trim to this list of tileids
    import desimodel.io
    tiles = desimodel.io.load_tiles()
    tileids = np.asarray(tileids)
    ii = np.in1d(tiles['TILEID'], tileids)

    tiles = tiles[ii]
    assert len(tiles) == len(tileids)

    #- Sort tiles to match order of tileids
    i = np.argsort(tileids)
    j = np.argsort(tiles['TILEID'])
    k = np.argsort(i)

    tiles = tiles[j[k]]
    assert np.all(tiles['TILEID'] == tileids)

    #- fix type bug after reading desi-tiles.fits
    if tiles['OBSCONDITIONS'].dtype == np.float64:
        tiles = Table(tiles)
        tiles.replace_column('OBSCONDITIONS', tiles['OBSCONDITIONS'].astype(int))

    n = len(tileids)

    obsconditions = Table()
    obsconditions['TILEID'] = tileids
    obsconditions['AIRMASS'] = tiles['AIRMASS']
    obsconditions['EBMV'] = tiles['EBV_MED']
    obsconditions['LINTRANS'] = np.ones(n)
    obsconditions['SEEING'] = np.ones(n) * 1.1

    #- Add lunar conditions, defaulting to dark time
    from desitarget.targetmask import obsconditions as obsbits
    obsconditions['MOONFRAC'] = np.zeros(n)
    obsconditions['MOONALT'] = -20.0 * np.ones(n)
    obsconditions['MOONSEP'] = 180.0 * np.ones(n)

    ii = (tiles['OBSCONDITIONS'] & obsbits.GRAY) != 0
    obsconditions['MOONFRAC'][ii] = 0.1
    obsconditions['MOONALT'][ii] = 10.0
    obsconditions['MOONSEP'][ii] = 60.0

    ii = (tiles['OBSCONDITIONS'] & obsbits.BRIGHT) != 0
    obsconditions['MOONFRAC'][ii] = 0.7
    obsconditions['MOONALT'][ii] = 60.0
    obsconditions['MOONSEP'][ii] = 50.0

    return obsconditions

[docs]def quickcat(tilefiles, targets, truth, fassignhdu='FIBERASSIGN', zcat=None, obsconditions=None, perfect=False):
    """
    Generates quick output zcatalog

    Args:
        tilefiles : list of fiberassign tile files that were observed
        targets : astropy Table of targets
        truth : astropy Table of input truth with columns TARGETID, TRUEZ, and TRUETYPE
        zcat (optional): input zcatalog Table from previous observations
        obsconditions (optional): Table or ndarray with observing conditions from surveysim
        perfect (optional): if True, treat spectro pipeline as perfect with input=output,
            otherwise add noise and zwarn!=0 flags

    Returns:
        zcatalog astropy Table based upon input truth, plus ZERR, ZWARN,
        NUMOBS, and TYPE columns
    """
    #- convert to Table for easier manipulation
    if not isinstance(truth, Table):
        truth = Table(truth)

    #- Count how many times each target was observed for this set of tiles
    log.info('{} QC Reading {} tiles'.format(asctime(), len(tilefiles)))
    nobs = Counter()
    targets_in_tile = {}
    tileids = list()
    for infile in tilefiles:
        
        fibassign, header = fits.getdata(infile, fassignhdu, header=True)
 
        # hack needed here rnc 7/26/18
        if 'TILEID' in header:
            tileidnew = header['TILEID']
        else:
            fnew=infile.split('/')[-1]
            tileidnew=fnew.split("_")[-1]
            tileidnew=int(tileidnew[:-5])
            log.error('TILEID missing from {} header'.format(fnew))
            log.error('{} -> TILEID {}'.format(infile, tileidnew))

        tileids.append(tileidnew)

        ii = (fibassign['TARGETID'] != -1)  #- targets with assignments
        nobs.update(fibassign['TARGETID'][ii])
        targets_in_tile[tileidnew] = fibassign['TARGETID'][ii]

    #- Trim obsconditions to just the tiles that were observed
    if obsconditions is not None:
        ii = np.in1d(obsconditions['TILEID'], tileids)
        if np.any(ii == False):
            obsconditions = obsconditions[ii]
        assert len(obsconditions) > 0

    #- Sort obsconditions to match order of tiles
    #- This might not be needed, but is fast for O(20k) tiles and may
    #- prevent future surprises if code expects them to be row aligned
    tileids = np.array(tileids)
    if (obsconditions is not None) and \
       (np.any(tileids != obsconditions['TILEID'])):
        i = np.argsort(tileids)
        j = np.argsort(obsconditions['TILEID'])
        k = np.argsort(i)
        obsconditions = obsconditions[j[k]]
        assert np.all(tileids == obsconditions['TILEID'])

    #- Trim truth down to just ones that have already been observed
    log.info('{} QC Trimming truth to just observed targets'.format(asctime()))
    obs_targetids = np.array(list(nobs.keys()))
    iiobs = np.in1d(truth['TARGETID'], obs_targetids)
    truth = truth[iiobs]
    targets = targets[iiobs]

    #- Construct initial new z catalog
    log.info('{} QC Constructing new redshift catalog'.format(asctime()))
    newzcat = Table()
    newzcat['TARGETID'] = truth['TARGETID']
    if 'BRICKNAME' in truth.dtype.names:
        newzcat['BRICKNAME'] = truth['BRICKNAME']
    else:
        newzcat['BRICKNAME'] = np.zeros(len(truth), dtype=(str, 8))

    #- Copy TRUETYPE -> SPECTYPE so that we can change without altering original
    newzcat['SPECTYPE'] = truth['TRUESPECTYPE'].copy()

    #- Add ZERR and ZWARN
    log.info('{} QC Adding ZERR and ZWARN'.format(asctime()))
    nz = len(newzcat)
    if perfect:
        newzcat['Z'] = truth['TRUEZ'].copy()
        newzcat['ZERR'] = np.zeros(nz, dtype=np.float32)
        newzcat['ZWARN'] = np.zeros(nz, dtype=np.int32)
    else:
        # get the observational conditions for the current tilefiles
        if obsconditions is None:
            obsconditions = get_median_obsconditions(tileids)

        # get the redshifts
        z, zerr, zwarn = get_observed_redshifts(targets, truth, targets_in_tile, obsconditions)
        newzcat['Z'] = z  #- update with noisy redshift
        newzcat['ZERR'] = zerr
        newzcat['ZWARN'] = zwarn

    #- Add numobs column
    log.info('{} QC Adding NUMOBS column'.format(asctime()))
    newzcat.add_column(Column(name='NUMOBS', length=nz, dtype=np.int32))
    for i in range(nz):
        newzcat['NUMOBS'][i] = nobs[newzcat['TARGETID'][i]]

    #- Merge previous zcat with newzcat
    log.info('{} QC Merging previous zcat'.format(asctime()))
    if zcat is not None:
        #- don't modify original
        #- Note: this uses copy on write for the columns to be memory
        #- efficient while still letting us modify a column if needed
        zcat = zcat.copy()

        # needed to have the same ordering both in zcat and newzcat
        # to ensure consistent use of masks from np.in1d()
        zcat.sort(keys='TARGETID')
        newzcat.sort(keys='TARGETID') 
        
        #- targets that are in both zcat and newzcat
        repeats = np.in1d(zcat['TARGETID'], newzcat['TARGETID'])

        #- update numobs in both zcat and newzcat
        ii = np.in1d(newzcat['TARGETID'], zcat['TARGETID'][repeats])
        orig_numobs = zcat['NUMOBS'][repeats].copy()
        new_numobs = newzcat['NUMOBS'][ii].copy()
        zcat['NUMOBS'][repeats] += new_numobs
        newzcat['NUMOBS'][ii] += orig_numobs

        #- replace only repeats that had ZWARN flags in original zcat
        #- replace in new
        replace = repeats & (zcat['ZWARN'] != 0)
        jj = np.in1d(newzcat['TARGETID'], zcat['TARGETID'][replace])
        zcat[replace] = newzcat[jj]

        #- trim newzcat to ones that shouldn't override original zcat
        discard = np.in1d(newzcat['TARGETID'], zcat['TARGETID'])
        newzcat = newzcat[~discard]

        #- Should be non-overlapping now
        assert np.all(np.in1d(zcat['TARGETID'], newzcat['TARGETID']) == False)

        #- merge them
        newzcat = vstack([zcat, newzcat])

    #- check for duplicates
    targetids = newzcat['TARGETID']
    assert len(np.unique(targetids)) == len(targetids)

    #- Metadata for header
    newzcat.meta['EXTNAME'] = 'ZCATALOG'
    
    #newzcat.sort(keys='TARGETID')

    log.info('{} QC done'.format(asctime()))
    return newzcat