Source code for desisim.qso_template.fit_boss_qsos

"""
desisim.qso_template.fit_boss_qsos
==================================

Module for Fitting PCA to the BOSS QSOs

01-Dec-2014 by JXP
"""
from __future__ import print_function, absolute_import, division

import numpy as np
import os
import multiprocessing as mp

from astropy.io import fits

flg_xdb = True
try:
    from xastropy.xutils import xdebug as xdb
except ImportError:
    flg_xdb = False

#
[docs]def read_qso_eigen(eigen_fil=None): ''' Input the QSO Eigenspectra ''' # File if eigen_fil is None: eigen_fil = os.environ.get('IDLSPEC2D_DIR')+'/templates/spEigenQSO-55732.fits' print('Using these eigen spectra: {:s}'.format(eigen_fil)) hdu = fits.open(eigen_fil) eigen = hdu[0].data head = hdu[0].header # Rest wavelength eigen_wave = 10.**(head['COEFF0'] + np.arange(head['NAXIS1'])*head['COEFF1']) # Return return eigen, eigen_wave
##
[docs]def fit_eigen(flux,ivar,eigen_flux): ''' Fit the spectrum with the eigenvectors. Pass back the coefficients ''' #C = np.diag(1./ivar) Cinv = np.diag(ivar) A = eigen_flux.T #alpha = np.dot(A.T, np.linalg.solve(C, A)) # Numerical Recipe notation alpha = np.dot(A.T, np.dot(Cinv,A)) cov = np.linalg.inv(alpha) #beta = np.dot(A.T, np.linalg.solve(C, y)) beta = np.dot(A.T, np.dot(Cinv, flux)) acoeff = np.dot(cov, beta) # Return return acoeff
##
[docs]def do_boss_lya_parallel(istart, iend, cut_Lya, output, debug=False): ''' Generate PCA coeff for the BOSS Lya DR10 dataset, v2.1 Parameters ---------- cut_Lya: boolean (True) Avoid using the Lya forest in the analysis ''' # Eigen eigen, eigen_wave = read_qso_eigen() # Open the BOSS catalog file boss_cat_fil = os.environ.get('BOSSPATH')+'/DR10/BOSSLyaDR10_cat_v2.1.fits.gz' bcat_hdu = fits.open(boss_cat_fil) t_boss = bcat_hdu[1].data nqso = len(t_boss) pca_val = np.zeros((iend-istart, 4)) if cut_Lya is False: print('do_boss: Not cutting the Lya Forest in the fit') # Loop us -- Should spawn on multiple CPU #for ii in range(nqso): datdir = os.environ.get('BOSSPATH')+'/DR10/BOSSLyaDR10_spectra_v2.1/' jj = 0 print('istart = {:d}'.format(istart)) for ii in range(istart,iend): if (ii % 100) == 0: print('ii = {:d}'.format(ii)) #print('ii = {:d}'.format(ii)) # Spectrum file pnm = str(t_boss['PLATE'][ii]) fnm = str(t_boss['FIBERID'][ii]).rjust(4,str('0')) mjd = str(t_boss['MJD'][ii]) sfil = datdir+pnm+'/speclya-' sfil = sfil+pnm+'-'+mjd+'-'+fnm+'.fits.gz' # Read spectrum spec_hdu = fits.open(sfil) t = spec_hdu[1].data flux = t['flux'] wave = 10.**t['loglam'] ivar = t['ivar'] zqso = t_boss['z_pipe'][ii] wrest = wave / (1+zqso) wlya = 1215.67 # Cut Lya forest? if cut_Lya is True: Ly_imn = np.argmin(np.abs(wrest-wlya)) else: Ly_imn = 0 # Pack imn = np.argmin(np.abs(wrest[Ly_imn]-eigen_wave)) npix = len(wrest[Ly_imn:]) imx = npix+imn eigen_flux = eigen[:,imn:imx] # FIT tflux = flux[Ly_imn:] tivar = ivar[Ly_imn:] acoeff = fit_eigen(tflux, ivar, eigen_flux) pca_val[jj,:] = acoeff jj += 1 # Check if debug is True: model = np.dot(eigen.T,acoeff) if flg_xdb is True: xdb.xplot(wrest, flux, xtwo=eigen_wave, ytwo=model) xdb.set_trace() #xdb.set_trace() print('Done with my subset {:d}, {:d}'.format(istart,iend)) if output is not None: output.put((istart,iend,pca_val)) #output.put(None) else: return pca_val
##
[docs]def do_sdss_lya_parallel(istart, iend, cut_Lya, output, debug=False): ''' Generate PCA coeff for the SDSS DR7 dataset, 0.5<z<2 Parameters ---------- cut_Lya: boolean (True) Avoid using the Lya forest in the analysis ''' # Eigen eigen, eigen_wave = read_qso_eigen() # Open the BOSS catalog file sdss_cat_fil = os.environ.get('SDSSPATH')+'/DR7_QSO/dr7_qso.fits.gz' bcat_hdu = fits.open(sdss_cat_fil) t_sdss = bcat_hdu[1].data nqso = len(t_sdss) pca_val = np.zeros((iend-istart, 4)) if cut_Lya is False: print('do_sdss: Not cutting the Lya Forest in the fit') # Loop us -- Should spawn on multiple CPU #for ii in range(nqso): datdir = os.environ.get('SDSSPATH')+'/DR7_QSO/spectro/1d_26/' jj = 0 for ii in range(istart,iend): if (ii % 1000) == 0: print('SDSS ii = {:d}'.format(ii)) # Spectrum file pnm = str(t_sdss['PLATE'][ii]).rjust(4,str('0')) fnm = str(t_sdss['FIBERID'][ii]).rjust(3,str('0')) mjd = str(t_sdss['MJD'][ii]) sfil = datdir+pnm+'/1d/spSpec-' sfil = sfil+mjd+'-'+pnm+'-'+fnm+'.fit.gz' # Read spectrum spec_hdu = fits.open(sfil) head = spec_hdu[0].header iwave = head['CRVAL1'] cdelt = head['CD1_1'] t = spec_hdu[0].data flux = t[0,:] sig = t[2,:] npix = len(flux) wave = 10.**(iwave + np.arange(npix)*cdelt) ivar = np.zeros(npix) gd = np.where(sig>0.)[0] ivar[gd] = 1./sig[gd]**2 zqso = t_sdss['z'][ii] wrest = wave / (1+zqso) wlya = 1215.67 # Cut Lya forest? if cut_Lya is True: Ly_imn = np.argmin(np.abs(wrest-wlya)) else: Ly_imn = 0 # Pack imn = np.argmin(np.abs(wrest[Ly_imn]-eigen_wave)) npix = len(wrest[Ly_imn:]) imx = npix+imn eigen_flux = eigen[:,imn:imx] # FIT acoeff = fit_eigen(flux[Ly_imn:], ivar[Ly_imn:], eigen_flux) pca_val[jj,:] = acoeff jj += 1 # Check if debug is True: model = np.dot(eigen.T,acoeff) if flg_xdb is True: xdb.xplot(wrest, flux, xtwo=eigen_wave, ytwo=model) xdb.set_trace() #xdb.set_trace() print('Done with my subset {:d}, {:d}'.format(istart,iend)) if output is not None: output.put((istart,iend,pca_val)) #output.put(None) else: return pca_val
[docs]def failed_parallel(): ''' Collision with np.dot Might fix with OPENBLAS_NUM_THREADS=1 ''' flg = 0 # 0=BOSS, 1=SDSS ## ############################ # Parallel if flg == 0: boss_cat_fil = os.environ.get('BOSSPATH')+'/DR10/BOSSLyaDR10_cat_v2.1.fits.gz' bcat_hdu = fits.open(boss_cat_fil) t_boss = bcat_hdu[1].data nqso = len(t_boss) elif flg == 1: sdss_cat_fil = os.environ.get('SDSSPATH')+'/DR7_QSO/dr7_qso.fits.gz' scat_hdu = fits.open(sdss_cat_fil) t_sdss = scat_hdu[1].data nqso = len(t_sdss) outfil = 'SDSS_DR7Lya_PCA_values_nocut.fits' nqso = 40 # Testing #do_boss_lya_parallel(0,nqso, False, None,debug=False) output = mp.Queue() processes = [] nproc = 1 nsub = nqso // nproc cut_Lya = False # Setup the Processes for ii in range(nproc): # Generate istrt = ii * nsub if ii == (nproc-1): iend = nqso else: iend = (ii+1)*nsub #xdb.set_trace() if flg == 0: process = mp.Process(target=do_boss_lya_parallel, args=(istrt,iend,cut_Lya, output)) elif flg == 1: process = mp.Process(target=do_sdss_lya_parallel, args=(istrt,iend,cut_Lya, output)) processes.append(process) # Run processes for p in processes: p.start() print('Grabbing Output') results = [output.get() for p in processes] # Get process results from the output queue # Exit the completed processes print('Joining') for p in processes: p.join() xdb.set_trace() # Bring together #sorted(results, key=lambda result: result[0]) #all_is = [ir[0] for ir in results] pca_val = np.zeros((nqso, 4)) for ir in results: pca_val[ir[0]:ir[1],:] = ir[2] # Write to disk as a binary FITS table col0 = fits.Column(name='PCA0',format='E',array=pca_val[:,0]) col1 = fits.Column(name='PCA1',format='E',array=pca_val[:,1]) col2 = fits.Column(name='PCA2',format='E',array=pca_val[:,2]) col3 = fits.Column(name='PCA3',format='E',array=pca_val[:,3]) cols = fits.ColDefs([col0, col1, col2, col3]) tbhdu = fits.BinTableHDU.from_columns(cols) prihdr = fits.Header() prihdr['OBSERVER'] = 'Edwin Hubble' prihdr['COMMENT'] = "Here's some commentary about this FITS file." prihdu = fits.PrimaryHDU(header=prihdr) thdulist = fits.HDUList([prihdu, tbhdu]) if not (outfil in locals()): if cut_Lya is False: outfil = 'BOSS_DR10Lya_PCA_values_nocut.fits' else: outfil = 'BOSS_DR10Lya_PCA_values.fits' thdulist.writeto(outfil, overwrite=True) # Done #xdb.set_trace() print('All done')
# ########
[docs]def splice_fits(flg=0): ''' Splices together the various PCA fits for SDSS or BOSS flg: int (0) 0=BOSS, 1=SDSS ''' import glob from astropy.table.table import Table if flg == 0: outroot = 'Output/BOSS_DR10Lya_PCA_values_nocut' outfil = 'BOSS_DR10Lya_PCA_values_nocut.fits' elif flg ==1: outroot = 'Output/SDSS_DR7Lya_PCA_values_nocut' outfil = 'SDSS_DR7Lya_PCA_values_nocut.fits' # Get all the files files = glob.glob(outroot+'*') for ifil in files: print('Reading {:s}'.format(ifil)) hdu = fits.open(ifil) tab = Table(hdu[1].data) # if not 'full_tab' in locals(): full_tab = tab else: #xdb.set_trace() for row in tab: full_tab.add_row(row) # Write prihdr = fits.Header() if flg == 0: prihdr['PROJECT'] = 'BOSS: z>2 quasars' elif flg == 1: prihdr['PROJECT'] = 'SDSS: Meant for z<2 quasars' prihdr['COMMENT'] = 'PCA fits to the quasars' prihdu = fits.PrimaryHDU(header=prihdr) table_hdu = fits.BinTableHDU.from_columns(np.array(full_tab.filled())) thdulist = fits.HDUList([prihdu, table_hdu]) print('Writing {:s} table, with {:d} rows'.format(outfil,len(full_tab))) thdulist.writeto(outfil, overwrite=True)
## ################ if __name__ == '__main__': ''' flg: int (0) 0=BOSS, 1=SDSS ''' import sys flg = int(sys.argv[1]) istrt = int(sys.argv[2]) iend = int(sys.argv[3]) outfil = str(sys.argv[4]) cut_Lya = False # Run #do_boss_lya_parallel(0,10, False, None,debug=True) #xdb.set_trace() #do_sdss_lya_parallel(0, 10, False, None, debug=True) ## ############################ if flg == 0: pca_val = do_boss_lya_parallel(istrt,iend,cut_Lya, None) elif flg == 1: pca_val = do_sdss_lya_parallel(istrt,iend,cut_Lya, None) # Write to disk as a binary FITS table col0 = fits.Column(name='PCA0',format='E',array=pca_val[:,0]) col1 = fits.Column(name='PCA1',format='E',array=pca_val[:,1]) col2 = fits.Column(name='PCA2',format='E',array=pca_val[:,2]) col3 = fits.Column(name='PCA3',format='E',array=pca_val[:,3]) cols = fits.ColDefs([col0, col1, col2, col3]) tbhdu = fits.BinTableHDU.from_columns(cols) prihdr = fits.Header() prihdr['OBSERVER'] = 'Edwin Hubble' prihdr['COMMENT'] = "Here's some commentary about this FITS file." prihdu = fits.PrimaryHDU(header=prihdr) thdulist = fits.HDUList([prihdu, tbhdu]) thdulist.writeto(outfil, overwrite=True) # Done #xdb.set_trace() print('All done')