Source code for radiocatalogs.radcatdb

"""Tables corresponding to each sky survey catalog are 
stored in a separate schema called "radcat" within the 
same PostgreSQL database as the VLITE data. This
module contains the functions to create the schema
and tables.

"""
import os
import sys
import logging
import psycopg2
from psycopg2 import sql
import catalogio

# create logger
radcatdb_logger = logging.getLogger('vdp.radiocatalogs.radcatdb')


[docs]def index(tblname, conn): """Indexes and clusters the table using Q3C. Parameters ---------- tblname : str Name of the database table. conn : ``psycopg2.extensions.connect`` instance The PostgreSQL database connection object. """ cur = conn.cursor() # Just in case... tblname = tblname.lower() cur.execute(psycopg2.sql.SQL('''CREATE INDEX ON radcat.{} (q3c_ang2ipix(ra, dec))''').format(psycopg2.sql.Identifier(tblname))) cur.execute(psycopg2.sql.SQL('CLUSTER {} ON radcat.{}').format( psycopg2.sql.Identifier(tblname + '_q3c_ang2ipix_idx'), psycopg2.sql.Identifier(tblname))) cur.execute(psycopg2.sql.SQL('ANALYZE radcat.{}').format( psycopg2.sql.Identifier(tblname))) conn.commit() cur.close()
[docs]def add_table(tblname, conn): """Copies sky survey sources into a new table in the "radcat" schema from a text file. If the text file does not yet exist, the sources are read from the original catalog, initialized as CatalogSource objects, and written to the text file. Parameters ---------- tblname : str Name of the table/catalog from which the source originated. conn : ``psycopg2.extensions.connect`` instance The PostgreSQL database connection object. """ cur = conn.cursor() # force to all lowercase, just in case tblname = tblname.lower() # Create the table cur.execute(psycopg2.sql.SQL( ''' CREATE TABLE IF NOT EXISTS radcat.{} ( id INTEGER NOT NULL, name TEXT, ra REAL, e_ra REAL, dec REAL, e_dec REAL, total_flux REAL, e_total_flux REAL, peak_flux REAL, e_peak_flux REAL, maj REAL, e_maj REAL, min REAL, e_min REAL, pa REAL, e_pa REAL, rms REAL, field TEXT, catalog_id INTEGER, PRIMARY KEY (id), FOREIGN KEY (catalog_id) REFERENCES radcat.catalogs (id) ON UPDATE CASCADE ON DELETE CASCADE ); ''').format(psycopg2.sql.Identifier(tblname))) # Read the sky catalog if tblname == 'tgss': sources = catalogio.read_tgss() elif tblname == 'nvss': sources = catalogio.read_nvss() elif tblname == 'first': sources = catalogio.read_first() elif tblname == 'sumss': sources = catalogio.read_sumss() elif tblname == 'wenss': sources = catalogio.read_wenss() elif tblname == 'gleam': sources = catalogio.read_gleam() elif tblname == 'cosmos': sources = catalogio.read_cosmos() elif tblname == 'vlssr': sources = catalogio.read_vlssr() elif tblname == 'txs': sources = catalogio.read_txs() elif tblname == 'sevenc': sources = catalogio.read_sevenc() elif tblname == 'gpsr5': sources = catalogio.read_gpsr5() elif tblname == 'gpsr1': sources = catalogio.read_gpsr1() elif tblname == 'nordgc': sources = catalogio.read_nordgc() elif tblname == 'lazio04': sources = catalogio.read_lazio04() elif tblname == 'lotss': sources = catalogio.read_lotss() elif tblname == 'm31_glg04': sources = catalogio.read_m31_glg04() elif tblname == 'lofar_hba': sources = catalogio.read_lofar_hba() elif tblname == 'lofar_lba': sources = catalogio.read_lofar_lba() elif tblname == 'nrl_nvss': sources = catalogio.read_nrl_nvss() else: radcatdb_logger.error('ERROR: No function to read catalog {}.'. format(tblname)) sys.exit(0) radcatdb_logger.info(' -- reading/inserting {} sources into database'. format(tblname)) # Add catalog to catalogs table sql = '''INSERT INTO radcat.catalogs ( id, name, telescope, frequency, resolution, reference) VALUES ( %s, %s, %s, %s, %s, %s) ON CONFLICT (id) DO NOTHING''' cur.execute(sql, (catalogio.catalog_dict[tblname]['id'], tblname, catalogio.catalog_dict[tblname]['telescope'], catalogio.catalog_dict[tblname]['frequency'], catalogio.catalog_dict[tblname]['resolution'], catalogio.catalog_dict[tblname]['reference'])) # OLD WAY: this way took too long """ # Add sources to table for src in sources: src.catalog_id = catid sql = '''INSERT INTO radcat.{} ( name, ra, e_ra, dec, e_dec, total_flux, e_total_flux, peak_flux, e_peak_flux, maj, e_maj, min, e_min, pa, e_pa, rms, field, catalog_id) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)''' vals = (src.name, src.ra, src.e_ra, src.dec, src.e_dec, src.total_flux, src.e_total_flux, src.peak_flux, src.e_peak_flux, src.maj, src.e_maj, src.min, src.e_min, src.pa, src.e_pa, src.rms, src.field, src.catalog_id) cur.execute(psycopg2.sql.SQL(sql).format( psycopg2.sql.Identifier(tblname)), vals) """ # This is waaaaaaay faster (~10x) fname = tblname + '_psql.txt' psqlf = os.path.join(catalogio.catalogdir, fname) fullname = 'radcat.' + tblname with open(psqlf, 'r') as f: cur.copy_from(f, fullname, sep=' ', null='None') conn.commit() cur.close()
[docs]def create(conn): """Creates the "radcat" schema and catalogs table within that schema. Calls the functions ``add_table`` and ``index`` for each catalog in the list from ``catalogio.catalog_dict.keys()``. Parameters ---------- conn : ``psycopg2.extensions.connect`` instance The PostgreSQL database connection object. """ cur = conn.cursor() sql = ( ''' CREATE SCHEMA IF NOT EXISTS radcat; CREATE TABLE IF NOT EXISTS radcat.catalogs ( id INTEGER NOT NULL, name TEXT, telescope TEXT, frequency REAL, resolution REAL, reference TEXT, PRIMARY KEY (id) ); ''') cur.execute(sql) tables = catalogio.catalog_dict.keys() atleastone = False for table in tables: cur.execute('''SELECT EXISTS(SELECT 1 FROM information_schema.tables WHERE table_schema = 'radcat' AND table_name = %s)''', (table, )) if not cur.fetchone()[0]: atleastone = True radcatdb_logger.info('Adding sources from {}'.format(table)) add_table(table, conn) index(table, conn) else: continue if not atleastone: print radcatdb_logger.info('No new sky catalogs to add.') cur.close