Source code for radiocatalogs.radcatdb

"""Tables corresponding to each sky survey catalog are 
stored in a separate schema called "radcat" within the 
same PostgreSQL database as the VLITE data. This
module contains the functions to create the schema
and tables.

"""
import os
import sys
import logging
import psycopg2
from psycopg2 import sql
import catalogio

# create logger
radcatdb_logger = logging.getLogger('vdp.radiocatalogs.radcatdb')


[docs]def index(tblname, conn):
    """Indexes and clusters the table using Q3C.

    Parameters
    ----------
    tblname : str
        Name of the database table.
    conn : ``psycopg2.extensions.connect`` instance
        The PostgreSQL database connection object.
    """
    cur = conn.cursor()

    # Just in case...
    tblname = tblname.lower()
        
    cur.execute(psycopg2.sql.SQL('''CREATE INDEX ON radcat.{} 
        (q3c_ang2ipix(ra, dec))''').format(psycopg2.sql.Identifier(tblname)))
    cur.execute(psycopg2.sql.SQL('CLUSTER {} ON radcat.{}').format(
        psycopg2.sql.Identifier(tblname + '_q3c_ang2ipix_idx'),
        psycopg2.sql.Identifier(tblname)))
    cur.execute(psycopg2.sql.SQL('ANALYZE radcat.{}').format(
        psycopg2.sql.Identifier(tblname)))

    conn.commit()
    cur.close()


[docs]def add_table(tblname, conn):
    """Copies sky survey sources into a new table
    in the "radcat" schema from a text file. If the
    text file does not yet exist, the sources are
    read from the original catalog, initialized as
    CatalogSource objects, and written to the text file.

    Parameters
    ----------
    tblname : str
        Name of the table/catalog from which the source
        originated.
    conn : ``psycopg2.extensions.connect`` instance
        The PostgreSQL database connection object.
    """
    cur = conn.cursor()

    # force to all lowercase, just in case
    tblname = tblname.lower()
   
    # Create the table
    cur.execute(psycopg2.sql.SQL(
        '''
        CREATE TABLE IF NOT EXISTS radcat.{} (
            id INTEGER NOT NULL,
            name TEXT,
            ra REAL,
            e_ra REAL,
            dec REAL,
            e_dec REAL,
            total_flux REAL,
            e_total_flux REAL,
            peak_flux REAL,
            e_peak_flux REAL,
            maj REAL,
            e_maj REAL,
            min REAL,
            e_min REAL,
            pa REAL,
            e_pa REAL,
            rms REAL,
            field TEXT,
            catalog_id INTEGER,
            PRIMARY KEY (id),
            FOREIGN KEY (catalog_id)
              REFERENCES radcat.catalogs (id)
              ON UPDATE CASCADE
              ON DELETE CASCADE
        );
        ''').format(psycopg2.sql.Identifier(tblname)))

    # Read the sky catalog
    if tblname == 'tgss':
        sources = catalogio.read_tgss()
    elif tblname == 'nvss':
        sources = catalogio.read_nvss()
    elif tblname == 'first':
        sources = catalogio.read_first()
    elif tblname == 'sumss':
        sources = catalogio.read_sumss()
    elif tblname == 'wenss':
        sources = catalogio.read_wenss()
    elif tblname == 'gleam':
        sources = catalogio.read_gleam()
    elif tblname == 'cosmos':
        sources = catalogio.read_cosmos()
    elif tblname == 'vlssr':
        sources = catalogio.read_vlssr()
    elif tblname == 'txs':
        sources = catalogio.read_txs()
    elif tblname == 'sevenc':
        sources = catalogio.read_sevenc()
    elif tblname == 'gpsr5':
        sources = catalogio.read_gpsr5()
    elif tblname == 'gpsr1':
        sources = catalogio.read_gpsr1()
    elif tblname == 'nordgc':
        sources = catalogio.read_nordgc()
    elif tblname == 'lazio04':
        sources = catalogio.read_lazio04()
    elif tblname == 'lotss':
        sources = catalogio.read_lotss()
    elif tblname == 'm31_glg04':
        sources = catalogio.read_m31_glg04()
    elif tblname == 'lofar_hba':
        sources = catalogio.read_lofar_hba()
    elif tblname == 'lofar_lba':
        sources = catalogio.read_lofar_lba()
    elif tblname == 'nrl_nvss':
        sources = catalogio.read_nrl_nvss()
    else:
        radcatdb_logger.error('ERROR: No function to read catalog {}.'.
                              format(tblname))
        sys.exit(0)

    radcatdb_logger.info(' -- reading/inserting {} sources into database'.
                         format(tblname))
        
    # Add catalog to catalogs table
    sql = '''INSERT INTO radcat.catalogs (
        id, name, telescope, frequency, resolution, reference) VALUES (
        %s, %s, %s, %s, %s, %s) ON CONFLICT (id) DO NOTHING'''
    cur.execute(sql, (catalogio.catalog_dict[tblname]['id'], tblname,
                      catalogio.catalog_dict[tblname]['telescope'],
                      catalogio.catalog_dict[tblname]['frequency'],
                      catalogio.catalog_dict[tblname]['resolution'],
                      catalogio.catalog_dict[tblname]['reference']))

    # OLD WAY: this way took too long
    """
    # Add sources to table
    for src in sources:
        src.catalog_id = catid
        sql = '''INSERT INTO radcat.{} (
            name, ra, e_ra, dec, e_dec, total_flux, e_total_flux,
            peak_flux, e_peak_flux, maj, e_maj, min, e_min, pa, e_pa,
            rms, field, catalog_id) 
            VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, 
            %s, %s, %s, %s, %s)'''
        vals = (src.name, src.ra, src.e_ra, src.dec, src.e_dec,
                src.total_flux, src.e_total_flux, src.peak_flux,
                src.e_peak_flux, src.maj, src.e_maj, src.min, src.e_min,
                src.pa, src.e_pa, src.rms, src.field, src.catalog_id)
        cur.execute(psycopg2.sql.SQL(sql).format(
            psycopg2.sql.Identifier(tblname)), vals)
    """

    # This is waaaaaaay faster (~10x)
    fname = tblname + '_psql.txt'
    psqlf = os.path.join(catalogio.catalogdir, fname)
    fullname = 'radcat.' + tblname
    with open(psqlf, 'r') as f:
        cur.copy_from(f, fullname, sep=' ', null='None')

    conn.commit()
    cur.close()
    

[docs]def create(conn):
    """Creates the "radcat" schema and catalogs 
    table within that schema. Calls the functions
    ``add_table`` and ``index`` for each catalog in the
    list from ``catalogio.catalog_dict.keys()``.

    Parameters
    ----------
    conn : ``psycopg2.extensions.connect`` instance
        The PostgreSQL database connection object.    
    """
    cur = conn.cursor()
    sql = (
        '''
        CREATE SCHEMA IF NOT EXISTS radcat;
        
        CREATE TABLE IF NOT EXISTS radcat.catalogs (
            id INTEGER NOT NULL,
            name TEXT,
            telescope TEXT,
            frequency REAL,
            resolution REAL,
            reference TEXT,
            PRIMARY KEY (id)
        );
        ''')
    cur.execute(sql)

    tables = catalogio.catalog_dict.keys()
    atleastone = False
    for table in tables:
        cur.execute('''SELECT EXISTS(SELECT 1 FROM information_schema.tables
            WHERE table_schema = 'radcat' AND table_name = %s)''',
                    (table, ))
        if not cur.fetchone()[0]:
            atleastone = True
            radcatdb_logger.info('Adding sources from {}'.format(table))
            add_table(table, conn)
            index(table, conn)
        else:
            continue

    if not atleastone:
        print
        radcatdb_logger.info('No new sky catalogs to add.')

    cur.close