Postgres Integration for Plots

Connect PostgreSQL databases to Plots. Create graphical dashboards that allow scientists to manipulate, visualize and integrate data.

1/ Configure and Create a PostgresConnection

Create a dedicated plot transform cell that constructs configuration and connection objects.

from sqlalchemy import create_engine
from dataclasses import dataclass
from contextlib import contextmanager
import pandas as pd
import logging

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

@dataclass
class PostgresConfig:
    host: str
    database: str
    user: str
    password: str
    port: int = 5432
    
    @classmethod
    def build(cls) -> 'PostgresConfig':
        """Create config from environment variables"""
        return cls(
            host="localhost",
            database="some_db",
            user="postgres_user",
            password="secret",
            port=5432
        )

@contextmanager
def postgres_connection(config: PostgresConfig):
    engine = None
    try:
        connection_string = (
            f"postgresql://{config.user}:{config.password}@"
            f"{config.host}:{config.port}/{config.database}"
        )
        engine = create_engine(connection_string)
        conn = engine.connect()
        yield conn
    except Exception as e:
        logger.error(f"Error connecting to PostgreSQL: {str(e)}")
        raise
    finally:
        if engine:
            engine.dispose()

2/ Allow scientists to query data with graphical widgets

Once you have established a connection, you can allow scientists to query, manipulate and visualize their own tables of data from PostgreSQL using graphical query builders.

See an example below with a table of synthetic barcode counts. Scientists can retrieve counts and barcodes based on date ranges and sample IDs:

from lplots.widgets.text import w_text_input
from lplots.widgets.multiselect import w_multi_select
from datetime import datetime

start_date = w_text_input(
    label="Start Date",
    default="2023-01-01",
    appearance={"help_text": "Enter start date in YYYY-MM-DD format"}
)

end_date = w_text_input(
    label="End Date",
    default="2023-01-31",
    appearance={"help_text": "Enter end date in YYYY-MM-DD format"}
)

sample_ids = w_multi_select(
    label="Sample IDs",
    options=['SAMPLE_001_A1', 'SAMPLE_001_A2', 'SAMPLE_001_A3'],
    appearance={"help_text": "Select one or more sample IDs"}
)

barcode_ids = w_multi_select(
    label="Barcode IDs",
    options=['BC000001', 'BC000002', 'BC000003'],
    appearance={"help_text": "Select one or more barcode IDs"}
)

limit = w_text_input(
    label="Query Limit",
    default="1000",
    appearance={"help_text": "Enter the maximum number of rows to return"}
)

# PostgreSQL uses different parameter style (%s instead of %(name)s)
base_query = """
SELECT 
    sample_id,
    barcode_id,
    count,
    run_id,
    experiment_date
FROM barcode_counts
WHERE experiment_date BETWEEN %s AND %s
"""

params = [start_date.value, end_date.value]

if sample_ids.value:
    base_query += " AND sample_id = ANY(%s)"
    params.append(sample_ids.value)
    
if barcode_ids.value:
    base_query += " AND barcode_id = ANY(%s)"
    params.append(barcode_ids.value)

base_query += """
ORDER BY experiment_date, sample_id
LIMIT %s
"""
params.append(int(limit.value))

with postgres_connection(PostgresConfig.build()) as conn:
    df = pd.read_sql(base_query, conn, params=params)

df