Postgres Integration for Plots
Connect PostgreSQL databases to Plots. Create graphical dashboards that allow scientists to manipulate, visualize and integrate data.
1/ Configure and Create a PostgresConnection
Create a dedicated plot transform cell that constructs configuration and connection objects.
from sqlalchemy import create_engine
from dataclasses import dataclass
from contextlib import contextmanager
import pandas as pd
import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
@dataclass
class PostgresConfig:
host: str
database: str
user: str
password: str
port: int = 5432
@classmethod
def build(cls) -> 'PostgresConfig':
"""Create config from environment variables"""
return cls(
host="localhost",
database="some_db",
user="postgres_user",
password="secret",
port=5432
)
@contextmanager
def postgres_connection(config: PostgresConfig):
engine = None
try:
connection_string = (
f"postgresql://{config.user}:{config.password}@"
f"{config.host}:{config.port}/{config.database}"
)
engine = create_engine(connection_string)
conn = engine.connect()
yield conn
except Exception as e:
logger.error(f"Error connecting to PostgreSQL: {str(e)}")
raise
finally:
if engine:
engine.dispose()
Once you have established a connection, you can allow scientists to query, manipulate and visualize their own tables of data from PostgreSQL using graphical query builders.
See an example below with a table of synthetic barcode counts. Scientists can retrieve counts and barcodes based on date ranges and sample IDs:
from lplots.widgets.text import w_text_input
from lplots.widgets.multiselect import w_multi_select
from datetime import datetime
start_date = w_text_input(
label="Start Date",
default="2023-01-01",
appearance={"help_text": "Enter start date in YYYY-MM-DD format"}
)
end_date = w_text_input(
label="End Date",
default="2023-01-31",
appearance={"help_text": "Enter end date in YYYY-MM-DD format"}
)
sample_ids = w_multi_select(
label="Sample IDs",
options=['SAMPLE_001_A1', 'SAMPLE_001_A2', 'SAMPLE_001_A3'],
appearance={"help_text": "Select one or more sample IDs"}
)
barcode_ids = w_multi_select(
label="Barcode IDs",
options=['BC000001', 'BC000002', 'BC000003'],
appearance={"help_text": "Select one or more barcode IDs"}
)
limit = w_text_input(
label="Query Limit",
default="1000",
appearance={"help_text": "Enter the maximum number of rows to return"}
)
# PostgreSQL uses different parameter style (%s instead of %(name)s)
base_query = """
SELECT
sample_id,
barcode_id,
count,
run_id,
experiment_date
FROM barcode_counts
WHERE experiment_date BETWEEN %s AND %s
"""
params = [start_date.value, end_date.value]
if sample_ids.value:
base_query += " AND sample_id = ANY(%s)"
params.append(sample_ids.value)
if barcode_ids.value:
base_query += " AND barcode_id = ANY(%s)"
params.append(barcode_ids.value)
base_query += """
ORDER BY experiment_date, sample_id
LIMIT %s
"""
params.append(int(limit.value))
with postgres_connection(PostgresConfig.build()) as conn:
df = pd.read_sql(base_query, conn, params=params)
df