Skip to content

Synthetic Biology Design Checks with TypedLogic

This gallery tutorial turns a small synthetic biology design problem into composable logical checks. The same TypedLogic theory covers assembly structure, GO-driven pathway feasibility, and GO-CAM curation consistency.

The running example is sucrose utilization in E. coli: a design needs a transporter (cscB) to bring sucrose into the cell and an invertase (cscA) to convert intracellular sucrose into glucose and fructose.

Installation

This notebook uses the Clingo solver because the pathway rule is recursive and naturally evaluated as a Datalog-style fixpoint.

pip install 'typedlogic[clingo]'

Setup

The formal predicates and axioms live in synbio_theory.py next to this notebook. Keeping the theory in a normal Python module makes it reusable from tests, scripts, and notebooks.

from __future__ import annotations

from contextlib import contextmanager
from pathlib import Path
from typing import Iterable, Sequence
import os
import sys

from IPython.display import Markdown, display
from IPython.lib.display import Code
from typedlogic.integrations.solvers.clingo import ClingoSolver

EXAMPLE_DIR = Path.cwd()
if not (EXAMPLE_DIR / "synbio_theory.py").exists():
    EXAMPLE_DIR = Path.cwd() / "docs" / "examples"

THEORY_PATH = EXAMPLE_DIR / "synbio_theory.py"
sys.path.insert(0, str(EXAMPLE_DIR))

from synbio_theory import (
    AvailableMetabolite,
    CausalEdge,
    CausalRelation,
    DesignContains,
    EncodesProtein,
    FunctionCatalyzes,
    GOAspect,
    GOCAMIndividual,
    HasMolecularFunction,
    Part,
    RequiredMetabolite,
)


@contextmanager
def silence_clingo_stderr():
    """Suppress harmless Clingo messages about predicates that only appear as facts."""
    saved = os.dup(2)
    devnull = os.open(os.devnull, os.O_WRONLY)
    os.dup2(devnull, 2)
    try:
        yield
    finally:
        os.dup2(saved, 2)
        os.close(saved)
        os.close(devnull)


def infer(facts: Iterable[object], *predicate_names: str) -> dict[str, list[tuple[object, ...]]]:
    """Load the synbio theory, add facts, and return selected derived predicates."""
    solver = ClingoSolver()
    solver.load(THEORY_PATH)
    for fact in facts:
        solver.add_fact(fact)

    with silence_clingo_stderr():
        model = solver.model()

    rows = {predicate_name: [] for predicate_name in predicate_names}
    for term in model.ground_terms:
        if term.predicate in rows:
            rows[term.predicate].append(term.values)
    return {predicate: sorted(values) for predicate, values in rows.items()}


def markdown_table(headers: Sequence[str], rows: Sequence[Sequence[object]]) -> str:
    """Render a small Markdown table without adding a pandas dependency."""
    lines = ["| " + " | ".join(headers) + " |"]
    lines.append("| " + " | ".join(["---"] * len(headers)) + " |")
    for row in rows:
        lines.append("| " + " | ".join(str(value) for value in row) + " |")
    return "\n".join(lines)


def pairs(values: Sequence[tuple[object, ...]]) -> str:
    """Format binary or assembly-scoped predicate values as compact edges."""
    formatted = []
    for value in values:
        if len(value) == 2:
            left, right = value
            formatted.append(f"{left}->{right}")
        elif len(value) == 3:
            assembly, left, right = value
            formatted.append(f"{assembly}:{left}->{right}")
        else:
            formatted.append("->".join(str(part) for part in value))
    return ", ".join(formatted) or "-"


def csv(values: Sequence[object]) -> str:
    """Format a short sequence for a Markdown table cell."""
    return ", ".join(str(value) for value in values) or "-"
Code(filename=str(THEORY_PATH), language="python")
"""Synthetic biology design checks for the gallery notebook."""

# ruff: noqa: S101
#
# The theory is intentionally small, but it spans three practical layers:
# 1. Assembly structure checks for compatible Golden Gate-style overhangs.
# 2. GO-driven pathway reachability for engineered metabolic functions.
# 3. GO-CAM curation checks for causal edges between molecular functions.

from dataclasses import dataclass

from typedlogic import Fact, axiom


@dataclass(frozen=True)
class Part(Fact):
    """A DNA part in an ordered assembly."""

    assembly: str
    name: str
    part_type: str
    five_oh: str
    three_oh: str
    position: int


@dataclass(frozen=True)
class CanLigate(Fact):
    """Two parts have compatible overhangs and can ligate."""

    assembly: str
    upstream: str
    downstream: str


@dataclass(frozen=True)
class IntendedAdjacent(Fact):
    """Two parts are adjacent in the intended design order."""

    assembly: str
    upstream: str
    downstream: str


@dataclass(frozen=True)
class IntendedLigationGap(Fact):
    """Two intended adjacent parts have incompatible overhangs."""

    assembly: str
    upstream: str
    downstream: str


@dataclass(frozen=True)
class MisligationRisk(Fact):
    """A compatible ligation exists outside the intended adjacency order."""

    assembly: str
    upstream: str
    downstream: str


@axiom
def ligation_compatibility(
    assembly1: str,
    n1: str,
    t1: str,
    fo1: str,
    to1: str,
    pos1: int,
    assembly2: str,
    n2: str,
    t2: str,
    fo2: str,
    to2: str,
    pos2: int,
):
    """Derive ligation compatibility when the upstream 3' overhang matches the downstream 5' overhang."""
    if Part(assembly1, n1, t1, fo1, to1, pos1) and Part(assembly2, n2, t2, fo2, to2, pos2):
        if assembly1 == assembly2 and to1 == fo2 and n1 != n2:
            assert CanLigate(assembly1, n1, n2)


@axiom
def intended_adjacency(
    assembly1: str,
    n1: str,
    t1: str,
    fo1: str,
    to1: str,
    pos1: int,
    assembly2: str,
    n2: str,
    t2: str,
    fo2: str,
    to2: str,
    pos2: int,
):
    """Derive intended adjacency from the declared part positions."""
    if Part(assembly1, n1, t1, fo1, to1, pos1) and Part(assembly2, n2, t2, fo2, to2, pos2):
        if assembly1 == assembly2 and pos2 == pos1 + 1:
            assert IntendedAdjacent(assembly1, n1, n2)


@axiom
def intended_ligation_gap(
    assembly1: str,
    n1: str,
    t1: str,
    fo1: str,
    to1: str,
    pos1: int,
    assembly2: str,
    n2: str,
    t2: str,
    fo2: str,
    to2: str,
    pos2: int,
):
    """Flag intended adjacent parts whose overhangs do not match."""
    if Part(assembly1, n1, t1, fo1, to1, pos1) and Part(assembly2, n2, t2, fo2, to2, pos2):
        if assembly1 == assembly2 and pos2 == pos1 + 1 and to1 != fo2:
            assert IntendedLigationGap(assembly1, n1, n2)


@axiom
def misligation_detection(
    assembly: str,
    upstream: str,
    downstream: str,
    n1: str,
    t1: str,
    fo1: str,
    to1: str,
    pos1: int,
    n2: str,
    t2: str,
    fo2: str,
    to2: str,
    pos2: int,
):
    """Flag compatible ligations that skip over the intended next position."""
    if (
        CanLigate(assembly, upstream, downstream)
        and Part(assembly, n1, t1, fo1, to1, pos1)
        and Part(assembly, n2, t2, fo2, to2, pos2)
        and upstream == n1
        and downstream == n2
    ):
        if pos2 != pos1 + 1:
            assert MisligationRisk(assembly, upstream, downstream)


@dataclass(frozen=True)
class EncodesProtein(Fact):
    """A CDS part encodes a protein."""

    part: str
    protein: str


@dataclass(frozen=True)
class HasMolecularFunction(Fact):
    """A protein has a GO molecular function."""

    protein: str
    go_mf: str


@dataclass(frozen=True)
class FunctionCatalyzes(Fact):
    """A molecular function converts a substrate metabolite to a product metabolite."""

    go_mf: str
    substrate: str
    product: str


@dataclass(frozen=True)
class DesignContains(Fact):
    """A named design contains a part."""

    design: str
    part: str


@dataclass(frozen=True)
class AvailableMetabolite(Fact):
    """A metabolite is available to a design from the environment or an encoded function."""

    design: str
    metabolite: str


@dataclass(frozen=True)
class RequiredMetabolite(Fact):
    """A metabolite that the engineered cell needs for the design intent."""

    design: str
    metabolite: str


@axiom
def metabolite_produced_by_design(
    design: str,
    part: str,
    protein: str,
    go_mf: str,
    substrate: str,
    product: str,
):
    """Propagate metabolite availability through functions encoded by the design."""
    if (
        DesignContains(design, part)
        and EncodesProtein(part, protein)
        and HasMolecularFunction(protein, go_mf)
        and FunctionCatalyzes(go_mf, substrate, product)
        and AvailableMetabolite(design, substrate)
    ):
        assert AvailableMetabolite(design, product)


@dataclass(frozen=True)
class GOAspect(Fact):
    """The aspect of a GO term: molecular function, biological process, or cellular component."""

    go_term: str
    aspect: str


@dataclass(frozen=True)
class GOCAMIndividual(Fact):
    """An individual in a GO-CAM model instantiating a GO class."""

    iri: str
    go_class: str


@dataclass(frozen=True)
class CausalRelation(Fact):
    """A relation that should connect molecular function individuals in GO-CAM."""

    relation: str


@dataclass(frozen=True)
class CausalEdge(Fact):
    """A causal relation between two GO-CAM individuals."""

    upstream_iri: str
    downstream_iri: str
    relation: str


@dataclass(frozen=True)
class GOCAMViolation(Fact):
    """A derived GO-CAM curation rule violation."""

    individual: str
    rule: str


@axiom
def causal_edge_upstream_must_be_mf(
    up: str,
    down: str,
    rel: str,
    upstream_class: str,
    aspect: str,
):
    """Require the upstream node of a causal edge to instantiate a molecular function."""
    if (
        CausalEdge(up, down, rel)
        and CausalRelation(rel)
        and GOCAMIndividual(up, upstream_class)
        and GOAspect(upstream_class, aspect)
    ):
        if aspect != "MF":
            assert GOCAMViolation(up, "causal_upstream_not_MF")


@axiom
def causal_edge_downstream_must_be_mf(
    up: str,
    down: str,
    rel: str,
    downstream_class: str,
    aspect: str,
):
    """Require the downstream node of a causal edge to instantiate a molecular function."""
    if (
        CausalEdge(up, down, rel)
        and CausalRelation(rel)
        and GOCAMIndividual(down, downstream_class)
        and GOAspect(downstream_class, aspect)
    ):
        if aspect != "MF":
            assert GOCAMViolation(down, "causal_downstream_not_MF")

Layer 1: Assembly Structure

The structural layer treats overhangs as assembly-scoped typed facts. From those facts it derives which parts can ligate, which parts are intended neighbors, whether an intended neighboring pair fails to ligate, and whether any compatible ligation skips the intended next position.

safe_assembly = [
    Part("clean", "p_lac", "promoter", "GGAG", "TACT", 0),
    Part("clean", "rbs_b0034", "rbs", "TACT", "AATG", 1),
    Part("clean", "cscB", "cds", "AATG", "GCTT", 2),
    Part("clean", "term_b0015", "terminator", "GCTT", "CGCT", 3),
]

risky_assembly = [
    Part("risky", "p_lac", "promoter", "GGAG", "TACT", 0),
    Part("risky", "rbs_b0034", "rbs", "TACT", "AATG", 1),
    Part("risky", "cscB_skip_rbs", "cds", "TACT", "GCTT", 2),
    Part("risky", "term_b0015", "terminator", "GCTT", "CGCT", 3),
]


def assembly_report(label: str, facts: Sequence[Part]) -> tuple[str, str, str, str, str]:
    results = infer(facts, "CanLigate", "IntendedAdjacent", "IntendedLigationGap", "MisligationRisk")
    gaps = results["IntendedLigationGap"]
    risks = results["MisligationRisk"]
    return (
        label,
        pairs(results["CanLigate"]),
        pairs(results["IntendedAdjacent"]),
        pairs(gaps) if gaps else "none",
        pairs(risks) if risks else "none",
    )


assembly_rows = [
    assembly_report("clean overhang grammar", safe_assembly),
    assembly_report("CDS can skip RBS", risky_assembly),
]

display(
    Markdown(
        markdown_table(
            ["Assembly", "Can ligate", "Intended adjacency", "Intended gap", "Skip risk"],
            assembly_rows,
        )
    )
)
Assembly Can ligate Intended adjacency Intended gap Skip risk
clean overhang grammar clean:cscB->term_b0015, clean:p_lac->rbs_b0034, clean:rbs_b0034->cscB clean:cscB->term_b0015, clean:p_lac->rbs_b0034, clean:rbs_b0034->cscB none none
CDS can skip RBS risky:cscB_skip_rbs->term_b0015, risky:p_lac->cscB_skip_rbs, risky:p_lac->rbs_b0034 risky:cscB_skip_rbs->term_b0015, risky:p_lac->rbs_b0034, risky:rbs_b0034->cscB_skip_rbs risky:rbs_b0034->cscB_skip_rbs risky:p_lac->cscB_skip_rbs

In the second assembly, rbs_b0034 and cscB_skip_rbs are intended neighbors but have incompatible overhangs. The same assembly also lets p_lac ligate directly into cscB_skip_rbs, so the theory reports both the intended ligation gap and the skip risk before considering pathway function.

Layer 2: GO-Driven Pathway Feasibility

The functional layer connects parts to proteins, proteins to GO molecular functions, and GO functions to metabolite transformations. A recursive rule derives every metabolite reachable from the growth medium.

BIOLOGY = [
    EncodesProtein("cscA", "P10000_cscA"),
    EncodesProtein("cscB", "P30000_cscB"),
    HasMolecularFunction("P10000_cscA", "GO:0004575"),  # sucrose alpha-glucosidase
    HasMolecularFunction("P30000_cscB", "GO:0008515"),  # sucrose:H+ symporter
    FunctionCatalyzes("GO:0008515", "sucrose_out", "sucrose_in"),
    FunctionCatalyzes("GO:0004575", "sucrose_in", "glucose_in"),
    FunctionCatalyzes("GO:0004575", "sucrose_in", "fructose_in"),
]


def pathway_report(label: str, design_parts: Sequence[str]) -> tuple[str, str, str, str]:
    facts = [
        *BIOLOGY,
        *(DesignContains("sucrose_design", part) for part in design_parts),
        AvailableMetabolite("sucrose_design", "sucrose_out"),
        RequiredMetabolite("sucrose_design", "glucose_in"),
    ]
    results = infer(facts, "AvailableMetabolite", "RequiredMetabolite")
    available = sorted({metabolite for _, metabolite in results["AvailableMetabolite"]})
    required = sorted({metabolite for _, metabolite in results["RequiredMetabolite"]})
    missing = [metabolite for metabolite in required if metabolite not in available]
    status = "complete" if not missing else f"gap: {csv(missing)}"
    return label, csv(design_parts), csv(available), status
pathway_rows = [
    pathway_report("full sucrose pathway", ["cscA", "cscB"]),
    pathway_report("permease only", ["cscB"]),
    pathway_report("invertase only", ["cscA"]),
    pathway_report("empty design", []),
]

display(Markdown(markdown_table(["Design", "Parts", "Reachable metabolites", "Verdict"], pathway_rows)))
Design Parts Reachable metabolites Verdict
full sucrose pathway cscA, cscB fructose_in, glucose_in, sucrose_in, sucrose_out complete
permease only cscB sucrose_in, sucrose_out gap: glucose_in
invertase only cscA sucrose_out gap: glucose_in
empty design - sucrose_out gap: glucose_in

The full design reaches intracellular glucose from extracellular sucrose. The permease-only design imports sucrose but cannot cleave it. The invertase-only design has the enzyme, but no route for sucrose to enter the cell.

Layer 3: GO-CAM Curation Consistency

The curation layer checks a common GO-CAM modeling rule: declared causal relations such as RO:0002413 should connect molecular function individuals, not biological process individuals.

GO_ASPECTS = [
    GOAspect("GO:0004575", "MF"),  # sucrose alpha-glucosidase activity
    GOAspect("GO:0008515", "MF"),  # sucrose:H+ symporter activity
    GOAspect("GO:0005992", "BP"),  # trehalose biosynthetic process
    GOAspect("GO:0006012", "BP"),  # galactose metabolic process
]

CAUSAL_RELATIONS = [
    CausalRelation("RO:0002411"),  # causally upstream of
    CausalRelation("RO:0002413"),  # provides direct input for
]


def gocam_report(
    label: str,
    individuals: Sequence[tuple[str, str]],
    edges: Sequence[tuple[str, str, str]],
) -> tuple[str, str, str, str]:
    facts = [
        *GO_ASPECTS,
        *CAUSAL_RELATIONS,
        *(GOCAMIndividual(iri, go_class) for iri, go_class in individuals),
        *(CausalEdge(upstream, downstream, relation) for upstream, downstream, relation in edges),
    ]
    violations = infer(facts, "GOCAMViolation")["GOCAMViolation"]
    violation_text = ", ".join(f"{iri}: {rule}" for iri, rule in violations) or "none"
    return (
        label,
        csv([f"{iri}={go_class}" for iri, go_class in individuals]),
        pairs([(upstream, downstream) for upstream, downstream, _ in edges]),
        violation_text,
    )
gocam_rows = [
    gocam_report(
        "valid MF to MF causal edge",
        individuals=[("ind:1", "GO:0008515"), ("ind:2", "GO:0004575")],
        edges=[("ind:1", "ind:2", "RO:0002413")],
    ),
    gocam_report(
        "invalid MF to BP causal edge",
        individuals=[("ind:3", "GO:0008515"), ("ind:4", "GO:0005992")],
        edges=[("ind:3", "ind:4", "RO:0002413")],
    ),
    gocam_report(
        "invalid BP to BP causal edge",
        individuals=[("ind:5", "GO:0006012"), ("ind:6", "GO:0005992")],
        edges=[("ind:5", "ind:6", "RO:0002413")],
    ),
    gocam_report(
        "non-causal relation to BP",
        individuals=[("ind:7", "GO:0008515"), ("ind:8", "GO:0005992")],
        edges=[("ind:7", "ind:8", "BFO:0000050")],
    ),
]

display(Markdown(markdown_table(["GO-CAM model", "Individuals", "Causal edge", "Violations"], gocam_rows)))
GO-CAM model Individuals Causal edge Violations
valid MF to MF causal edge ind:1=GO:0008515, ind:2=GO:0004575 ind:1->ind:2 none
invalid MF to BP causal edge ind:3=GO:0008515, ind:4=GO:0005992 ind:3->ind:4 ind:4: causal_downstream_not_MF
invalid BP to BP causal edge ind:5=GO:0006012, ind:6=GO:0005992 ind:5->ind:6 ind:5: causal_upstream_not_MF, ind:6: causal_downstream_not_MF
non-causal relation to BP ind:7=GO:0008515, ind:8=GO:0005992 ind:7->ind:8 none

The same solver workflow handles design-time checks and curation-time checks. In a production pipeline, the facts could come from a parts registry, GO annotations, Rhea mappings, and GO-CAM RDF exports rather than hand-authored lists.

Inspect the Compiled Rules

TypedLogic parses the Python axioms into a logical theory and the Clingo backend renders that theory as ASP. The recursive pathway rule is the key fixpoint rule.

solver = ClingoSolver()
solver.load(THEORY_PATH)
compiled_rules = solver.dump().splitlines()
for rule in compiled_rules:
    if rule.startswith(("availablemetabolite", "intendedligationgap", "misligationrisk", "gocamviolation")):
        print(rule)
intendedligationgap(Assembly1, N1, N2) :- part(Assembly1, N1, T1, Fo1, To1, Pos1), part(Assembly2, N2, T2, Fo2, To2, Pos2), Assembly1 == Assembly2, Pos2 == Pos1 + 1, To1 != Fo2.
misligationrisk(Assembly, Upstream, Downstream) :- canligate(Assembly, Upstream, Downstream), part(Assembly, N1, T1, Fo1, To1, Pos1), part(Assembly, N2, T2, Fo2, To2, Pos2), Upstream == N1, Downstream == N2, Pos2 != Pos1 + 1.
availablemetabolite(Design, Product) :- designcontains(Design, Part), encodesprotein(Part, Protein), hasmolecularfunction(Protein, Go_mf), functioncatalyzes(Go_mf, Substrate, Product), availablemetabolite(Design, Substrate).
gocamviolation(Up, "causal_upstream_not_MF") :- causaledge(Up, Down, Rel), causalrelation(Rel), gocamindividual(Up, Upstream_class), goaspect(Upstream_class, Aspect), Aspect != "MF".
gocamviolation(Down, "causal_downstream_not_MF") :- causaledge(Up, Down, Rel), causalrelation(Rel), gocamindividual(Down, Downstream_class), goaspect(Downstream_class, Aspect), Aspect != "MF".

Next Steps

This pattern scales by swapping the toy facts for real sources: a parts library for assembly facts, GO and Rhea mappings for function-to-reaction facts, and GO-CAM exports for curation facts. Violations can then be reported back as design review comments or curator feedback.