Trying in vain to fix filtering.

This commit is contained in:
Yaro Kasear 2025-10-06 14:28:24 -05:00
parent 5a1f978aa2
commit 5ad652d372
2 changed files with 134 additions and 47 deletions

View file

@ -9,6 +9,7 @@ from sqlalchemy.orm import Load, Session, with_polymorphic, Mapper, contains_eag
from sqlalchemy.orm.attributes import InstrumentedAttribute from sqlalchemy.orm.attributes import InstrumentedAttribute
from sqlalchemy.sql import operators from sqlalchemy.sql import operators
from sqlalchemy.sql.elements import UnaryExpression, ColumnElement from sqlalchemy.sql.elements import UnaryExpression, ColumnElement
from sqlalchemy.sql.visitors import Visitable
from crudkit.core import to_jsonable, deep_diff, diff_to_patch, filter_to_columns, normalize_payload from crudkit.core import to_jsonable, deep_diff, diff_to_patch, filter_to_columns, normalize_payload
from crudkit.core.base import Version from crudkit.core.base import Version
@ -42,6 +43,45 @@ class _CRUDModelProto(_HasID, _HasTable, _HasADict, Protocol):
T = TypeVar("T", bound=_CRUDModelProto) T = TypeVar("T", bound=_CRUDModelProto)
def _collect_tables_from_filters(filters) -> set:
"""
Walk SQLAlchemy filter expressions and collect the Table/Alias objects
that appear, so we can detect which relationships are *actually used*
by filters and must be JOINed (not just selectinloaded).
"""
seen = set()
def visit(node):
if node is None:
return
# record table / selectable if present
tbl = getattr(node, "table", None)
if tbl is not None:
# include the selectable and its base element (alias -> base table)
cur = tbl
while cur is not None:
seen.add(cur)
cur = getattr(cur, "element", None)
# generic children walker
try:
children = list(node.get_children())
except Exception:
children = []
for ch in children:
visit(ch)
# also inspect common attributes if present
for attr in ("left", "right", "element", "clause", "clauses"):
val = getattr(node, attr, None)
if val is None:
continue
if isinstance(val, (list, tuple)):
for v in val:
visit(v)
else:
visit(val)
for f in (filters or []):
visit(f)
return seen
def _unwrap_ob(ob): def _unwrap_ob(ob):
"""Return (col, is_desc) from an ORDER BY element (handles .asc()/.desc()).""" """Return (col, is_desc) from an ORDER BY element (handles .asc()/.desc())."""
col = getattr(ob, "element", None) col = getattr(ob, "element", None)
@ -239,6 +279,9 @@ class CRUDService(Generic[T]):
# Soft delete # Soft delete
query = self._apply_not_deleted(query, root_alias, params) query = self._apply_not_deleted(query, root_alias, params)
# Which related tables are referenced by filters?
filter_tables = _collect_tables_from_filters(filters)
# Root column projection (load_only) # Root column projection (load_only)
only_cols = [c for c in root_fields if isinstance(c, InstrumentedAttribute)] only_cols = [c for c in root_fields if isinstance(c, InstrumentedAttribute)]
if only_cols: if only_cols:
@ -247,21 +290,29 @@ class CRUDService(Generic[T]):
# Detect first hops that have deeper, nested tails requested (e.g. "contact.supervisor") # Detect first hops that have deeper, nested tails requested (e.g. "contact.supervisor")
nested_first_hops = { path[0] for path in (rel_field_names or {}).keys() if len(path) > 1 } nested_first_hops = { path[0] for path in (rel_field_names or {}).keys() if len(path) > 1 }
# IMPORTANT: # First-hop strategy:
# - Only attach loader options for first-hop relations from the root. # - If a non-collection relation's target table is used in filters -> plain JOIN (no explicit alias)
# - Always use selectinload here (avoid contains_eager joins). # - Otherwise -> selectinload (keeps row counts sane)
# - Let compile_projections() supply deep chained options. used_contains_eager = False # we purposely avoid contains_eager here
for base_alias, rel_attr, target_alias in join_paths: for base_alias, rel_attr, target_alias in join_paths:
is_firsthop_from_root = (base_alias is root_alias) is_firsthop_from_root = (base_alias is root_alias)
if not is_firsthop_from_root: if not is_firsthop_from_root:
# Deeper hops are handled by proj_opts below # deeper hops handled separately (via proj_opts)
continue continue
prop = getattr(rel_attr, "property", None) prop = getattr(rel_attr, "property", None)
is_collection = bool(getattr(prop, "uselist", False)) is_collection = bool(getattr(prop, "uselist", False))
is_nested_firsthop = rel_attr.key in nested_first_hops _is_nested_firsthop = rel_attr.key in nested_first_hops
target_selectable = getattr(target_alias, "selectable", None)
target_base = getattr(target_selectable, "element", None) or target_selectable
needed_for_filter = (target_selectable in filter_tables) or (target_base in filter_tables)
if needed_for_filter and not is_collection:
# Join via the relationship attribute so SA reuses the same FROM
# that filter expressions reference (avoids cartesian products).
query = query.join(rel_attr, isouter=True)
else:
opt = selectinload(rel_attr) opt = selectinload(rel_attr)
# Optional narrowng for collections
if is_collection: if is_collection:
child_names = (collection_field_names or {}).get(rel_attr.key, []) child_names = (collection_field_names or {}).get(rel_attr.key, [])
if child_names: if child_names:
@ -272,10 +323,14 @@ class CRUDService(Generic[T]):
opt = opt.load_only(*cols) opt = opt.load_only(*cols)
query = query.options(opt) query = query.options(opt)
# Filters AFTER joins → no cartesian products # Filters AFTER relations wiring → no cartesian products
if filters: if filters:
query = query.filter(*filters) query = query.filter(*filters)
# Apply deep projection loader options (only when we didn't use contains_eager)
if proj_opts and not used_contains_eager:
query = query.options(*proj_opts)
# Order spec (with PK tie-breakers for stability) # Order spec (with PK tie-breakers for stability)
order_spec = self._extract_order_spec(root_alias, order_by) order_spec = self._extract_order_spec(root_alias, order_by)
limit, _ = spec.parse_pagination() limit, _ = spec.parse_pagination()
@ -363,10 +418,6 @@ class CRUDService(Generic[T]):
last_key = None last_key = None
# Count DISTINCT ids with mirrored joins # Count DISTINCT ids with mirrored joins
# Apply deep projection loader options (safe: we avoided contains_eager)
if proj_opts:
query = query.options(*proj_opts)
total = None total = None
if include_total: if include_total:
base = session.query(getattr(root_alias, "id")) base = session.query(getattr(root_alias, "id"))
@ -375,7 +426,7 @@ class CRUDService(Generic[T]):
for base_alias, rel_attr, target_alias in join_paths: for base_alias, rel_attr, target_alias in join_paths:
# do not join collections for COUNT mirror # do not join collections for COUNT mirror
if not bool(getattr(getattr(rel_attr, "property", None), "uselist", False)): if not bool(getattr(getattr(rel_attr, "property", None), "uselist", False)):
base = base.join(target_alias, rel_attr.of_type(target_alias), isouter=True) base = base.join(rel_attr, isouter=True)
if filters: if filters:
base = base.filter(*filters) base = base.filter(*filters)
total = session.query(func.count()).select_from( total = session.query(func.count()).select_from(
@ -468,7 +519,8 @@ class CRUDService(Generic[T]):
nested_first_hops = { path[0] for path in (rel_field_names or {}).keys() if len(path) > 1 } nested_first_hops = { path[0] for path in (rel_field_names or {}).keys() if len(path) > 1 }
# First-hop only; use selectinload (no contains_eager) # First-hop strategy (same as peek_window): join if filter needs it, else selectinload
use_contains_eager = False # we avoid contains_eager here, too
for base_alias, rel_attr, target_alias in join_paths: for base_alias, rel_attr, target_alias in join_paths:
is_firsthop_from_root = (base_alias is root_alias) is_firsthop_from_root = (base_alias is root_alias)
if not is_firsthop_from_root: if not is_firsthop_from_root:
@ -477,6 +529,13 @@ class CRUDService(Generic[T]):
is_collection = bool(getattr(prop, "uselist", False)) is_collection = bool(getattr(prop, "uselist", False))
_is_nested_firsthop = rel_attr.key in nested_first_hops _is_nested_firsthop = rel_attr.key in nested_first_hops
target_selectable = getattr(target_alias, "selectable", None)
target_base = getattr(target_selectable, "element", None) or target_selectable
needed_for_filter = target_selectable in _collect_tables_from_filters(filters) or (target_base in _collect_tables_from_filters(filters))
if needed_for_filter and not is_collection:
query = query.join(rel_attr, isouter=True)
else:
opt = selectinload(rel_attr) opt = selectinload(rel_attr)
if is_collection: if is_collection:
child_names = (collection_field_names or {}).get(rel_attr.key, []) child_names = (collection_field_names or {}).get(rel_attr.key, [])
@ -498,7 +557,7 @@ class CRUDService(Generic[T]):
# Projection loader options compiled from requested fields. # Projection loader options compiled from requested fields.
# Skip if we used contains_eager to avoid loader-strategy conflicts. # Skip if we used contains_eager to avoid loader-strategy conflicts.
expanded_fields, proj_opts = compile_projection(self.model, req_fields) if req_fields else ([], []) expanded_fields, proj_opts = compile_projection(self.model, req_fields) if req_fields else ([], [])
if proj_opts: if proj_opts and not use_contains_eager:
query = query.options(*proj_opts) query = query.options(*proj_opts)
obj = query.first() obj = query.first()
@ -566,7 +625,8 @@ class CRUDService(Generic[T]):
nested_first_hops = { path[0] for path in (rel_field_names or {}).keys() if len(path) > 1 } nested_first_hops = { path[0] for path in (rel_field_names or {}).keys() if len(path) > 1 }
# First-hop only; use selectinload # First-hop strategy (same as seek_window): join if filter needs it, else selectinload
used_contains_eager = False # We avoid contains_eager here as well
for base_alias, rel_attr, target_alias in join_paths: for base_alias, rel_attr, target_alias in join_paths:
is_firsthop_from_root = (base_alias is root_alias) is_firsthop_from_root = (base_alias is root_alias)
if not is_firsthop_from_root: if not is_firsthop_from_root:
@ -575,6 +635,14 @@ class CRUDService(Generic[T]):
is_collection = bool(getattr(prop, "uselist", False)) is_collection = bool(getattr(prop, "uselist", False))
_is_nested_firsthop = rel_attr.key in nested_first_hops _is_nested_firsthop = rel_attr.key in nested_first_hops
target_selectable = getattr(target_alias, "selectable", None)
target_base = getattr(target_selectable, "element", None) or target_selectable
ftables = _collect_tables_from_filters(filters)
needed_for_filter = (target_selectable in ftables) or (target_base in ftables)
if needed_for_filter and not is_collection:
query = query.join(rel_attr, isouter=True)
else:
opt = selectinload(rel_attr) opt = selectinload(rel_attr)
if is_collection: if is_collection:
child_names = (collection_field_names or {}).get(rel_attr.key, []) child_names = (collection_field_names or {}).get(rel_attr.key, [])
@ -608,7 +676,7 @@ class CRUDService(Generic[T]):
# Projection loaders only if we didnt use contains_eager # Projection loaders only if we didnt use contains_eager
expanded_fields, proj_opts = compile_projection(self.model, req_fields) if req_fields else ([], []) expanded_fields, proj_opts = compile_projection(self.model, req_fields) if req_fields else ([], [])
if proj_opts: if proj_opts and not used_contains_eager:
query = query.options(*proj_opts) query = query.options(*proj_opts)
else: else:

View file

@ -25,6 +25,15 @@
{% endfor %} {% endfor %}
</ul> </ul>
<div class="mt-3">
<button type="button" class="btn btn-outline-primary btn-sm" onclick="addNewUpdate()">
Add update
</button>
</div>
<input type="hidden" name="updates" id="updatesPayload">
<input type="hidden" name="delete_update_ids" id="deleteUpdatesPayload">
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/github-markdown-css@5/github-markdown.min.css"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/github-markdown-css@5/github-markdown.min.css">
<style> <style>
textarea.auto-md { textarea.auto-md {
@ -45,6 +54,16 @@
<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script> <script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/dompurify/dist/purify.min.js"></script> <script src="https://cdn.jsdelivr.net/npm/dompurify/dist/purify.min.js"></script>
<script> <script>
// Track deletions for existing notes
const deletedUpdateIds = new Set();
function addNewUpdate() {
// Create a temporary client-only id so we can manage the DOM before saving
const tempId = `new_${Date.now()}`;
const li = document.createElement('li');
li.className = 'list-group-item';
}
// Initial render // Initial render
document.addEventListener('DOMContentLoaded', () => { document.addEventListener('DOMContentLoaded', () => {
const ids = [ {% for n in items %} {{ n.id }}{% if not loop.last %}, {% endif %}{% endfor %} ]; const ids = [ {% for n in items %} {{ n.id }}{% if not loop.last %}, {% endif %}{% endfor %} ];