Fixed two of the most annoying bugs in the universe: Extra sessions and malfunctioning params.

2025-09-25 13:13:27 -05:00 · 2025-09-25 13:13:27 -05:00 · 803eb83ca5
commit 803eb83ca5
parent 981d3ea933
2 changed files with 110 additions and 198 deletions
--- a/crudkit/core/service.py
+++ b/crudkit/core/service.py
@ -239,84 +239,46 @@ class CRUDService(Generic[T]):
        include_total: bool = True,
    ) -> "SeekWindow[T]":
        """
-        Transport-agnostic keyset pagination that preserves all the goodies from `list()`:
-          - filters, includes, joins, field projection, eager loading, soft-delete
-          - deterministic ordering (user sort + PK tiebreakers)
-          - forward/backward seek via `key` and `backward`
-        Returns a SeekWindow with items, first/last keys, order spec, limit, and optional total.
+        Keyset pagination with relationship-safe filtering/sorting.
+        Always JOIN all CRUDSpec-discovered paths first; then apply filters, sort, seek.
        """
        session = self.session
        query, root_alias = self.get_query()

-        # Normalize requested fields and compile projection (may skip later to avoid conflicts)
+        # Requested fields → projection + optional loaders
        fields = _normalize_fields_param(params)
        expanded_fields, proj_opts = compile_projection(self.model, fields) if fields else ([], [])

        spec = CRUDSpec(self.model, params or {}, root_alias)

+        # Parse all inputs so join_paths are populated
        filters = spec.parse_filters()
        order_by = spec.parse_sort()
-
-        # Field parsing for root load_only fallback
        root_fields, rel_field_names, root_field_names = spec.parse_fields()
+        spec.parse_includes()
+        join_paths = tuple(spec.get_join_paths())

-        # Soft delete filter
+        # Soft delete
        query = self._apply_not_deleted(query, root_alias, params)

-        # Apply filters first
-        if filters:
-            query = query.filter(*filters)
-
-        # Includes + join paths (dotted fields etc.)
-        spec.parse_includes()
-        join_paths = tuple(spec.get_join_paths())  # iterable of (path, relationship_attr, target_alias)
-
-        # Relationship names required by ORDER BY / WHERE
-        sql_hops: set[str] = _paths_needed_for_sql(order_by, filters, join_paths)
-        # Also include relationships mentioned directly in the sort spec
-        sql_hops |= _hops_from_sort(params)
-
-        # First-hop relationship names implied by dotted projection fields
-        proj_hops: set[str] = _paths_from_fields(fields)
-
-        # Root column projection
+        # Root column projection (load_only)
        only_cols = [c for c in root_fields if isinstance(c, InstrumentedAttribute)]
        if only_cols:
            query = query.options(Load(root_alias).load_only(*only_cols))

-        # Relationship handling per path (avoid loader strategy conflicts)
+        # JOIN all resolved paths, hydrate from the join
        used_contains_eager = False
-        joined_names: set[str] = set()
-
-        for _path, relationship_attr, target_alias in join_paths:
-            rel_attr = cast(InstrumentedAttribute, relationship_attr)
-            name = relationship_attr.key
-            if name in sql_hops:
-                # Needed for WHERE/ORDER BY: join + hydrate from that join
-                query = query.join(target_alias, rel_attr.of_type(target_alias), isouter=True)
-                query = query.options(contains_eager(rel_attr, alias=target_alias))
-                used_contains_eager = True
-                joined_names.add(name)
-            elif name in proj_hops:
-                # Display-only: bulk-load efficiently, no join
-                query = query.options(selectinload(rel_attr))
-                joined_names.add(name)
-
-        # Force-join any SQL-needed relationships that weren't in join_paths
-        missing_sql = sql_hops - joined_names
-        for name in missing_sql:
-            rel_attr = cast(InstrumentedAttribute, getattr(root_alias, name))
-            query = query.join(rel_attr, isouter=True)
-            query = query.options(contains_eager(rel_attr))
+        for _base_alias, rel_attr, target_alias in join_paths:
+            query = query.join(target_alias, rel_attr.of_type(target_alias), isouter=True)
+            query = query.options(contains_eager(rel_attr, alias=target_alias))
            used_contains_eager = True
-            joined_names.add(name)

-        # Apply projection loader options only if they won't conflict with contains_eager
-        if proj_opts and not used_contains_eager:
-            query = query.options(*proj_opts)
+        # Filters AFTER joins → no cartesian products
+        if filters:
+            query = query.filter(*filters)

-        # Order + limit
-        order_spec = self._extract_order_spec(root_alias, order_by)  # SA 2.x helper
+        # Order spec (with PK tie-breakers for stability)
+        order_spec = self._extract_order_spec(root_alias, order_by)
        limit, _ = spec.parse_pagination()
        if limit is None:
            effective_limit = 50
@ -325,13 +287,13 @@ class CRUDService(Generic[T]):
        else:
            effective_limit = limit

-        # Keyset predicate
+        # Seek predicate from cursor key (if any)
        if key:
            pred = self._key_predicate(order_spec, key, backward)
            if pred is not None:
                query = query.filter(pred)

-        # Apply ordering. For backward, invert SQL order then reverse in-memory for display.
+        # Apply ORDER and LIMIT. Backward is SQL-inverted + reverse in-memory.
        if not backward:
            clauses = [(c.desc() if is_desc else c.asc()) for c, is_desc in zip(order_spec.cols, order_spec.desc)]
            query = query.order_by(*clauses)
@ -345,9 +307,9 @@ class CRUDService(Generic[T]):
                query = query.limit(effective_limit)
            items = list(reversed(query.all()))

-        # Tag projection so your renderer knows what fields were requested
+        # Projection meta tag for renderers
        if fields:
-            proj = list(dict.fromkeys(fields))  # dedupe, preserve order
+            proj = list(dict.fromkeys(fields))
            if "id" not in proj and hasattr(self.model, "id"):
                proj.insert(0, "id")
        else:
@ -370,12 +332,9 @@ class CRUDService(Generic[T]):
                except Exception:
                    pass

-        # Boundary keys for cursor encoding in the API layer
-        # When ORDER BY includes related columns (e.g., owner.first_name),
-        # pluck values from the related object we hydrated with contains_eager/selectinload.
+        # Cursor key pluck: support related columns we hydrated via contains_eager
        def _pluck_key_from_obj(obj: Any) -> list[Any]:
            vals: list[Any] = []
-            # Build a quick map: selectable -> relationship name
            alias_to_rel: dict[Any, str] = {}
            for _p, relationship_attr, target_alias in join_paths:
                sel = getattr(target_alias, "selectable", None)
@ -383,20 +342,17 @@ class CRUDService(Generic[T]):
                    alias_to_rel[sel] = relationship_attr.key

            for col in order_spec.cols:
-                key = getattr(col, "key", None) or getattr(col, "name", None)
-                # Try root attribute first
-                if key and hasattr(obj, key):
-                    vals.append(getattr(obj, key))
+                keyname = getattr(col, "key", None) or getattr(col, "name", None)
+                if keyname and hasattr(obj, keyname):
+                    vals.append(getattr(obj, keyname))
                    continue
-                # Try relationship hop by matching the column's table/selectable
                table = getattr(col, "table", None)
                relname = alias_to_rel.get(table)
-                if relname and key:
+                if relname and keyname:
                    relobj = getattr(obj, relname, None)
-                    if relobj is not None and hasattr(relobj, key):
-                        vals.append(getattr(relobj, key))
+                    if relobj is not None and hasattr(relobj, keyname):
+                        vals.append(getattr(relobj, keyname))
                        continue
-                # Give up: unsupported expression for cursor purposes
                raise ValueError("unpluckable")
            return vals

@ -404,33 +360,24 @@ class CRUDService(Generic[T]):
            first_key = _pluck_key_from_obj(items[0]) if items else None
            last_key  = _pluck_key_from_obj(items[-1]) if items else None
        except Exception:
-            # If we can't derive cursor keys (e.g., ORDER BY expression/aggregate),
-            # disable cursors for this response rather than exploding.
            first_key = None
            last_key = None

-        # Optional total that’s safe under JOINs (COUNT DISTINCT ids)
+        # Count DISTINCT ids with mirrored joins
        total = None
        if include_total:
            base = session.query(getattr(root_alias, "id"))
            base = self._apply_not_deleted(base, root_alias, params)
+            # same joins as above for correctness
+            for _base_alias, rel_attr, target_alias in join_paths:
+                base = base.join(target_alias, rel_attr.of_type(target_alias), isouter=True)
            if filters:
                base = base.filter(*filters)
-            # Mirror join structure for any SQL-needed relationships
-            for _path, relationship_attr, target_alias in join_paths:
-                if relationship_attr.key in sql_hops:
-                    rel_attr = cast(InstrumentedAttribute, relationship_attr)
-                    base = base.join(target_alias, rel_attr.of_type(target_alias), isouter=True)
-            # Also mirror any forced joins
-            for name in (sql_hops - {ra.key for _p, ra, _a in join_paths}):
-                rel_attr = cast(InstrumentedAttribute, getattr(root_alias, name))
-                base = base.join(rel_attr, isouter=True)
-
            total = session.query(func.count()).select_from(
                base.order_by(None).distinct().subquery()
            ).scalar() or 0

-        window_limit_for_body = 0 if effective_limit is None and limit == 0 else (effective_limit or 50)
+        window_limit_for_body = 0 if effective_limit is None and (limit == 0) else (effective_limit or 50)

        if log.isEnabledFor(logging.DEBUG):
            log.debug("QUERY: %s", str(query))
@ -475,64 +422,53 @@ class CRUDService(Generic[T]):
        return [*order_by, *pk_cols]

    def get(self, id: int, params=None) -> T | None:
-        """Fetch a single row by id with conflict-free eager loading and clean projection."""
+        """
+        Fetch a single row by id with conflict-free eager loading and clean projection.
+        Always JOIN any paths that CRUDSpec resolved for filters/fields/includes so
+        related-column filters never create cartesian products.
+        """
        query, root_alias = self.get_query()

        # Defaults so we can build a projection even if params is None
+        req_fields: list[str] = _normalize_fields_param(params)
        root_fields: list[Any] = []
        root_field_names: dict[str, str] = {}
        rel_field_names: dict[tuple[str, ...], list[str]] = {}
-        req_fields: list[str] = _normalize_fields_param(params)

-        # Soft-delete guard
+        # Soft-delete guard first
        query = self._apply_not_deleted(query, root_alias, params)

        spec = CRUDSpec(self.model, params or {}, root_alias)

-        # Optional extra filters (in addition to id); keep parity with list()
+        # Parse everything so CRUDSpec records any join paths it needed to resolve
        filters = spec.parse_filters()
-        if filters:
-            query = query.filter(*filters)
-
-        # Always filter by id
-        query = query.filter(getattr(root_alias, "id") == id)
-
-        # Includes + join paths we may need
+        # no ORDER BY for get()
+        if params:
+            root_fields, rel_field_names, root_field_names = spec.parse_fields()
        spec.parse_includes()
        join_paths = tuple(spec.get_join_paths())

-        # Field parsing to enable root load_only
-        if params:
-            root_fields, rel_field_names, root_field_names = spec.parse_fields()
-
-        # Decide which relationship paths are needed for SQL vs display-only
-        # For get(), there is no ORDER BY; only filters might force SQL use.
-        sql_hops = _paths_needed_for_sql(order_by=None, filters=filters, join_paths=join_paths)
-        proj_hops = _paths_from_fields(req_fields)
-
-        # Root column projection
+        # Root-column projection (load_only)
        only_cols = [c for c in root_fields if isinstance(c, InstrumentedAttribute)]
        if only_cols:
            query = query.options(Load(root_alias).load_only(*only_cols))

-        # Relationship handling per path: avoid loader strategy conflicts
+        # JOIN all discovered paths up front; hydrate via contains_eager
        used_contains_eager = False
-        for _path, relationship_attr, target_alias in join_paths:
-            rel_attr = cast(InstrumentedAttribute, relationship_attr)
-            name = relationship_attr.key
-            if name in sql_hops:
-                # Needed in WHERE: join + hydrate from the join
-                query = query.join(target_alias, rel_attr.of_type(target_alias), isouter=True)
-                query = query.options(contains_eager(rel_attr, alias=target_alias))
-                used_contains_eager = True
-            elif name in proj_hops:
-                # Display-only: bulk-load efficiently
-                query = query.options(selectinload(rel_attr))
-            else:
-                pass
+        for _base_alias, rel_attr, target_alias in join_paths:
+            query = query.join(target_alias, rel_attr.of_type(target_alias), isouter=True)
+            query = query.options(contains_eager(rel_attr, alias=target_alias))
+            used_contains_eager = True
+
+        # Apply filters (joins are in place → no cartesian products)
+        if filters:
+            query = query.filter(*filters)
+
+        # And the id filter
+        query = query.filter(getattr(root_alias, "id") == id)

        # Projection loader options compiled from requested fields.
-        # Skip if we used contains_eager to avoid strategy conflicts.
+        # Skip if we used contains_eager to avoid loader-strategy conflicts.
        expanded_fields, proj_opts = compile_projection(self.model, req_fields) if req_fields else ([], [])
        if proj_opts and not used_contains_eager:
            query = query.options(*proj_opts)
@ -541,7 +477,7 @@ class CRUDService(Generic[T]):

        # Emit exactly what the client requested (plus id), or a reasonable fallback
        if req_fields:
-            proj = list(dict.fromkeys(req_fields))  # dedupe, preserve order
+            proj = list(dict.fromkeys(req_fields))
            if "id" not in proj and hasattr(self.model, "id"):
                proj.insert(0, "id")
        else:
@ -569,16 +505,20 @@ class CRUDService(Generic[T]):
        return obj or None

    def list(self, params=None) -> list[T]:
-        """Offset/limit listing with smart relationship loading and clean projection."""
+        """
+        Offset/limit listing with relationship-safe filtering.
+        We always JOIN every CRUDSpec-discovered path before applying filters/sorts.
+        """
        query, root_alias = self.get_query()

        # Defaults so we can reference them later even if params is None
+        req_fields: list[str] = _normalize_fields_param(params)
        root_fields: list[Any] = []
        root_field_names: dict[str, str] = {}
        rel_field_names: dict[tuple[str, ...], list[str]] = {}
-        req_fields: list[str] = _normalize_fields_param(params)

        if params:
+            # Soft delete
            query = self._apply_not_deleted(query, root_alias, params)

            spec = CRUDSpec(self.model, params or {}, root_alias)
@ -586,84 +526,56 @@ class CRUDService(Generic[T]):
            order_by = spec.parse_sort()
            limit, offset = spec.parse_pagination()

-            # Includes + join paths we might need
+            # Includes / fields (populates join_paths)
+            root_fields, rel_field_names, root_field_names = spec.parse_fields()
            spec.parse_includes()
            join_paths = tuple(spec.get_join_paths())

-            # Field parsing for load_only on root columns
-            root_fields, rel_field_names, root_field_names = spec.parse_fields()
-
-            if filters:
-                query = query.filter(*filters)
-
-            # Determine which relationship paths are needed for SQL vs display-only
-            sql_hops = _paths_needed_for_sql(order_by, filters, join_paths)
-            sql_hops |= _hops_from_sort(params)  # ensure sort-driven joins exist
-            proj_hops = _paths_from_fields(req_fields)
-
-            # Root column projection
+            # Root column projection (load_only)
            only_cols = [c for c in root_fields if isinstance(c, InstrumentedAttribute)]
            if only_cols:
                query = query.options(Load(root_alias).load_only(*only_cols))

-            # Relationship handling per path
+            # JOIN all paths we resolved and hydrate them from the join
            used_contains_eager = False
-            joined_names: set[str] = set()
-
-            for _path, relationship_attr, target_alias in join_paths:
-                rel_attr = cast(InstrumentedAttribute, relationship_attr)
-                name = relationship_attr.key
-                if name in sql_hops:
-                    # Needed for WHERE/ORDER BY: join + hydrate from the join
-                    query = query.join(target_alias, rel_attr.of_type(target_alias), isouter=True)
-                    query = query.options(contains_eager(rel_attr, alias=target_alias))
-                    used_contains_eager = True
-                    joined_names.add(name)
-                elif name in proj_hops:
-                    # Display-only: no join, bulk-load efficiently
-                    query = query.options(selectinload(rel_attr))
-                    joined_names.add(name)
-
-            # Force-join any SQL-needed relationships that weren't in join_paths
-            missing_sql = sql_hops - joined_names
-            for name in missing_sql:
-                rel_attr = cast(InstrumentedAttribute, getattr(root_alias, name))
-                query = query.join(rel_attr, isouter=True)
-                query = query.options(contains_eager(rel_attr))
+            for _base_alias, rel_attr, target_alias in join_paths:
+                query = query.join(target_alias, rel_attr.of_type(target_alias), isouter=True)
+                query = query.options(contains_eager(rel_attr, alias=target_alias))
                used_contains_eager = True
-                joined_names.add(name)

-            # MSSQL requires ORDER BY when OFFSET is used (SQLA uses OFFSET for limit/offset)
+            # Filters AFTER joins → no cartesian products
+            if filters:
+                query = query.filter(*filters)
+
+            # MSSQL requires ORDER BY when OFFSET is used; ensure stable PK tie-breakers
            paginating = (limit is not None) or (offset is not None and offset != 0)
            if paginating and not order_by and self.backend.requires_order_by_for_offset:
                order_by = self._default_order_by(root_alias)
-
            if order_by:
-                query = query.order_by(*order_by)
+                query = query.order_by(*self._stable_order_by(root_alias, order_by))

-            # Only apply offset/limit when not None and not zero
+            # Offset/limit
            if offset is not None and offset != 0:
                query = query.offset(offset)
            if limit is not None and limit > 0:
                query = query.limit(limit)

-            # Projection loader options compiled from requested fields.
-            # Skip if we used contains_eager to avoid loader-strategy conflicts.
+            # Projection loaders only if we didn’t use contains_eager
            expanded_fields, proj_opts = compile_projection(self.model, req_fields) if req_fields else ([], [])
            if proj_opts and not used_contains_eager:
                query = query.options(*proj_opts)

        else:
-            # No params means no filters/sorts/limits; still honor projection loaders if any
+            # No params; still honor projection loaders if any
            expanded_fields, proj_opts = compile_projection(self.model, req_fields) if req_fields else ([], [])
            if proj_opts:
                query = query.options(*proj_opts)

        rows = query.all()

-        # Emit exactly what the client requested (plus id), or a reasonable fallback
+        # Build projection meta for renderers
        if req_fields:
-            proj = list(dict.fromkeys(req_fields))  # dedupe while preserving order
+            proj = list(dict.fromkeys(req_fields))
            if "id" not in proj and hasattr(self.model, "id"):
                proj.insert(0, "id")
        else:
@ -691,7 +603,6 @@ class CRUDService(Generic[T]):

        return rows

-
    def create(self, data: dict, actor=None) -> T:
        session = self.session
        obj = self.model(**data)