More attempts to fix a bug.

This introduces an uglt regression.
2025-10-21 08:54:15 -05:00 · 2025-10-20 15:20:52 -05:00
5 changed files with 219 additions and 130 deletions
--- a/crudkit/api/flask_api.py
+++ b/crudkit/api/flask_api.py
@ -7,7 +7,7 @@ from hashlib import md5
 from urllib.parse import urlencode
 from werkzeug.exceptions import HTTPException

-from crudkit.core.service import _is_truthy
+from crudkit.core.params import is_truthy

 MAX_JSON = 1_000_000

@ -39,7 +39,7 @@ def _json_error(e: Exception, status: int = 400):
    return jsonify({"status": "error", "error": msg}), status

 def _bool_param(d: dict[str, str], key: str, default: bool) -> bool:
-    return _is_truthy(d.get(key, "1" if default else "0"))
+    return is_truthy(d.get(key, "1" if default else "0"))

 def generate_crud_blueprint(model, service, *, base_prefix: str | None = None, rest: bool = True, rpc: bool = True):
    """
--- a/crudkit/core/base.py
+++ b/crudkit/core/base.py
@ -1,22 +1,11 @@
-from functools import lru_cache
 from typing import Any, Dict, Iterable, List, Optional, Tuple, Set, cast
-from sqlalchemy import Column, Integer, DateTime, Boolean, String, JSON, func, inspect
-from sqlalchemy.orm import declarative_mixin, declarative_base, NO_VALUE, RelationshipProperty, Mapper
+from sqlalchemy import Column, Integer, DateTime, String, JSON, func, inspect
+from sqlalchemy.orm import declarative_mixin, declarative_base, NO_VALUE, Mapper
 from sqlalchemy.orm.state import InstanceState

-Base = declarative_base()
+from crudkit.core.meta import column_names_for_model

-@lru_cache(maxsize=512)
-def _column_names_for_model(cls: type) -> tuple[str, ...]:
-    try:
-        mapper = inspect(cls)
-        return tuple(prop.key for prop in mapper.column_attrs)
-    except Exception:
-        names: list[str] = []
-        for c in cls.__mro__:
-            if hasattr(c, "__table__"):
-                names.extend(col.name for col in c.__table__.columns)
-        return tuple(dict.fromkeys(names))
+Base = declarative_base()

 def _sa_state(obj: Any) -> Optional[InstanceState[Any]]:
    """Safely get SQLAlchemy InstanceState (or None)."""
@ -60,7 +49,16 @@ def _safe_get_loaded_attr(obj, name):
        if attr is not None:
            val = attr.loaded_value
            return None if val is NO_VALUE else val
-
+        try:
+            # In rare cases, state.dict may be stale; reject descriptors
+            got = getattr(obj, name, None)
+            from sqlalchemy.orm.attributes import InstrumentedAttribute as _Instr
+            if got is not None and not isinstance(got, _Instr):
+                # Do not trigger load; only return if it was already present in __dict__
+                if hasattr(obj, "__dict__") and name in obj.__dict__:
+                    return got
+        except Exception:
+            pass
        return None
    except Exception:
        return None
@ -72,16 +70,10 @@ def _identity_key(obj) -> Tuple[type, Any]:
    except Exception:
        return (type(obj), id(obj))

-def _is_collection_rel(prop: RelationshipProperty) -> bool:
-    try:
-        return prop.uselist is True
-    except Exception:
-        return False
-
 def _serialize_simple_obj(obj) -> Dict[str, Any]:
    """Columns only (no relationships)."""
    out: Dict[str, Any] = {}
-    for name in _column_names_for_model(type(obj)):
+    for name in column_names_for_model(type(obj)):
        try:
            out[name] = getattr(obj, name)
        except Exception:
@ -153,9 +145,11 @@ def _split_field_tokens(fields: Iterable[str]) -> Tuple[List[str], Dict[str, Lis
 def _deep_get_loaded(obj: Any, dotted: str) -> Any:
    """
    Deep get with no lazy loads:
-      - For all but the final hop, use _safe_get_loaded_attr (mapped-only, no getattr).
-      - For the final hop, try _safe_get_loaded_attr first; if None, fall back to getattr()
-        to allow computed properties/hybrids that rely on already-loaded columns.
+        - Intermediate hops via _safe_get_loaded_attr only.
+        - Final hop:
+            * If relationship and not loaded: return None
+            * else allow getattr for non-relationship attrs (hybrids/properties) that compute from already-loaded data.
+            * Serialize ORM objects at the lead into dicts
    """
    parts = dotted.split(".")
    if not parts:
@ -171,30 +165,57 @@ def _deep_get_loaded(obj: Any, dotted: str) -> Any:
        return None

    last = parts[-1]
-    # Try safe fetch on the last hop first
+
+    # If last is an ORM relationship, only return it if already loaded.
+    m = _sa_mapper(cur)
+    if m is not None and last in m.relationships:
        val = _safe_get_loaded_attr(cur, last)
-    if val is not None:
-        return val
-    # Fall back to getattr for computed/hybrid attributes on an already-loaded object
-    try:
-        return getattr(cur, last, None)
-    except Exception:
+        if val is None:
            return None
+        # serialize relationship value
+        if _sa_mapper(val) is not None:
+            return _serialize_simple_obj(val)
+        if isinstance(val, (list, tuple)):
+            out = []
+            for v in val:
+                out.append(_serialize_simple_obj(v) if _sa_mapper(v) is not None else v)
+            return out
+        return val
+
+    # Not a relationship: try loaded value, else safe getattr
+    val = _safe_get_loaded_attr(cur, last)
+    if val is None:
+        try:
+            # getattr here will not lazy-load relationships because we already gated those
+            val = getattr(cur, last, None)
+        except Exception:
+            val = None
+
+    if _sa_mapper(val) is not None:
+        return _serialize_simple_obj(val)
+    if isinstance(val, (list, tuple)):
+        return [_serialize_simple_obj(v) if _sa_mapper(v) is not None else v for v in val]
+    return val

 def _serialize_leaf(obj: Any) -> Any:
    """
-    Lead serialization for values we put into as_dict():
-        - If object has as_dict(), call as_dict() with no args (caller controls field shapes).
-        - Else return value as-is (Flask/JSON encoder will handle datetimes, etc., via app config).
+    Leaf serialization for non-dotted scalar fields:
+      - If it's an ORM object with as_dict(), use it.
+      - Else if it's an ORM object, serialize columns only.
+      - Else return the value as-is.
    """
    if obj is None:
        return None
+
+    if _sa_mapper(obj) is not None:
        ad = getattr(obj, "as_dict", None)
        if callable(ad):
            try:
-            return ad(None)
+                return ad()  # no args, your default handles fields selection
            except Exception:
-            return str(obj)
+                pass
+        return _serialize_simple_obj(obj)
+
    return obj

 def _serialize_collection(items: Iterable[Any], requested_tails: List[str]) -> List[Dict[str, Any]]:
@ -319,8 +340,6 @@ class CRUDMixin:
                if mapper is not None:
                    out[name] = _serialize_simple_obj(val)
                    continue
-
-                # If it's a collection and no subfields were requested, emit a light list
                if isinstance(val, (list, tuple)):
                    out[name] = [_serialize_leaf(v) for v in val]
                else:
--- a/crudkit/core/meta.py
+++ b/crudkit/core/meta.py
@ -0,0 +1,26 @@
+from __future__ import annotations
+from functools import lru_cache
+from typing import NamedTuple, Any, Dict, Tuple
+from sqlalchemy import inspect
+from sqlalchemy.orm import Mapper
+
+class RelInfo(NamedTuple):
+    key: str
+    uselist: bool
+    target_cls: type | None
+
+@lru_cache(maxsize=512)
+def mapper_info(model: type) -> Dict[str, Any]:
+    m: Mapper = inspect(model)
+    cols = tuple(prop.key for prop in m.column_attrs)
+    rels = { r.key: RelInfo(r.key, bool(r.uselist), getattr(r.mapper, "class_", None))
+             for r in m.relationships }
+    pks = tuple(c.key for c in m.primary_key)
+    table = getattr(model, "__table__", None)
+    return {"cols": cols, "rels": rels, "pks": pks, "table": table}
+
+def rel_map(model: type) -> Dict[str, RelInfo]:
+    return mapper_info(model)["rels"]
+
+def column_names_for_model(model: type) -> Tuple[str, ...]:
+    return mapper_info(model)["cols"]
--- a/crudkit/core/params.py
+++ b/crudkit/core/params.py
@ -0,0 +1,16 @@
+def is_truthy(val) -> bool:
+    return str(val).lower() in ('1', 'true', 'yes', 'on')
+
+def normalize_fields_param(params: dict | None) -> list[str]:
+    if not params:
+        return []
+    raw = params.get("fields")
+    if isinstance(raw, (list, tuple)):
+        out: list[str] = []
+        for item in raw:
+            if isinstance(item, str):
+                out.extend([p for p in (s.strip() for s in item.split(",")) if p])
+        return out
+    if isinstance(raw, str):
+        return [p for p in (s.strip() for s in raw.split(",")) if p]
+    return []
--- a/crudkit/core/service.py
+++ b/crudkit/core/service.py
@ -1,18 +1,19 @@
 from __future__ import annotations

-from collections.abc import Iterable
 from dataclasses import dataclass
 from flask import current_app
 from typing import Any, Callable, Type, TypeVar, Generic, Optional, Protocol, runtime_checkable, cast
-from sqlalchemy import and_, func, inspect, or_, text, select, literal
+from sqlalchemy import and_, func, inspect, or_, text
 from sqlalchemy.engine import Engine, Connection
-from sqlalchemy.orm import Load, Session, with_polymorphic, Mapper, selectinload, with_loader_criteria, aliased, with_parent
+from sqlalchemy.orm import Load, Session, with_polymorphic, Mapper, selectinload, with_loader_criteria
 from sqlalchemy.orm.attributes import InstrumentedAttribute
 from sqlalchemy.sql import operators, visitors
 from sqlalchemy.sql.elements import UnaryExpression, ColumnElement

 from crudkit.core import to_jsonable, deep_diff, diff_to_patch, filter_to_columns, normalize_payload
 from crudkit.core.base import Version
+from crudkit.core.meta import rel_map, column_names_for_model
+from crudkit.core.params import is_truthy, normalize_fields_param
 from crudkit.core.spec import CRUDSpec, CollPred
 from crudkit.core.types import OrderSpec, SeekWindow
 from crudkit.backend import BackendInfo, make_backend_info
@ -139,23 +140,6 @@ def _dedupe_order_by(order_by):
        out.append(ob)
    return out

-def _is_truthy(val):
-    return str(val).lower() in ('1', 'true', 'yes', 'on')
-
-def _normalize_fields_param(params: dict | None) -> list[str]:
-    if not params:
-        return []
-    raw = params.get("fields")
-    if isinstance(raw, (list, tuple)):
-        out: list[str] = []
-        for item in raw:
-            if isinstance(item, str):
-                out.extend([p for p in (s.strip() for s in item.split(",")) if p])
-        return out
-    if isinstance(raw, str):
-        return [p for p in (s.strip() for s in raw.split(",")) if p]
-    return []
-
 # ---------------------------- CRUD service ----------------------------

 class CRUDService(Generic[T]):
@ -200,7 +184,7 @@ class CRUDService(Generic[T]):

    def _apply_soft_delete_criteria_for_children(self, query, plan: "CRUDService._Plan", params):
        # Skip if caller explicitly asked for deleted
-        if _is_truthy((params or {}).get("include_deleted")):
+        if is_truthy((params or {}).get("include_deleted")):
            return query

        seen = set()
@ -218,7 +202,8 @@ class CRUDService(Generic[T]):
                    with_loader_criteria(
                        target_cls,
                        lambda cls: cls.is_deleted == False,
-                        include_aliases=True
+                        include_aliases=True,
+                        propagate_to_loaders=True,
                    )
                )
        return query
@ -248,21 +233,18 @@ class CRUDService(Generic[T]):

        order_spec = self._extract_order_spec(root_alias, plan.order_by)

-        # Inner subquery must be ordered exactly like the real query
+        # Inner subquery ordered exactly like the real query
        inner = query.order_by(*self._order_clauses(order_spec, invert=False))

-        # IMPORTANT: Build subquery that actually exposes the order-by columns
-        # under predictable names, then select FROM that and reference subq.c[...]
-        subq = inner.with_entities(*order_spec.cols).subquery()
+        # Label each order-by column with deterministic, unique names
+        labeled_cols = []
+        for idx, col in enumerate(order_spec.cols):
+            base = getattr(col, "key", None) or getattr(col, "name", None)
+            name = f"ord_{idx}_{base}" if base else f"ord_{idx}"
+            labeled_cols.append(col.label(name))

-        # Map the order columns to the subquery columns by key/name
-        cols_on_subq = []
-        for col in order_spec.cols:
-            key = getattr(col, "key", None) or getattr(col, "name", None)
-            if not key:
-                # Fallback, but frankly your order cols should have names
-                raise ValueError("Order-by column is missing a key/name")
-            cols_on_subq.append(getattr(subq.c, key))
+        subq = inner.with_entities(*labeled_cols).subquery()
+        cols_on_subq = [getattr(subq.c, c.key) for c in labeled_cols]

        # Now the outer anchor query orders and offsets on the subquery columns
        anchor_q = (
@ -282,7 +264,7 @@ class CRUDService(Generic[T]):
        return list(row)  # tuple-like -> list for _key_predicate

    def _apply_not_deleted(self, query, root_alias, params):
-        if self.supports_soft_delete and not _is_truthy((params or {}).get("include_deleted")):
+        if self.supports_soft_delete and not is_truthy((params or {}).get("include_deleted")):
            return query.filter(getattr(root_alias, "is_deleted") == False)
        return query

@ -299,7 +281,10 @@ class CRUDService(Generic[T]):
    def _stable_order_by(self, root_alias, given_order_by):
        order_by = list(given_order_by or [])
        if not order_by:
+            # Safe default: primary key(s) only. No unlabeled expressions.
            return _dedupe_order_by(self._default_order_by(root_alias))
+
+        # Dedupe what the user gave us, then ensure PK tie-breakers exist
        order_by = _dedupe_order_by(order_by)
        mapper: Mapper[Any] = cast(Mapper[Any], inspect(self.model))
        present = {_order_identity(_unwrap_ob(ob)[0]) for ob in order_by}
@ -362,7 +347,7 @@ class CRUDService(Generic[T]):
        proj_opts: Any

    def _plan(self, params, root_alias) -> _Plan:
-        req_fields = _normalize_fields_param(params)
+        req_fields = normalize_fields_param(params)
        spec = CRUDSpec(self.model, params or {}, root_alias)

        filters = spec.parse_filters()
@ -373,7 +358,16 @@ class CRUDService(Generic[T]):
        join_paths = tuple(spec.get_join_paths())
        filter_tables = _collect_tables_from_filters(filters)
        fkeys = set()
-        _, proj_opts = compile_projection(self.model, req_fields) if req_fields else ([], [])
+        # Build projection opts only if there are true scalar columns requested.
+        # Bare relationship fields like "owner" should not force root column pruning.
+        column_names = set(column_names_for_model(self.model))
+        has_scalar_column_tokens = any(
+            (("." not in f) and (f in column_names))
+            for f in (req_fields or [])
+        )
+        _, proj_opts = (compile_projection(self.model, req_fields)
+                        if (req_fields and has_scalar_column_tokens)
+                        else ([], []))

        # filter_tables = ()
        # fkeys = set()
@ -391,22 +385,53 @@ class CRUDService(Generic[T]):
        return query.options(Load(root_alias).load_only(*only_cols)) if only_cols else query

    def _apply_firsthop_strategies(self, query, root_alias, plan: _Plan):
-        nested_first_hops = { p[0] for p in (plan.rel_field_names or {}).keys() if len(p) > 1 }
-        joined_rel_keys = set()
+        joined_rel_keys: set[str] = set()
+        rels = rel_map(self.model)  # {name: RelInfo}

-        # Existing behavior: join everything in join_paths (to-one), selectinload collections
+        # Eager join to-one relationships requested as bare fields (e.g., fields=owner)
+        requested_scalars = set(plan.root_field_names or [])
+        for key in requested_scalars:
+            info = rels.get(key)
+            if info and not info.uselist and key not in joined_rel_keys:
+                query = query.join(getattr(root_alias, key), isouter=True)
+                joined_rel_keys.add(key)
+
+        # 1) Join to-one relationships explicitly requested as bare fields
+        requested_scalars = set(plan.root_field_names or [])  # names like "owner", "supervisor"
+        for key in requested_scalars:
+            info = rels.get(key)
+            if info and not info.uselist and key not in joined_rel_keys:
+                query = query.join(getattr(root_alias, key), isouter=True)
+                joined_rel_keys.add(key)
+
+        # 2) Join to-one relationships from parsed join_paths
        for base_alias, rel_attr, target_alias in plan.join_paths:
            if base_alias is not root_alias:
                continue
            prop = getattr(rel_attr, "property", None)
-            is_collection = bool(getattr(prop, "uselist", False))
-
-            if not is_collection:
+            if prop and not prop.uselist:
                query = query.join(target_alias, rel_attr.of_type(target_alias), isouter=True)
                joined_rel_keys.add(prop.key if prop is not None else rel_attr.key)
-            else:
+
+        # 3) Ensure to-one touched by filters is joined
+        if plan.filter_tables:
+            for key, info in rels.items():
+                if info.uselist or not info.target_cls:
+                    continue
+                target_tbl = getattr(info.target_cls, "__table__", None)
+                if target_tbl is not None and target_tbl in plan.filter_tables and key not in joined_rel_keys:
+                    query = query.join(getattr(root_alias, key), isouter=True)
+                    joined_rel_keys.add(key)
+
+        # 4) Collections via selectinload, optionally load_only for requested child columns
+        for base_alias, rel_attr, _target_alias in plan.join_paths:
+            if base_alias is not root_alias:
+                continue
+            prop = getattr(rel_attr, "property", None)
+            if not prop or not prop.uselist:
+                continue
            opt = selectinload(rel_attr)
-                child_names = (plan.collection_field_names or {}).get(rel_attr.key, [])
+            child_names = (plan.collection_field_names or {}).get(prop.key, [])
            if child_names:
                target_cls = prop.mapper.class_
                cols = [getattr(target_cls, n, None) for n in child_names]
@ -415,42 +440,45 @@ class CRUDService(Generic[T]):
                    opt = opt.load_only(*cols)
            query = query.options(opt)

-        # NEW: if a first-hop to-one relationship’s target table is present in filter expressions,
-        # make sure we actually JOIN it (outer) so filters don’t create a cartesian product.
-        if plan.filter_tables:
-            mapper: Mapper[Any] = cast(Mapper[Any], inspect(self.model))
-            for rel in mapper.relationships:
-                if rel.uselist:
-                    continue  # only first-hop to-one here
-                target_tbl = getattr(rel.mapper.class_, "__table__", None)
-                if target_tbl is None:
+        for path, _names in (plan.rel_field_names or {}).items():
+            if not path:
                continue
-                if target_tbl in plan.filter_tables:
-                    if rel.key in joined_rel_keys:
-                        continue  # already joined via join_paths
-                    query = query.join(getattr(root_alias, rel.key), isouter=True)
-                    joined_rel_keys.add(rel.key)
-        if log.isEnabledFor(logging.DEBUG):
-            info = []
-            for base_alias, rel_attr, target_alias in plan.join_paths:
-                if base_alias is not root_alias:
+            # Build a chained selectinload for each relationship segment in the path
+            first = path[0]
+            info = rels.get(first)
+            if not info or info.target_cls is None:
                continue
-                prop = getattr(rel_attr, "property", None)
-                sel = getattr(target_alias, "selectable", None)
-                info.append({
-                    "rel": (getattr(prop, "key", getattr(rel_attr, "key", "?"))),
-                    "collection": bool(getattr(prop, "uselist", False)),
-                    "target_keys": list(_selectable_keys(sel)) if sel is not None else [],
-                    "joined": (getattr(prop, "key", None) in joined_rel_keys),
-                })
-            log.debug("FIRSTHOP: %s.%s first-hop paths: %s",
-                      self.model.__name__, getattr(root_alias, "__table__", type(root_alias)).key,
-                      info)
+
+            # Start with selectinload on the first hop
+            opt = selectinload(getattr(root_alias, first))
+
+            # Walk deeper segments
+            cur_cls = info.target_cls
+            for seg in path[1:]:
+                sub = rel_map(cur_cls).get(seg)
+                if not sub or sub.target_cls is None:
+                    # if segment isn't a relationship, we stop the chain
+                    break
+                opt = opt.selectinload(getattr(cur_cls, seg))
+                cur_cls = sub.target_cls
+
+            query = query.options(opt)

        return query

+
    def _apply_proj_opts(self, query, plan: _Plan):
-        return query.options(*plan.proj_opts) if plan.proj_opts else query
+        if not plan.proj_opts:
+            return query
+        try:
+            return query.options(*plan.proj_opts)
+        except KeyError as e:
+            # Seen "KeyError: 'col'" when alias-column remapping meets unlabeled exprs.
+            log.debug("Projection options disabled due to %r; proceeding without them.", e)
+            return query
+        except Exception as e:
+            log.debug("Projection options failed (%r); proceeding without them.", e)
+            return query

    def _projection_meta(self, plan: _Plan):
        if plan.req_fields:
Author	SHA1	Message	Date
Yaro Kasear	dd863dba99	More attempts to fix a bug.	2025-10-21 08:54:15 -05:00
Yaro Kasear	bd2daf921a	This introduces an uglt regression.	2025-10-20 15:20:52 -05:00