From bd2daf921a490c87bc2f31ad113ff53a4df460a3 Mon Sep 17 00:00:00 2001 From: Yaro Kasear Date: Mon, 20 Oct 2025 15:20:52 -0500 Subject: [PATCH] This introduces an uglt regression. --- crudkit/api/flask_api.py | 4 +- crudkit/core/base.py | 95 ++++++++++++++++-------------- crudkit/core/meta.py | 26 +++++++++ crudkit/core/params.py | 16 +++++ crudkit/core/service.py | 123 +++++++++++++-------------------------- 5 files changed, 136 insertions(+), 128 deletions(-) create mode 100644 crudkit/core/meta.py create mode 100644 crudkit/core/params.py diff --git a/crudkit/api/flask_api.py b/crudkit/api/flask_api.py index 39e7c49..29672f8 100644 --- a/crudkit/api/flask_api.py +++ b/crudkit/api/flask_api.py @@ -7,7 +7,7 @@ from hashlib import md5 from urllib.parse import urlencode from werkzeug.exceptions import HTTPException -from crudkit.core.service import _is_truthy +from crudkit.core.params import is_truthy MAX_JSON = 1_000_000 @@ -39,7 +39,7 @@ def _json_error(e: Exception, status: int = 400): return jsonify({"status": "error", "error": msg}), status def _bool_param(d: dict[str, str], key: str, default: bool) -> bool: - return _is_truthy(d.get(key, "1" if default else "0")) + return is_truthy(d.get(key, "1" if default else "0")) def generate_crud_blueprint(model, service, *, base_prefix: str | None = None, rest: bool = True, rpc: bool = True): """ diff --git a/crudkit/core/base.py b/crudkit/core/base.py index d73a04f..43a605d 100644 --- a/crudkit/core/base.py +++ b/crudkit/core/base.py @@ -1,22 +1,11 @@ -from functools import lru_cache from typing import Any, Dict, Iterable, List, Optional, Tuple, Set, cast -from sqlalchemy import Column, Integer, DateTime, Boolean, String, JSON, func, inspect -from sqlalchemy.orm import declarative_mixin, declarative_base, NO_VALUE, RelationshipProperty, Mapper +from sqlalchemy import Column, Integer, DateTime, String, JSON, func, inspect +from sqlalchemy.orm import declarative_mixin, declarative_base, NO_VALUE, Mapper from sqlalchemy.orm.state import InstanceState -Base = declarative_base() +from crudkit.core.meta import column_names_for_model -@lru_cache(maxsize=512) -def _column_names_for_model(cls: type) -> tuple[str, ...]: - try: - mapper = inspect(cls) - return tuple(prop.key for prop in mapper.column_attrs) - except Exception: - names: list[str] = [] - for c in cls.__mro__: - if hasattr(c, "__table__"): - names.extend(col.name for col in c.__table__.columns) - return tuple(dict.fromkeys(names)) +Base = declarative_base() def _sa_state(obj: Any) -> Optional[InstanceState[Any]]: """Safely get SQLAlchemy InstanceState (or None).""" @@ -60,7 +49,16 @@ def _safe_get_loaded_attr(obj, name): if attr is not None: val = attr.loaded_value return None if val is NO_VALUE else val - + try: + # In rare cases, state.dict may be stale; reject descriptors + got = getattr(obj, name, None) + from sqlalchemy.orm.attributes import InstrumentedAttribute as _Instr + if got is not None and not isinstance(got, _Instr): + # Do not trigger load; only return if it was already present in __dict__ + if hasattr(obj, "__dict__") and name in obj.__dict__: + return got + except Exception: + pass return None except Exception: return None @@ -72,16 +70,10 @@ def _identity_key(obj) -> Tuple[type, Any]: except Exception: return (type(obj), id(obj)) -def _is_collection_rel(prop: RelationshipProperty) -> bool: - try: - return prop.uselist is True - except Exception: - return False - def _serialize_simple_obj(obj) -> Dict[str, Any]: """Columns only (no relationships).""" out: Dict[str, Any] = {} - for name in _column_names_for_model(type(obj)): + for name in column_names_for_model(type(obj)): try: out[name] = getattr(obj, name) except Exception: @@ -153,16 +145,16 @@ def _split_field_tokens(fields: Iterable[str]) -> Tuple[List[str], Dict[str, Lis def _deep_get_loaded(obj: Any, dotted: str) -> Any: """ Deep get with no lazy loads: - - For all but the final hop, use _safe_get_loaded_attr (mapped-only, no getattr). - - For the final hop, try _safe_get_loaded_attr first; if None, fall back to getattr() - to allow computed properties/hybrids that rely on already-loaded columns. + - Walk intermediate hops via _safe_get_loaded_attr only. + - Final hop: prefer _safe_get_loaded_attr; if that returns an ORM object or a collection of ORM objects, + serialize to simple dicts; else return the plain value. """ parts = dotted.split(".") if not parts: return None cur = obj - # Traverse up to the parent of the last token safely + # Traverse up to parent of last token without lazy-loads for part in parts[:-1]: if cur is None: return None @@ -171,30 +163,45 @@ def _deep_get_loaded(obj: Any, dotted: str) -> Any: return None last = parts[-1] - # Try safe fetch on the last hop first val = _safe_get_loaded_attr(cur, last) - if val is not None: - return val - # Fall back to getattr for computed/hybrid attributes on an already-loaded object - try: - return getattr(cur, last, None) - except Exception: + if val is None: + # Do NOT lazy load. If it isn't loaded, treat as absent. return None + # If the final hop is an ORM object or collection, serialize it + if _sa_mapper(val) is not None: + return _serialize_simple_obj(val) + if isinstance(val, (list, tuple)): + out = [] + for v in val: + if _sa_mapper(v) is not None: + out.append(_serialize_simple_obj(v)) + else: + out.append(v) + return out + + # Plain scalar/computed value + return val + def _serialize_leaf(obj: Any) -> Any: """ - Lead serialization for values we put into as_dict(): - - If object has as_dict(), call as_dict() with no args (caller controls field shapes). - - Else return value as-is (Flask/JSON encoder will handle datetimes, etc., via app config). + Leaf serialization for non-dotted scalar fields: + - If it's an ORM object with as_dict(), use it. + - Else if it's an ORM object, serialize columns only. + - Else return the value as-is. """ if obj is None: return None - ad = getattr(obj, "as_dict", None) - if callable(ad): - try: - return ad(None) - except Exception: - return str(obj) + + if _sa_mapper(obj) is not None: + ad = getattr(obj, "as_dict", None) + if callable(ad): + try: + return ad() # no args, your default handles fields selection + except Exception: + pass + return _serialize_simple_obj(obj) + return obj def _serialize_collection(items: Iterable[Any], requested_tails: List[str]) -> List[Dict[str, Any]]: @@ -319,8 +326,6 @@ class CRUDMixin: if mapper is not None: out[name] = _serialize_simple_obj(val) continue - - # If it's a collection and no subfields were requested, emit a light list if isinstance(val, (list, tuple)): out[name] = [_serialize_leaf(v) for v in val] else: diff --git a/crudkit/core/meta.py b/crudkit/core/meta.py new file mode 100644 index 0000000..f6dabbc --- /dev/null +++ b/crudkit/core/meta.py @@ -0,0 +1,26 @@ +from __future__ import annotations +from functools import lru_cache +from typing import NamedTuple, Any, Dict, Tuple +from sqlalchemy import inspect +from sqlalchemy.orm import Mapper + +class RelInfo(NamedTuple): + key: str + uselist: bool + target_cls: type | None + +@lru_cache(maxsize=512) +def mapper_info(model: type) -> Dict[str, Any]: + m: Mapper = inspect(model) + cols = tuple(prop.key for prop in m.column_attrs) + rels = { r.key: RelInfo(r.key, bool(r.uselist), getattr(r.mapper, "class_", None)) + for r in m.relationships } + pks = tuple(c.key for c in m.primary_key) + table = getattr(model, "__table__", None) + return {"cols": cols, "rels": rels, "pks": pks, "table": table} + +def rel_map(model: type) -> Dict[str, RelInfo]: + return mapper_info(model)["rels"] + +def column_names_for_model(model: type) -> Tuple[str, ...]: + return mapper_info(model)["col"] diff --git a/crudkit/core/params.py b/crudkit/core/params.py new file mode 100644 index 0000000..2919af3 --- /dev/null +++ b/crudkit/core/params.py @@ -0,0 +1,16 @@ +def is_truthy(val) -> bool: + return str(val).lower() in ('1', 'true', 'yes', 'on') + +def normalize_fields_param(params: dict | None) -> list[str]: + if not params: + return [] + raw = params.get("fields") + if isinstance(raw, (list, tuple)): + out: list[str] = [] + for item in raw: + if isinstance(item, str): + out.extend([p for p in (s.strip() for s in item.split(",")) if p]) + return out + if isinstance(raw, str): + return [p for p in (s.strip() for s in raw.split(",")) if p] + return [] diff --git a/crudkit/core/service.py b/crudkit/core/service.py index fbad3a1..6fce1a4 100644 --- a/crudkit/core/service.py +++ b/crudkit/core/service.py @@ -1,18 +1,19 @@ from __future__ import annotations -from collections.abc import Iterable from dataclasses import dataclass from flask import current_app from typing import Any, Callable, Type, TypeVar, Generic, Optional, Protocol, runtime_checkable, cast -from sqlalchemy import and_, func, inspect, or_, text, select, literal +from sqlalchemy import and_, func, inspect, or_, text from sqlalchemy.engine import Engine, Connection -from sqlalchemy.orm import Load, Session, with_polymorphic, Mapper, selectinload, with_loader_criteria, aliased, with_parent +from sqlalchemy.orm import Load, Session, with_polymorphic, Mapper, selectinload, with_loader_criteria from sqlalchemy.orm.attributes import InstrumentedAttribute from sqlalchemy.sql import operators, visitors from sqlalchemy.sql.elements import UnaryExpression, ColumnElement from crudkit.core import to_jsonable, deep_diff, diff_to_patch, filter_to_columns, normalize_payload from crudkit.core.base import Version +from crudkit.core.meta import rel_map +from crudkit.core.params import is_truthy, normalize_fields_param from crudkit.core.spec import CRUDSpec, CollPred from crudkit.core.types import OrderSpec, SeekWindow from crudkit.backend import BackendInfo, make_backend_info @@ -139,23 +140,6 @@ def _dedupe_order_by(order_by): out.append(ob) return out -def _is_truthy(val): - return str(val).lower() in ('1', 'true', 'yes', 'on') - -def _normalize_fields_param(params: dict | None) -> list[str]: - if not params: - return [] - raw = params.get("fields") - if isinstance(raw, (list, tuple)): - out: list[str] = [] - for item in raw: - if isinstance(item, str): - out.extend([p for p in (s.strip() for s in item.split(",")) if p]) - return out - if isinstance(raw, str): - return [p for p in (s.strip() for s in raw.split(",")) if p] - return [] - # ---------------------------- CRUD service ---------------------------- class CRUDService(Generic[T]): @@ -200,7 +184,7 @@ class CRUDService(Generic[T]): def _apply_soft_delete_criteria_for_children(self, query, plan: "CRUDService._Plan", params): # Skip if caller explicitly asked for deleted - if _is_truthy((params or {}).get("include_deleted")): + if is_truthy((params or {}).get("include_deleted")): return query seen = set() @@ -218,7 +202,8 @@ class CRUDService(Generic[T]): with_loader_criteria( target_cls, lambda cls: cls.is_deleted == False, - include_aliases=True + include_aliases=True, + propagate_to_loaders=True, ) ) return query @@ -248,21 +233,18 @@ class CRUDService(Generic[T]): order_spec = self._extract_order_spec(root_alias, plan.order_by) - # Inner subquery must be ordered exactly like the real query + # Inner subquery ordered exactly like the real query inner = query.order_by(*self._order_clauses(order_spec, invert=False)) - # IMPORTANT: Build subquery that actually exposes the order-by columns - # under predictable names, then select FROM that and reference subq.c[...] - subq = inner.with_entities(*order_spec.cols).subquery() + # Label each order-by column with deterministic, unique names + labeled_cols = [] + for idx, col in enumerate(order_spec.cols): + base = getattr(col, "key", None) or getattr(col, "name", None) + name = f"ord_{idx}_{base}" if base else f"ord_{idx}" + labeled_cols.append(col.label(name)) - # Map the order columns to the subquery columns by key/name - cols_on_subq = [] - for col in order_spec.cols: - key = getattr(col, "key", None) or getattr(col, "name", None) - if not key: - # Fallback, but frankly your order cols should have names - raise ValueError("Order-by column is missing a key/name") - cols_on_subq.append(getattr(subq.c, key)) + subq = inner.with_entities(*labeled_cols).subquery() + cols_on_subq = [getattr(subq.c, c.key) for c in labeled_cols] # Now the outer anchor query orders and offsets on the subquery columns anchor_q = ( @@ -282,7 +264,7 @@ class CRUDService(Generic[T]): return list(row) # tuple-like -> list for _key_predicate def _apply_not_deleted(self, query, root_alias, params): - if self.supports_soft_delete and not _is_truthy((params or {}).get("include_deleted")): + if self.supports_soft_delete and not is_truthy((params or {}).get("include_deleted")): return query.filter(getattr(root_alias, "is_deleted") == False) return query @@ -362,7 +344,7 @@ class CRUDService(Generic[T]): proj_opts: Any def _plan(self, params, root_alias) -> _Plan: - req_fields = _normalize_fields_param(params) + req_fields = normalize_fields_param(params) spec = CRUDSpec(self.model, params or {}, root_alias) filters = spec.parse_filters() @@ -391,62 +373,41 @@ class CRUDService(Generic[T]): return query.options(Load(root_alias).load_only(*only_cols)) if only_cols else query def _apply_firsthop_strategies(self, query, root_alias, plan: _Plan): - nested_first_hops = { p[0] for p in (plan.rel_field_names or {}).keys() if len(p) > 1 } - joined_rel_keys = set() + joined_rel_keys: set[str] = set() + rels = rel_map(self.model) - # Existing behavior: join everything in join_paths (to-one), selectinload collections for base_alias, rel_attr, target_alias in plan.join_paths: if base_alias is not root_alias: continue prop = getattr(rel_attr, "property", None) - is_collection = bool(getattr(prop, "uselist", False)) - - if not is_collection: + if prop and not prop.uselist: query = query.join(target_alias, rel_attr.of_type(target_alias), isouter=True) joined_rel_keys.add(prop.key if prop is not None else rel_attr.key) - else: - opt = selectinload(rel_attr) - child_names = (plan.collection_field_names or {}).get(rel_attr.key, []) - if child_names: - target_cls = prop.mapper.class_ - cols = [getattr(target_cls, n, None) for n in child_names] - cols = [c for c in cols if isinstance(c, InstrumentedAttribute)] - if cols: - opt = opt.load_only(*cols) - query = query.options(opt) - # NEW: if a first-hop to-one relationship’s target table is present in filter expressions, - # make sure we actually JOIN it (outer) so filters don’t create a cartesian product. if plan.filter_tables: - mapper: Mapper[Any] = cast(Mapper[Any], inspect(self.model)) - for rel in mapper.relationships: - if rel.uselist: - continue # only first-hop to-one here - target_tbl = getattr(rel.mapper.class_, "__table__", None) - if target_tbl is None: + for key, info in rels.items(): + if info.uselist or not info.target_cls: continue - if target_tbl in plan.filter_tables: - if rel.key in joined_rel_keys: - continue # already joined via join_paths - query = query.join(getattr(root_alias, rel.key), isouter=True) - joined_rel_keys.add(rel.key) - if log.isEnabledFor(logging.DEBUG): - info = [] - for base_alias, rel_attr, target_alias in plan.join_paths: - if base_alias is not root_alias: - continue - prop = getattr(rel_attr, "property", None) - sel = getattr(target_alias, "selectable", None) - info.append({ - "rel": (getattr(prop, "key", getattr(rel_attr, "key", "?"))), - "collection": bool(getattr(prop, "uselist", False)), - "target_keys": list(_selectable_keys(sel)) if sel is not None else [], - "joined": (getattr(prop, "key", None) in joined_rel_keys), - }) - log.debug("FIRSTHOP: %s.%s first-hop paths: %s", - self.model.__name__, getattr(root_alias, "__table__", type(root_alias)).key, - info) + target_tbl = getattr(info.target_cls, "__table__", None) + if target_tbl is not None and target_tbl in plan.filter_tables and key not in joined_rel_keys: + query = query.join(getattr(root_alias, key), isouter=True) + joined_rel_keys.add(key) + for base_alias, rel_attr, _target_alias in plan.join_paths: + if base_alias is not root_alias: + continue + prop = getattr(rel_attr, "property", None) + if not prop or not prop.uselist: + continue + opt = selectinload(rel_attr) + child_names = (plan.collection_field_names or {}).get(prop.key, []) + if child_names: + target_cls = prop.mapper.class_ + cols = [getattr(target_cls, n, None) for n in child_names] + cols = [c for c in cols if isinstance(c, InstrumentedAttribute)] + if cols: + opt = opt.load_only(*cols) + query = query.options(opt) return query def _apply_proj_opts(self, query, plan: _Plan):