Compare commits

...
Sign in to create a new pull request.

2 commits

Author SHA1 Message Date
Yaro Kasear
dd863dba99 More attempts to fix a bug. 2025-10-21 08:54:15 -05:00
Yaro Kasear
bd2daf921a This introduces an uglt regression. 2025-10-20 15:20:52 -05:00
5 changed files with 219 additions and 130 deletions

View file

@ -7,7 +7,7 @@ from hashlib import md5
from urllib.parse import urlencode
from werkzeug.exceptions import HTTPException
from crudkit.core.service import _is_truthy
from crudkit.core.params import is_truthy
MAX_JSON = 1_000_000
@ -39,7 +39,7 @@ def _json_error(e: Exception, status: int = 400):
return jsonify({"status": "error", "error": msg}), status
def _bool_param(d: dict[str, str], key: str, default: bool) -> bool:
return _is_truthy(d.get(key, "1" if default else "0"))
return is_truthy(d.get(key, "1" if default else "0"))
def generate_crud_blueprint(model, service, *, base_prefix: str | None = None, rest: bool = True, rpc: bool = True):
"""

View file

@ -1,22 +1,11 @@
from functools import lru_cache
from typing import Any, Dict, Iterable, List, Optional, Tuple, Set, cast
from sqlalchemy import Column, Integer, DateTime, Boolean, String, JSON, func, inspect
from sqlalchemy.orm import declarative_mixin, declarative_base, NO_VALUE, RelationshipProperty, Mapper
from sqlalchemy import Column, Integer, DateTime, String, JSON, func, inspect
from sqlalchemy.orm import declarative_mixin, declarative_base, NO_VALUE, Mapper
from sqlalchemy.orm.state import InstanceState
Base = declarative_base()
from crudkit.core.meta import column_names_for_model
@lru_cache(maxsize=512)
def _column_names_for_model(cls: type) -> tuple[str, ...]:
try:
mapper = inspect(cls)
return tuple(prop.key for prop in mapper.column_attrs)
except Exception:
names: list[str] = []
for c in cls.__mro__:
if hasattr(c, "__table__"):
names.extend(col.name for col in c.__table__.columns)
return tuple(dict.fromkeys(names))
Base = declarative_base()
def _sa_state(obj: Any) -> Optional[InstanceState[Any]]:
"""Safely get SQLAlchemy InstanceState (or None)."""
@ -60,7 +49,16 @@ def _safe_get_loaded_attr(obj, name):
if attr is not None:
val = attr.loaded_value
return None if val is NO_VALUE else val
try:
# In rare cases, state.dict may be stale; reject descriptors
got = getattr(obj, name, None)
from sqlalchemy.orm.attributes import InstrumentedAttribute as _Instr
if got is not None and not isinstance(got, _Instr):
# Do not trigger load; only return if it was already present in __dict__
if hasattr(obj, "__dict__") and name in obj.__dict__:
return got
except Exception:
pass
return None
except Exception:
return None
@ -72,16 +70,10 @@ def _identity_key(obj) -> Tuple[type, Any]:
except Exception:
return (type(obj), id(obj))
def _is_collection_rel(prop: RelationshipProperty) -> bool:
try:
return prop.uselist is True
except Exception:
return False
def _serialize_simple_obj(obj) -> Dict[str, Any]:
"""Columns only (no relationships)."""
out: Dict[str, Any] = {}
for name in _column_names_for_model(type(obj)):
for name in column_names_for_model(type(obj)):
try:
out[name] = getattr(obj, name)
except Exception:
@ -153,9 +145,11 @@ def _split_field_tokens(fields: Iterable[str]) -> Tuple[List[str], Dict[str, Lis
def _deep_get_loaded(obj: Any, dotted: str) -> Any:
"""
Deep get with no lazy loads:
- For all but the final hop, use _safe_get_loaded_attr (mapped-only, no getattr).
- For the final hop, try _safe_get_loaded_attr first; if None, fall back to getattr()
to allow computed properties/hybrids that rely on already-loaded columns.
- Intermediate hops via _safe_get_loaded_attr only.
- Final hop:
* If relationship and not loaded: return None
* else allow getattr for non-relationship attrs (hybrids/properties) that compute from already-loaded data.
* Serialize ORM objects at the lead into dicts
"""
parts = dotted.split(".")
if not parts:
@ -171,30 +165,57 @@ def _deep_get_loaded(obj: Any, dotted: str) -> Any:
return None
last = parts[-1]
# Try safe fetch on the last hop first
# If last is an ORM relationship, only return it if already loaded.
m = _sa_mapper(cur)
if m is not None and last in m.relationships:
val = _safe_get_loaded_attr(cur, last)
if val is not None:
return val
# Fall back to getattr for computed/hybrid attributes on an already-loaded object
try:
return getattr(cur, last, None)
except Exception:
if val is None:
return None
# serialize relationship value
if _sa_mapper(val) is not None:
return _serialize_simple_obj(val)
if isinstance(val, (list, tuple)):
out = []
for v in val:
out.append(_serialize_simple_obj(v) if _sa_mapper(v) is not None else v)
return out
return val
# Not a relationship: try loaded value, else safe getattr
val = _safe_get_loaded_attr(cur, last)
if val is None:
try:
# getattr here will not lazy-load relationships because we already gated those
val = getattr(cur, last, None)
except Exception:
val = None
if _sa_mapper(val) is not None:
return _serialize_simple_obj(val)
if isinstance(val, (list, tuple)):
return [_serialize_simple_obj(v) if _sa_mapper(v) is not None else v for v in val]
return val
def _serialize_leaf(obj: Any) -> Any:
"""
Lead serialization for values we put into as_dict():
- If object has as_dict(), call as_dict() with no args (caller controls field shapes).
- Else return value as-is (Flask/JSON encoder will handle datetimes, etc., via app config).
Leaf serialization for non-dotted scalar fields:
- If it's an ORM object with as_dict(), use it.
- Else if it's an ORM object, serialize columns only.
- Else return the value as-is.
"""
if obj is None:
return None
if _sa_mapper(obj) is not None:
ad = getattr(obj, "as_dict", None)
if callable(ad):
try:
return ad(None)
return ad() # no args, your default handles fields selection
except Exception:
return str(obj)
pass
return _serialize_simple_obj(obj)
return obj
def _serialize_collection(items: Iterable[Any], requested_tails: List[str]) -> List[Dict[str, Any]]:
@ -319,8 +340,6 @@ class CRUDMixin:
if mapper is not None:
out[name] = _serialize_simple_obj(val)
continue
# If it's a collection and no subfields were requested, emit a light list
if isinstance(val, (list, tuple)):
out[name] = [_serialize_leaf(v) for v in val]
else:

26
crudkit/core/meta.py Normal file
View file

@ -0,0 +1,26 @@
from __future__ import annotations
from functools import lru_cache
from typing import NamedTuple, Any, Dict, Tuple
from sqlalchemy import inspect
from sqlalchemy.orm import Mapper
class RelInfo(NamedTuple):
key: str
uselist: bool
target_cls: type | None
@lru_cache(maxsize=512)
def mapper_info(model: type) -> Dict[str, Any]:
m: Mapper = inspect(model)
cols = tuple(prop.key for prop in m.column_attrs)
rels = { r.key: RelInfo(r.key, bool(r.uselist), getattr(r.mapper, "class_", None))
for r in m.relationships }
pks = tuple(c.key for c in m.primary_key)
table = getattr(model, "__table__", None)
return {"cols": cols, "rels": rels, "pks": pks, "table": table}
def rel_map(model: type) -> Dict[str, RelInfo]:
return mapper_info(model)["rels"]
def column_names_for_model(model: type) -> Tuple[str, ...]:
return mapper_info(model)["cols"]

16
crudkit/core/params.py Normal file
View file

@ -0,0 +1,16 @@
def is_truthy(val) -> bool:
return str(val).lower() in ('1', 'true', 'yes', 'on')
def normalize_fields_param(params: dict | None) -> list[str]:
if not params:
return []
raw = params.get("fields")
if isinstance(raw, (list, tuple)):
out: list[str] = []
for item in raw:
if isinstance(item, str):
out.extend([p for p in (s.strip() for s in item.split(",")) if p])
return out
if isinstance(raw, str):
return [p for p in (s.strip() for s in raw.split(",")) if p]
return []

View file

@ -1,18 +1,19 @@
from __future__ import annotations
from collections.abc import Iterable
from dataclasses import dataclass
from flask import current_app
from typing import Any, Callable, Type, TypeVar, Generic, Optional, Protocol, runtime_checkable, cast
from sqlalchemy import and_, func, inspect, or_, text, select, literal
from sqlalchemy import and_, func, inspect, or_, text
from sqlalchemy.engine import Engine, Connection
from sqlalchemy.orm import Load, Session, with_polymorphic, Mapper, selectinload, with_loader_criteria, aliased, with_parent
from sqlalchemy.orm import Load, Session, with_polymorphic, Mapper, selectinload, with_loader_criteria
from sqlalchemy.orm.attributes import InstrumentedAttribute
from sqlalchemy.sql import operators, visitors
from sqlalchemy.sql.elements import UnaryExpression, ColumnElement
from crudkit.core import to_jsonable, deep_diff, diff_to_patch, filter_to_columns, normalize_payload
from crudkit.core.base import Version
from crudkit.core.meta import rel_map, column_names_for_model
from crudkit.core.params import is_truthy, normalize_fields_param
from crudkit.core.spec import CRUDSpec, CollPred
from crudkit.core.types import OrderSpec, SeekWindow
from crudkit.backend import BackendInfo, make_backend_info
@ -139,23 +140,6 @@ def _dedupe_order_by(order_by):
out.append(ob)
return out
def _is_truthy(val):
return str(val).lower() in ('1', 'true', 'yes', 'on')
def _normalize_fields_param(params: dict | None) -> list[str]:
if not params:
return []
raw = params.get("fields")
if isinstance(raw, (list, tuple)):
out: list[str] = []
for item in raw:
if isinstance(item, str):
out.extend([p for p in (s.strip() for s in item.split(",")) if p])
return out
if isinstance(raw, str):
return [p for p in (s.strip() for s in raw.split(",")) if p]
return []
# ---------------------------- CRUD service ----------------------------
class CRUDService(Generic[T]):
@ -200,7 +184,7 @@ class CRUDService(Generic[T]):
def _apply_soft_delete_criteria_for_children(self, query, plan: "CRUDService._Plan", params):
# Skip if caller explicitly asked for deleted
if _is_truthy((params or {}).get("include_deleted")):
if is_truthy((params or {}).get("include_deleted")):
return query
seen = set()
@ -218,7 +202,8 @@ class CRUDService(Generic[T]):
with_loader_criteria(
target_cls,
lambda cls: cls.is_deleted == False,
include_aliases=True
include_aliases=True,
propagate_to_loaders=True,
)
)
return query
@ -248,21 +233,18 @@ class CRUDService(Generic[T]):
order_spec = self._extract_order_spec(root_alias, plan.order_by)
# Inner subquery must be ordered exactly like the real query
# Inner subquery ordered exactly like the real query
inner = query.order_by(*self._order_clauses(order_spec, invert=False))
# IMPORTANT: Build subquery that actually exposes the order-by columns
# under predictable names, then select FROM that and reference subq.c[...]
subq = inner.with_entities(*order_spec.cols).subquery()
# Label each order-by column with deterministic, unique names
labeled_cols = []
for idx, col in enumerate(order_spec.cols):
base = getattr(col, "key", None) or getattr(col, "name", None)
name = f"ord_{idx}_{base}" if base else f"ord_{idx}"
labeled_cols.append(col.label(name))
# Map the order columns to the subquery columns by key/name
cols_on_subq = []
for col in order_spec.cols:
key = getattr(col, "key", None) or getattr(col, "name", None)
if not key:
# Fallback, but frankly your order cols should have names
raise ValueError("Order-by column is missing a key/name")
cols_on_subq.append(getattr(subq.c, key))
subq = inner.with_entities(*labeled_cols).subquery()
cols_on_subq = [getattr(subq.c, c.key) for c in labeled_cols]
# Now the outer anchor query orders and offsets on the subquery columns
anchor_q = (
@ -282,7 +264,7 @@ class CRUDService(Generic[T]):
return list(row) # tuple-like -> list for _key_predicate
def _apply_not_deleted(self, query, root_alias, params):
if self.supports_soft_delete and not _is_truthy((params or {}).get("include_deleted")):
if self.supports_soft_delete and not is_truthy((params or {}).get("include_deleted")):
return query.filter(getattr(root_alias, "is_deleted") == False)
return query
@ -299,7 +281,10 @@ class CRUDService(Generic[T]):
def _stable_order_by(self, root_alias, given_order_by):
order_by = list(given_order_by or [])
if not order_by:
# Safe default: primary key(s) only. No unlabeled expressions.
return _dedupe_order_by(self._default_order_by(root_alias))
# Dedupe what the user gave us, then ensure PK tie-breakers exist
order_by = _dedupe_order_by(order_by)
mapper: Mapper[Any] = cast(Mapper[Any], inspect(self.model))
present = {_order_identity(_unwrap_ob(ob)[0]) for ob in order_by}
@ -362,7 +347,7 @@ class CRUDService(Generic[T]):
proj_opts: Any
def _plan(self, params, root_alias) -> _Plan:
req_fields = _normalize_fields_param(params)
req_fields = normalize_fields_param(params)
spec = CRUDSpec(self.model, params or {}, root_alias)
filters = spec.parse_filters()
@ -373,7 +358,16 @@ class CRUDService(Generic[T]):
join_paths = tuple(spec.get_join_paths())
filter_tables = _collect_tables_from_filters(filters)
fkeys = set()
_, proj_opts = compile_projection(self.model, req_fields) if req_fields else ([], [])
# Build projection opts only if there are true scalar columns requested.
# Bare relationship fields like "owner" should not force root column pruning.
column_names = set(column_names_for_model(self.model))
has_scalar_column_tokens = any(
(("." not in f) and (f in column_names))
for f in (req_fields or [])
)
_, proj_opts = (compile_projection(self.model, req_fields)
if (req_fields and has_scalar_column_tokens)
else ([], []))
# filter_tables = ()
# fkeys = set()
@ -391,22 +385,53 @@ class CRUDService(Generic[T]):
return query.options(Load(root_alias).load_only(*only_cols)) if only_cols else query
def _apply_firsthop_strategies(self, query, root_alias, plan: _Plan):
nested_first_hops = { p[0] for p in (plan.rel_field_names or {}).keys() if len(p) > 1 }
joined_rel_keys = set()
joined_rel_keys: set[str] = set()
rels = rel_map(self.model) # {name: RelInfo}
# Existing behavior: join everything in join_paths (to-one), selectinload collections
# Eager join to-one relationships requested as bare fields (e.g., fields=owner)
requested_scalars = set(plan.root_field_names or [])
for key in requested_scalars:
info = rels.get(key)
if info and not info.uselist and key not in joined_rel_keys:
query = query.join(getattr(root_alias, key), isouter=True)
joined_rel_keys.add(key)
# 1) Join to-one relationships explicitly requested as bare fields
requested_scalars = set(plan.root_field_names or []) # names like "owner", "supervisor"
for key in requested_scalars:
info = rels.get(key)
if info and not info.uselist and key not in joined_rel_keys:
query = query.join(getattr(root_alias, key), isouter=True)
joined_rel_keys.add(key)
# 2) Join to-one relationships from parsed join_paths
for base_alias, rel_attr, target_alias in plan.join_paths:
if base_alias is not root_alias:
continue
prop = getattr(rel_attr, "property", None)
is_collection = bool(getattr(prop, "uselist", False))
if not is_collection:
if prop and not prop.uselist:
query = query.join(target_alias, rel_attr.of_type(target_alias), isouter=True)
joined_rel_keys.add(prop.key if prop is not None else rel_attr.key)
else:
# 3) Ensure to-one touched by filters is joined
if plan.filter_tables:
for key, info in rels.items():
if info.uselist or not info.target_cls:
continue
target_tbl = getattr(info.target_cls, "__table__", None)
if target_tbl is not None and target_tbl in plan.filter_tables and key not in joined_rel_keys:
query = query.join(getattr(root_alias, key), isouter=True)
joined_rel_keys.add(key)
# 4) Collections via selectinload, optionally load_only for requested child columns
for base_alias, rel_attr, _target_alias in plan.join_paths:
if base_alias is not root_alias:
continue
prop = getattr(rel_attr, "property", None)
if not prop or not prop.uselist:
continue
opt = selectinload(rel_attr)
child_names = (plan.collection_field_names or {}).get(rel_attr.key, [])
child_names = (plan.collection_field_names or {}).get(prop.key, [])
if child_names:
target_cls = prop.mapper.class_
cols = [getattr(target_cls, n, None) for n in child_names]
@ -415,42 +440,45 @@ class CRUDService(Generic[T]):
opt = opt.load_only(*cols)
query = query.options(opt)
# NEW: if a first-hop to-one relationships target table is present in filter expressions,
# make sure we actually JOIN it (outer) so filters dont create a cartesian product.
if plan.filter_tables:
mapper: Mapper[Any] = cast(Mapper[Any], inspect(self.model))
for rel in mapper.relationships:
if rel.uselist:
continue # only first-hop to-one here
target_tbl = getattr(rel.mapper.class_, "__table__", None)
if target_tbl is None:
for path, _names in (plan.rel_field_names or {}).items():
if not path:
continue
if target_tbl in plan.filter_tables:
if rel.key in joined_rel_keys:
continue # already joined via join_paths
query = query.join(getattr(root_alias, rel.key), isouter=True)
joined_rel_keys.add(rel.key)
if log.isEnabledFor(logging.DEBUG):
info = []
for base_alias, rel_attr, target_alias in plan.join_paths:
if base_alias is not root_alias:
# Build a chained selectinload for each relationship segment in the path
first = path[0]
info = rels.get(first)
if not info or info.target_cls is None:
continue
prop = getattr(rel_attr, "property", None)
sel = getattr(target_alias, "selectable", None)
info.append({
"rel": (getattr(prop, "key", getattr(rel_attr, "key", "?"))),
"collection": bool(getattr(prop, "uselist", False)),
"target_keys": list(_selectable_keys(sel)) if sel is not None else [],
"joined": (getattr(prop, "key", None) in joined_rel_keys),
})
log.debug("FIRSTHOP: %s.%s first-hop paths: %s",
self.model.__name__, getattr(root_alias, "__table__", type(root_alias)).key,
info)
# Start with selectinload on the first hop
opt = selectinload(getattr(root_alias, first))
# Walk deeper segments
cur_cls = info.target_cls
for seg in path[1:]:
sub = rel_map(cur_cls).get(seg)
if not sub or sub.target_cls is None:
# if segment isn't a relationship, we stop the chain
break
opt = opt.selectinload(getattr(cur_cls, seg))
cur_cls = sub.target_cls
query = query.options(opt)
return query
def _apply_proj_opts(self, query, plan: _Plan):
return query.options(*plan.proj_opts) if plan.proj_opts else query
if not plan.proj_opts:
return query
try:
return query.options(*plan.proj_opts)
except KeyError as e:
# Seen "KeyError: 'col'" when alias-column remapping meets unlabeled exprs.
log.debug("Projection options disabled due to %r; proceeding without them.", e)
return query
except Exception as e:
log.debug("Projection options failed (%r); proceeding without them.", e)
return query
def _projection_meta(self, plan: _Plan):
if plan.req_fields: