Source code for restgdf.utils._pagination

"""Pure pagination planner for ArcGIS REST ``/query`` traversal.

Private submodule. Public symbols (:class:`PaginationPlan`,
:func:`build_pagination_plan`) are re-exported via
:mod:`restgdf.utils.getinfo` to preserve the patch-seam convention used
elsewhere in ``restgdf.utils``.

The planner is pure math: given a feature count and a server-advertised
``maxRecordCount`` (plus an optional caller-supplied
``maxRecordCountFactor``), it produces the ``(resultOffset,
resultRecordCount)`` tuples that drive paged ``/query`` calls. It does
not know about HTTP, authentication, or metadata fetches; call sites
(today :func:`restgdf.utils.getgdf.get_query_data_batches`) wrap the
tuples into request bodies.

Clamp semantics for ``maxRecordCountFactor`` match ArcGIS convention:
the advertised factor is an upper bound published by the service;
requesting a larger factor is silently clamped down server-side, so the
planner clamps proactively and emits a single ``WARNING`` via
``restgdf.pagination`` for observability.
"""

from __future__ import annotations

from dataclasses import dataclass
from typing import Final

from restgdf._logging import get_logger

_LOG = get_logger("pagination")
_DEFAULT_FACTOR: Final[float] = 1.0


[docs] @dataclass(frozen=True) class PaginationPlan: """Frozen result of :func:`build_pagination_plan`. Attributes ---------- total_records : int Layer-wide feature count the plan paginates over. max_record_count : int Server-advertised per-page cap. max_record_count_factor : float Effective factor after clamping against the advertised upper bound. Equals the caller-supplied ``factor`` when no clamp was applied. effective_page_size : int ``max(1, int(max_record_count * max_record_count_factor))`` — the actual page size used to compute batches. batches : tuple Tuple of ``(resultOffset, resultRecordCount)`` pairs. Empty when ``total_records == 0``. Last pair's count may be less than ``effective_page_size`` (partial tail page). """ total_records: int max_record_count: int max_record_count_factor: float effective_page_size: int batches: tuple[tuple[int, int], ...]
[docs] def build_pagination_plan( total_records: int, max_record_count: int, *, factor: float = _DEFAULT_FACTOR, advertised_factor: float | None = None, ) -> PaginationPlan: """Compute a :class:`PaginationPlan` for ``total_records`` rows. Parameters ---------- total_records : int Non-negative total row count (typically the result of ``get_feature_count``). max_record_count : int Positive server-advertised per-page cap. factor : float, optional Caller-supplied multiplier on ``max_record_count``. Defaults to 1.0 (pure ``max_record_count`` pagination). advertised_factor : float or None, optional Server-advertised ``advancedQueryCapabilities.maxRecordCountFactor`` upper bound. When provided and ``factor > advertised_factor``, the factor is clamped down and a single warning is logged under ``restgdf.pagination``. Raises ------ ValueError If ``total_records < 0``, ``max_record_count <= 0``, or ``factor <= 0``. """ if total_records < 0: raise ValueError("total_records must be >= 0") if max_record_count <= 0: raise ValueError("max_record_count must be > 0") if factor <= 0: raise ValueError("factor must be > 0") effective_factor = factor if advertised_factor is not None and factor > advertised_factor: _LOG.warning( "maxRecordCountFactor clamp", extra={ "requested_factor": factor, "advertised_factor": advertised_factor, }, ) effective_factor = advertised_factor effective_page_size = max(1, int(max_record_count * effective_factor)) if total_records == 0: batches: tuple[tuple[int, int], ...] = () else: batches = tuple( (offset, min(effective_page_size, total_records - offset)) for offset in range(0, total_records, effective_page_size) ) return PaginationPlan( total_records=total_records, max_record_count=max_record_count, max_record_count_factor=effective_factor, effective_page_size=effective_page_size, batches=batches, )
__all__ = ["PaginationPlan", "build_pagination_plan"]