"""HTTP defaults for ArcGIS REST helpers.
Private submodule; all public names are re-exported by
``restgdf.utils.getinfo`` to preserve import paths.
"""
from __future__ import annotations
from collections.abc import Mapping
import inspect
from typing import Any, Literal
from urllib.parse import urlencode
import aiohttp
from restgdf._config import get_config
DEFAULT_METADATA_HEADERS = {
"Accept": "application/json,text/plain,*/*",
"User-Agent": "Mozilla/5.0",
}
DEFAULTDICT: dict = {
"where": "1=1",
"outFields": "*",
"returnGeometry": True,
"returnCountOnly": False,
"f": "json",
}
[docs]
def default_data(
data: dict | None = None,
default_dict: dict | None = None,
) -> dict:
"""Return a dict with default values for ArcGIS REST API requests."""
default_dict = default_dict or DEFAULTDICT
return {**default_dict, **(data or {})}
# T8 (R-74): ArcGIS REST practical URL-length ceiling. Past this many bytes
# (URL + encoded body), servers (and intermediaries like IIS/WAFs) routinely
# reject the GET with 414 URI Too Long, so restgdf must fall back to POST.
_ARCGIS_URL_BODY_LIMIT: int = 8192
def _encoded_body_length(body: Mapping[str, object] | None) -> int:
"""Return the byte length of ``body`` as it would be URL-encoded."""
if not body:
return 0
return len(urlencode(dict(body), doseq=True))
def _choose_verb(
url: str,
body: Mapping[str, object] | None = None,
) -> Literal["POST", "GET"]:
"""Return the HTTP verb restgdf should use for ``url`` with ``body``.
T8 (R-74) wires this helper at every ArcGIS call site. The decision
is length-based against :data:`_ARCGIS_URL_BODY_LIMIT` (the 8k
practical ceiling ArcGIS Server / IIS / common WAFs enforce for
``GET`` query strings):
* ``len(url) + len(urlencode(body)) <= limit`` → ``GET`` — idempotent,
cache-friendly, byte-for-byte equivalent to today's short-body GETs.
* Anything larger → ``POST`` — restgdf will never emit a request that
a real server will refuse with 414 URI Too Long.
``body`` is accepted as any mapping (or ``None``). The helper never
mutates the mapping; it only measures its encoded size. A ``None``
or empty body short-circuits to ``GET`` immediately.
"""
encoded_len = _encoded_body_length(body)
# Account for the "?" separator that aiohttp would add between the
# URL path and the query string in a GET request.
separator = 1 if encoded_len else 0
if len(url) + separator + encoded_len <= _ARCGIS_URL_BODY_LIMIT:
return "GET"
return "POST"
async def _arcgis_request(
session: Any,
url: str,
body: Mapping[str, object] | None,
**kwargs: Any,
) -> Any:
"""Issue an ArcGIS request using the verb selected by :func:`_choose_verb`.
T8 (R-74): centralizes the GET-vs-POST decision so every ArcGIS call
site participates in length-based routing.
* ``GET`` → ``session.get(url, params=body, **kwargs)``
* ``POST`` → ``session.post(url, data=body, **kwargs)``
``body`` is forwarded untouched; the helper never injects or removes
keys. Any extra ``kwargs`` (``headers``, ``timeout``, ``ssl`` …) are
passed through to the underlying session call verbatim so request
semantics stay byte-for-byte identical below the verb switch.
**Credential safety (Gate-3 fix).** ``ArcGISTokenSession`` configured
with ``transport="body"`` or ``transport="query"`` injects the
bearer token into the outgoing payload. If the length-based router
chose ``GET`` for such a session, the token would be serialized
into the URL query string — a credential leak. To preserve the
pre-T8 wire shape, this helper walks the session's ``_inner`` chain
and forces ``POST`` whenever any layer reports a non-``"header"``
transport (``ResilientSession(ArcGISTokenSession(transport="body"))``
and similar wrappers included).
"""
if _session_requires_body_transport(session):
return await session.post(url, data=body, **kwargs)
verb = _choose_verb(url, body=body)
if verb == "GET":
params = _coerce_params_for_get(body) if body else body
return await session.get(url, params=params, **kwargs)
return await session.post(url, data=body, **kwargs)
def _session_requires_body_transport(session: Any) -> bool:
"""Return ``True`` if ``session`` (or any wrapped inner session)
injects an auth token into the request payload rather than the
``X-Esri-Authorization`` header.
Walks the ``_inner`` attribute chain so adapters such as
:class:`restgdf.resilience.ResilientSession` wrapping an
:class:`restgdf.utils.token.ArcGISTokenSession` still trigger the
safe ``POST`` path. ``transport="header"`` (and sessions without a
``_transport`` attribute at all, e.g. plain
:class:`aiohttp.ClientSession`) are treated as verb-neutral.
"""
current: Any = session
seen: set[int] = set()
while current is not None and id(current) not in seen:
seen.add(id(current))
transport = _safe_static_attr_value(current, "_transport")
if transport in ("body", "query"):
return True
current = _safe_static_attr_value(current, "_inner")
return False
def _safe_static_attr_value(obj: Any, name: str) -> Any:
"""Read an attribute without fabricating mock children.
``inspect.getattr_static`` protects against ``AsyncMock`` creating
synthetic attributes on demand, but returns raw descriptors for
property-backed attributes. Resolve descriptors only after confirming
the attribute exists statically.
"""
value = inspect.getattr_static(obj, name, None)
if value is None:
return None
if hasattr(value, "__get__"):
try:
return object.__getattribute__(obj, name)
except AttributeError:
return None
return value
def _coerce_params_for_get(
body: Mapping[str, object],
) -> dict[str, Any]:
"""Return ``body`` with values normalized for ``session.get(params=...)``.
``yarl`` (aiohttp's URL builder) rejects :class:`bool` and ``None``
values in query params with :class:`TypeError`, while the equivalent
``session.post(data=...)`` path serializes them happily via
:func:`urllib.parse.urlencode`. T8 (R-74) routes the same ArcGIS
payloads through either verb, so this helper keeps the GET path
byte-for-byte equivalent to what ArcGIS Server sees today:
booleans become lowercase ``"true"``/``"false"`` (ArcGIS's own wire
convention) and ``None`` becomes an empty string.
"""
coerced: dict[str, Any] = {}
for key, value in body.items():
if isinstance(value, bool):
coerced[key] = "true" if value else "false"
elif value is None:
coerced[key] = ""
else:
coerced[key] = value
return coerced
def default_timeout(settings: Any | None = None) -> aiohttp.ClientTimeout:
"""Return an :class:`aiohttp.ClientTimeout` driven by restgdf config.
When ``settings`` is ``None`` (the library-default call path), the
timeout is resolved from :func:`restgdf.get_config` — specifically
``config.timeout.total_s``. When a ``settings`` object is supplied,
the helper reads its ``timeout_seconds`` attribute (duck-typed) to
remain backwards compatible with the legacy
:class:`restgdf.Settings` surface and any ``SimpleNamespace``-style
fakes used in tests.
This helper is intentionally read-only: it never mutates its inputs
and never caches the ``ClientTimeout`` across calls, so environment
overrides take effect as soon as
:func:`~restgdf.reset_config_cache` (or the legacy
:func:`~restgdf._models._settings.reset_settings_cache`) has been
called.
"""
if settings is not None:
return aiohttp.ClientTimeout(total=float(settings.timeout_seconds))
return aiohttp.ClientTimeout(total=float(get_config().timeout.total_s))