Source code for restgdf.adapters.pandas

"""Pandas-gated tabular adapters.

Materialize row-shaped dict iterables into a :class:`pandas.DataFrame`. The
module itself is safe to import on a base restgdf install — pandas is loaded
lazily via :func:`restgdf.utils._optional.require_pandas` **inside** each
adapter function, so importing this module on a pandas-free install does not
raise. Calling an adapter function on such an install raises
:class:`restgdf.errors.OptionalDependencyError` with the canonical
``restgdf[geo]`` guidance.
"""

from __future__ import annotations

from collections.abc import AsyncIterable, Iterable, Sequence
from typing import TYPE_CHECKING, Any

from restgdf.utils._optional import require_pandas

if TYPE_CHECKING:  # pragma: no cover - import-time only
    from pandas import DataFrame

__all__ = ["arows_to_dataframe", "resolve_domains", "rows_to_dataframe"]


def _field_as_dict(field: Any) -> dict[str, Any]:
    """Normalize a ``FieldSpec`` or raw-dict field descriptor to a dict."""
    if isinstance(field, dict):
        return field
    # pydantic v2 model → dict (includes ``model_extra`` keys).
    dump = getattr(field, "model_dump", None)
    if callable(dump):
        return dump()
    return dict(getattr(field, "__dict__", {}))


[docs] def resolve_domains( df: DataFrame, fields: Sequence[Any] | None, ) -> DataFrame: """Replace coded-value domain codes with their human-readable names. Post-processes an already-materialized ``pandas.DataFrame`` using ArcGIS layer field metadata: * **Coded-value domains** — values present in the domain's ``codedValues`` table are substituted for their ``name``. Codes absent from the table pass through unchanged. * **Range domains** — values are left as-is. Out-of-range values are not flagged or coerced (callers who need strict validation should check ``[min, max]`` themselves using the layer's field metadata). The input DataFrame is **not** mutated; a shallow copy is returned when any substitution is performed, and the original object is returned unchanged when ``fields`` is empty / ``None`` or carries no applicable domains. Parameters ---------- df: DataFrame produced by :func:`rows_to_dataframe` / :func:`arows_to_dataframe`. fields: Sequence of field descriptors (either :class:`restgdf._models.responses.FieldSpec` instances or raw dicts) as found on :attr:`restgdf.FeatureLayer.metadata.fields`. Returns ------- pandas.DataFrame A DataFrame with applicable coded-value columns resolved. Examples -------- >>> import pandas as pd >>> from restgdf.adapters.pandas import resolve_domains >>> df = pd.DataFrame({"STATUS": [1, 2, 99]}) >>> fields = [{ ... "name": "STATUS", ... "domain": { ... "type": "codedValue", ... "codedValues": [ ... {"name": "Active", "code": 1}, ... {"name": "Inactive", "code": 2}, ... ], ... }, ... }] >>> resolve_domains(df, fields)["STATUS"].tolist() ['Active', 'Inactive', 99] """ if not fields: return df coded_maps: dict[str, dict[Any, Any]] = {} for raw_field in fields: field = _field_as_dict(raw_field) name = field.get("name") domain = field.get("domain") if not name or not domain or name not in df.columns: continue if domain.get("type") != "codedValue": # Range domains (and any unknown variants) are intentionally # pass-through; see the docstring. continue coded_values = domain.get("codedValues") or [] mapping = {cv["code"]: cv["name"] for cv in coded_values if "code" in cv} if mapping: coded_maps[name] = mapping if not coded_maps: return df out = df.copy() for col, mapping in coded_maps.items(): # ``Series.map`` with a dict leaves unmapped values as NaN; we # instead use ``replace`` so unknown codes pass through unchanged. out[col] = out[col].replace(mapping) return out
[docs] def rows_to_dataframe(rows: Iterable[dict[str, Any]]) -> DataFrame: """Materialize an iterable of row-shaped dicts as a ``pandas.DataFrame``. Parameters ---------- rows: Any iterable of row-shaped dicts — typically produced by :func:`restgdf.adapters.dict.features_to_rows` or collected from :meth:`restgdf.FeatureLayer.stream_rows`. Returns ------- pandas.DataFrame Raises ------ restgdf.errors.OptionalDependencyError When ``pandas`` is not installed. Install the optional extra via ``pip install "restgdf[geo]"`` (which ships pandas alongside the geo stack) or install ``pandas`` directly. Examples -------- >>> from restgdf.adapters.pandas import rows_to_dataframe >>> rows_to_dataframe([{"OBJECTID": 1, "NAME": "A"}]) # doctest: +SKIP OBJECTID NAME 0 1 A See Also -------- :meth:`restgdf.FeatureLayer.get_df` Async pandas-first tabular accessor that wraps this adapter over a live layer. """ pd = require_pandas("restgdf.adapters.pandas.rows_to_dataframe()") return pd.DataFrame(list(rows))
[docs] async def arows_to_dataframe(rows: AsyncIterable[dict[str, Any]]) -> DataFrame: """Async counterpart of :func:`rows_to_dataframe`. Consumes the async iterable to completion, then delegates to :func:`rows_to_dataframe`. Parameters ---------- rows: Async iterable of row-shaped dicts — typically :meth:`restgdf.FeatureLayer.stream_rows` or :func:`restgdf.adapters.stream.iter_rows`. Returns ------- pandas.DataFrame Raises ------ restgdf.errors.OptionalDependencyError When ``pandas`` is not installed. Install via ``pip install "restgdf[geo]"`` (geo extra bundles pandas) or ``pip install pandas``. Examples -------- >>> import asyncio >>> from restgdf.adapters.pandas import arows_to_dataframe >>> async def demo(): ... async def rows(): ... yield {"OBJECTID": 1} ... return await arows_to_dataframe(rows()) >>> asyncio.run(demo()) # doctest: +SKIP OBJECTID 0 1 See Also -------- :meth:`restgdf.FeatureLayer.get_df` Convenience accessor equivalent to ``await arows_to_dataframe(layer.stream_rows())``. """ materialized: list[dict[str, Any]] = [row async for row in rows] return rows_to_dataframe(materialized)