Source code for restgdf.utils._stats

"""Async statistics/aggregation helpers for ArcGIS REST endpoints.

Private submodule; all public names are re-exported by
``restgdf.utils.getinfo`` to preserve import paths.
"""

from __future__ import annotations

from aiohttp import ClientSession
from pandas import DataFrame, concat

from restgdf._client.request import build_conservative_query_data
from restgdf._models._drift import _parse_response
from restgdf._models.responses import FeaturesResponse
from restgdf.utils._deprecations import deprecated_alias
from restgdf.utils._http import default_headers
from restgdf.utils.token import ArcGISTokenSession


[docs] async def get_unique_values( url: str, fields: tuple | str, session: ClientSession | ArcGISTokenSession, sortby: str | None = None, **kwargs, ) -> list | DataFrame: """Get the unique values for a field.""" datadict = build_conservative_query_data( { "where": "1=1", "f": "json", "returnGeometry": False, "returnDistinctValues": True, "outFields": fields if isinstance(fields, str) else ",".join(fields), }, kwargs.get("data"), ) xkwargs: dict = {k: v for k, v in kwargs.items() if k != "data"} response = await session.post( f"{url}/query", data=datadict, headers=default_headers(xkwargs.pop("headers", None)), **xkwargs, ) raw = await response.json(content_type=None) envelope = _parse_response(FeaturesResponse, raw, context=f"{url}/query") features = envelope.features or [] res_l: list | None = None res_df: DataFrame | None = None if isinstance(fields, str): res_l = [x["attributes"][fields] for x in features] if sortby and sortby == fields: res_l = sorted(res_l) elif len(fields) == 1: res_l = [x["attributes"][fields[0]] for x in features] if sortby and sortby == fields[0]: res_l = sorted(res_l) else: res_df = concat( [DataFrame(x).T.reset_index(drop=True) for x in features], ignore_index=True, ) if sortby: res_df = res_df.sort_values(sortby).reset_index(drop=True) return res_l or res_df
[docs] async def get_value_counts( url: str, field: str, session: ClientSession | ArcGISTokenSession, **kwargs, ) -> DataFrame: """Get the value counts for a field.""" statstr = f'[{{"statisticType":"count","onStatisticField":"{field}","outStatisticFieldName":"{field}_count"}}]' data = kwargs.pop("data", None) or {} data = { "where": "1=1", "f": "json", "returnGeometry": False, "outFields": field, "outStatistics": statstr, "groupByFieldsForStatistics": field, **data, } response = await session.post( f"{url}/query", data=data, headers=default_headers(kwargs.pop("headers", None)), **kwargs, ) raw = await response.json(content_type=None) envelope = _parse_response(FeaturesResponse, raw, context=f"{url}/query") features = envelope.features or [] cc = concat( [DataFrame(x["attributes"], index=[0]) for x in features], ignore_index=True, ) return cc.sort_values(f"{field}_count", ascending=False).reset_index(drop=True)
[docs] async def nested_count( url: str, fields, session: ClientSession | ArcGISTokenSession, **kwargs, ) -> DataFrame: """Get the nested value counts for a field.""" statstr = "".join( ( "[", ",".join( f'{{"statisticType":"count","onStatisticField":"{f}","outStatisticFieldName":"{f}_count"}}' for f in fields ), "]", ), ) data = kwargs.pop("data", None) or {} data = { "where": "1=1", "f": "json", "returnGeometry": False, "outFields": ",".join(fields), "outStatistics": statstr, "groupByFieldsForStatistics": ",".join(fields), **data, } response = await session.post( f"{url}/query", data=data, headers=default_headers(kwargs.pop("headers", None)), **kwargs, ) raw = await response.json(content_type=None) envelope = _parse_response(FeaturesResponse, raw, context=f"{url}/query") features = envelope.features or [] cc = concat( [DataFrame(x).T.reset_index(drop=True) for x in features], ignore_index=True, ) dropcol = [c for c in cc.columns if c.startswith(f"{fields[0]}_count")][0] rencol = [c for c in cc.columns if c.startswith(f"{fields[1]}_count")][0] return ( cc.drop(columns=dropcol) .rename(columns={rencol: "Count"}) .sort_values([fields[0], "Count"], ascending=[True, False]) .reset_index(drop=True) )
# Deprecated legacy aliases (Phase 6). See `_deprecations.deprecated_alias`. getuniquevalues = deprecated_alias( get_unique_values, "getuniquevalues", "get_unique_values", ) getvaluecounts = deprecated_alias( get_value_counts, "getvaluecounts", "get_value_counts", ) nestedcount = deprecated_alias(nested_count, "nestedcount", "nested_count")