Source code for restgdf._models.crawl

"""S-4: Pydantic models for :func:`restgdf.utils.crawl.safe_crawl` output.

``safe_crawl`` aggregates results of a directory crawl and, unlike
``fetch_all_data``, never short-circuits on the first failure. Its return
value is a :class:`CrawlReport` containing:

* :attr:`CrawlReport.services` — a list of :class:`CrawlServiceEntry`
  (one per successfully-discovered service).
* :attr:`CrawlReport.errors` — a list of :class:`CrawlError` capturing
  every recoverable failure, tagged by ``stage``.
* :attr:`CrawlReport.metadata` — the parsed root
  :class:`~restgdf._models.responses.LayerMetadata` (absent when the root
  ``get_metadata`` call itself failed).

All three models are :class:`~restgdf._models._drift.PermissiveModel`
subclasses: ArcGIS servers may emit extra keys at any of these levels,
and missing fields must never raise. :class:`CrawlError` declares
``arbitrary_types_allowed=True`` so that the original
:class:`BaseException` that caused the failure can be preserved under
``exception`` for callers that want to re-raise; the default
:meth:`~pydantic.BaseModel.model_dump` output excludes it so the report
stays JSON-serializable.
"""

from __future__ import annotations


from pydantic import ConfigDict, Field

from restgdf._models._drift import PermissiveModel
from restgdf._models.responses import LayerMetadata


[docs] class CrawlError(PermissiveModel): """A single failure captured during :func:`safe_crawl`. ``stage`` identifies where the failure occurred. Standard stages emitted by ``safe_crawl`` are: * ``"base_metadata"`` — the root ``get_metadata`` call failed. * ``"folder_metadata"`` — a per-folder ``get_metadata`` call failed. * ``"service_metadata"`` — a per-service ``service_metadata`` call failed. ``exception`` preserves the original :class:`BaseException` so callers can re-raise; it is excluded from the default :meth:`~pydantic.BaseModel.model_dump` output for JSON safety. """ model_config = ConfigDict( extra="allow", populate_by_name=True, arbitrary_types_allowed=True, ) stage: str | None = None url: str | None = None message: str | None = None exception: BaseException | None = Field(default=None, exclude=True)
[docs] class CrawlServiceEntry(PermissiveModel): """A service entry in :attr:`CrawlReport.services`. ``metadata`` is the :class:`~restgdf._models.responses.LayerMetadata` returned by ``service_metadata`` for this service. It is ``None`` when the ``service_metadata`` call failed; in that case a corresponding :class:`CrawlError` is recorded in :attr:`CrawlReport.errors`. """ name: str | None = None url: str | None = None type: str | None = None metadata: LayerMetadata | None = None
[docs] class CrawlReport(PermissiveModel): """Aggregated result of a directory crawl. Unlike the legacy ``fetch_all_data`` return shape (which short-circuits to ``{"error": exc}`` on the first failure), :class:`CrawlReport` always returns partial successes alongside captured errors. """ services: list[CrawlServiceEntry] = Field(default_factory=list) errors: list[CrawlError] = Field(default_factory=list) metadata: LayerMetadata | None = None
__all__ = ["CrawlError", "CrawlReport", "CrawlServiceEntry"]