Skip to content

Feature

BaseFeature is the most important class in Metaxy. Features are defined by extending it.

metaxy.BaseFeature pydantic-model

Bases: BaseModel

Show JSON schema:
{
  "additionalProperties": false,
  "properties": {
    "metaxy_provenance_by_field": {
      "additionalProperties": {
        "type": "string"
      },
      "description": "Field-level provenance hashes (maps field names to hashes)",
      "title": "Metaxy Provenance By Field",
      "type": "object"
    },
    "metaxy_provenance": {
      "anyOf": [
        {
          "type": "string"
        },
        {
          "type": "null"
        }
      ],
      "default": null,
      "description": "Hash of metaxy_provenance_by_field",
      "title": "Metaxy Provenance"
    },
    "metaxy_feature_version": {
      "anyOf": [
        {
          "type": "string"
        },
        {
          "type": "null"
        }
      ],
      "default": null,
      "description": "Hash of the feature definition (dependencies + fields + code_versions)",
      "title": "Metaxy Feature Version"
    },
    "metaxy_project_version": {
      "anyOf": [
        {
          "type": "string"
        },
        {
          "type": "null"
        }
      ],
      "default": null,
      "description": "Hash of the entire feature graph project version",
      "title": "Metaxy Project Version"
    },
    "metaxy_data_version_by_field": {
      "anyOf": [
        {
          "additionalProperties": {
            "type": "string"
          },
          "type": "object"
        },
        {
          "type": "null"
        }
      ],
      "default": null,
      "description": "Field-level data version hashes (maps field names to version hashes)",
      "title": "Metaxy Data Version By Field"
    },
    "metaxy_data_version": {
      "anyOf": [
        {
          "type": "string"
        },
        {
          "type": "null"
        }
      ],
      "default": null,
      "description": "Hash of metaxy_data_version_by_field",
      "title": "Metaxy Data Version"
    },
    "metaxy_created_at": {
      "anyOf": [
        {
          "format": "date-time",
          "type": "string"
        },
        {
          "type": "null"
        }
      ],
      "default": null,
      "description": "Timestamp when the metadata row was created (UTC)",
      "title": "Metaxy Created At"
    },
    "metaxy_materialization_id": {
      "anyOf": [
        {
          "type": "string"
        },
        {
          "type": "null"
        }
      ],
      "default": null,
      "description": "External orchestration run ID (e.g., Dagster Run ID)",
      "title": "Metaxy Materialization Id"
    }
  },
  "title": "BaseFeature",
  "type": "object"
}

Fields:

Validators:

  • _validate_id_columns_exist

Attributes

metaxy.BaseFeature.metaxy_provenance_by_field pydantic-field

metaxy_provenance_by_field: dict[str, str]

Field-level provenance hashes (maps field names to hashes)

metaxy.BaseFeature.metaxy_provenance pydantic-field

metaxy_provenance: str | None = None

Hash of metaxy_provenance_by_field

metaxy.BaseFeature.metaxy_feature_version pydantic-field

metaxy_feature_version: str | None = None

Hash of the feature definition (dependencies + fields + code_versions)

metaxy.BaseFeature.metaxy_project_version pydantic-field

metaxy_project_version: str | None = None

Hash of the entire feature graph project version

metaxy.BaseFeature.metaxy_data_version_by_field pydantic-field

metaxy_data_version_by_field: dict[str, str] | None = None

Field-level data version hashes (maps field names to version hashes)

metaxy.BaseFeature.metaxy_data_version pydantic-field

metaxy_data_version: str | None = None

Hash of metaxy_data_version_by_field

metaxy.BaseFeature.metaxy_created_at pydantic-field

metaxy_created_at: AwareDatetime | None = None

Timestamp when the metadata row was created (UTC)

metaxy.BaseFeature.metaxy_materialization_id pydantic-field

metaxy_materialization_id: str | None = None

External orchestration run ID (e.g., Dagster Run ID)

Functions

metaxy.BaseFeature.metaxy_project classmethod

metaxy_project() -> str

Return the project this feature belongs to.

Source code in src/metaxy/models/feature.py
@classmethod
def metaxy_project(cls) -> str:
    """Return the project this feature belongs to."""
    return cls.__metaxy_project__

metaxy.BaseFeature.table_name classmethod

table_name() -> str

Get SQL-like table name for this feature.

Converts feature key to SQL-compatible table name by joining parts with double underscores, consistent with IbisMetadataStore.

Returns:

  • str –

    Table name string (e.g., "my_namespace__my_feature")

Example
class VideoFeature(mx.BaseFeature, spec=mx.FeatureSpec(key="video/processing", id_columns=["id"])):
    id: str


VideoFeature.table_name()
# 'video__processing'
Source code in src/metaxy/models/feature.py
@classmethod
def table_name(cls) -> str:
    """Get SQL-like table name for this feature.

    Converts feature key to SQL-compatible table name by joining
    parts with double underscores, consistent with IbisMetadataStore.

    Returns:
        Table name string (e.g., "my_namespace__my_feature")

    Example:
        ```py
        class VideoFeature(mx.BaseFeature, spec=mx.FeatureSpec(key="video/processing", id_columns=["id"])):
            id: str


        VideoFeature.table_name()
        # 'video__processing'
        ```
    """
    return cls.spec().table_name()

metaxy.BaseFeature.feature_version classmethod

feature_version() -> str

Get hash of feature specification.

Returns a hash representing the feature's complete configuration: - Feature key - Field definitions and code versions - Dependencies (feature-level and field-level)

This hash changes when you modify: - Field code versions - Dependencies - Field definitions

Used to distinguish current vs historical metafield provenance hashes. Stored in the 'metaxy_feature_version' column of metadata DataFrames.

Returns:

  • str –

    SHA256 hex digest (like git short hashes)

Example
class MyFeature(mx.BaseFeature, spec=mx.FeatureSpec(key="my/feature", id_columns=["id"])):
    id: str


MyFeature.feature_version()
# 'a3f8b2c1...'
Source code in src/metaxy/models/feature.py
@classmethod
def feature_version(cls) -> str:
    """Get hash of feature specification.

    Returns a hash representing the feature's complete configuration:
    - Feature key
    - Field definitions and code versions
    - Dependencies (feature-level and field-level)

    This hash changes when you modify:
    - Field code versions
    - Dependencies
    - Field definitions

    Used to distinguish current vs historical metafield provenance hashes.
    Stored in the 'metaxy_feature_version' column of metadata DataFrames.

    Returns:
        SHA256 hex digest (like git short hashes)

    Example:
        ```py
        class MyFeature(mx.BaseFeature, spec=mx.FeatureSpec(key="my/feature", id_columns=["id"])):
            id: str


        MyFeature.feature_version()
        # 'a3f8b2c1...'
        ```
    """
    return cls.graph.get_feature_version(cls.spec().key)

metaxy.BaseFeature.feature_spec_version classmethod

feature_spec_version() -> str

Get hash of the complete feature specification.

Returns a hash representing ALL specification properties including: - Feature key - Dependencies - Fields - Code versions - Any future metadata, tags, or other properties

Unlike feature_version which only hashes computational properties (for migration triggering), feature_spec_version captures the entire specification for reproducibility and audit purposes.

Returns:

  • str –

    SHA256 hex digest of the complete specification

Example
class MyFeature(mx.BaseFeature, spec=mx.FeatureSpec(key="my/feature2", id_columns=["id"])):
    id: str


MyFeature.feature_spec_version()
# 'def456...'  # Different from feature_version
Source code in src/metaxy/models/feature.py
@classmethod
def feature_spec_version(cls) -> str:
    """Get hash of the complete feature specification.

    Returns a hash representing ALL specification properties including:
    - Feature key
    - Dependencies
    - Fields
    - Code versions
    - Any future metadata, tags, or other properties

    Unlike feature_version which only hashes computational properties
    (for migration triggering), feature_spec_version captures the entire specification
    for reproducibility and audit purposes.

    Returns:
        SHA256 hex digest of the complete specification

    Example:
        ```py
        class MyFeature(mx.BaseFeature, spec=mx.FeatureSpec(key="my/feature2", id_columns=["id"])):
            id: str


        MyFeature.feature_spec_version()
        # 'def456...'  # Different from feature_version
        ```
    """
    return cls.spec().feature_spec_version

metaxy.BaseFeature.provenance_by_field classmethod

provenance_by_field() -> dict[str, str]

Get the code-level field provenance for this feature.

This returns a static hash based on code versions and dependencies, not sample-level field provenance computed from upstream data.

Returns:

  • dict[str, str] –

    Dictionary mapping field keys to their provenance hashes.

Source code in src/metaxy/models/feature.py
@classmethod
def provenance_by_field(cls) -> dict[str, str]:
    """Get the code-level field provenance for this feature.

    This returns a static hash based on code versions and dependencies,
    not sample-level field provenance computed from upstream data.

    Returns:
        Dictionary mapping field keys to their provenance hashes.
    """
    return cls.graph.get_feature_version_by_field(cls.spec().key)

metaxy.BaseFeature.load_input classmethod

load_input(
    joiner: Any, upstream_refs: dict[str, LazyFrame[Any]]
) -> tuple[LazyFrame[Any], dict[str, str]]

Join upstream feature metadata.

Override for custom join logic (1:many, different keys, filtering, etc.).

Parameters:

  • joiner (Any) –

    UpstreamJoiner from MetadataStore

  • upstream_refs (dict[str, LazyFrame[Any]]) –

    Upstream feature metadata references (lazy where possible)

Returns:

Source code in src/metaxy/models/feature.py
@classmethod
def load_input(
    cls,
    joiner: Any,
    upstream_refs: dict[str, "nw.LazyFrame[Any]"],
) -> tuple["nw.LazyFrame[Any]", dict[str, str]]:
    """Join upstream feature metadata.

    Override for custom join logic (1:many, different keys, filtering, etc.).

    Args:
        joiner: UpstreamJoiner from MetadataStore
        upstream_refs: Upstream feature metadata references (lazy where possible)

    Returns:
        (joined_upstream, upstream_column_mapping)
        - joined_upstream: All upstream data joined together
        - upstream_column_mapping: Maps upstream_key -> column name
    """
    from metaxy.models.feature_spec import FeatureDep

    # Extract columns and renames from deps
    upstream_columns: dict[str, tuple[str, ...] | None] = {}
    upstream_renames: dict[str, dict[str, str] | None] = {}

    deps = cls.spec().deps
    if deps:
        for dep in deps:
            if isinstance(dep, FeatureDep):
                dep_key_str = dep.feature.to_string()
                upstream_columns[dep_key_str] = dep.select
                upstream_renames[dep_key_str] = dep.rename

    return joiner.join_upstream(
        upstream_refs=upstream_refs,
        feature_spec=cls.spec(),
        feature_plan=cls.graph.get_feature_plan(cls.spec().key),
        upstream_columns=upstream_columns,
        upstream_renames=upstream_renames,
    )

Code Version Access

Retrieve a feature's code version from its spec: MyFeature.spec().code_version.

metaxy.get_feature_by_key

get_feature_by_key(
    key: CoercibleToFeatureKey,
) -> FeatureDefinition

Get a FeatureDefinition by its key from the current graph.

Parameters:

  • key (CoercibleToFeatureKey) –

    Feature key to look up (can be FeatureKey, list of strings, slash-separated string, etc.)

Returns:

Raises:

  • KeyError –

    If no feature with the given key is registered

Source code in src/metaxy/__init__.py
@public
def get_feature_by_key(key: CoercibleToFeatureKey) -> FeatureDefinition:
    """Get a FeatureDefinition by its key from the current graph.

    Args:
        key: Feature key to look up (can be FeatureKey, list of strings, slash-separated string, etc.)

    Returns:
        FeatureDefinition for the feature

    Raises:
        KeyError: If no feature with the given key is registered
    """
    return current_graph().get_feature_definition(key)

metaxy.FeatureDefinition pydantic-model

Bases: FrozenBaseModel

Complete feature definition wrapping all feature information.

Attributes:

Show JSON schema:
{
  "$defs": {
    "AggregationRelationship": {
      "description": "Many-to-one relationship where multiple parent rows aggregate to one child row.\n\nParent features have more granular ID columns than the child. The child aggregates\nmultiple parent rows by grouping on a subset of the parent's ID columns.\n\nConstruct this relationship via [`LineageRelationship.aggregation`][metaxy.models.lineage.LineageRelationship.aggregation] classmethod.\n\nAttributes:\n    on: Columns to group by for aggregation. These should be a subset of the\n        target feature's ID columns. If not specified, uses all target ID columns.\n\nExample:\n    ```python\n    mx.LineageRelationship.aggregation(on=[\"sensor_id\", \"hour\"])\n    ```",
      "properties": {
        "type": {
          "const": "N:1",
          "default": "N:1",
          "title": "Type",
          "type": "string"
        },
        "on": {
          "anyOf": [
            {
              "items": {
                "type": "string"
              },
              "type": "array"
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "description": "Columns to group by for aggregation. Defaults to all target ID columns.",
          "title": "On"
        }
      },
      "title": "AggregationRelationship",
      "type": "object"
    },
    "AllFieldsMapping": {
      "description": "Field mapping that explicitly depends on all upstream fields.",
      "properties": {
        "type": {
          "const": "all",
          "default": "all",
          "title": "Type",
          "type": "string"
        }
      },
      "title": "AllFieldsMapping",
      "type": "object"
    },
    "DefaultFieldsMapping": {
      "description": "Default automatic field mapping configuration.\n\nWhen used, automatically maps fields to matching upstream fields based on field keys.\n\nAttributes:\n    match_suffix: If True, allows suffix matching (e.g., \"french\" matches \"audio/french\")\n    exclude_fields: List of field keys to exclude from auto-mapping",
      "properties": {
        "type": {
          "const": "default",
          "default": "default",
          "title": "Type",
          "type": "string"
        },
        "match_suffix": {
          "default": false,
          "title": "Match Suffix",
          "type": "boolean"
        },
        "exclude_fields": {
          "items": {
            "$ref": "#/$defs/FieldKey"
          },
          "title": "Exclude Fields",
          "type": "array"
        }
      },
      "title": "DefaultFieldsMapping",
      "type": "object"
    },
    "ExpansionRelationship": {
      "description": "One-to-many relationship where one parent row expands to multiple child rows.\n\nChild features have more granular ID columns than the parent. Each parent row\ngenerates multiple child rows with additional ID columns.\n\nConstruct this relationship via [`LineageRelationship.expansion`][metaxy.models.lineage.LineageRelationship.expansion] classmethod.\n\nAttributes:\n    on: Parent ID columns that identify the parent record. Child records with\n        the same parent IDs will share the same upstream provenance.\n        If not specified, will be inferred from the available columns.\n    id_generation_pattern: Optional pattern for generating child IDs.\n        Can be \"sequential\", \"hash\", or a custom pattern. If not specified,\n        the feature's load_input() method is responsible for ID generation.\n\nExample:\n    ```python\n    mx.LineageRelationship.expansion(on=[\"video_id\"], id_generation_pattern=\"sequential\")\n    ```",
      "properties": {
        "type": {
          "const": "1:N",
          "default": "1:N",
          "title": "Type",
          "type": "string"
        },
        "on": {
          "description": "Parent ID columns for grouping. Child records with same parent IDs share provenance. Required for expansion relationships.",
          "items": {
            "type": "string"
          },
          "title": "On",
          "type": "array"
        },
        "id_generation_pattern": {
          "anyOf": [
            {
              "type": "string"
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "description": "Pattern for generating child IDs. If None, handled by load_input().",
          "title": "Id Generation Pattern"
        }
      },
      "required": [
        "on"
      ],
      "title": "ExpansionRelationship",
      "type": "object"
    },
    "FeatureDep": {
      "additionalProperties": false,
      "description": "Feature dependency specification with optional column selection, renaming, and lineage.\n\nAttributes:\n    feature: The feature key to depend on. Accepts string (\"a/b/c\"), list ([\"a\", \"b\", \"c\"]),\n        FeatureKey instance, or BaseFeature class.\n    select: Optional sequence of column names to select from the upstream feature.\n        By default, all columns are selected. System columns are always selected.\n        Uses post-rename names when `rename` is also specified.\n    rename: Optional mapping of old column names to new names.\n        Applied before column selection.\n    fields_mapping: Optional field mapping configuration for automatic field dependency resolution.\n        When provided, fields without explicit deps will automatically map to matching upstream fields.\n        Defaults to using `[FieldsMapping.default()][metaxy.models.fields_mapping.DefaultFieldsMapping]`.\n    filters: Optional SQL-like filter strings applied to this dependency. Automatically parsed into\n        Narwhals expressions (accessible via the `filters` property). Filters are automatically\n        applied by FeatureDepTransformer after renames during all FeatureDep operations (including\n        resolve_update and version computation).\n    lineage: The lineage relationship between this upstream dependency and the downstream feature.\n        - `LineageRelationship.identity()` (default): 1:1 relationship, same cardinality\n        - `LineageRelationship.aggregation(on=...)`: N:1, multiple upstream rows aggregate to one downstream\n        - `LineageRelationship.expansion(on=...)`: 1:N, one upstream row expands to multiple downstream rows\n    optional: Whether individual samples of the downstream feature can be computed without\n        the corresponding samples of the upstream feature. If upstream samples are missing,\n        they are going to be represented as NULL values in the joined upstream metadata.\n        Defaults to False (required dependency).\n\nExample: Basic Usage\n    ```py\n    # Keep all columns with default field mapping (1:1 lineage)\n    mx.FeatureDep(feature=\"upstream\")\n\n    # Keep only specific columns\n    mx.FeatureDep(feature=\"upstream/feature\", select=(\"col1\", \"col2\"))\n\n    # Rename columns to avoid conflicts\n    mx.FeatureDep(feature=\"upstream/feature\", rename={\"old_name\": \"new_name\"})\n\n    # Combined rename + select: select uses post-rename names\n    mx.FeatureDep(\n        feature=\"upstream/feature\",\n        rename={\"old_name\": \"new_name\"},\n        select=(\"new_name\", \"other_col\"),\n    )\n\n    # SQL filters\n    mx.FeatureDep(feature=\"upstream\", filters=[\"age >= 25\", \"status = 'active'\"])\n\n    # Optional dependency (left join - samples preserved even if no match)\n    mx.FeatureDep(feature=\"enrichment/data\", optional=True)\n    ```\n\nExample: Lineage Relationships\n    ```py\n    from metaxy.models.lineage import LineageRelationship\n\n    # Aggregation: many sensor readings aggregate to one hourly stat\n    mx.FeatureDep(feature=\"sensor_readings\", lineage=LineageRelationship.aggregation(on=[\"sensor_id\", \"hour\"]))\n\n    # Expansion: one video expands to many frames\n    mx.FeatureDep(feature=\"video\", lineage=LineageRelationship.expansion(on=[\"video_id\"]))\n\n    # Mixed lineage: aggregate from one parent, identity from another\n    # In FeatureSpec:\n    deps = [\n        mx.FeatureDep(feature=\"readings\", lineage=LineageRelationship.aggregation(on=[\"sensor_id\"])),\n        mx.FeatureDep(feature=\"sensor_info\", lineage=LineageRelationship.identity()),\n    ]\n    ```",
      "properties": {
        "feature": {
          "$ref": "#/$defs/FeatureKey",
          "description": "Feature key. Accepts a slashed string ('a/b/c'), a sequence of strings, a FeatureKey instance, or a child class of BaseFeature"
        },
        "select": {
          "anyOf": [
            {
              "items": {
                "type": "string"
              },
              "type": "array"
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "title": "Select"
        },
        "rename": {
          "anyOf": [
            {
              "additionalProperties": {
                "type": "string"
              },
              "type": "object"
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "title": "Rename"
        },
        "fields_mapping": {
          "$ref": "#/$defs/FieldsMapping"
        },
        "filters": {
          "anyOf": [
            {
              "items": {
                "type": "string"
              },
              "type": "array"
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "description": "SQL-like filter strings applied to this dependency.",
          "title": "Filters"
        },
        "lineage": {
          "$ref": "#/$defs/LineageRelationship",
          "description": "Lineage relationship between this upstream dependency and the downstream feature."
        },
        "optional": {
          "default": false,
          "description": "Whether individual samples of the downstream feature can be computed without the corresponding samples of the upstream feature. If upstream samples are missing, they are going to be represented as NULL values in the joined upstream metadata.",
          "title": "Optional",
          "type": "boolean"
        }
      },
      "required": [
        "feature"
      ],
      "title": "FeatureDep",
      "type": "object"
    },
    "FeatureKey": {
      "description": "Feature key as a sequence of string parts.\n\nHashable for use as dict keys in registries.\nParts cannot contain forward slashes (/) or double underscores (__).\n\nExample:\n\n    ```py\n    FeatureKey(\"a/b/c\")  # String format\n    # FeatureKey(parts=['a', 'b', 'c'])\n\n    FeatureKey([\"a\", \"b\", \"c\"])  # List format\n    # FeatureKey(parts=['a', 'b', 'c'])\n\n    FeatureKey(FeatureKey([\"a\", \"b\", \"c\"]))  # FeatureKey copy\n    # FeatureKey(parts=['a', 'b', 'c'])\n    ```",
      "items": {
        "type": "string"
      },
      "title": "FeatureKey",
      "type": "array"
    },
    "FeatureSpec": {
      "additionalProperties": false,
      "properties": {
        "key": {
          "$ref": "#/$defs/FeatureKey"
        },
        "id_columns": {
          "description": "Columns that uniquely identify a sample in this feature.",
          "items": {
            "type": "string"
          },
          "title": "Id Columns",
          "type": "array"
        },
        "deps": {
          "items": {
            "$ref": "#/$defs/FeatureDep"
          },
          "title": "Deps",
          "type": "array"
        },
        "fields": {
          "items": {
            "additionalProperties": false,
            "properties": {
              "key": {
                "$ref": "#/$defs/FieldKey"
              },
              "code_version": {
                "default": "__metaxy_initial__",
                "title": "Code Version",
                "type": "string"
              },
              "deps": {
                "anyOf": [
                  {
                    "$ref": "#/$defs/SpecialFieldDep"
                  },
                  {
                    "items": {
                      "$ref": "#/$defs/FieldDep"
                    },
                    "type": "array"
                  }
                ],
                "title": "Deps"
              }
            },
            "title": "FieldSpec",
            "type": "object"
          },
          "title": "Fields",
          "type": "array"
        },
        "metadata": {
          "additionalProperties": true,
          "description": "Metadata attached to this feature.",
          "title": "Metadata",
          "type": "object"
        },
        "description": {
          "anyOf": [
            {
              "type": "string"
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "description": "Human-readable description of this feature.",
          "title": "Description"
        }
      },
      "required": [
        "key",
        "id_columns"
      ],
      "title": "FeatureSpec",
      "type": "object"
    },
    "FieldDep": {
      "additionalProperties": false,
      "properties": {
        "feature": {
          "$ref": "#/$defs/FeatureKey"
        },
        "fields": {
          "anyOf": [
            {
              "items": {
                "$ref": "#/$defs/FieldKey"
              },
              "type": "array"
            },
            {
              "const": "__METAXY_ALL_DEP__",
              "type": "string"
            }
          ],
          "default": "__METAXY_ALL_DEP__",
          "title": "Fields"
        }
      },
      "required": [
        "feature"
      ],
      "title": "FieldDep",
      "type": "object"
    },
    "FieldKey": {
      "description": "Field key as a sequence of string parts.\n\nHashable for use as dict keys in registries.\nParts cannot contain forward slashes (/) or double underscores (__).\n\nExample:\n\n    ```py\n    FieldKey(\"a/b/c\")  # String format\n    # FieldKey(parts=['a', 'b', 'c'])\n\n    FieldKey([\"a\", \"b\", \"c\"])  # List format\n    # FieldKey(parts=['a', 'b', 'c'])\n\n    FieldKey(FieldKey([\"a\", \"b\", \"c\"]))  # FieldKey copy\n    # FieldKey(parts=['a', 'b', 'c'])\n    ```",
      "items": {
        "type": "string"
      },
      "title": "FieldKey",
      "type": "array"
    },
    "FieldsMapping": {
      "description": "Base class for field mapping configurations.\n\nField mappings define how a field automatically resolves its dependencies\nbased on upstream feature fields. This is separate from explicit field\ndependencies which are defined directly.",
      "properties": {
        "mapping": {
          "discriminator": {
            "mapping": {
              "all": "#/$defs/AllFieldsMapping",
              "default": "#/$defs/DefaultFieldsMapping",
              "none": "#/$defs/NoneFieldsMapping",
              "specific": "#/$defs/SpecificFieldsMapping"
            },
            "propertyName": "type"
          },
          "oneOf": [
            {
              "$ref": "#/$defs/AllFieldsMapping"
            },
            {
              "$ref": "#/$defs/SpecificFieldsMapping"
            },
            {
              "$ref": "#/$defs/NoneFieldsMapping"
            },
            {
              "$ref": "#/$defs/DefaultFieldsMapping"
            }
          ],
          "title": "Mapping"
        }
      },
      "required": [
        "mapping"
      ],
      "title": "FieldsMapping",
      "type": "object"
    },
    "IdentityRelationship": {
      "description": "One-to-one relationship where each child row maps to exactly one parent row.\n\nThis is the default relationship type. Parent and child features share the same\nID columns and have the same cardinality.\n\nConstruct this relationship via [`LineageRelationship.identity`][metaxy.models.lineage.LineageRelationship.identity] classmethod.\n\nExample:\n    ```python\n    mx.LineageRelationship.identity()\n    ```",
      "properties": {
        "type": {
          "const": "1:1",
          "default": "1:1",
          "title": "Type",
          "type": "string"
        }
      },
      "title": "IdentityRelationship",
      "type": "object"
    },
    "LineageRelationship": {
      "description": "Wrapper class for lineage relationship configurations with convenient constructors.\n\nThis provides a cleaner API for creating lineage relationships while maintaining\ntype safety through discriminated unions.",
      "properties": {
        "relationship": {
          "discriminator": {
            "mapping": {
              "1:1": "#/$defs/IdentityRelationship",
              "1:N": "#/$defs/ExpansionRelationship",
              "N:1": "#/$defs/AggregationRelationship"
            },
            "propertyName": "type"
          },
          "oneOf": [
            {
              "$ref": "#/$defs/IdentityRelationship"
            },
            {
              "$ref": "#/$defs/AggregationRelationship"
            },
            {
              "$ref": "#/$defs/ExpansionRelationship"
            }
          ],
          "title": "Relationship"
        }
      },
      "required": [
        "relationship"
      ],
      "title": "LineageRelationship",
      "type": "object"
    },
    "NoneFieldsMapping": {
      "description": "Field mapping that never matches any upstream fields.",
      "properties": {
        "type": {
          "const": "none",
          "default": "none",
          "title": "Type",
          "type": "string"
        }
      },
      "title": "NoneFieldsMapping",
      "type": "object"
    },
    "SpecialFieldDep": {
      "enum": [
        "__METAXY_ALL_DEP__"
      ],
      "title": "SpecialFieldDep",
      "type": "string"
    },
    "SpecificFieldsMapping": {
      "description": "Field mapping that explicitly depends on specific upstream fields.",
      "properties": {
        "type": {
          "const": "specific",
          "default": "specific",
          "title": "Type",
          "type": "string"
        },
        "mapping": {
          "additionalProperties": {
            "items": {
              "$ref": "#/$defs/FieldKey"
            },
            "type": "array",
            "uniqueItems": true
          },
          "propertyNames": {
            "$ref": "#/$defs/FieldKey"
          },
          "title": "Mapping",
          "type": "object"
        }
      },
      "required": [
        "mapping"
      ],
      "title": "SpecificFieldsMapping",
      "type": "object"
    }
  },
  "additionalProperties": false,
  "description": "Complete feature definition wrapping all feature information.\n\nAttributes:\n    spec: The complete feature specification\n    feature_schema: Pydantic JSON schema dict for the feature model\n    feature_class_path: Python import path (e.g., 'myapp.features.VideoFeature')\n    project: The metaxy project this feature belongs to",
  "properties": {
    "spec": {
      "$ref": "#/$defs/FeatureSpec",
      "description": "Complete feature specification"
    },
    "feature_schema": {
      "contentMediaType": "application/json",
      "contentSchema": {
        "additionalProperties": true,
        "type": "object"
      },
      "description": "Pydantic JSON schema dict",
      "title": "Feature Schema",
      "type": "string"
    },
    "feature_class_path": {
      "anyOf": [
        {
          "type": "string"
        },
        {
          "type": "null"
        }
      ],
      "default": null,
      "description": "Python import path",
      "title": "Feature Class Path"
    },
    "project": {
      "description": "The metaxy project this feature belongs to",
      "minLength": 1,
      "title": "Project",
      "type": "string"
    }
  },
  "required": [
    "spec",
    "feature_schema",
    "project"
  ],
  "title": "FeatureDefinition",
  "type": "object"
}

Fields:

Validators:

Attributes

metaxy.FeatureDefinition.spec pydantic-field

Complete feature specification

metaxy.FeatureDefinition.feature_schema pydantic-field

feature_schema: Json[dict[str, Any]]

Pydantic JSON schema dict

metaxy.FeatureDefinition.feature_class_path pydantic-field

feature_class_path: str | None = None

Python import path

metaxy.FeatureDefinition.project pydantic-field

project: str

The metaxy project this feature belongs to

metaxy.FeatureDefinition.feature_definition_version cached property

feature_definition_version: str

Hash of spec + schema (excludes project).

metaxy.FeatureDefinition.key property

Get the feature key from the spec.

metaxy.FeatureDefinition.table_name property

table_name: str

Get SQL-like table name for this feature.

metaxy.FeatureDefinition.id_columns property

id_columns: tuple[str, ...]

Get ID columns from the spec.

metaxy.FeatureDefinition.columns cached property

columns: Sequence[str]

Get column names from the feature schema.

metaxy.FeatureDefinition.is_external property

is_external: bool

Check if this is an external feature definition.

metaxy.FeatureDefinition.provenance_by_field_override property

provenance_by_field_override: dict[str, str]

The manually-specified field provenance map.

Raises:

metaxy.FeatureDefinition.has_provenance_override property

has_provenance_override: bool

True if this external feature has a provenance override.

metaxy.FeatureDefinition.on_version_mismatch property

on_version_mismatch: Literal['warn', 'error']

What to do when actual feature version differs from expected.

metaxy.FeatureDefinition.source property

source: str

Human-readable string describing where this definition came from.

Functions

metaxy.FeatureDefinition.from_feature_class classmethod

from_feature_class(
    feature_cls: type[BaseFeature],
) -> FeatureDefinition

Create a FeatureDefinition from a Feature class.

Source code in src/metaxy/models/feature_definition.py
@classmethod
def from_feature_class(cls, feature_cls: type[BaseFeature]) -> FeatureDefinition:
    """Create a FeatureDefinition from a Feature class."""
    spec = feature_cls.spec()

    # Inject class docstring as description if not already set
    if spec.description is None and feature_cls.__doc__:
        spec = spec.model_copy(update={"description": inspect.cleandoc(feature_cls.__doc__)})

    schema = feature_cls.model_json_schema()
    class_path = f"{feature_cls.__module__}.{feature_cls.__name__}"
    project = feature_cls.metaxy_project()

    definition = cls(
        spec=spec,
        feature_schema=schema,
        feature_class_path=class_path,
        project=project,
    )
    definition._feature_class = feature_cls
    return definition

metaxy.FeatureDefinition.from_stored_data classmethod

from_stored_data(
    feature_spec: dict[str, Any] | str,
    feature_schema: dict[str, Any] | str,
    feature_class_path: str,
    project: str,
    source: str | None = None,
) -> FeatureDefinition

Create a FeatureDefinition from stored data.

Handles JSON string or dict inputs for spec and schema fields.

Parameters:

  • feature_spec (dict[str, Any] | str) –

    Feature specification as dict or JSON string.

  • feature_schema (dict[str, Any] | str) –

    Pydantic JSON schema as dict or JSON string.

  • feature_class_path (str) –

    Python import path of the feature class.

  • project (str) –

    The metaxy project name.

  • source (str | None, default: None ) –

    Human-readable string describing where this definition came from.

Returns:

Source code in src/metaxy/models/feature_definition.py
@classmethod
def from_stored_data(
    cls,
    feature_spec: dict[str, Any] | str,
    feature_schema: dict[str, Any] | str,
    feature_class_path: str,
    project: str,
    source: str | None = None,
) -> FeatureDefinition:
    """Create a FeatureDefinition from stored data.

    Handles JSON string or dict inputs for spec and schema fields.

    Args:
        feature_spec: Feature specification as dict or JSON string.
        feature_schema: Pydantic JSON schema as dict or JSON string.
        feature_class_path: Python import path of the feature class.
        project: The metaxy project name.
        source: Human-readable string describing where this definition came from.

    Returns:
        A new FeatureDefinition instance.
    """
    import json

    if isinstance(feature_spec, str):
        feature_spec = json.loads(feature_spec)
    if isinstance(feature_schema, str):
        feature_schema = json.loads(feature_schema)

    spec = FeatureSpec.model_validate(feature_spec)
    definition = cls(
        spec=spec,
        feature_schema=feature_schema,
        feature_class_path=feature_class_path,
        project=project,
    )
    definition._source = source
    return definition

metaxy.FeatureDefinition.external classmethod

external(
    *,
    spec: FeatureSpec,
    project: str,
    feature_schema: dict[str, Any] | None = None,
    provenance_by_field: dict[CoercibleToFieldKey, str]
    | None = None,
    on_version_mismatch: Literal["warn", "error"] = "warn",
    source: str | None = None,
) -> FeatureDefinition

Create an external FeatureDefinition without a Feature class.

External features are definitions loaded from another project or system that don't have corresponding Python Feature classes in the current codebase.

Parameters:

  • spec (FeatureSpec) –

    The feature specification.

  • project (str) –

    The metaxy project this feature belongs to.

  • feature_schema (dict[str, Any] | None, default: None ) –

    Pydantic JSON schema dict describing the feature's fields. Typically doesn't have to be provided, unless some user code attempts to use it before the real feature definition is loaded from the metadata store. This argument is experimental and may be changed in the future.

  • provenance_by_field (dict[CoercibleToFieldKey, str] | None, default: None ) –

    Optional manually-specified field provenance map. Use this argument to avoid providing too many upstream external features. Make sure to provide the actual values from the real external feature.

  • on_version_mismatch (Literal['warn', 'error'], default: 'warn' ) –

    How to handle a version mismatch if the actual feature loaded from the metadata store has a different version than the version specified in the corresponding external feature.

  • source (str | None, default: None ) –

    Human-readable string describing where this definition came from. If not provided, captures the call site location automatically.

Returns:

Source code in src/metaxy/models/feature_definition.py
@classmethod
def external(
    cls,
    *,
    spec: FeatureSpec,
    project: str,
    feature_schema: dict[str, Any] | None = None,
    provenance_by_field: dict[CoercibleToFieldKey, str] | None = None,
    on_version_mismatch: Literal["warn", "error"] = "warn",
    source: str | None = None,
) -> FeatureDefinition:
    """Create an external FeatureDefinition without a Feature class.

    External features are definitions loaded from another project or system
    that don't have corresponding Python Feature classes in the current codebase.

    Args:
        spec: The feature specification.
        project: The metaxy project this feature belongs to.
        feature_schema: Pydantic JSON schema dict describing the feature's fields.
            Typically doesn't have to be provided, unless some user code attempts
            to use it before the real feature definition is loaded from the metadata store.
            This argument is experimental and may be changed in the future.
        provenance_by_field: Optional manually-specified field provenance map.
            Use this argument to avoid providing too many upstream external features.
            Make sure to provide the actual values from the real external feature.
        on_version_mismatch: How to handle a version mismatch if the actual feature loaded from the
            metadata store has a different version than the version specified in the corresponding external feature.
        source: Human-readable string describing where this definition came from.
            If not provided, captures the call site location automatically.

    Returns:
        A new FeatureDefinition marked as external.
    """
    normalized_provenance: dict[str, str] | None = None
    if provenance_by_field is not None:
        normalized_provenance = {
            ValidatedFieldKeyAdapter.validate_python(k).to_string(): v for k, v in provenance_by_field.items()
        }

    # Capture call site location if source is not provided
    if source is None:
        source = cls._capture_call_site()

    definition = cls(
        spec=spec,
        feature_schema=feature_schema or {},
        feature_class_path=None,
        project=project,
    )
    definition._is_external = True
    definition._provenance_by_field = normalized_provenance
    definition._on_version_mismatch = on_version_mismatch
    definition._source = source
    return definition

metaxy.FeatureDefinition.check_version_mismatch

check_version_mismatch(
    *,
    expected_version: str,
    actual_version: str,
    expected_version_by_field: dict[str, str],
    actual_version_by_field: dict[str, str],
) -> None

Check if the actual feature version matches expected version.

Called by load_feature_definitions after loading external features from the metadata store, comparing provenance-carrying feature versions.

Parameters:

  • expected_version (str) –

    The feature version before loading (from graph).

  • actual_version (str) –

    The feature version after loading (from graph).

  • expected_version_by_field (dict[str, str]) –

    Field-level versions before loading.

  • actual_version_by_field (dict[str, str]) –

    Field-level versions after loading.

Raises:

  • ValueError –

    If versions mismatch and on_version_mismatch is "error".

Source code in src/metaxy/models/feature_definition.py
def check_version_mismatch(
    self,
    *,
    expected_version: str,
    actual_version: str,
    expected_version_by_field: dict[str, str],
    actual_version_by_field: dict[str, str],
) -> None:
    """Check if the actual feature version matches expected version.

    Called by load_feature_definitions after loading external features from
    the metadata store, comparing provenance-carrying feature versions.

    Args:
        expected_version: The feature version before loading (from graph).
        actual_version: The feature version after loading (from graph).
        expected_version_by_field: Field-level versions before loading.
        actual_version_by_field: Field-level versions after loading.

    Raises:
        ValueError: If versions mismatch and on_version_mismatch is "error".
    """
    if not self.is_external:
        return

    if expected_version == actual_version:
        return

    # Find which fields differ
    mismatched_fields = []
    all_fields = set(expected_version_by_field.keys()) | set(actual_version_by_field.keys())
    for field in sorted(all_fields):
        expected_field_ver = expected_version_by_field.get(field, "<missing>")
        actual_field_ver = actual_version_by_field.get(field, "<missing>")
        if expected_field_ver != actual_field_ver:
            mismatched_fields.append(f"  - {field}: expected '{expected_field_ver}', got '{actual_field_ver}'")

    field_details = "\n".join(mismatched_fields) if mismatched_fields else "  (no field-level details available)"

    message = (
        f"Version mismatch for external feature '{self.key}': "
        f"expected feature version '{expected_version}', got '{actual_version}'.\n"
        f"Field-level mismatches:\n{field_details}\n"
        f"The external feature definition may be out of sync with the metadata store."
    )

    if self._on_version_mismatch == "error":
        raise ValueError(message)
    warnings.warn(message, stacklevel=3)