Skip to content

Initialization

Initialization

metaxy.init

init(
    config: MetaxyConfig | Path | str | None = None,
    search_parents: bool = True,
    *,
    isolated: bool = False,
) -> MetaxyConfig

Main user-facing initialization function for Metaxy. It loads feature definitions and the Metaxy configuration.

The feature graphs is populated with feature definitions discovered in the Metaxy project. External features are loaded from a metaxy.lock if it is found.

Parameters:

  • config (MetaxyConfig | Path | str | None, default: None ) –

    Metaxy configuration to use for initialization. Will be auto-discovered if not provided.

    Tip

    METAXY_CONFIG environment variable can be used to set the config file path.

  • search_parents (bool, default: True ) –

    Whether to search parent directories for configuration files during config discovery.

  • isolated (bool, default: False ) –

    Whether to only load distribution entry points from Python dependencies (including transitive) of this project. Features from non-dependencies available in the same Python environment will be excluded from feature auto-discovery.

Returns:

Source code in src/metaxy/__init__.py
@public
def init(
    config: MetaxyConfig | Path | str | None = None,
    search_parents: bool = True,
    *,
    isolated: bool = False,
) -> MetaxyConfig:
    """Main user-facing initialization function for Metaxy. It loads feature definitions and the Metaxy [configuration][metaxy.MetaxyConfig].

    The feature graphs is populated with feature definitions [discovered](/guide/concepts/projects.md#feature-discovery) in the Metaxy project.
    [External features](/guide/concepts/definitions/external-features.md) are loaded from a `metaxy.lock` if it is found.

    Args:
        config: Metaxy configuration to use for initialization. Will be auto-discovered if not provided.

            !!! tip
                `METAXY_CONFIG` environment variable can be used to set the config file path.

        search_parents: Whether to search parent directories for configuration files during config discovery.
        isolated: Whether to only load distribution entry points from Python dependencies (including transitive) of this project.
            Features from non-dependencies available in the same Python environment will be excluded from feature auto-discovery.

    Returns:
        The activated Metaxy configuration.
    """
    from metaxy.utils.lock_file import load_lock_file

    if isinstance(config, MetaxyConfig):
        MetaxyConfig.set(config)
    else:
        config = MetaxyConfig.load(
            config_file=config,
            search_parents=search_parents,
        )
    load_lock_file(config)
    load_features(config.entrypoints, filter_project=config.project)
    return config

metaxy.sync_external_features

sync_external_features(
    store: MetadataStore,
    *,
    selection: FeatureSelection | None = None,
    on_version_mismatch: Literal["warn", "error"]
    | None = None,
) -> list[FeatureDefinition]

Experimental

This functionality is experimental.

Sync external feature definitions from a metadata store into the active FeatureGraph.

Replaces external feature placeholders in the active graph with real definitions loaded from the store. Validates that versions match and warns or errors on mismatches.

When selection is provided, the selected features are loaded in addition to any external features already present in the graph.

Parameters:

  • store (MetadataStore) –

    Metadata store to load from. Will be opened automatically if not already open.

  • selection (FeatureSelection | None, default: None ) –

    Optional additional features to load from the store.

  • on_version_mismatch (Literal['warn', 'error'] | None, default: None ) –

    Optional override for the on_version_mismatch setting on external feature definitions.

    Info

    Setting MetaxyConfig.locked to True takes precedence over this argument.

Returns:

Example
import metaxy as mx

# Sync external features before running a pipeline
mx.sync_external_features(store)

# Or with explicit error handling
mx.sync_external_features(store, on_version_mismatch="error")

# Load specific features by selection
mx.sync_external_features(store, selection=mx.FeatureSelection(projects=["upstream"]))
Source code in src/metaxy/utils/external_features.py
@public
@experimental
def sync_external_features(
    store: MetadataStore,
    *,
    selection: FeatureSelection | None = None,
    on_version_mismatch: Literal["warn", "error"] | None = None,
) -> list[FeatureDefinition]:
    """Sync external feature definitions from a metadata store into the active [`FeatureGraph`][metaxy.FeatureGraph].

    Replaces external feature placeholders in the active graph with real
    definitions loaded from the store. Validates that versions match and
    warns or errors on mismatches.

    When *selection* is provided, the selected features are loaded in addition
    to any external features already present in the graph.

    Args:
        store: Metadata store to load from. Will be opened automatically if not already open.
        selection: Optional additional features to load from the store.
        on_version_mismatch: Optional override for the `on_version_mismatch` setting on external feature definitions.

            !!! info
                Setting [`MetaxyConfig.locked`][metaxy.MetaxyConfig] to `True` takes precedence over this argument.

    Returns:
        List of loaded FeatureDefinition objects.

    Example:
        ```python
        import metaxy as mx

        # Sync external features before running a pipeline
        mx.sync_external_features(store)

        # Or with explicit error handling
        mx.sync_external_features(store, on_version_mismatch="error")

        # Load specific features by selection
        mx.sync_external_features(store, selection=mx.FeatureSelection(projects=["upstream"]))
        ```
    """
    from metaxy.config import MetaxyConfig
    from metaxy.models.feature_selection import FeatureSelection as FS

    graph = FeatureGraph.get_active()
    config = MetaxyConfig.get(_allow_default_config=True)

    # Collect external feature keys
    external_versions_before: dict[FeatureKey, tuple[str, dict[str, str], FeatureDefinition]] = {}
    external_keys: list[FeatureKey] = []
    for key, defn in graph.feature_definitions_by_key.items():
        if defn.is_external:
            external_versions_before[key] = (
                graph.get_feature_version(key),
                graph.get_feature_version_by_field(key),
                defn,
            )
            external_keys.append(key)

    # Build combined selection: external keys + user-provided selection + config extra_features
    # Filter selection keys that are already non-external in the graph to avoid
    # unnecessary store I/O.
    non_external_keys = {key for key, defn in graph.feature_definitions_by_key.items() if not defn.is_external}
    if selection is not None:
        selection = _filter_selection(selection, non_external_keys)
    parts: list[FS] = []
    if external_keys:
        parts.append(FS(keys=external_keys))
    if selection is not None:
        parts.append(selection)
    parts.extend(config.extra_features)

    if not parts:
        return []

    combined: FS = parts[0]
    for part in parts[1:]:
        combined = combined | part

    if config.locked:
        on_version_mismatch = "error"

    result = _load_selection(store, combined, graph)

    # Check for version mismatches
    _check_version_mismatches(graph, external_versions_before, on_version_mismatch)

    # Warn if there are still unresolved external features after sync
    remaining_external = list(sorted(d.spec.key for d in graph.feature_definitions_by_key.values() if d.is_external))
    if remaining_external:
        keys_str = ", ".join(str(k) for k in remaining_external)
        warnings.warn(
            f"After syncing, {len(remaining_external)} external feature(s) could not be resolved "
            f"from the metadata store: {keys_str}. "
            f"These features may not exist in the store.",
            UnresolvedExternalFeatureWarning,
            stacklevel=2,
        )

    return result