Skip to content

Initialization

Initialization

metaxy.init

init(
    config: MetaxyConfig | Path | str | None = None,
    search_parents: bool = True,
) -> MetaxyConfig

Main user-facing initialization function for Metaxy. It loads feature definitions and the Metaxy configuration.

The feature graphs is populated with feature definitions discovered in the Metaxy project. External features are loaded from a metaxy.lock if it is found.

Parameters:

  • config (MetaxyConfig | Path | str | None, default: None ) –

    Metaxy configuration to use for initialization. Will be auto-discovered if not provided.

    Tip

    METAXY_CONFIG environment variable can be used to set the config file path.

  • search_parents (bool, default: True ) –

    Whether to search parent directories for configuration files during config discovery.

Returns:

Source code in src/metaxy/__init__.py
@public
def init(
    config: MetaxyConfig | Path | str | None = None,
    search_parents: bool = True,
) -> MetaxyConfig:
    """Main user-facing initialization function for Metaxy. It loads feature definitions and the Metaxy [configuration][metaxy.MetaxyConfig].

    The feature graphs is populated with feature definitions [discovered](/guide/concepts/projects.md#feature-discovery) in the Metaxy project.
    [External features](/guide/concepts/definitions/external-features.md) are loaded from a `metaxy.lock` if it is found.

    Args:
        config: Metaxy configuration to use for initialization. Will be auto-discovered if not provided.

            !!! tip
                `METAXY_CONFIG` environment variable can be used to set the config file path.

        search_parents: Whether to search parent directories for configuration files during config discovery.

    Returns:
        The activated Metaxy configuration.
    """
    from metaxy.utils.lock_file import load_lock_file

    if isinstance(config, MetaxyConfig):
        MetaxyConfig.set(config)
    else:
        config = MetaxyConfig.load(
            config_file=config,
            search_parents=search_parents,
        )
    load_lock_file(config)
    load_features(config.entrypoints)
    return config

metaxy.sync_external_features

sync_external_features(
    store: MetadataStore,
    *,
    on_version_mismatch: Literal["warn", "error"]
    | None = None,
) -> list[FeatureDefinition]

Sync external feature definitions from a metadata store if the graph has any.

This function loads feature definitions from the metadata store to replace external feature placeholders in the active graph. It also validates that the versions match and warns or errors on mismatches.

Additionally, this function loads any feature keys specified in the features config field, warning if any of them are not found in the metadata store.

Parameters:

  • store (MetadataStore) –

    Metadata store to load from. Will be opened automatically if not already open.

  • on_version_mismatch (Literal['warn', 'error'] | None, default: None ) –

    Optional override for the on_version_mismatch setting on external feature definitions.

    Info

    Setting MetaxyConfig.locked to True takes precedence over this argument.

Returns:

Example
import metaxy as mx

# Sync external features before running a pipeline
mx.sync_external_features(store)

# Or with explicit error handling
mx.sync_external_features(store, on_version_mismatch="error")
Source code in src/metaxy/utils/external_features.py
@public
def sync_external_features(
    store: MetadataStore,
    *,
    on_version_mismatch: Literal["warn", "error"] | None = None,
) -> list[FeatureDefinition]:
    """Sync external feature definitions from a metadata store if the graph has any.

    This function loads feature definitions from the metadata store to replace
    external feature placeholders in the active graph. It also validates that
    the versions match and warns or errors on mismatches.

    Additionally, this function loads any feature keys specified in the
    `features` config field, warning if any of them are not found in the metadata store.

    Args:
        store: Metadata store to load from. Will be opened automatically if not already open.
        on_version_mismatch: Optional override for the `on_version_mismatch` setting on external feature definitions.

            !!! info
                Setting [`MetaxyConfig.locked`][metaxy.MetaxyConfig] to `True` takes precedence over this argument.

    Returns:
        List of loaded FeatureDefinition objects.

    Example:
        ```python
        import metaxy as mx

        # Sync external features before running a pipeline
        mx.sync_external_features(store)

        # Or with explicit error handling
        mx.sync_external_features(store, on_version_mismatch="error")
        ```
    """
    from metaxy.config import MetaxyConfig
    from metaxy.metadata_store.system import SystemTableStorage

    graph = FeatureGraph.get_active()
    config = MetaxyConfig.get(_allow_default_config=True)

    if not graph.has_external_features:
        return []

    # Check if locked mode is enabled
    if config.locked:
        on_version_mismatch = "error"

    # Record versions of external features BEFORE loading
    external_versions_before: dict[FeatureKey, tuple[str, dict[str, str], FeatureDefinition]] = {}
    external_keys: list[str] = []
    for key, defn in graph.feature_definitions_by_key.items():
        if defn.is_external:
            external_versions_before[key] = (
                graph.get_feature_version(key),
                graph.get_feature_version_by_field(key),
                defn,
            )
            external_keys.append(key.to_string())

    # Use nullcontext if store is already open, otherwise open it
    cm = nullcontext(store) if store._is_open else store
    result: list[FeatureDefinition] = []
    with cm:
        storage = SystemTableStorage(store)
        for key_str in external_keys:
            try:
                loaded = storage._load_feature_definitions_raw(
                    filters=[nw.col("feature_key") == key_str],
                )
                result.extend(loaded)
            except Exception as e:
                warnings.warn(
                    f"Skipping feature '{key_str}': failed to load from store: {e}",
                    InvalidStoredFeatureWarning,
                )

    # Check for version mismatches
    _check_version_mismatches(graph, external_versions_before, on_version_mismatch)

    # Warn if there are still unresolved external features after sync
    remaining_external = list(sorted(d.spec.key for d in graph.feature_definitions_by_key.values() if d.is_external))
    if remaining_external:
        keys_str = ", ".join(str(k) for k in remaining_external)
        warnings.warn(
            f"After syncing, {len(remaining_external)} external feature(s) could not be resolved "
            f"from the metadata store: {keys_str}. "
            f"These features may not exist in the store.",
            UnresolvedExternalFeatureWarning,
            stacklevel=2,
        )

    return result