Initialization
Initialization¶
metaxy.init
¶
init(
config: MetaxyConfig | Path | str | None = None,
search_parents: bool = True,
) -> MetaxyConfig
Main user-facing initialization function for Metaxy. It loads feature definitions and the Metaxy configuration.
The feature graphs is populated with feature definitions discovered in the Metaxy project.
External features are loaded from a metaxy.lock if it is found.
Parameters:
-
config(MetaxyConfig | Path | str | None, default:None) –Metaxy configuration to use for initialization. Will be auto-discovered if not provided.
Tip
METAXY_CONFIGenvironment variable can be used to set the config file path. -
search_parents(bool, default:True) –Whether to search parent directories for configuration files during config discovery.
Returns:
-
MetaxyConfig–The activated Metaxy configuration.
Source code in src/metaxy/__init__.py
@public
def init(
config: MetaxyConfig | Path | str | None = None,
search_parents: bool = True,
) -> MetaxyConfig:
"""Main user-facing initialization function for Metaxy. It loads feature definitions and the Metaxy [configuration][metaxy.MetaxyConfig].
The feature graphs is populated with feature definitions [discovered](/guide/concepts/projects.md#feature-discovery) in the Metaxy project.
[External features](/guide/concepts/definitions/external-features.md) are loaded from a `metaxy.lock` if it is found.
Args:
config: Metaxy configuration to use for initialization. Will be auto-discovered if not provided.
!!! tip
`METAXY_CONFIG` environment variable can be used to set the config file path.
search_parents: Whether to search parent directories for configuration files during config discovery.
Returns:
The activated Metaxy configuration.
"""
from metaxy.utils.lock_file import load_lock_file
if isinstance(config, MetaxyConfig):
MetaxyConfig.set(config)
else:
config = MetaxyConfig.load(
config_file=config,
search_parents=search_parents,
)
load_lock_file(config)
load_features(config.entrypoints)
return config
metaxy.sync_external_features
¶
sync_external_features(
store: MetadataStore,
*,
on_version_mismatch: Literal["warn", "error"]
| None = None,
) -> list[FeatureDefinition]
Sync external feature definitions from a metadata store if the graph has any.
This function loads feature definitions from the metadata store to replace external feature placeholders in the active graph. It also validates that the versions match and warns or errors on mismatches.
Additionally, this function loads any feature keys specified in the
features config field, warning if any of them are not found in the metadata store.
Parameters:
-
store(MetadataStore) –Metadata store to load from. Will be opened automatically if not already open.
-
on_version_mismatch(Literal['warn', 'error'] | None, default:None) –Optional override for the
on_version_mismatchsetting on external feature definitions.Info
Setting
MetaxyConfig.lockedtoTruetakes precedence over this argument.
Returns:
-
list[FeatureDefinition]–List of loaded FeatureDefinition objects.
Example
Source code in src/metaxy/utils/external_features.py
@public
def sync_external_features(
store: MetadataStore,
*,
on_version_mismatch: Literal["warn", "error"] | None = None,
) -> list[FeatureDefinition]:
"""Sync external feature definitions from a metadata store if the graph has any.
This function loads feature definitions from the metadata store to replace
external feature placeholders in the active graph. It also validates that
the versions match and warns or errors on mismatches.
Additionally, this function loads any feature keys specified in the
`features` config field, warning if any of them are not found in the metadata store.
Args:
store: Metadata store to load from. Will be opened automatically if not already open.
on_version_mismatch: Optional override for the `on_version_mismatch` setting on external feature definitions.
!!! info
Setting [`MetaxyConfig.locked`][metaxy.MetaxyConfig] to `True` takes precedence over this argument.
Returns:
List of loaded FeatureDefinition objects.
Example:
```python
import metaxy as mx
# Sync external features before running a pipeline
mx.sync_external_features(store)
# Or with explicit error handling
mx.sync_external_features(store, on_version_mismatch="error")
```
"""
from metaxy.config import MetaxyConfig
from metaxy.metadata_store.system import SystemTableStorage
graph = FeatureGraph.get_active()
config = MetaxyConfig.get(_allow_default_config=True)
if not graph.has_external_features:
return []
# Check if locked mode is enabled
if config.locked:
on_version_mismatch = "error"
# Record versions of external features BEFORE loading
external_versions_before: dict[FeatureKey, tuple[str, dict[str, str], FeatureDefinition]] = {}
external_keys: list[str] = []
for key, defn in graph.feature_definitions_by_key.items():
if defn.is_external:
external_versions_before[key] = (
graph.get_feature_version(key),
graph.get_feature_version_by_field(key),
defn,
)
external_keys.append(key.to_string())
# Use nullcontext if store is already open, otherwise open it
cm = nullcontext(store) if store._is_open else store
result: list[FeatureDefinition] = []
with cm:
storage = SystemTableStorage(store)
for key_str in external_keys:
try:
loaded = storage._load_feature_definitions_raw(
filters=[nw.col("feature_key") == key_str],
)
result.extend(loaded)
except Exception as e:
warnings.warn(
f"Skipping feature '{key_str}': failed to load from store: {e}",
InvalidStoredFeatureWarning,
)
# Check for version mismatches
_check_version_mismatches(graph, external_versions_before, on_version_mismatch)
# Warn if there are still unresolved external features after sync
remaining_external = list(sorted(d.spec.key for d in graph.feature_definitions_by_key.values() if d.is_external))
if remaining_external:
keys_str = ", ".join(str(k) for k in remaining_external)
warnings.warn(
f"After syncing, {len(remaining_external)} external feature(s) could not be resolved "
f"from the metadata store: {keys_str}. "
f"These features may not exist in the store.",
UnresolvedExternalFeatureWarning,
stacklevel=2,
)
return result