🖥️ CLI Argument Parsing
✨ Implicit Argument References
py-gen-ml
generates a smart CLI argument parser using Pydantic base models. It shortens CLI argument names for deeply nested fields in your config when there's exactly one path to a field and the field name is unique.
Example protobuf structure:
// cli_demo.proto
syntax = "proto3";
package cli_demo;
import "py_gen_ml/extensions.proto";
// Global configuration
message CLIDemo {
option (pgml.cli).enable = true;
// Dataset configuration
Data data = 1;
// Model configuration
Model model = 2;
// Training configuration
Training training = 3;
}
// Dataset configuration
message Dataset {
// Path to the dataset
string path = 1;
}
// Data config
message Data {
// Path to the dataset
Dataset dataset = 1;
// Number of workers for loading the dataset
uint32 num_workers = 2;
}
// Model configuration
message Model {
// Number of layers
uint32 num_layers = 1;
}
// Training configuration
message Training {
// Number of epochs
uint32 num_epochs = 1;
}
This generates a CLI args class:
# Autogenerated code. DO NOT EDIT.
import py_gen_ml as pgml
import typing
import pydantic
import typer
from . import cli_demo_base as base
class CLIDemoArgs(pgml.YamlBaseModel):
"""Global configuration"""
path: typing.Annotated[
typing.Optional[str],
typer.Option(help="Path to the dataset. Maps to 'data.dataset.path'"),
pydantic.Field(None),
pgml.ArgRef("data.dataset.path"),
]
"""Path to the dataset"""
num_layers: typing.Annotated[
typing.Optional[int],
typer.Option(help="Number of layers. Maps to 'model.num_layers'"),
pydantic.Field(None),
pgml.ArgRef("model.num_layers"),
]
"""Number of layers"""
num_workers: typing.Annotated[
typing.Optional[int],
typer.Option(
help="Number of workers for loading the dataset. Maps to 'data.num_workers'"
),
pydantic.Field(None),
pgml.ArgRef("data.num_workers"),
]
"""Number of workers for loading the dataset"""
num_epochs: typing.Annotated[
typing.Optional[int],
typer.Option(help="Number of epochs. Maps to 'training.num_epochs'"),
pydantic.Field(None),
pgml.ArgRef("training.num_epochs"),
]
"""Number of epochs"""
🚪 Generated Entrypoint
It also generates a skeleton entrypoint:
import pgml_out.cli_demo_base as base
import pgml_out.cli_demo_sweep as sweep
import pgml_out.cli_demo_cli_args as cli_args
import typer
import py_gen_ml as pgml
import optuna
import typing
app = typer.Typer(pretty_exceptions_enable=False)
def run_trial(
cli_demo: base.CLIDemo,
trial: typing.Optional[optuna.Trial] = None
) -> typing.Union[float, typing.Sequence[float]]:
"""
Run a trial with the given values for cli_demo. The sampled hyperparameters have
already been added to the trial.
"""
# TODO: Implement this function
return 0.0
@pgml.pgml_cmd(app=app)
def main(
config_paths: typing.List[str] = typer.Option(..., help="Paths to config files"),
sweep_paths: typing.List[str] = typer.Option(
default_factory=list,
help="Paths to sweep files"
),
cli_args: cli_args.CLIDemoArgs = typer.Option(...),
) -> None:
cli_demo = base.CLIDemo.from_yaml_files(config_paths)
cli_demo = cli_demo.apply_cli_args(cli_args)
if len(sweep_paths) == 0:
run_trial(cli_demo)
return
cli_demo_sweep = sweep.CLIDemoSweep.from_yaml_files(sweep_paths)
def objective(trial: optuna.Trial) -> typing.Union[
float,
typing.Sequence[float]
]:
optuna_sampler = pgml.OptunaSampler(trial)
cli_demo_patch = optuna_sampler.sample(cli_demo_sweep)
cli_demo_patched = cli_demo.merge(cli_demo_patch)
objective_value = run_trial(cli_demo_patched, trial)
return objective_value
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=100)
if __name__ == "__main__":
app()
It is a standard Typer app, so you can run it like a normal Python script:
Which should show something like:
Usage: cli_demo_entrypoint.py [OPTIONS]
╭─ Options ────────────────────────────────────────────────────────────────╮
│ * --config-paths TEXT Paths to config files │
│ [default: None] │
│ [required] │
│ --sweep-paths TEXT Paths to sweep files │
│ [default: <class 'list'>] │
│ --num-epochs INTEGER Number of epochs. Maps to │
│ 'num_epochs' │
│ [default: None] │
│ --path TEXT Path to the dataset. Maps to │
│ 'path' │
│ [default: None] │
│ --num-layers INTEGER Number of layers. Maps to │
│ 'num_layers' │
│ [default: None] │
│ --num-workers INTEGER Number of workers for loading │
│ the dataset. Maps to │
│ 'num_workers' │
│ [default: None] │
│ --install-completion Install completion for the │
│ current shell. │
│ --show-completion Show completion for the current │
│ shell, to copy it or customize │
│ the installation. │
│ --help Show this message and exit. │
╰──────────────────────────────────────────────────────────────────────────╯
Notice how the names of the args are just the names of the fields in the innermost message of the nested structure. The names are unique globally, so these short names suffice for finding the intended field within the full structure.
💡 Workflow
We recommend copying the generated entrypoint and modifying it to fit your needs.
For example, you might write a run_trial
function that interfaces with your model and training code.
⏩ Shortening CLI arguments
As stated before, CLI argument names are shortened for deeply nested fields in your config when there's exactly one path to a field and the field name is unique. If the field name is not unique, we will prepend accessors to the field name until it is unique.
Take for example the following protobuf file:
// cli_demo_deep.proto
syntax = "proto3";
package cli_demo_deep;
import "py_gen_ml/extensions.proto";
// Global configuration
message CliDemoDeep {
option (pgml.cli).enable = true;
// Dataset configuration
Data data = 1;
// Model configuration
Model model = 2;
// Training configuration
Training training = 3;
}
// Dataset configuration
message Dataset {
// Path to the dataset
string path = 1;
}
// Data config
message Data {
// Path to the dataset
Dataset train_dataset = 1;
// Path to the dataset
Dataset test_dataset = 2;
// Number of workers for loading the dataset
uint32 num_workers = 3;
}
// Model configuration
message Model {
// Number of layers
uint32 num_layers = 1;
}
// Training configuration
message Training {
// Number of epochs
uint32 num_epochs = 1;
}
This generates the following CLI arguments:
# Autogenerated code. DO NOT EDIT.
import py_gen_ml as pgml
import typing
import pydantic
import typer
from . import cli_demo_deep_base as base
class CliDemoDeepArgs(pgml.YamlBaseModel):
"""Global configuration"""
train_dataset_path: typing.Annotated[
typing.Optional[str],
typer.Option(help="Path to the dataset. Maps to 'data.train_dataset.path'"),
pydantic.Field(None),
pgml.ArgRef("data.train_dataset.path"),
]
"""Path to the dataset"""
test_dataset_path: typing.Annotated[
typing.Optional[str],
typer.Option(help="Path to the dataset. Maps to 'data.test_dataset.path'"),
pydantic.Field(None),
pgml.ArgRef("data.test_dataset.path"),
]
"""Path to the dataset"""
num_workers: typing.Annotated[
typing.Optional[int],
typer.Option(
help="Number of workers for loading the dataset. Maps to 'data.num_workers'"
),
pydantic.Field(None),
pgml.ArgRef("data.num_workers"),
]
"""Number of workers for loading the dataset"""
num_epochs: typing.Annotated[
typing.Optional[int],
typer.Option(help="Number of epochs. Maps to 'training.num_epochs'"),
pydantic.Field(None),
pgml.ArgRef("training.num_epochs"),
]
"""Number of epochs"""
num_layers: typing.Annotated[
typing.Optional[int],
typer.Option(help="Number of layers. Maps to 'model.num_layers'"),
pydantic.Field(None),
pgml.ArgRef("model.num_layers"),
]
"""Number of layers"""
Notice how data.train_dataset.path
is shortened to train_dataset_path
and data.test_dataset.path
is shortened to test_dataset_path
.
🎯 Explicit Argument References
For more control, use explicit argument references in your protobuf:
// cli_extension_demo.proto
syntax = "proto3";
package cli_extension_demo;
import "py_gen_ml/extensions.proto";
// Global configuration
message CliExtensionDemo {
option (pgml.cli) = {
enable: true
arg: { name: "train_path", path: "data.train_dataset.path" }
arg: { name: "test_path", path: "data.test_dataset.path" }
};
// Dataset configuration
Data data = 1;
// Model configuration
Model model = 2;
// Training configuration
Training training = 3;
}
// Dataset configuration
message Dataset {
// Path to the dataset
string path = 1;
}
// Data config
message Data {
// Path to the dataset
Dataset train_dataset = 1;
// Path to the dataset
Dataset test_dataset = 2;
// Number of workers for loading the dataset
uint32 num_workers = 3;
}
// Model configuration
message Model {
// Number of layers
uint32 num_layers = 1;
}
// Training configuration
message Training {
// Number of epochs
uint32 num_epochs = 1;
}
The explicit argument references will replace the ones we have seen previously:
# Autogenerated code. DO NOT EDIT.
import py_gen_ml as pgml
import typing
import pydantic
import typer
from . import cli_extension_demo_base as base
class CliExtensionDemoArgs(pgml.YamlBaseModel):
"""Global configuration"""
train_path: typing.Annotated[
typing.Optional[str],
typer.Option(help="Path to the dataset. Maps to 'data.train_dataset.path'"),
pydantic.Field(None),
pgml.ArgRef("data.train_dataset.path"),
]
"""Path to the dataset"""
test_path: typing.Annotated[
typing.Optional[str],
typer.Option(help="Path to the dataset. Maps to 'data.test_dataset.path'"),
pydantic.Field(None),
pgml.ArgRef("data.test_dataset.path"),
]
"""Path to the dataset"""
num_epochs: typing.Annotated[
typing.Optional[int],
typer.Option(help="Number of epochs. Maps to 'training.num_epochs'"),
pydantic.Field(None),
pgml.ArgRef("training.num_epochs"),
]
"""Number of epochs"""
num_workers: typing.Annotated[
typing.Optional[int],
typer.Option(
help="Number of workers for loading the dataset. Maps to 'data.num_workers'"
),
pydantic.Field(None),
pgml.ArgRef("data.num_workers"),
]
"""Number of workers for loading the dataset"""
num_layers: typing.Annotated[
typing.Optional[int],
typer.Option(help="Number of layers. Maps to 'model.num_layers'"),
pydantic.Field(None),
pgml.ArgRef("model.num_layers"),
]
"""Number of layers"""
📚 Summary
With py-gen-ml
, you get powerful, flexible CLI argument parsing that adapts to your needs, whether using implicit shortcuts or explicit references.