EvSys

DashboardClient

HTTP client for the EvolvingSystems backend's SDK write routes, with a local mirror and graceful degradation when the backend is unreachable.

Attributes

attributebase_url
= (base_url or os.environ.get(EVSYS_API_URL_ENV) or DEFAULT_API_URL).rstrip('/')
attributeapi_key
= api_key or os.environ.get(EVSYS_API_KEY_ENV)
attributeproject_id
= project_id or os.environ.get(EVSYS_PROJECT_ID_ENV)
attributetimeout_s
= timeout_s
attributeoffline
= offline if offline is not None else truthy_env(os.environ.get(EVSYS_OFFLINE_ENV))
attributelocal
= LocalExperimentStore(log_dir=log_dir)

Functions

func__init__(self, *, base_url=None, api_key=None, project_id=None, timeout_s=DEFAULT_TIMEOUT_S, offline=None, log_dir=None) -> None
paramself
parambase_urlstr | None
= None
paramapi_keystr | None
= None
paramproject_idstr | None
= None
paramtimeout_sfloat
= DEFAULT_TIMEOUT_S
paramofflinebool | None
= None
paramlog_dirstr | None
= None

Returns

None
func_post(self, path, body=None) -> dict | None

POST to the backend. Returns the JSON dict, or None when the write could not be sent remotely (offline, or backend unreachable). Raises DashboardClientError on a 4xx response.

paramself
parampathstr
parambodydict | None
= None

Returns

dict | None
funccreate_experiment(self, *, experiment_name, hypothesis=None, hypothesis_reasoning=None, plan=None, tags=None, project_goal_id=None, config=None, declaration_path=None, is_valid=None) -> dict
paramself
paramexperiment_namestr
paramhypothesisstr | None
= None
paramhypothesis_reasoningstr | None
= None
paramplanstr | None
= None
paramtagslist[str] | None
= None
paramproject_goal_idstr | None
= None
paramconfigdict | None
= None
paramdeclaration_pathstr | None
= None
paramis_validbool | None
= None

Returns

dict
funcupdate_experiment(self, experiment_id, **patch) -> dict

PATCH experiment metadata. Whitelisted backend fields: status, best_score, best_generation_id, current_iteration, error_message, hypothesis, hypothesis_reasoning, plan, conclusion, tags, problem_statement_id, is_valid.

e.g. update_experiment(exp_id, is_valid=False) to invalidate an experiment after a bug is found in its runs (D16).

paramself
paramexperiment_idstr
parampatchAny
= {}

Returns

dict
funccreate_run(self, *, experiment_id, group_id=None, dataset_id=None, seed=None, recipe_kind=None, run_config=None, status=STATUS_PENDING, wandb_run_url=None, tensorboard_path=None, tensorboard_archive_url=None) -> dict
paramself
paramexperiment_idstr
paramgroup_idstr | None
= None
paramdataset_idstr | None
= None
paramseedint | None
= None
paramrecipe_kindstr | None
= None
paramrun_configdict | None
= None
paramstatusstr
= STATUS_PENDING
paramwandb_run_urlstr | None
= None
paramtensorboard_pathstr | None
= None
paramtensorboard_archive_urlstr | None
= None

Returns

dict
funcupdate_run(self, run_id, **patch) -> dict
paramself
paramrun_idstr
parampatchAny
= {}

Returns

dict
funclog_step_metric(self, run_id, *, step, split='train', loss=None, accuracy=None, learning_rate=None, grad_norm=None, tokens_per_sec=None, **metrics) -> dict

Log per-step training metrics as long-format run_metrics (D15).

Accepts arbitrary named series via **metrics (e.g. val_loss=…, kl_loss=…) plus a split (train/val/test). The named kwargs (loss, accuracy, …) are folded into the same metrics map. None values are dropped.

paramself
paramrun_idstr
paramstepint
paramsplitstr
= 'train'
paramlossfloat | None
= None
paramaccuracyfloat | None
= None
paramlearning_ratefloat | None
= None
paramgrad_normfloat | None
= None
paramtokens_per_secfloat | None
= None
parammetricsfloat
= {}

Returns

dict
funccreate_eval(self, run_id, *, metrics, benchmark_id=None, checkpoint_id=None, model_ref=None, step=None, breakdowns=None, sdk_version=None) -> dict

Record an eval (D13): a run scored on a benchmark → metrics{name:value}.

paramself
paramrun_idstr
parammetricsdict[str, float]
parambenchmark_idstr | None
= None
paramcheckpoint_idstr | None
= None
parammodel_refstr | None
= None
paramstepint | None
= None
parambreakdownsdict | None
= None
paramsdk_versionstr | None
= None

Returns

dict
funcadd_checkpoint(self, run_id, *, uri, label=None, step=None, base_model=None, is_final=False) -> dict

Record a named checkpoint for a run (D7).

paramself
paramrun_idstr
paramuristr
paramlabelstr | None
= None
paramstepint | None
= None
parambase_modelstr | None
= None
paramis_finalbool
= False

Returns

dict
funclog_predictions(self, run_id, predictions) -> dict

Bulk-insert per-task predictions. Each is: { kind: 'eval' | 'rollout', eval_id?, task_id?, sample_idx?, step?, instruction, model_output, expected?, reward?, advantage?, metadata? }

paramself
paramrun_idstr
parampredictionslist[dict]

Returns

dict

On this page