DashboardClient
HTTP client for the EvolvingSystems backend's SDK write routes, with a local mirror and graceful degradation when the backend is unreachable.
Attributes
attributebase_url= (base_url or os.environ.get(EVSYS_API_URL_ENV) or DEFAULT_API_URL).rstrip('/')attributeapi_key= api_key or os.environ.get(EVSYS_API_KEY_ENV)attributeproject_id= project_id or os.environ.get(EVSYS_PROJECT_ID_ENV)attributetimeout_s= timeout_sattributeoffline= offline if offline is not None else truthy_env(os.environ.get(EVSYS_OFFLINE_ENV))attributelocal= LocalExperimentStore(log_dir=log_dir)Functions
func__init__(self, *, base_url=None, api_key=None, project_id=None, timeout_s=DEFAULT_TIMEOUT_S, offline=None, log_dir=None) -> Noneparamselfparambase_urlstr | None= Noneparamapi_keystr | None= Noneparamproject_idstr | None= Noneparamtimeout_sfloat= DEFAULT_TIMEOUT_Sparamofflinebool | None= Noneparamlog_dirstr | None= NoneReturns
Nonefunc_post(self, path, body=None) -> dict | NonePOST to the backend. Returns the JSON dict, or None when the write could not be sent remotely (offline, or backend unreachable). Raises DashboardClientError on a 4xx response.
paramselfparampathstrparambodydict | None= NoneReturns
dict | Nonefunccreate_experiment(self, *, experiment_name, hypothesis=None, hypothesis_reasoning=None, plan=None, tags=None, project_goal_id=None, config=None, declaration_path=None, is_valid=None) -> dictparamselfparamexperiment_namestrparamhypothesisstr | None= Noneparamhypothesis_reasoningstr | None= Noneparamplanstr | None= Noneparamtagslist[str] | None= Noneparamproject_goal_idstr | None= Noneparamconfigdict | None= Noneparamdeclaration_pathstr | None= Noneparamis_validbool | None= NoneReturns
dictfuncupdate_experiment(self, experiment_id, **patch) -> dictPATCH experiment metadata. Whitelisted backend fields: status, best_score, best_generation_id, current_iteration, error_message, hypothesis, hypothesis_reasoning, plan, conclusion, tags, problem_statement_id, is_valid.
e.g. update_experiment(exp_id, is_valid=False) to invalidate an
experiment after a bug is found in its runs (D16).
paramselfparamexperiment_idstrparampatchAny= {}Returns
dictfunccreate_run(self, *, experiment_id, group_id=None, dataset_id=None, seed=None, recipe_kind=None, run_config=None, status=STATUS_PENDING, wandb_run_url=None, tensorboard_path=None, tensorboard_archive_url=None) -> dictparamselfparamexperiment_idstrparamgroup_idstr | None= Noneparamdataset_idstr | None= Noneparamseedint | None= Noneparamrecipe_kindstr | None= Noneparamrun_configdict | None= Noneparamstatusstr= STATUS_PENDINGparamwandb_run_urlstr | None= Noneparamtensorboard_pathstr | None= Noneparamtensorboard_archive_urlstr | None= NoneReturns
dictfuncupdate_run(self, run_id, **patch) -> dictparamselfparamrun_idstrparampatchAny= {}Returns
dictfunclog_step_metric(self, run_id, *, step, split='train', loss=None, accuracy=None, learning_rate=None, grad_norm=None, tokens_per_sec=None, **metrics) -> dictLog per-step training metrics as long-format run_metrics (D15).
Accepts arbitrary named series via **metrics (e.g. val_loss=…,
kl_loss=…) plus a split (train/val/test). The named
kwargs (loss, accuracy, …) are folded into the same metrics map.
None values are dropped.
paramselfparamrun_idstrparamstepintparamsplitstr= 'train'paramlossfloat | None= Noneparamaccuracyfloat | None= Noneparamlearning_ratefloat | None= Noneparamgrad_normfloat | None= Noneparamtokens_per_secfloat | None= Noneparammetricsfloat= {}Returns
dictfunccreate_eval(self, run_id, *, metrics, benchmark_id=None, checkpoint_id=None, model_ref=None, step=None, breakdowns=None, sdk_version=None) -> dictRecord an eval (D13): a run scored on a benchmark → metrics{name:value}.
paramselfparamrun_idstrparammetricsdict[str, float]parambenchmark_idstr | None= Noneparamcheckpoint_idstr | None= Noneparammodel_refstr | None= Noneparamstepint | None= Noneparambreakdownsdict | None= Noneparamsdk_versionstr | None= NoneReturns
dictfuncadd_checkpoint(self, run_id, *, uri, label=None, step=None, base_model=None, is_final=False) -> dictRecord a named checkpoint for a run (D7).
paramselfparamrun_idstrparamuristrparamlabelstr | None= Noneparamstepint | None= Noneparambase_modelstr | None= Noneparamis_finalbool= FalseReturns
dictfunclog_predictions(self, run_id, predictions) -> dictBulk-insert per-task predictions. Each is: { kind: 'eval' | 'rollout', eval_id?, task_id?, sample_idx?, step?, instruction, model_output, expected?, reward?, advantage?, metadata? }
paramselfparamrun_idstrparampredictionslist[dict]Returns
dict