LocalRLConfig
Attributes
attributemodel_config= ConfigDict(extra='forbid')attributelearning_ratefloat= 5e-05attributenum_epochsint= 3attributeper_device_train_batch_sizeint= 4attributegradient_accumulation_stepsint= 8attributemax_completion_lengthint= 128attributenum_generationsint= 4attributewarmup_stepsint= 30attributelogging_stepsint= 10attributesave_stepsint= 50attributesave_total_limitint= 5attributebf16bool= FalseEnable only on CUDA GPUs that support bfloat16 (Ampere+). Crashes on CPU/MPS.
attributefp16bool= FalseEnable only on CUDA GPUs. Crashes on CPU/MPS.
attributebetafloat= 0.04attributeseedint= 42attributelora_rankint= 4attributelora_alphaint= 8attributelora_target_moduleslist[str]= Field(default_factory=(lambda: ['q_proj', 'v_proj']))attributeverifier_kindstr= 'format_only'attributeverifier_paramsdict[str, Any]= Field(default_factory=dict)