LocalSFTConfig
Attributes
attributemodel_config= ConfigDict(extra='forbid')attributelearning_ratefloat= 0.0002attributenum_epochsint= 3attributemax_stepsint | None= NoneIf set, overrides num_epochs - training stops after this many steps.
attributeper_device_train_batch_sizeint= 1attributegradient_accumulation_stepsint= 16attributewarmup_stepsint= 20attributemax_seq_lenint= 512attributelogging_stepsint= 10attributesave_stepsint= 100attributesave_total_limitint= 5attributebf16bool= FalseEnable only on CUDA GPUs that support bfloat16 (Ampere+). Crashes on CPU/MPS.
attributefp16bool= FalseEnable only on CUDA GPUs. Crashes on CPU/MPS.
attributeseedint= 42attributelora_rankint= 16attributelora_alphaint= 32attributelora_dropoutfloat= 0.05attributelora_target_moduleslist[str]= Field(default_factory=(lambda: ['q_proj', 'k_proj', 'v_proj', 'o_proj', 'gate_proj', 'up_proj', 'down_proj']))