from torch import optim
from torch.utils.data import DataLoader, random_split
import pytorch_lightning as pl
from transformers import AutoModelForCausalLM, AutoTokenizer
from datasets import load_dataset
from instruct_goose.reward import RewardModel, PairwiseLoss
from instruct_goose.dataset import PairDatasetHow to train a reward model?
Step 1: Create a reward model from a pre-trained language model
tokenizer = AutoTokenizer.from_pretrained("gpt2")
tokenizer.pad_token = tokenizer.eos_tokenreward_model = RewardModel(checkpoint="gpt2")Step 2: Create a Pairwise dataset
dataset = load_dataset("CarperAI/openai_summarize_comparisons", split="train")
dataset, _ = random_split(dataset, lengths=[10, len(dataset) - 10]) # for demo purposesUsing custom data configuration CarperAI--openai_summarize_comparisons-79d2c222a15dc8fb
Found cached dataset parquet (/Users/education/.cache/huggingface/datasets/CarperAI___parquet/CarperAI--openai_summarize_comparisons-79d2c222a15dc8fb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
pair_dataset = PairDataset(dataset, tokenizer)
dataloader = DataLoader(pair_dataset, batch_size=2)100%|██████████| 10/10 [00:00<00:00, 822.85it/s]
Step 3: Write a training loop
N_EPOCHS = 1 # for demo purposes
LEARNING_RATE = 1e-3
pairwise_loss = PairwiseLoss()class LitRewardModel(pl.LightningModule):
def __init__(
self, model, loss_func, lr
):
super().__init__()
self.model = model
self.loss_func = loss_func
self.lr = lr
def training_step(self, batch, batch_idx: int):
chosen_input_ids, chosen_attention_mask,\
rejected_input_ids, rejected_attention_mask = batch
chosen_rewards = self.model(chosen_input_ids, chosen_attention_mask)
rejected_rewards = self.model(rejected_input_ids, rejected_attention_mask)
loss = self.loss_func(chosen_rewards, rejected_rewards)
print(f"loss={loss}")
return loss
def configure_optimizers(self):
optimizer = optim.Adam(self.model.parameters(), lr=self.lr)
return optimizerlit_model = LitRewardModel(reward_model, pairwise_loss, lr=1e-3)trainer = pl.Trainer(max_epochs=1, log_every_n_steps=1)GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
trainer.fit(model=lit_model, train_dataloaders=dataloader)Missing logger folder: /Users/education/DATA/projects/ai/RLHF/instructGOOSE/nbs/lightning_logs
| Name | Type | Params
-------------------------------------------
0 | model | RewardModel | 124 M
1 | loss_func | PairwiseLoss | 0
-------------------------------------------
124 M Trainable params
0 Non-trainable params
124 M Total params
497.762 Total estimated model params size (MB)
/Users/education/DATA/projects/ai/RLHF/instructGOOSE/env/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:224: PossibleUserWarning: The dataloader, train_dataloader, does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` (try 8 which is the number of cpus on this machine) in the `DataLoader` init to improve performance.
rank_zero_warn(
loss=-0.2531266510486603
loss=-0.2498958855867386
loss=-0.24884334206581116
loss=-0.2499789297580719
loss=-0.23997953534126282
`Trainer.fit` stopped: `max_epochs=1` reached.