Skip to content

Commit

Permalink
[style] Increase black's line length (CarperAI#250)
Browse files Browse the repository at this point in the history
* style(*): increase black --line-length to `120`

* chore!(pre-commit): update isort to `5.12.0` to resolve discrepancy

for some odd reason CI's style check differs from the local `run --all-files`

* style(*): satisfy isort
  • Loading branch information
maxreciprocate authored Feb 1, 2023
1 parent a92a971 commit 4390d3f
Show file tree
Hide file tree
Showing 33 changed files with 209 additions and 658 deletions.
6 changes: 3 additions & 3 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# See https://pre-commit.com/hooks.html for more hooks
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.1.0
rev: v4.4.0
hooks:
- id: check-case-conflict
- id: check-json
Expand All @@ -19,12 +19,12 @@ repos:
- id: requirements-txt-fixer
- id: trailing-whitespace
- repo: https://github.com/psf/black
rev: 22.10.0
rev: 23.1.0
hooks:
- id: black
files: ^(trlx|examples|tests|setup.py)/
- repo: https://github.com/pycqa/isort
rev: 5.11.5
rev: 5.12.0
hooks:
- id: isort
name: isort (python)
Expand Down
4 changes: 1 addition & 3 deletions examples/architext.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,7 @@ def reward_fn(samples, **kwargs):
def main(hparams={}):
config = TRLConfig.update(default_config, hparams)

trlx.train(
"architext/gptj-162M", reward_fn=reward_fn, prompts=prompts, config=config
)
trlx.train("architext/gptj-162M", reward_fn=reward_fn, prompts=prompts, config=config)


if __name__ == "__main__":
Expand Down
5 changes: 2 additions & 3 deletions examples/experiments/grounded_program_synthesis/lang.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ def init_random_input(len_range: int = 5, value_gen=5) -> list:

const_integer = [-5, -4, -3, -2, -1, 1, 2, 3, 4, 5]


# Functions in the DSL
# Each function defines a transformation in the given DSL Grammar.
def take(input_list: list, n: int) -> list:
Expand Down Expand Up @@ -372,9 +373,7 @@ def basic_stats(dataset, tokenizer):
"""
length_list = []
for examples in tqdm(dataset):
datapoint = tokenizer(
examples["input"] + " " + examples["output"] + "<|endoftext|>"
)
datapoint = tokenizer(examples["input"] + " " + examples["output"] + "<|endoftext|>")
length_list.append(len(datapoint["input_ids"]))
return {
"max": max(length_list),
Expand Down
18 changes: 3 additions & 15 deletions examples/experiments/grounded_program_synthesis/train_trlx.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,20 +75,8 @@ def main(hparams={}):

if __name__ == "__main__":
# TEST REWARD FUNTION
assert (
reward_fn(
["Input: 1 Output: [-4,-5,-2] Function: div_n(reverse([-2, -5, -4]),1)"]
)
) == [1]
assert (
reward_fn(
["Input: 1 Output: [-4,-5,-2] Function: div_n(reverse([-2, -5, -a]),1)"]
)
) == [-1]
assert (
reward_fn(
["Input: 1 Output: [-4,-5,-2] Function: div_n(reverse([-2, -5, -3]),1)"]
)
) == [-0.5]
assert (reward_fn(["Input: 1 Output: [-4,-5,-2] Function: div_n(reverse([-2, -5, -4]),1)"])) == [1]
assert (reward_fn(["Input: 1 Output: [-4,-5,-2] Function: div_n(reverse([-2, -5, -a]),1)"])) == [-1]
assert (reward_fn(["Input: 1 Output: [-4,-5,-2] Function: div_n(reverse([-2, -5, -3]),1)"])) == [-0.5]

main()
23 changes: 4 additions & 19 deletions examples/randomwalks/randomwalks.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,7 @@
import torch


def generate_rand_int_excluding(
rng: np.random.RandomState, max: int, exclude: int
) -> int:
def generate_rand_int_excluding(rng: np.random.RandomState, max: int, exclude: int) -> int:
"""Random integer generator, excluding a specific number
Args:
Expand Down Expand Up @@ -35,12 +33,7 @@ def generate_random_walks( # noqa: max-complexity
p_edge: float = 0.1,
seed: int = 1002,
gpt2_tokenizer: bool = False,
) -> Tuple[
Callable[[List[str]], Dict[str, List[float]]],
List[str],
List[str],
torch.Tensor,
]:
) -> Tuple[Callable[[List[str]], Dict[str, List[float]]], List[str], List[str], torch.Tensor,]:
"""Generate random walks
Args:
Expand Down Expand Up @@ -106,7 +99,6 @@ def generate_random_walks( # noqa: max-complexity

# Create n_walks samples
for _ in range(n_walks):

# Create a random starting node (that isn't already at the goal state)
node: int = generate_rand_int_excluding(rng, n_nodes, goal)

Expand All @@ -116,7 +108,6 @@ def generate_random_walks( # noqa: max-complexity
# Do a series of steps, until we hit the maximum number of steps or the
# goal state (whichever comes first)
for _step in range(max_length - 1):

# From the starting node, get all the nodes we can move to. Pick one
# of these at random, and add it to the list of visited nodes
node = rng.choice(np.nonzero(adjacency_matrix[node])[0])
Expand All @@ -143,9 +134,7 @@ def generate_random_walks( # noqa: max-complexity
for start in set(range(n_nodes)) - {goal}:
try:
# Find the shortest path (up to the max_length)
shortest_path = nx.shortest_path(directional_graph, start, goal)[
:max_length
]
shortest_path = nx.shortest_path(directional_graph, start, goal)[:max_length]
shortest_lengths.append(len(shortest_path))
except Exception:
# If there is no path, use the maximum length instead
Expand Down Expand Up @@ -186,11 +175,7 @@ def metric_fn(
for node in range(len(sample)):
# If an invalid path is taken, set the length to the invalid
# path score
if (
sample[node] >= n_nodes
or node > 0
and not adjacency_matrix[sample[node - 1], sample[node]]
):
if sample[node] >= n_nodes or node > 0 and not adjacency_matrix[sample[node - 1], sample[node]]:
length = invalid_path_length
break

Expand Down
15 changes: 4 additions & 11 deletions examples/summarize_daily_cnn/t5_summarize_daily_cnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,7 @@
import evaluate
except ImportError:
raise ImportError(
"To run this example, please install the `evaluate` and `nltk` packages"
"by running `pip install evaluate`"
"To run this example, please install the `evaluate` and `nltk` packages" "by running `pip install evaluate`"
)

config_path = pathlib.Path(__file__).parent / "configs/ppo_config_cnn_daily.yml"
Expand All @@ -26,9 +25,7 @@
def reward_fn(samples: List[str], prompts: List[str], outputs: List[str]):
original_summaries = [prompt_label[prompt.strip()] for prompt in prompts]
scores = [
meteor.compute(predictions=[output.strip()], references=[original])[
"meteor"
]
meteor.compute(predictions=[output.strip()], references=[original])["meteor"]
for (original, output) in zip(original_summaries, outputs)
]
return scores
Expand All @@ -41,9 +38,7 @@ def reward_fn(samples: List[str], prompts: List[str], outputs: List[str]):
prompts = ["Summarize: " + prompt for prompt in prompts]

# take 1,000 samples from the validation set as prompts for evaluation
val_prompts = [
"Summarize: " + prompt for prompt in dataset["validation"]["article"][0:1000]
]
val_prompts = ["Summarize: " + prompt for prompt in dataset["validation"]["article"][0:1000]]
val_summaries = dataset["validation"]["highlights"][0:1000]

# make dictionary of prompts and labels to use for reward function
Expand All @@ -63,9 +58,7 @@ def reward_fn(samples: List[str], prompts: List[str], outputs: List[str]):

for i in tqdm(range(len(val_prompts))):
key = tokenizer.decode(
tokenizer(val_prompts[i], truncation=True, max_length=max_length)[
"input_ids"
],
tokenizer(val_prompts[i], truncation=True, max_length=max_length)["input_ids"],
skip_special_tokens=True,
) # get prompt like trlx's prompt
prompt_label[key.strip()] = val_summaries[i]
Expand Down
16 changes: 4 additions & 12 deletions examples/summarize_rlhf/reward_model/gptj_reward_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,7 @@ def set_seed(seed_val=42):
torch.cuda.manual_seed_all(seed_val)


def create_comparison_dataset(
path="CarperAI/openai_summarize_comparisons", split="train"
):
def create_comparison_dataset(path="CarperAI/openai_summarize_comparisons", split="train"):
dataset = load_dataset(path, split=split)
if split == "test":
dataset = dataset.select(range(5000))
Expand Down Expand Up @@ -95,16 +93,12 @@ def __call__(self, data):
model = GPTRewardModel("CarperAI/openai_summarize_tldr_sft")
model.load_state_dict(torch.load("rm_checkpoint/pytorch_model.bin"))
max_length = 550
val_pairs = create_comparison_dataset(
"CarperAI/openai_summarize_comparisons", "test"
)
val_pairs = create_comparison_dataset("CarperAI/openai_summarize_comparisons", "test")
dev_dataset = PairwiseDataset(val_pairs, tokenizer, max_length=max_length)

from torch.utils.data import DataLoader

dev_dataloader = DataLoader(
dev_dataset, shuffle=False, batch_size=6, collate_fn=DataCollatorReward()
)
dev_dataloader = DataLoader(dev_dataset, shuffle=False, batch_size=6, collate_fn=DataCollatorReward())
model.cuda()
model.eval()
model.half()
Expand All @@ -116,9 +110,7 @@ def __call__(self, data):
for x in batch:
batch[x] = batch[x].cuda()
outputs = model(**batch)
correct += sum(
outputs["chosen_end_scores"] > outputs["rejected_end_scores"]
)
correct += sum(outputs["chosen_end_scores"] > outputs["rejected_end_scores"])
chosen_list.append(outputs["chosen_end_scores"].cpu())
reject_list.append(outputs["rejected_end_scores"].cpu())
print("Total accuracy: ", correct / len(dev_dataset))
10 changes: 2 additions & 8 deletions examples/summarize_rlhf/reward_model/reward_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,7 @@ def __init__(self, model_path):
model = AutoModelForCausalLM.from_pretrained(model_path)
self.config = model.config
# `gpt-neo(x)` models use `hidden_size` attribute names instead of `n_embd``
self.config.n_embd = (
self.config.hidden_size
if hasattr(self.config, "hidden_size")
else self.config.n_embd
)
self.config.n_embd = self.config.hidden_size if hasattr(self.config, "hidden_size") else self.config.n_embd
self.transformer = model.transformer
self.v_head = nn.Linear(self.config.n_embd, 1, bias=False)
self.tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6B")
Expand Down Expand Up @@ -91,9 +87,7 @@ def forward(
rejected_end_scores.append(r_truncated_reward[-1])

# Compute loss
loss += -torch.log(
torch.sigmoid(c_truncated_reward - r_truncated_reward)
).mean()
loss += -torch.log(torch.sigmoid(c_truncated_reward - r_truncated_reward)).mean()
loss = loss / bs

if not inference:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,7 @@
from transformers import AutoTokenizer, Trainer, TrainingArguments


def create_comparison_dataset(
path="CarperAI/openai_summarize_comparisons", split="train"
):
def create_comparison_dataset(path="CarperAI/openai_summarize_comparisons", split="train"):
dataset = load_dataset(path, split=split)
pairs = []
for sample in tqdm(dataset):
Expand Down
26 changes: 7 additions & 19 deletions examples/summarize_rlhf/sft/summarize_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,7 @@ def __len__(self):

def __getitem__(self, idx):
txt = self.post_list[idx]
encodings_dict = self.tokenizer(
txt, truncation=True, max_length=self.max_length, padding="max_length"
)
encodings_dict = self.tokenizer(txt, truncation=True, max_length=self.max_length, padding="max_length")
input_ids = torch.tensor(encodings_dict["input_ids"])
attn_masks = torch.tensor(encodings_dict["attention_mask"])

Expand Down Expand Up @@ -75,19 +73,11 @@ def make_text(post, summarize):
self.post_list.append(sample["info"]["post"])
# NOTE: The chosen summary is always the first one, i.e. `sample["summaries"][0]`
if sample["choice"] == 0:
self.summaries_0.append(
make_text(sample["info"], sample["summaries"][0]["text"])
)
self.summaries_1.append(
make_text(sample["info"], sample["summaries"][1]["text"])
)
self.summaries_0.append(make_text(sample["info"], sample["summaries"][0]["text"]))
self.summaries_1.append(make_text(sample["info"], sample["summaries"][1]["text"]))
else:
self.summaries_0.append(
make_text(sample["info"], sample["summaries"][1]["text"])
)
self.summaries_1.append(
make_text(sample["info"], sample["summaries"][0]["text"])
)
self.summaries_0.append(make_text(sample["info"], sample["summaries"][1]["text"]))
self.summaries_1.append(make_text(sample["info"], sample["summaries"][0]["text"]))
self.labels.append(0)

def __len__(self):
Expand All @@ -113,7 +103,7 @@ def __init__(self, train_path, tokenizer, split, max_length=1024):
if split == "valid":
df = df.sample(n=5000)
self.summarizes = []
for (i, row) in df.iterrows():
for i, row in df.iterrows():
self.summarizes.append(f"Summarize: {row['text']}. TL;DR: {row['summary']}")
self.tokenizer = tokenizer
self.max_length = max_length
Expand All @@ -125,9 +115,7 @@ def __len__(self):

def __getitem__(self, idx):
txt = self.summarizes[idx]
encodings_dict = self.tokenizer(
txt, truncation=True, max_length=self.max_length, padding="max_length"
)
encodings_dict = self.tokenizer(txt, truncation=True, max_length=self.max_length, padding="max_length")
input_ids = torch.tensor(encodings_dict["input_ids"])
attn_masks = torch.tensor(encodings_dict["attention_mask"])

Expand Down
24 changes: 6 additions & 18 deletions examples/summarize_rlhf/trlx_gptj_text_summarization.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@


if __name__ == "__main__":

# Load the pre-trained reward model
rw_tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6B")
rw_tokenizer.pad_token = rw_tokenizer.eos_token
Expand All @@ -38,9 +37,7 @@ def get_scores(samples: List[str]):
batch_size = 2
for i in range(0, len(samples), batch_size):
sub_samples = samples[i : i + batch_size]
sub_samples = [
"<|startoftext|>" + chosen + "<|endoftext|>" for chosen in sub_samples
]
sub_samples = ["<|startoftext|>" + chosen + "<|endoftext|>" for chosen in sub_samples]
encodings_dict = rw_tokenizer(
sub_samples,
truncation=True,
Expand Down Expand Up @@ -69,8 +66,7 @@ def get_prompt_dataset(prompts, max_length):
tokenizer(
prompts[i].split("TL;DR:")[0],
truncation=True,
max_length=max_length
- 5, # to make sure "TL;DR" dont get truncated
max_length=max_length - 5, # to make sure "TL;DR" dont get truncated
)["input_ids"],
skip_special_tokens=True,
).strip()
Expand All @@ -84,25 +80,19 @@ def get_prompt_dataset(prompts, max_length):

def reward_fn(samples: List[str], **kwargs):
original_samples = [text.split("TL;DR:")[0] + "TL;DR: " for text in samples]
original_samples = [
text + post_summary_dict[text.strip()] for text in original_samples
]
original_samples = [text + post_summary_dict[text.strip()] for text in original_samples]
original_scores = get_scores(original_samples)
scores = get_scores(samples)
norms_scores = scores - original_scores
return norms_scores

config_path = pathlib.Path(__file__).parent.joinpath(
"configs/ppo_config_summ_gptj.yml"
)
config_path = pathlib.Path(__file__).parent.joinpath("configs/ppo_config_summ_gptj.yml")
config = TRLConfig.load_yaml(config_path)

tokenizer = AutoTokenizer.from_pretrained(config.tokenizer.tokenizer_path)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "left"
max_length_input = (
config.train.seq_length - config.method.gen_kwargs["max_new_tokens"]
)
max_length_input = config.train.seq_length - config.method.gen_kwargs["max_new_tokens"]

dataset = load_dataset("CarperAI/openai_summarize_tldr")

Expand All @@ -127,8 +117,6 @@ def reward_fn(samples: List[str], **kwargs):
config.model.model_path,
reward_fn=reward_fn,
prompts=train_prompts,
eval_prompts=val_prompts[
0:1000
], # sampling 1000 validation prompts for evaluation speed in training
eval_prompts=val_prompts[0:1000], # sampling 1000 validation prompts for evaluation speed in training
config=config,
)
Loading

0 comments on commit 4390d3f

Please sign in to comment.