Skip to content

Commit

Permalink
lf.eval: Fix a few issues
Browse files Browse the repository at this point in the history
- Dryrun to use the copy's examples to avoid mess up the current evaluation's state.
- Reset examples when starting to run.
- Fix usage aggregation logic when the LLM does not provide usage information.

This avoids the state of the current evaluation get messed up if calling `self.examples` has a side effect.

PiperOrigin-RevId: 630202707
  • Loading branch information
daiyip authored and langfun authors committed May 2, 2024
1 parent 90e52e3 commit fe9122d
Showing 1 changed file with 9 additions and 5 deletions.
14 changes: 9 additions & 5 deletions langfun/core/eval/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -549,7 +549,7 @@ def _render_message(self, message: lf.Message, s: io.StringIO) -> None:
)
s.write(html.escape(pg.format(m.result)))
s.write('</div>')
if 'usage' in m.metadata:
if 'usage' in m.metadata and m.usage is not None:
s.write(
'<div style="background-color: #EEEEEE; color: black; '
'white-space: pre-wrap; padding: 10px; border: 0px solid; '
Expand Down Expand Up @@ -1056,11 +1056,11 @@ def _dryrun(
verbose: bool,
**kwargs,
) -> None:
# Set the example for dryrun.
example = example or self.examples[0]

# We make a copy to avoid pollute the state of current object.
copy: Evaluation = self.clone()

# Set the example for dryrun.
example = example or copy.examples[0]
copy.__dict__['examples'] = [example]

# We set the symbolic parent of the cloned to access contextual information
Expand Down Expand Up @@ -1126,6 +1126,9 @@ def _run(
**kwargs,
) -> None:
# Setup examples.
# Reset examples so it could be read from the input functor.
self.__dict__.pop('examples', None)

if end is None:
end = len(self.examples)
examples = self.examples[start:end]
Expand Down Expand Up @@ -1434,8 +1437,9 @@ def audit(
self._num_completed += 1

def audit_usage(self, message: lf.Message, dryrun: bool = False) -> None:
del dryrun
for m in message.trace():
if 'usage' in m.metadata:
if m.metadata.get('usage', None) is not None:
self._total_prompt_tokens += m.usage.prompt_tokens
self._total_completion_tokens += m.usage.completion_tokens
self._num_usages += 1
Expand Down

0 comments on commit fe9122d

Please sign in to comment.