Skip to content

Commit

Permalink
use p_value to distinguish watermarked text from non-watermarked text…
Browse files Browse the repository at this point in the history
… in EXP
  • Loading branch information
panly2003 committed Jul 2, 2024
1 parent 7bc8d93 commit 2d26d6f
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 6 deletions.
2 changes: 1 addition & 1 deletion config/EXP.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@
"algorithm_name": "EXP",
"prefix_length": 4,
"hash_key": 15485863,
"threshold": 2.0,
"threshold": 1e-4,
"sequence_length": 200
}
11 changes: 6 additions & 5 deletions watermark/exp/exp.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
# ============================================

import torch
import scipy
from math import log
from ..base import BaseWatermark
from utils.utils import load_config_file
Expand Down Expand Up @@ -151,17 +152,17 @@ def detect_watermark(self, text: str, return_dict: bool = True, *args, **kwargs)
r = random_numbers[encoded_text[i]]
total_score += log(1 / (1 - r))

# Compute the average score across all scored tokens
score = total_score / num_scored if num_scored > 0 else 0
# Calculate p_value
p_value = scipy.stats.gamma.sf(total_score, num_scored, loc=0, scale=1)

# Determine if the computed score exceeds the threshold for watermarking
is_watermarked = score > self.config.threshold
is_watermarked = p_value < self.config.threshold

# Return results based on the `return_dict` flag
if return_dict:
return {"is_watermarked": is_watermarked, "score": score}
return {"is_watermarked": is_watermarked, "score": p_value}
else:
return (is_watermarked, score)
return (is_watermarked, p_value)

def get_data_for_visualization(self, text: str, *args, **kwargs) -> DataForVisualization:
"""Get data for visualization."""
Expand Down

0 comments on commit 2d26d6f

Please sign in to comment.