Skip to content

Commit

Permalink
Fix an error in Laplace noise parameter.
Browse files Browse the repository at this point in the history
  • Loading branch information
haoyueping committed Apr 1, 2021
1 parent 69cc502 commit 540a442
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 6 deletions.
11 changes: 8 additions & 3 deletions DataSynthesizer/lib/PrivBayes.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,8 +192,11 @@ def exponential_mechanism(epsilon, mutual_info_list, parents_pair_list, attr_to_


def laplace_noise_parameter(k, num_attributes, num_tuples, epsilon):
"""The noises injected into conditional distributions. PrivBayes Algorithm 1."""
return 2 * (num_attributes - k) / (num_tuples * epsilon)
"""The noises injected into conditional distributions.
Note that these noises are over counts, instead of the probability distributions in PrivBayes Algorithm 1.
"""
return (num_attributes - k) / epsilon


def get_noisy_distribution_of_attributes(attributes, encoded_dataset, epsilon=0.1):
Expand Down Expand Up @@ -257,9 +260,11 @@ def construct_noisy_conditional_distributions(bayesian_network, encoded_dataset,
for idx, (child, parents) in enumerate(bayesian_network):
conditional_distributions[child] = {}

if idx < k:
if idx <= k - 2:
stats = noisy_dist_of_kplus1_attributes.copy().loc[:, parents + [child, 'count']]
stats = stats.groupby(parents + [child], as_index=False).sum()
elif idx == k - 1:
stats = noisy_dist_of_kplus1_attributes.loc[:, parents + [child, 'count']]
else:
stats = get_noisy_distribution_of_attributes(parents + [child], encoded_dataset, epsilon)
stats = stats.loc[:, parents + [child, 'count']]
Expand Down
8 changes: 7 additions & 1 deletion HISTORY.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,10 @@

### What's New

* Update example notebooks.
* Update example notebooks.

## 0.1.7 - 2021-03-31

### Bugs Fixed

* Fixed an error in Laplace noise parameter. - [Issue #34](https://github.com/DataResponsibly/DataSynthesizer/issues/34) by @ganevgv
4 changes: 2 additions & 2 deletions tests/test_DataSynthesizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,9 @@ def test_datasynthesizer():

for col in df_input:
if col == 'age':
assert ks_test(df_input, df_output, col) < 0.1
assert ks_test(df_input, df_output, col) < 0.5
else:
assert kl_test(df_input, df_output, col) < 0.01
assert kl_test(df_input, df_output, col) < 1

df_input_mi = pairwise_attributes_mutual_information(df_input)
df_output_mi = pairwise_attributes_mutual_information(df_output)
Expand Down

0 comments on commit 540a442

Please sign in to comment.