Skip to content

Commit

Permalink
Fix UniPC scheduler for 1D (huggingface#5276)
Browse files Browse the repository at this point in the history
  • Loading branch information
patrickvonplaten authored Oct 3, 2023
1 parent dfcce3c commit 7271f8b
Show file tree
Hide file tree
Showing 9 changed files with 27 additions and 36 deletions.
7 changes: 3 additions & 4 deletions src/diffusers/schedulers/scheduling_ddim.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,25 +276,24 @@ def _threshold_sample(self, sample: torch.FloatTensor) -> torch.FloatTensor:
https://arxiv.org/abs/2205.11487
"""
dtype = sample.dtype
batch_size, channels, height, width = sample.shape
batch_size, channels, *remaining_dims = sample.shape

if dtype not in (torch.float32, torch.float64):
sample = sample.float() # upcast for quantile calculation, and clamp not implemented for cpu half

# Flatten sample for doing quantile calculation along each image
sample = sample.reshape(batch_size, channels * height * width)
sample = sample.reshape(batch_size, channels * np.prod(remaining_dims))

abs_sample = sample.abs() # "a certain percentile absolute pixel value"

s = torch.quantile(abs_sample, self.config.dynamic_thresholding_ratio, dim=1)
s = torch.clamp(
s, min=1, max=self.config.sample_max_value
) # When clamped to min=1, equivalent to standard clipping to [-1, 1]

s = s.unsqueeze(1) # (batch_size, 1) because clamp will broadcast along dim=0
sample = torch.clamp(sample, -s, s) / s # "we threshold xt0 to the range [-s, s] and then divide by s"

sample = sample.reshape(batch_size, channels, height, width)
sample = sample.reshape(batch_size, channels, *remaining_dims)
sample = sample.to(dtype)

return sample
Expand Down
7 changes: 3 additions & 4 deletions src/diffusers/schedulers/scheduling_ddim_parallel.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,25 +298,24 @@ def _threshold_sample(self, sample: torch.FloatTensor) -> torch.FloatTensor:
https://arxiv.org/abs/2205.11487
"""
dtype = sample.dtype
batch_size, channels, height, width = sample.shape
batch_size, channels, *remaining_dims = sample.shape

if dtype not in (torch.float32, torch.float64):
sample = sample.float() # upcast for quantile calculation, and clamp not implemented for cpu half

# Flatten sample for doing quantile calculation along each image
sample = sample.reshape(batch_size, channels * height * width)
sample = sample.reshape(batch_size, channels * np.prod(remaining_dims))

abs_sample = sample.abs() # "a certain percentile absolute pixel value"

s = torch.quantile(abs_sample, self.config.dynamic_thresholding_ratio, dim=1)
s = torch.clamp(
s, min=1, max=self.config.sample_max_value
) # When clamped to min=1, equivalent to standard clipping to [-1, 1]

s = s.unsqueeze(1) # (batch_size, 1) because clamp will broadcast along dim=0
sample = torch.clamp(sample, -s, s) / s # "we threshold xt0 to the range [-s, s] and then divide by s"

sample = sample.reshape(batch_size, channels, height, width)
sample = sample.reshape(batch_size, channels, *remaining_dims)
sample = sample.to(dtype)

return sample
Expand Down
7 changes: 3 additions & 4 deletions src/diffusers/schedulers/scheduling_ddpm.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,25 +330,24 @@ def _threshold_sample(self, sample: torch.FloatTensor) -> torch.FloatTensor:
https://arxiv.org/abs/2205.11487
"""
dtype = sample.dtype
batch_size, channels, height, width = sample.shape
batch_size, channels, *remaining_dims = sample.shape

if dtype not in (torch.float32, torch.float64):
sample = sample.float() # upcast for quantile calculation, and clamp not implemented for cpu half

# Flatten sample for doing quantile calculation along each image
sample = sample.reshape(batch_size, channels * height * width)
sample = sample.reshape(batch_size, channels * np.prod(remaining_dims))

abs_sample = sample.abs() # "a certain percentile absolute pixel value"

s = torch.quantile(abs_sample, self.config.dynamic_thresholding_ratio, dim=1)
s = torch.clamp(
s, min=1, max=self.config.sample_max_value
) # When clamped to min=1, equivalent to standard clipping to [-1, 1]

s = s.unsqueeze(1) # (batch_size, 1) because clamp will broadcast along dim=0
sample = torch.clamp(sample, -s, s) / s # "we threshold xt0 to the range [-s, s] and then divide by s"

sample = sample.reshape(batch_size, channels, height, width)
sample = sample.reshape(batch_size, channels, *remaining_dims)
sample = sample.to(dtype)

return sample
Expand Down
7 changes: 3 additions & 4 deletions src/diffusers/schedulers/scheduling_ddpm_parallel.py
Original file line number Diff line number Diff line change
Expand Up @@ -344,25 +344,24 @@ def _threshold_sample(self, sample: torch.FloatTensor) -> torch.FloatTensor:
https://arxiv.org/abs/2205.11487
"""
dtype = sample.dtype
batch_size, channels, height, width = sample.shape
batch_size, channels, *remaining_dims = sample.shape

if dtype not in (torch.float32, torch.float64):
sample = sample.float() # upcast for quantile calculation, and clamp not implemented for cpu half

# Flatten sample for doing quantile calculation along each image
sample = sample.reshape(batch_size, channels * height * width)
sample = sample.reshape(batch_size, channels * np.prod(remaining_dims))

abs_sample = sample.abs() # "a certain percentile absolute pixel value"

s = torch.quantile(abs_sample, self.config.dynamic_thresholding_ratio, dim=1)
s = torch.clamp(
s, min=1, max=self.config.sample_max_value
) # When clamped to min=1, equivalent to standard clipping to [-1, 1]

s = s.unsqueeze(1) # (batch_size, 1) because clamp will broadcast along dim=0
sample = torch.clamp(sample, -s, s) / s # "we threshold xt0 to the range [-s, s] and then divide by s"

sample = sample.reshape(batch_size, channels, height, width)
sample = sample.reshape(batch_size, channels, *remaining_dims)
sample = sample.to(dtype)

return sample
Expand Down
7 changes: 3 additions & 4 deletions src/diffusers/schedulers/scheduling_deis_multistep.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,25 +268,24 @@ def _threshold_sample(self, sample: torch.FloatTensor) -> torch.FloatTensor:
https://arxiv.org/abs/2205.11487
"""
dtype = sample.dtype
batch_size, channels, height, width = sample.shape
batch_size, channels, *remaining_dims = sample.shape

if dtype not in (torch.float32, torch.float64):
sample = sample.float() # upcast for quantile calculation, and clamp not implemented for cpu half

# Flatten sample for doing quantile calculation along each image
sample = sample.reshape(batch_size, channels * height * width)
sample = sample.reshape(batch_size, channels * np.prod(remaining_dims))

abs_sample = sample.abs() # "a certain percentile absolute pixel value"

s = torch.quantile(abs_sample, self.config.dynamic_thresholding_ratio, dim=1)
s = torch.clamp(
s, min=1, max=self.config.sample_max_value
) # When clamped to min=1, equivalent to standard clipping to [-1, 1]

s = s.unsqueeze(1) # (batch_size, 1) because clamp will broadcast along dim=0
sample = torch.clamp(sample, -s, s) / s # "we threshold xt0 to the range [-s, s] and then divide by s"

sample = sample.reshape(batch_size, channels, height, width)
sample = sample.reshape(batch_size, channels, *remaining_dims)
sample = sample.to(dtype)

return sample
Expand Down
7 changes: 3 additions & 4 deletions src/diffusers/schedulers/scheduling_dpmsolver_multistep.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,25 +288,24 @@ def _threshold_sample(self, sample: torch.FloatTensor) -> torch.FloatTensor:
https://arxiv.org/abs/2205.11487
"""
dtype = sample.dtype
batch_size, channels, height, width = sample.shape
batch_size, channels, *remaining_dims = sample.shape

if dtype not in (torch.float32, torch.float64):
sample = sample.float() # upcast for quantile calculation, and clamp not implemented for cpu half

# Flatten sample for doing quantile calculation along each image
sample = sample.reshape(batch_size, channels * height * width)
sample = sample.reshape(batch_size, channels * np.prod(remaining_dims))

abs_sample = sample.abs() # "a certain percentile absolute pixel value"

s = torch.quantile(abs_sample, self.config.dynamic_thresholding_ratio, dim=1)
s = torch.clamp(
s, min=1, max=self.config.sample_max_value
) # When clamped to min=1, equivalent to standard clipping to [-1, 1]

s = s.unsqueeze(1) # (batch_size, 1) because clamp will broadcast along dim=0
sample = torch.clamp(sample, -s, s) / s # "we threshold xt0 to the range [-s, s] and then divide by s"

sample = sample.reshape(batch_size, channels, height, width)
sample = sample.reshape(batch_size, channels, *remaining_dims)
sample = sample.to(dtype)

return sample
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -298,25 +298,24 @@ def _threshold_sample(self, sample: torch.FloatTensor) -> torch.FloatTensor:
https://arxiv.org/abs/2205.11487
"""
dtype = sample.dtype
batch_size, channels, height, width = sample.shape
batch_size, channels, *remaining_dims = sample.shape

if dtype not in (torch.float32, torch.float64):
sample = sample.float() # upcast for quantile calculation, and clamp not implemented for cpu half

# Flatten sample for doing quantile calculation along each image
sample = sample.reshape(batch_size, channels * height * width)
sample = sample.reshape(batch_size, channels * np.prod(remaining_dims))

abs_sample = sample.abs() # "a certain percentile absolute pixel value"

s = torch.quantile(abs_sample, self.config.dynamic_thresholding_ratio, dim=1)
s = torch.clamp(
s, min=1, max=self.config.sample_max_value
) # When clamped to min=1, equivalent to standard clipping to [-1, 1]

s = s.unsqueeze(1) # (batch_size, 1) because clamp will broadcast along dim=0
sample = torch.clamp(sample, -s, s) / s # "we threshold xt0 to the range [-s, s] and then divide by s"

sample = sample.reshape(batch_size, channels, height, width)
sample = sample.reshape(batch_size, channels, *remaining_dims)
sample = sample.to(dtype)

return sample
Expand Down
7 changes: 3 additions & 4 deletions src/diffusers/schedulers/scheduling_dpmsolver_singlestep.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,25 +302,24 @@ def _threshold_sample(self, sample: torch.FloatTensor) -> torch.FloatTensor:
https://arxiv.org/abs/2205.11487
"""
dtype = sample.dtype
batch_size, channels, height, width = sample.shape
batch_size, channels, *remaining_dims = sample.shape

if dtype not in (torch.float32, torch.float64):
sample = sample.float() # upcast for quantile calculation, and clamp not implemented for cpu half

# Flatten sample for doing quantile calculation along each image
sample = sample.reshape(batch_size, channels * height * width)
sample = sample.reshape(batch_size, channels * np.prod(remaining_dims))

abs_sample = sample.abs() # "a certain percentile absolute pixel value"

s = torch.quantile(abs_sample, self.config.dynamic_thresholding_ratio, dim=1)
s = torch.clamp(
s, min=1, max=self.config.sample_max_value
) # When clamped to min=1, equivalent to standard clipping to [-1, 1]

s = s.unsqueeze(1) # (batch_size, 1) because clamp will broadcast along dim=0
sample = torch.clamp(sample, -s, s) / s # "we threshold xt0 to the range [-s, s] and then divide by s"

sample = sample.reshape(batch_size, channels, height, width)
sample = sample.reshape(batch_size, channels, *remaining_dims)
sample = sample.to(dtype)

return sample
Expand Down
7 changes: 3 additions & 4 deletions src/diffusers/schedulers/scheduling_unipc_multistep.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,25 +282,24 @@ def _threshold_sample(self, sample: torch.FloatTensor) -> torch.FloatTensor:
https://arxiv.org/abs/2205.11487
"""
dtype = sample.dtype
batch_size, channels, height, width = sample.shape
batch_size, channels, *remaining_dims = sample.shape

if dtype not in (torch.float32, torch.float64):
sample = sample.float() # upcast for quantile calculation, and clamp not implemented for cpu half

# Flatten sample for doing quantile calculation along each image
sample = sample.reshape(batch_size, channels * height * width)
sample = sample.reshape(batch_size, channels * np.prod(remaining_dims))

abs_sample = sample.abs() # "a certain percentile absolute pixel value"

s = torch.quantile(abs_sample, self.config.dynamic_thresholding_ratio, dim=1)
s = torch.clamp(
s, min=1, max=self.config.sample_max_value
) # When clamped to min=1, equivalent to standard clipping to [-1, 1]

s = s.unsqueeze(1) # (batch_size, 1) because clamp will broadcast along dim=0
sample = torch.clamp(sample, -s, s) / s # "we threshold xt0 to the range [-s, s] and then divide by s"

sample = sample.reshape(batch_size, channels, height, width)
sample = sample.reshape(batch_size, channels, *remaining_dims)
sample = sample.to(dtype)

return sample
Expand Down

0 comments on commit 7271f8b

Please sign in to comment.