Source code for ares.attack.evolutionary

import torch
import numpy as np
from ares.utils.registry import registry

[docs]@registry.register_attack('evolutionary') class Evolutionary(object): ''' Evolutionary. A black-box decision-based method. Example: >>> from ares.utils.registry import registry >>> attacker_cls = registry.get_attack('evolutionary') >>> attacker = attacker_cls(model) >>> adv_images = attacker(images, labels, target_labels) - Supported distance metric: 2. - References: https://arxiv.org/abs/1904.04433. '''
[docs] def __init__(self, model, device='cuda', ccov=0.001, decay_weight=0.99, max_queries=10000, mu=0.01, sigma=3e-2, maxlen=30, target=False): '''The function to initialize evolutionary attack. Args: model (torch.nn.Module): The target model to be attacked. device (torch.device): The device to perform autoattack. Defaults to 'cuda'. ccov (float): The parameter cconv. Defaults to 0.001. decay_weight (float): The decay weight param. Defaults to 0.99. max_queries (int): The maximum query number. Defaults to 10000. mu (float): The mean for bias. Defaults to 0.01. sigma (float): The deviation for bias. Defaults to 3e-2. maxlen (int): The maximum length. Defaults to 30. target (bool): Conduct target/untarget attack. Defaults to False. ''' self.model = model self.ccov = ccov self.decay_weight = decay_weight self.max_queries = max_queries self.mu = mu self.device =device self.sigma = sigma self.maxlen = maxlen self.targeted = target self.min_value = 0 self.max_value = 1 if self.targeted: raise AssertionError('dont support targeted attack')
def _is_adversarial(self,x, y, ytarget): '''The function to judge if the input image is adversarial.''' output = torch.argmax(self.model(x), dim=1) if self.targeted: return output == ytarget else: return output != y
[docs] def get_init_noise(self, x_target, y, ytarget): '''The function to initialize noise.''' while True: x_init = torch.rand(x_target.size()).to(self.device) x_init = torch.clamp(x_init, min=self.min_value, max=self.max_value) if self._is_adversarial(x_init, y, ytarget): print("Success getting init noise",end=' ') return x_init
[docs] def evolutionary(self, x, y, ytarget): '''The function to conduct evolutionary attack.''' x = x.to(self.device) y = y.to(self.device) if ytarget is not None: ytarget = ytarget.to(self.device) pert_shape = (x.size(0),x.size(1),x.size(2),x.size(3)) m = np.prod(pert_shape) k = int(m / 20) evolutionary_path = np.zeros(pert_shape) decay_weight = self.decay_weight diagonal_covariance = np.ones(pert_shape) ccov = self.ccov if self._is_adversarial(x, y,ytarget): return x # find an starting point x_adv = self.get_init_noise(x , y, ytarget) mindist = 1e10 stats_adversarial = [] for _ in range(self.max_queries): unnormalized_source_direction = x - x_adv source_norm = torch.norm(unnormalized_source_direction) if mindist > source_norm: mindist = source_norm best_adv = x_adv selection_prob = diagonal_covariance.reshape(-1) / np.sum(diagonal_covariance) selection_indices = np.random.choice(m, k, replace=False, p=selection_prob) pert = np.random.normal(0.0, 1.0, pert_shape) factor = np.zeros([m]) factor[selection_indices] = True pert *= factor.reshape(pert_shape) * np.sqrt(diagonal_covariance) pert_large = torch.Tensor(pert).to(self.device) biased = (x_adv + self.mu * unnormalized_source_direction).to(self.device) candidate = biased + self.sigma * source_norm * pert_large / torch.norm(pert_large) candidate = x - (x - candidate) / torch.norm(x - candidate) * torch.norm(x - biased) candidate = torch.clamp(candidate, self.min_value, self.max_value) if self._is_adversarial(candidate, y, ytarget): x_adv = candidate evolutionary_path = decay_weight * evolutionary_path + np.sqrt(1-decay_weight** 2) * pert diagonal_covariance = (1 - ccov) * diagonal_covariance + ccov * (evolutionary_path ** 2) stats_adversarial.append(1) else: stats_adversarial.append(0) if len(stats_adversarial) == self.maxlen: self.mu *= np.exp(np.mean(stats_adversarial) - 0.2) stats_adversarial = [] return best_adv
def __call__(self, images=None, labels=None, target_labels=None): '''This function perform attack on target images with corresponding labels and target labels for target attack. Args: images (torch.Tensor): The images to be attacked. The images should be torch.Tensor with shape [N, C, H, W] and range [0, 1]. labels (torch.Tensor): The corresponding labels of the images. The labels should be torch.Tensor with shape [N, ] target_labels (torch.Tensor): The target labels for target attack. The labels should be torch.Tensor with shape [N, ] Returns: torch.Tensor: Adversarial images with value range [0,1]. ''' adv_images = [] for i in range(len(images)): if target_labels is None: target_label = None else: target_label = target_labels[i].unsqueeze(0) adv_x = self.evolutionary(images[i].unsqueeze(0), labels[i].unsqueeze(0), target_label) adv_images.append(adv_x) adv_images = torch.cat(adv_images, 0) return adv_images