Feature: A new contrastive loss (Barlow Twins) (#1259)

Nimrais · Mykola Lukashchouk · Scitator · web-flow · commit ce79bbecf0ce · 2021-07-23T20:16:26.000+03:00
* Add Barlow Twins loss as a new contrastive loss

* update CHANGELOG.md

* Add BarlowTwinsLoss into catalyst/contrib/nn/criterion/__init__.py

* Add example with simple test

* Delete test code

* Barlow Twins cross-correlation matrix - a laconic way of off-diagonal element selection

* handle zero varience

* Example from &gt;&gt;&gt; to code-block

* add explicit BarlowTwinsLoss init check into test_criterion_init

* Add simple test for Barlow Twins loss

* typo std -&gt; var

* i.i.d. distibuted and normalized

* delete unbised from torch.var

* lambda influence testing

* hidden trailing whitespace

* Update catalyst/contrib/nn/criterion/contrastive.py

lmbda -&gt; lambda

Co-authored-by: Sergey Kolesnikov &lt;scitator@gmail.com&gt;

* rename parameter lmbda -&gt; offdiag_lambda

* Add ValueErrors into BarlowTwinsLoss

* laconic example in BarlowTwinsLoss

* rebase

* Fixed typo

Co-authored-by: Mykola Lukashchouk &lt;mykola@Mac-mini-Mykola.local&gt;
Co-authored-by: Sergey Kolesnikov &lt;scitator@gmail.com&gt;
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -10,6 +10,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 - added `pre-commit` hook to run codestyle checker on commit ([#1257](https://github.com/catalyst-team/catalyst/pull/1257))
 - `on publish` github action for docker and docs added [#1260](https://github.com/catalyst-team/catalyst/pull/1260)
+- Barlow twins loss ([#1259](https://github.com/catalyst-team/catalyst/pull/1259))
 
 ### Changed
 
diff --git a/catalyst/contrib/nn/criterion/__init__.py b/catalyst/contrib/nn/criterion/__init__.py
@@ -5,40 +5,35 @@
 
 from catalyst.contrib.nn.criterion.ce import (
     MaskCrossEntropyLoss,
-    SymmetricCrossEntropyLoss,
     NaiveCrossEntropyLoss,
+    SymmetricCrossEntropyLoss,
 )
 from catalyst.contrib.nn.criterion.circle import CircleLoss
 from catalyst.contrib.nn.criterion.contrastive import (
+    BarlowTwinsLoss,
     ContrastiveDistanceLoss,
     ContrastiveEmbeddingLoss,
     ContrastivePairwiseEmbeddingLoss,
 )
 from catalyst.contrib.nn.criterion.dice import DiceLoss
-from catalyst.contrib.nn.criterion.focal import (
-    FocalLossBinary,
-    FocalLossMultiClass,
-)
-from catalyst.contrib.nn.criterion.gan import (
-    GradientPenaltyLoss,
-    MeanOutputLoss,
-)
+from catalyst.contrib.nn.criterion.focal import FocalLossBinary, FocalLossMultiClass
+from catalyst.contrib.nn.criterion.gan import GradientPenaltyLoss, MeanOutputLoss
 
 if torch.__version__ < "1.9":
     from catalyst.contrib.nn.criterion.huber import HuberLoss
 
 from catalyst.contrib.nn.criterion.iou import IoULoss
-from catalyst.contrib.nn.criterion.trevsky import TrevskyLoss, FocalTrevskyLoss
 from catalyst.contrib.nn.criterion.lovasz import (
     LovaszLossBinary,
     LovaszLossMultiClass,
     LovaszLossMultiLabel,
 )
 from catalyst.contrib.nn.criterion.margin import MarginLoss
+from catalyst.contrib.nn.criterion.trevsky import FocalTrevskyLoss, TrevskyLoss
 from catalyst.contrib.nn.criterion.triplet import (
     TripletLoss,
     TripletLossV2,
-    TripletPairwiseEmbeddingLoss,
     TripletMarginLossWithSampler,
+    TripletPairwiseEmbeddingLoss,
 )
 from catalyst.contrib.nn.criterion.wing import WingLoss
diff --git a/catalyst/contrib/nn/criterion/contrastive.py b/catalyst/contrib/nn/criterion/contrastive.py
@@ -135,8 +135,96 @@ def forward(self, embeddings_pred, embeddings_true) -> torch.Tensor:
         return loss
 
 
+class BarlowTwinsLoss(nn.Module):
+    """The Contrastive embedding loss.
+
+    It has been proposed in `Barlow Twins:
+    Self-Supervised Learning via Redundancy Reduction`_.
+
+    Example:
+
+    .. code-block:: python
+
+        import torch
+        from torch.nn import functional as F
+        from catalyst.contrib.nn import BarlowTwinsLoss
+
+        embeddings_left = F.normalize(torch.rand(256, 64, requires_grad=True))
+        embeddings_right = F.normalize(torch.rand(256, 64, requires_grad=True))
+        criterion = BarlowTwinsLoss(offdiag_lambda = 1)
+        criterion(embeddings_left, embeddings_right)
+
+    .. _`Barlow Twins: Self-Supervised Learning via Redundancy Reduction`:
+        https://arxiv.org/abs/2103.03230
+    """
+
+    def __init__(self, offdiag_lambda=1.0, eps=1e-12):
+        """
+        Args:
+            offdiag_lambda: trade-off parameter
+            eps: shift for the varience (var + eps)
+        """
+        super().__init__()
+        self.offdiag_lambda = offdiag_lambda
+        self.eps = eps
+
+    def forward(
+        self, embeddings_left: torch.Tensor, embeddings_right: torch.Tensor,
+    ) -> torch.Tensor:
+        """Forward propagation method for the contrastive loss.
+
+        Args:
+            embeddings_left: left objects embeddings [batch_size, features_dim]
+            embeddings_right: right objects embeddings [batch_size, features_dim]
+
+        Raises:
+            ValueError: if the batch size is 1
+            ValueError: if embeddings_left and embeddings_right shapes are different
+            ValueError: if embeddings shapes are not in a form (batch_size, features_dim)
+
+        Returns:
+            torch.Tensor: loss
+        """
+        shape_left, shape_right = embeddings_left.shape, embeddings_right.shape
+        if len(shape_left) != 2:
+            raise ValueError(
+                f"Left shape should be (batch_size, feature_dim), but got - {shape_left}!"
+            )
+        elif len(shape_right) != 2:
+            raise ValueError(
+                f"Right shape should be (batch_size, feature_dim), but got - {shape_right}!"
+            )
+        if shape_left[0] == 1:
+            raise ValueError(f"Batch size should be >= 2, but got - {shape_left[0]}!")
+        if shape_left != shape_right:
+            raise ValueError(f"Shapes should be equall, but got - {shape_left} and {shape_right}!")
+        # normalization
+        z_left = (embeddings_left - embeddings_left.mean(dim=0)) / (
+            embeddings_left.var(dim=0) + self.eps
+        ).pow(1 / 2)
+        z_right = (embeddings_right - embeddings_right.mean(dim=0)) / (
+            embeddings_right.var(dim=0) + self.eps
+        ).pow(1 / 2)
+
+        # cross-correlation matrix
+        batch_size = z_left.shape[0]
+        cross_correlation = torch.matmul(z_left.T, z_right) / batch_size
+
+        # selection of diagonal elements and off diagonal elements
+        on_diag = torch.diagonal(cross_correlation)
+        off_diag = cross_correlation.clone().fill_diagonal_(0)
+
+        # the loss described in the original Barlow Twin's paper
+        # encouraging off_diag to be zero and on_diag to be one
+        on_diag_loss = on_diag.add_(-1).pow_(2).sum()
+        off_diag_loss = off_diag.pow_(2).sum()
+        loss = on_diag_loss + self.offdiag_lambda * off_diag_loss
+        return loss
+
+
 __all__ = [
     "ContrastiveEmbeddingLoss",
     "ContrastiveDistanceLoss",
     "ContrastivePairwiseEmbeddingLoss",
+    "BarlowTwinsLoss",
 ]
diff --git a/tests/catalyst/contrib/nn/test_criterion.py b/tests/catalyst/contrib/nn/test_criterion.py
@@ -1,7 +1,11 @@
 # flake8: noqa
+import numpy as np
+import pytest
+import torch
 
 from catalyst.contrib.nn import criterion as module
 from catalyst.contrib.nn.criterion import CircleLoss, TripletMarginLossWithSampler
+from catalyst.contrib.nn.criterion.contrastive import BarlowTwinsLoss
 from catalyst.data import AllTripletsSampler
 
 
@@ -13,6 +17,8 @@ def test_criterion_init():
                 instance = module_class(margin=0.25, gamma=256)
             elif module_class == TripletMarginLossWithSampler:
                 instance = module_class(margin=1.0, sampler_inbatch=AllTripletsSampler())
+            elif module_class == BarlowTwinsLoss:
+                instance = module_class(offdiag_lambda=1, eps=1e-12)
             else:
                 # @TODO: very dirty trick
                 try:
@@ -21,3 +27,84 @@ def test_criterion_init():
                     print(module_class)
                     instance = 1
             assert instance is not None
+
+
+@pytest.mark.parametrize(
+    "embeddings_left,embeddings_right,offdiag_lambda,eps,true_value",
+    (
+        (
+            torch.tensor([[1.0, 0.0], [0.0, 1.0]]),
+            torch.tensor([[1.0, 0.0], [0.0, 1.0]]),
+            1,
+            1e-12,
+            1,
+        ),
+        (
+            torch.tensor([[1.0, 0.0], [0.0, 1.0]]),
+            torch.tensor([[1.0, 0.0], [0.0, 1.0]]),
+            0,
+            1e-12,
+            0.5,
+        ),
+        (
+            torch.tensor([[1.0, 0.0], [0.0, 1.0]]),
+            torch.tensor([[1.0, 0.0], [0.0, 1.0]]),
+            2,
+            1e-12,
+            1.5,
+        ),
+        (
+            torch.tensor(
+                [
+                    [-0.31887834],
+                    [1.3980029],
+                    [0.30775256],
+                    [0.29397671],
+                    [-1.47968253],
+                    [-0.72796992],
+                    [-0.30937596],
+                    [1.16363952],
+                    [-2.15524895],
+                    [-0.0440765],
+                ]
+            ),
+            torch.tensor(
+                [
+                    [-0.31887834],
+                    [1.3980029],
+                    [0.30775256],
+                    [0.29397671],
+                    [-1.47968253],
+                    [-0.72796992],
+                    [-0.30937596],
+                    [1.16363952],
+                    [-2.15524895],
+                    [-0.0440765],
+                ]
+            ),
+            1,
+            1e-12,
+            0.01,
+        ),
+    ),
+)
+def test_barlow_twins_loss(
+    embeddings_left: torch.Tensor,
+    embeddings_right: torch.Tensor,
+    offdiag_lambda: float,
+    eps: float,
+    true_value: float,
+):
+    """
+    Test Barlow Twins loss
+    Args:
+        embeddings_left: left objects embeddings [batch_size, features_dim]
+        embeddings_right: right objects embeddings [batch_size, features_dim]
+        offdiag_lambda: trade off parametr
+        eps: zero varience handler (var + eps)
+        true_value: expected loss value
+    """
+    value = BarlowTwinsLoss(offdiag_lambda=offdiag_lambda, eps=eps)(
+        embeddings_left, embeddings_right
+    ).item()
+    assert np.isclose(value, true_value)