implement scalar_prod

sebasv · sebasv · commit 938846ebd0c9 · 2018-10-24T07:42:04.000+02:00
diff --git a/src/numeric/impl_numeric.rs b/src/numeric/impl_numeric.rs
@@ -6,7 +6,7 @@
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
 
-use std::ops::{Add, Div};
+use std::ops::{Add, Div, Mul};
 use libnum::{self, One, Zero, Float};
 use itertools::free::enumerate;
 
@@ -46,6 +46,32 @@ impl<A, S, D> ArrayBase<S, D>
         sum
     }
 
+    /// Return the product of all elements in the array.
+    ///
+    /// ```
+    /// use ndarray::arr2;
+    ///
+    /// let a = arr2(&[[1., 2.],
+    ///                [3., 4.]]);
+    /// assert_eq!(a.scalar_prod(), 24.);
+    /// ```
+    pub fn scalar_prod(&self) -> A
+        where A: Clone + Mul<Output=A> + libnum::One,
+    {
+        if let Some(slc) = self.as_slice_memory_order() {
+            return numeric_util::unrolled_prod(slc);
+        }
+        let mut sum = A::one();
+        for row in self.inner_rows() {
+            if let Some(slc) = row.as_slice() {
+                sum = sum * numeric_util::unrolled_prod(slc);
+            } else {
+                sum = sum * row.iter().fold(A::one(), |acc, elt| acc * elt.clone());
+            }
+        }
+        sum
+    }
+
     /// Return sum along `axis`.
     ///
     /// ```
diff --git a/src/numeric_util.rs b/src/numeric_util.rs
@@ -10,6 +10,7 @@ use libnum;
 use std::cmp;
 use std::ops::{
     Add,
+    Mul,
 };
 
 use LinalgScalar;
@@ -51,6 +52,43 @@ pub fn unrolled_sum<A>(mut xs: &[A]) -> A
     sum
 }
 
+/// Compute the product of the values in `xs`
+pub fn unrolled_prod<A>(mut xs: &[A]) -> A
+    where A: Clone + Mul<Output=A> + libnum::One,
+{
+    // eightfold unrolled so that floating point can be vectorized
+    // (even with strict floating point accuracy semantics)
+    let mut prod = A::one();
+    let (mut p0, mut p1, mut p2, mut p3,
+         mut p4, mut p5, mut p6, mut p7) =
+        (A::one(), A::one(), A::one(), A::one(),
+         A::one(), A::one(), A::one(), A::one());
+    while xs.len() >= 8 {
+        p0 = p0 * xs[0].clone();
+        p1 = p1 * xs[1].clone();
+        p2 = p2 * xs[2].clone();
+        p3 = p3 * xs[3].clone();
+        p4 = p4 * xs[4].clone();
+        p5 = p5 * xs[5].clone();
+        p6 = p6 * xs[6].clone();
+        p7 = p7 * xs[7].clone();
+
+        xs = &xs[8..];
+    }
+    prod = prod.clone() * (p0 * p4);
+    prod = prod.clone() * (p1 * p5);
+    prod = prod.clone() * (p2 * p6);
+    prod = prod.clone() * (p3 * p7);
+
+    // make it clear to the optimizer that this loop is short
+    // and can not be autovectorized.
+    for i in 0..xs.len() {
+        if i >= 7 { break; }
+        prod = prod.clone() * xs[i].clone()
+    }
+    prod
+}
+
 /// Compute the dot product.
 ///
 /// `xs` and `ys` must be the same length
diff --git a/tests/oper.rs b/tests/oper.rs
@@ -271,6 +271,26 @@ fn fold_and_sum() {
     }
 }
 
+#[test]
+fn scalar_prod() {
+    let a = Array::linspace(0.5, 2., 128).into_shape((8, 16)).unwrap();
+    assert_approx_eq(a.fold(1., |acc, &x| acc * x), a.scalar_prod(), 1e-5);
+
+    // test different strides
+    let max = 8 as Ixs;
+    for i in 1..max {
+        for j in 1..max {
+            let a1 = a.slice(s![..;i, ..;j]);
+            let mut prod = 1.;
+            for elt in a1.iter() {
+                prod *= *elt;
+            }
+            assert_approx_eq(a1.fold(1., |acc, &x| acc * x), prod, 1e-5);
+            assert_approx_eq(prod, a1.scalar_prod(), 1e-5);
+        }
+    }
+}
+
 fn range_mat(m: Ix, n: Ix) -> Array2<f32> {
     Array::linspace(0., (m * n) as f32 - 1., m * n).into_shape((m, n)).unwrap()
 }