diff --git a/examples/03_variogram/03_multi_vario.py b/examples/03_variogram/03_multi_vario.py new file mode 100755 index 000000000..110ca4340 --- /dev/null +++ b/examples/03_variogram/03_multi_vario.py @@ -0,0 +1,43 @@ +""" +Multi-field variogram estimation +-------------------------------- + +In this example, we demonstrate how to estimate a variogram from multiple +fields at the same point-set, that should have the same statistical properties. +""" +import numpy as np +import gstools as gs +import matplotlib.pyplot as plt + + +x = np.random.RandomState(19970221).rand(1000) * 100.0 +y = np.random.RandomState(20011012).rand(1000) * 100.0 +model = gs.Exponential(dim=2, var=2, len_scale=8) +srf = gs.SRF(model, mean=0) + +############################################################################### +# Generate two synthetic fields with an exponential model. + +field1 = srf((x, y), seed=19970221) +field2 = srf((x, y), seed=20011012) +fields = [field1, field2] + +############################################################################### +# Now we estimate the variograms for both fields individual and then again +# simultaneously with only one call. + +bins = np.arange(40) +bin_center, gamma1 = gs.vario_estimate_unstructured((x, y), field1, bins) +bin_center, gamma2 = gs.vario_estimate_unstructured((x, y), field2, bins) +bin_center, gamma = gs.vario_estimate_unstructured((x, y), fields, bins) + +############################################################################### +# Now we demonstrate, that the mean variogram from both fields coincides +# with the joined estimated one. + +plt.plot(bin_center, gamma1, label="field 1") +plt.plot(bin_center, gamma2, label="field 2") +plt.plot(bin_center, gamma, label="field joint") +plt.plot(bin_center, 0.5 * (gamma1 + gamma2), ":", label="field 1+2 mean") +plt.legend() +plt.show() diff --git a/gstools/variogram/estimator.pyx b/gstools/variogram/estimator.pyx index 41414f6c4..b2df86c3b 100644 --- a/gstools/variogram/estimator.pyx +++ b/gstools/variogram/estimator.pyx @@ -10,7 +10,7 @@ import numpy as np cimport cython from cython.parallel import prange, parallel from libcpp.vector cimport vector -from libc.math cimport fabs, sqrt +from libc.math cimport fabs, sqrt, isnan cimport numpy as np @@ -112,31 +112,31 @@ ctypedef double (*_dist_func)( def unstructured( - const double[:] f, + const double[:, :] f, const double[:] bin_edges, const double[:] x, const double[:] y=None, const double[:] z=None, str estimator_type='m' ): - if x.shape[0] != f.shape[0]: + if x.shape[0] != f.shape[1]: raise ValueError('len(x) = {0} != len(f) = {1} '. - format(x.shape[0], f.shape[0])) + format(x.shape[0], f.shape[1])) if bin_edges.shape[0] < 2: raise ValueError('len(bin_edges) too small') cdef _dist_func distance # 3d if z is not None: - if z.shape[0] != f.shape[0]: + if z.shape[0] != f.shape[1]: raise ValueError('len(z) = {0} != len(f) = {1} '. - format(z.shape[0], f.shape[0])) + format(z.shape[0], f.shape[1])) distance = _distance_3d # 2d elif y is not None: - if y.shape[0] != f.shape[0]: + if y.shape[0] != f.shape[1]: raise ValueError('len(y) = {0} != len(f) = {1} '. - format(y.shape[0], f.shape[0])) + format(y.shape[0], f.shape[1])) distance = _distance_2d # 1d else: @@ -150,18 +150,22 @@ def unstructured( cdef int i_max = bin_edges.shape[0] - 1 cdef int j_max = x.shape[0] - 1 cdef int k_max = x.shape[0] + cdef int f_max = f.shape[0] cdef vector[double] variogram = vector[double](len(bin_edges)-1, 0.0) cdef vector[long] counts = vector[long](len(bin_edges)-1, 0) - cdef int i, j, k + cdef int i, j, k, m cdef DTYPE_t dist for i in prange(i_max, nogil=True): for j in range(j_max): for k in range(j+1, k_max): dist = distance(x, y, z, k, j) if dist >= bin_edges[i] and dist < bin_edges[i+1]: - counts[i] += 1 - variogram[i] += estimator_func(f[k] - f[j]) + for m in range(f_max): + # skip no data values + if not (isnan(f[m,k]) or isnan(f[m,j])): + counts[i] += 1 + variogram[i] += estimator_func(f[m,k] - f[m,j]) normalization_func(variogram, counts) return np.asarray(variogram) diff --git a/gstools/variogram/variogram.py b/gstools/variogram/variogram.py index e13ddf32d..d2f2b71e2 100644 --- a/gstools/variogram/variogram.py +++ b/gstools/variogram/variogram.py @@ -40,6 +40,7 @@ def vario_estimate_unstructured( sampling_size=None, sampling_seed=None, estimator="matheron", + no_data=np.nan, ): r""" Estimates the variogram on a unstructured grid. @@ -73,8 +74,11 @@ def vario_estimate_unstructured( pos : :class:`list` the position tuple, containing main direction and transversal directions - field : :class:`numpy.ndarray` - the spatially distributed data + field : :class:`numpy.ndarray` or :class:`list` of :class:`numpy.ndarray` + The spatially distributed data. + You can pass a list of fields, that will be used simultaneously. + This could be helpful, when there are multiple realizations at the + same points, with the same statistical properties. bin_edges : :class:`numpy.ndarray` the bins on which the variogram will be calculated sampling_size : :class:`int` or :any:`None`, optional @@ -92,23 +96,37 @@ def vario_estimate_unstructured( * "cressie": an estimator more robust to outliers Default: "matheron" + no_data : :class:`float`, optional + Value to identify missing data in the given field. + Default: `np.nan` Returns ------- :class:`tuple` of :class:`numpy.ndarray` the estimated variogram and the bin centers """ - # TODO check_mesh - field = np.array(field, ndmin=1, dtype=np.double) + # allow multiple fields at same positions (ndmin=2: first axis -> field ID) + field = np.array(field, ndmin=2, dtype=np.double) bin_edges = np.array(bin_edges, ndmin=1, dtype=np.double) x, y, z, dim = pos2xyz(pos, calc_dim=True, dtype=np.double) bin_centres = (bin_edges[:-1] + bin_edges[1:]) / 2.0 - if sampling_size is not None and sampling_size < len(field): + # check_mesh + if len(field.shape) > 2 or field.shape[1] != len(x): + try: + field = field.reshape((-1, len(x))) + except ValueError: + raise ValueError("'field' has wrong shape") + + # set no_data values + if not np.isnan(no_data): + field[field == float(no_data)] = np.nan + + if sampling_size is not None and sampling_size < len(x): sampled_idx = np.random.RandomState(sampling_seed).choice( - np.arange(len(field)), sampling_size, replace=False + np.arange(len(x)), sampling_size, replace=False ) - field = field[sampled_idx] + field = field[:, sampled_idx] x = x[sampled_idx] if dim > 1: y = y[sampled_idx] diff --git a/tests/test_variogram_unstructured.py b/tests/test_variogram_unstructured.py index c5a59a85a..fd93cc26a 100644 --- a/tests/test_variogram_unstructured.py +++ b/tests/test_variogram_unstructured.py @@ -5,6 +5,7 @@ import unittest import numpy as np +import gstools as gs from gstools import vario_estimate_unstructured @@ -217,6 +218,34 @@ def test_assertions(self): ValueError, vario_estimate_unstructured, [x], field_e, bins ) + def test_multi_field(self): + x = np.random.RandomState(19970221).rand(100) * 100.0 + model = gs.Exponential(dim=1, var=2, len_scale=10) + srf = gs.SRF(model) + field1 = srf(x, seed=19970221) + field2 = srf(x, seed=20011012) + bins = np.arange(20) * 2 + bin_center, gamma1 = gs.vario_estimate_unstructured(x, field1, bins) + bin_center, gamma2 = gs.vario_estimate_unstructured(x, field2, bins) + bin_center, gamma = gs.vario_estimate_unstructured( + x, [field1, field2], bins + ) + gamma_mean = 0.5 * (gamma1 + gamma2) + for i in range(len(gamma)): + self.assertAlmostEqual(gamma[i], gamma_mean[i], places=2) + + def test_no_data(self): + x1 = np.random.RandomState(19970221).rand(100) * 100.0 + field1 = np.random.RandomState(20011012).rand(100) * 100.0 + field1[:10] = np.nan + x2 = x1[10:] + field2 = field1[10:] + bins = np.arange(20) * 2 + bin_center, gamma1 = gs.vario_estimate_unstructured(x1, field1, bins) + bin_center, gamma2 = gs.vario_estimate_unstructured(x2, field2, bins) + for i in range(len(gamma1)): + self.assertAlmostEqual(gamma1[i], gamma2[i], places=2) + if __name__ == "__main__": unittest.main()