From f6725366583dcd4e4e879248d3de1a247a2f66e2 Mon Sep 17 00:00:00 2001 From: MuellerSeb Date: Mon, 21 Sep 2020 21:41:53 +0200 Subject: [PATCH 01/10] vario unstruct: allow multiple fields to estimate variogram --- gstools/variogram/estimator.pyx | 26 +++++++++++++++----------- gstools/variogram/variogram.py | 18 +++++++++++++++--- 2 files changed, 30 insertions(+), 14 deletions(-) diff --git a/gstools/variogram/estimator.pyx b/gstools/variogram/estimator.pyx index 41414f6c4..2f0d67b10 100644 --- a/gstools/variogram/estimator.pyx +++ b/gstools/variogram/estimator.pyx @@ -10,7 +10,7 @@ import numpy as np cimport cython from cython.parallel import prange, parallel from libcpp.vector cimport vector -from libc.math cimport fabs, sqrt +from libc.math cimport fabs, sqrt, isnan cimport numpy as np @@ -112,31 +112,31 @@ ctypedef double (*_dist_func)( def unstructured( - const double[:] f, + const double[:, :] f, const double[:] bin_edges, const double[:] x, const double[:] y=None, const double[:] z=None, str estimator_type='m' ): - if x.shape[0] != f.shape[0]: + if x.shape[0] != f.shape[1]: raise ValueError('len(x) = {0} != len(f) = {1} '. - format(x.shape[0], f.shape[0])) + format(x.shape[0], f.shape[1])) if bin_edges.shape[0] < 2: raise ValueError('len(bin_edges) too small') cdef _dist_func distance # 3d if z is not None: - if z.shape[0] != f.shape[0]: + if z.shape[0] != f.shape[1]: raise ValueError('len(z) = {0} != len(f) = {1} '. - format(z.shape[0], f.shape[0])) + format(z.shape[0], f.shape[1])) distance = _distance_3d # 2d elif y is not None: - if y.shape[0] != f.shape[0]: + if y.shape[0] != f.shape[1]: raise ValueError('len(y) = {0} != len(f) = {1} '. - format(y.shape[0], f.shape[0])) + format(y.shape[0], f.shape[1])) distance = _distance_2d # 1d else: @@ -150,18 +150,22 @@ def unstructured( cdef int i_max = bin_edges.shape[0] - 1 cdef int j_max = x.shape[0] - 1 cdef int k_max = x.shape[0] + cdef int f_max = f.shape[0] cdef vector[double] variogram = vector[double](len(bin_edges)-1, 0.0) cdef vector[long] counts = vector[long](len(bin_edges)-1, 0) - cdef int i, j, k + cdef int i, j, k, m cdef DTYPE_t dist for i in prange(i_max, nogil=True): for j in range(j_max): for k in range(j+1, k_max): dist = distance(x, y, z, k, j) if dist >= bin_edges[i] and dist < bin_edges[i+1]: - counts[i] += 1 - variogram[i] += estimator_func(f[k] - f[j]) + for m in range(f_max): + if isnan(f[m,k]) or isnan(f[m,j]): + continue # skip no data values + counts[i] += 1 + variogram[i] += estimator_func(f[m,k] - f[m,j]) normalization_func(variogram, counts) return np.asarray(variogram) diff --git a/gstools/variogram/variogram.py b/gstools/variogram/variogram.py index e13ddf32d..859d77b1a 100644 --- a/gstools/variogram/variogram.py +++ b/gstools/variogram/variogram.py @@ -40,6 +40,7 @@ def vario_estimate_unstructured( sampling_size=None, sampling_seed=None, estimator="matheron", + no_data=np.nan, ): r""" Estimates the variogram on a unstructured grid. @@ -73,8 +74,11 @@ def vario_estimate_unstructured( pos : :class:`list` the position tuple, containing main direction and transversal directions - field : :class:`numpy.ndarray` - the spatially distributed data + field : :class:`numpy.ndarray` or :class:`list` of :class:`numpy.ndarray` + The spatially distributed data. + You can pass a list of fields, that will be used simultaneously. + This could be helpful, when there are multiple realizations at the + same points, with the same statistical properties. bin_edges : :class:`numpy.ndarray` the bins on which the variogram will be calculated sampling_size : :class:`int` or :any:`None`, optional @@ -92,6 +96,9 @@ def vario_estimate_unstructured( * "cressie": an estimator more robust to outliers Default: "matheron" + no_data : :class:`float`, optional + Value to identify missing data in the given field. + Default: `np.nan` Returns ------- @@ -99,11 +106,16 @@ def vario_estimate_unstructured( the estimated variogram and the bin centers """ # TODO check_mesh - field = np.array(field, ndmin=1, dtype=np.double) + # allow multiple fields at same positions (ndmin=2: first axis -> field ID) + field = np.array(field, ndmin=2, dtype=np.double) bin_edges = np.array(bin_edges, ndmin=1, dtype=np.double) x, y, z, dim = pos2xyz(pos, calc_dim=True, dtype=np.double) bin_centres = (bin_edges[:-1] + bin_edges[1:]) / 2.0 + # set no_data values + if not np.isnan(no_data): + field[field == float(no_data)] = np.nan + if sampling_size is not None and sampling_size < len(field): sampled_idx = np.random.RandomState(sampling_seed).choice( np.arange(len(field)), sampling_size, replace=False From 1d61fd2644198b35e1f438ce884d9ab88adcde5a Mon Sep 17 00:00:00 2001 From: MuellerSeb Date: Mon, 21 Sep 2020 21:42:25 +0200 Subject: [PATCH 02/10] test: test multi field variogram estimation --- tests/test_variogram_unstructured.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tests/test_variogram_unstructured.py b/tests/test_variogram_unstructured.py index c5a59a85a..3ffedeeed 100644 --- a/tests/test_variogram_unstructured.py +++ b/tests/test_variogram_unstructured.py @@ -5,6 +5,7 @@ import unittest import numpy as np +import gstools as gs from gstools import vario_estimate_unstructured @@ -217,6 +218,27 @@ def test_assertions(self): ValueError, vario_estimate_unstructured, [x], field_e, bins ) + def test_multi_field(self): + x = np.random.RandomState(19970221).rand(100) * 100.0 + y = np.random.RandomState(20011012).rand(100) * 100.0 + model = gs.Exponential(dim=1, var=2, len_scale=10) + srf = gs.SRF(model) + field1 = srf((x, y), seed=19970221) + field2 = srf((x, y), seed=20011012) + bins = np.arange(20) * 2 + bin_center, gamma1 = gs.vario_estimate_unstructured( + (x, y), field1, bins + ) + bin_center, gamma2 = gs.vario_estimate_unstructured( + (x, y), field2, bins + ) + bin_center, gamma = gs.vario_estimate_unstructured( + (x, y), [field1, field2], bins + ) + gamma_mean = 0.5 * (gamma1 + gamma2) + for i in range(20): + self.assertAlmostEqual(gamma[i], gamma_mean[i], places=2) + if __name__ == "__main__": unittest.main() From 0272f70aaf20757d27ec91025d1d5118f3e81499 Mon Sep 17 00:00:00 2001 From: MuellerSeb Date: Mon, 21 Sep 2020 21:42:52 +0200 Subject: [PATCH 03/10] examples: add example for multi-field variogram estimation --- examples/03_variogram/03_multi_vario.py | 42 +++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100755 examples/03_variogram/03_multi_vario.py diff --git a/examples/03_variogram/03_multi_vario.py b/examples/03_variogram/03_multi_vario.py new file mode 100755 index 000000000..fecfc746c --- /dev/null +++ b/examples/03_variogram/03_multi_vario.py @@ -0,0 +1,42 @@ +""" +Multi-field variogram estimation +-------------------------------- + +In this example, we demonstrate how to estimate a variogram from multiple +fields at the same point-set, that should have the same statistical properties. +""" +import numpy as np +import gstools as gs +import matplotlib.pyplot as plt + + +x = np.random.RandomState(19970221).rand(1000) * 100.0 +y = np.random.RandomState(20011012).rand(1000) * 100.0 +model = gs.Exponential(dim=2, var=2, len_scale=8) +srf = gs.SRF(model, mean=0) + +############################################################################### +# Generate two synthetic fields with an exponential model. + +field1 = srf((x, y), seed=19970221) +field2 = srf((x, y), seed=20011012) + +############################################################################### +# Now we estimate the variograms for both fields individual and then again +# simultaneously with only one call. + +bins = np.arange(40) +bin_center, gamma1 = gs.vario_estimate_unstructured((x, y), field1, bins) +bin_center, gamma2 = gs.vario_estimate_unstructured((x, y), field2, bins) +bin_center, gamma = gs.vario_estimate_unstructured((x, y), [field1, field2], bins) + +############################################################################### +# Now we demonstrate, that the mean variogram from both fields coincides +# with the joined estimated one. + +plt.plot(bin_center, gamma1, label="field 1") +plt.plot(bin_center, gamma2, label="field 2") +plt.plot(bin_center, gamma, label="field joint") +plt.plot(bin_center, 0.5*(gamma1+gamma2), ":", label="field 1+2 mean") +plt.legend() +plt.show() From e9763f5c2ab29c8518170d85c55a9f02c4865481 Mon Sep 17 00:00:00 2001 From: MuellerSeb Date: Mon, 21 Sep 2020 21:58:12 +0200 Subject: [PATCH 04/10] vario: fix sampling of multi fields --- gstools/variogram/variogram.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gstools/variogram/variogram.py b/gstools/variogram/variogram.py index 859d77b1a..304f766d5 100644 --- a/gstools/variogram/variogram.py +++ b/gstools/variogram/variogram.py @@ -120,7 +120,7 @@ def vario_estimate_unstructured( sampled_idx = np.random.RandomState(sampling_seed).choice( np.arange(len(field)), sampling_size, replace=False ) - field = field[sampled_idx] + field = field[:, sampled_idx] x = x[sampled_idx] if dim > 1: y = y[sampled_idx] From fb1b4160b6955528f278d165085e039c989b38c9 Mon Sep 17 00:00:00 2001 From: MuellerSeb Date: Mon, 21 Sep 2020 22:59:45 +0200 Subject: [PATCH 05/10] vario: fix sampling --- gstools/variogram/variogram.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gstools/variogram/variogram.py b/gstools/variogram/variogram.py index 304f766d5..b71c14522 100644 --- a/gstools/variogram/variogram.py +++ b/gstools/variogram/variogram.py @@ -116,9 +116,9 @@ def vario_estimate_unstructured( if not np.isnan(no_data): field[field == float(no_data)] = np.nan - if sampling_size is not None and sampling_size < len(field): + if sampling_size is not None and sampling_size < len(x): sampled_idx = np.random.RandomState(sampling_seed).choice( - np.arange(len(field)), sampling_size, replace=False + np.arange(len(x)), sampling_size, replace=False ) field = field[:, sampled_idx] x = x[sampled_idx] From 67b9b0bf969bef7e7b18221f5ae4fe3b4ffb9066 Mon Sep 17 00:00:00 2001 From: MuellerSeb Date: Mon, 21 Sep 2020 23:03:49 +0200 Subject: [PATCH 06/10] vario: fix multi field est test --- tests/test_variogram_unstructured.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_variogram_unstructured.py b/tests/test_variogram_unstructured.py index 3ffedeeed..5e0ddbf19 100644 --- a/tests/test_variogram_unstructured.py +++ b/tests/test_variogram_unstructured.py @@ -236,7 +236,7 @@ def test_multi_field(self): (x, y), [field1, field2], bins ) gamma_mean = 0.5 * (gamma1 + gamma2) - for i in range(20): + for i in range(len(gamma)): self.assertAlmostEqual(gamma[i], gamma_mean[i], places=2) From fe00f6a316ef5a59aa8d02d50bada3681741ae5e Mon Sep 17 00:00:00 2001 From: MuellerSeb Date: Mon, 21 Sep 2020 23:18:25 +0200 Subject: [PATCH 07/10] vario unstr: check shape of field value --- gstools/variogram/variogram.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/gstools/variogram/variogram.py b/gstools/variogram/variogram.py index b71c14522..d2f2b71e2 100644 --- a/gstools/variogram/variogram.py +++ b/gstools/variogram/variogram.py @@ -105,13 +105,19 @@ def vario_estimate_unstructured( :class:`tuple` of :class:`numpy.ndarray` the estimated variogram and the bin centers """ - # TODO check_mesh # allow multiple fields at same positions (ndmin=2: first axis -> field ID) field = np.array(field, ndmin=2, dtype=np.double) bin_edges = np.array(bin_edges, ndmin=1, dtype=np.double) x, y, z, dim = pos2xyz(pos, calc_dim=True, dtype=np.double) bin_centres = (bin_edges[:-1] + bin_edges[1:]) / 2.0 + # check_mesh + if len(field.shape) > 2 or field.shape[1] != len(x): + try: + field = field.reshape((-1, len(x))) + except ValueError: + raise ValueError("'field' has wrong shape") + # set no_data values if not np.isnan(no_data): field[field == float(no_data)] = np.nan From 14ac78084fcbc6743907db8d05ea0c54ec27afd3 Mon Sep 17 00:00:00 2001 From: MuellerSeb Date: Tue, 22 Sep 2020 05:28:32 +0200 Subject: [PATCH 08/10] vario: skipping no_data values fixed --- gstools/variogram/estimator.pyx | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/gstools/variogram/estimator.pyx b/gstools/variogram/estimator.pyx index 2f0d67b10..b2df86c3b 100644 --- a/gstools/variogram/estimator.pyx +++ b/gstools/variogram/estimator.pyx @@ -162,10 +162,10 @@ def unstructured( dist = distance(x, y, z, k, j) if dist >= bin_edges[i] and dist < bin_edges[i+1]: for m in range(f_max): - if isnan(f[m,k]) or isnan(f[m,j]): - continue # skip no data values - counts[i] += 1 - variogram[i] += estimator_func(f[m,k] - f[m,j]) + # skip no data values + if not (isnan(f[m,k]) or isnan(f[m,j])): + counts[i] += 1 + variogram[i] += estimator_func(f[m,k] - f[m,j]) normalization_func(variogram, counts) return np.asarray(variogram) From 7e068353e32e7db87aeee9cc90702b72b1976154 Mon Sep 17 00:00:00 2001 From: MuellerSeb Date: Tue, 22 Sep 2020 05:28:49 +0200 Subject: [PATCH 09/10] test: test no_data values in vario est --- tests/test_variogram_unstructured.py | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/tests/test_variogram_unstructured.py b/tests/test_variogram_unstructured.py index 5e0ddbf19..fd93cc26a 100644 --- a/tests/test_variogram_unstructured.py +++ b/tests/test_variogram_unstructured.py @@ -220,25 +220,32 @@ def test_assertions(self): def test_multi_field(self): x = np.random.RandomState(19970221).rand(100) * 100.0 - y = np.random.RandomState(20011012).rand(100) * 100.0 model = gs.Exponential(dim=1, var=2, len_scale=10) srf = gs.SRF(model) - field1 = srf((x, y), seed=19970221) - field2 = srf((x, y), seed=20011012) + field1 = srf(x, seed=19970221) + field2 = srf(x, seed=20011012) bins = np.arange(20) * 2 - bin_center, gamma1 = gs.vario_estimate_unstructured( - (x, y), field1, bins - ) - bin_center, gamma2 = gs.vario_estimate_unstructured( - (x, y), field2, bins - ) + bin_center, gamma1 = gs.vario_estimate_unstructured(x, field1, bins) + bin_center, gamma2 = gs.vario_estimate_unstructured(x, field2, bins) bin_center, gamma = gs.vario_estimate_unstructured( - (x, y), [field1, field2], bins + x, [field1, field2], bins ) gamma_mean = 0.5 * (gamma1 + gamma2) for i in range(len(gamma)): self.assertAlmostEqual(gamma[i], gamma_mean[i], places=2) + def test_no_data(self): + x1 = np.random.RandomState(19970221).rand(100) * 100.0 + field1 = np.random.RandomState(20011012).rand(100) * 100.0 + field1[:10] = np.nan + x2 = x1[10:] + field2 = field1[10:] + bins = np.arange(20) * 2 + bin_center, gamma1 = gs.vario_estimate_unstructured(x1, field1, bins) + bin_center, gamma2 = gs.vario_estimate_unstructured(x2, field2, bins) + for i in range(len(gamma1)): + self.assertAlmostEqual(gamma1[i], gamma2[i], places=2) + if __name__ == "__main__": unittest.main() From 021704000ead58aea68a0cc4813321dc72729865 Mon Sep 17 00:00:00 2001 From: MuellerSeb Date: Tue, 22 Sep 2020 06:02:00 +0200 Subject: [PATCH 10/10] examples: polish multi vario example --- examples/03_variogram/03_multi_vario.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/examples/03_variogram/03_multi_vario.py b/examples/03_variogram/03_multi_vario.py index fecfc746c..110ca4340 100755 --- a/examples/03_variogram/03_multi_vario.py +++ b/examples/03_variogram/03_multi_vario.py @@ -20,6 +20,7 @@ field1 = srf((x, y), seed=19970221) field2 = srf((x, y), seed=20011012) +fields = [field1, field2] ############################################################################### # Now we estimate the variograms for both fields individual and then again @@ -28,7 +29,7 @@ bins = np.arange(40) bin_center, gamma1 = gs.vario_estimate_unstructured((x, y), field1, bins) bin_center, gamma2 = gs.vario_estimate_unstructured((x, y), field2, bins) -bin_center, gamma = gs.vario_estimate_unstructured((x, y), [field1, field2], bins) +bin_center, gamma = gs.vario_estimate_unstructured((x, y), fields, bins) ############################################################################### # Now we demonstrate, that the mean variogram from both fields coincides @@ -37,6 +38,6 @@ plt.plot(bin_center, gamma1, label="field 1") plt.plot(bin_center, gamma2, label="field 2") plt.plot(bin_center, gamma, label="field joint") -plt.plot(bin_center, 0.5*(gamma1+gamma2), ":", label="field 1+2 mean") +plt.plot(bin_center, 0.5 * (gamma1 + gamma2), ":", label="field 1+2 mean") plt.legend() plt.show()