Skip to content
3 changes: 2 additions & 1 deletion package/AUTHORS
Original file line number Diff line number Diff line change
Expand Up @@ -246,11 +246,12 @@ Chronological list of authors
- Matthew Davies
- Jia-Xin Zhu
- Tanish Yelgoe
2025
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oops, thanks for fixing :-)

2025
- Joshua Raphael Uy
- Namir Oues
- Lexi Xu
- BHM-Bob G
- Debasish Mohanty
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add your name at the end of the list.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

- Yu-Yuan (Stuart) Yang

External code
Expand Down
95 changes: 57 additions & 38 deletions package/MDAnalysis/visualization/streamlines.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,53 @@ def split_grid(grid, num_cores):
]


# some private utility functions for trajectory iteration
def _produce_list_indices_point_in_polygon_this_frame(
vertex_coord_list, relevant_particle_coordinate_array_xy
):
"""
Produce a list of indices for particles of interest in the current frame.
The list is ordered according to the order of the squares in the grid.
Each entry in the list is a tuple of indices for the particles in that square.
The list is the same length as the number of squares in the grid.
"""
list_indices_point_in_polygon = []
for square_vertices in vertex_coord_list:
path_object = matplotlib.path.Path(square_vertices)
index_list_in_polygon = np.where(
path_object.contains_points(relevant_particle_coordinate_array_xy)
)
list_indices_point_in_polygon.append(index_list_in_polygon)
return list_indices_point_in_polygon


def _produce_list_centroids_this_frame(
list_indices_in_polygon, relevant_particle_coordinate_array_xy
):
"""
Produce a list of centroids for the particles in the current frame.
The list is ordered according to the order of the squares in the grid.
Each entry in the list is a numpy array of the centroid coordinates for the particles in that square.
The list is the same length as the number of squares in the grid.
If there are no particles in a square, the entry is None.
"""
list_centroids_this_frame = []
for indices in list_indices_in_polygon:
if (
not indices[0].size > 0
): # if there are no particles of interest in this particular square
list_centroids_this_frame.append(None)
else:
current_coordinate_array_in_square = (
relevant_particle_coordinate_array_xy[indices]
)
current_square_indices_centroid = np.average(
current_coordinate_array_in_square, axis=0
)
list_centroids_this_frame.append(current_square_indices_centroid)
return list_centroids_this_frame # a list of numpy xy centroid arrays for this frame


def per_core_work(
topology_file_path,
trajectory_file_path,
Expand All @@ -197,39 +244,6 @@ def per_core_work(
)
list_previous_frame_centroids = []
list_previous_frame_indices = []
# define some utility functions for trajectory iteration:

def produce_list_indices_point_in_polygon_this_frame(vertex_coord_list):
list_indices_point_in_polygon = []
for square_vertices in vertex_coord_list:
path_object = matplotlib.path.Path(square_vertices)
index_list_in_polygon = np.where(
path_object.contains_points(
relevant_particle_coordinate_array_xy
)
)
list_indices_point_in_polygon.append(index_list_in_polygon)
return list_indices_point_in_polygon

def produce_list_centroids_this_frame(list_indices_in_polygon):
list_centroids_this_frame = []
for indices in list_indices_in_polygon:
if (
not indices[0].size > 0
): # if there are no particles of interest in this particular square
list_centroids_this_frame.append(None)
else:
current_coordinate_array_in_square = (
relevant_particle_coordinate_array_xy[indices]
)
current_square_indices_centroid = np.average(
current_coordinate_array_in_square, axis=0
)
list_centroids_this_frame.append(
current_square_indices_centroid
)
return list_centroids_this_frame # a list of numpy xy centroid arrays for this frame

for ts in universe_object.trajectory:
if ts.frame < start_frame: # don't start until first specified frame
continue
Expand All @@ -239,22 +253,27 @@ def produce_list_centroids_this_frame(list_indices_in_polygon):
# only 2D / xy coords for now
# I will need a list of indices for relevant particles falling within each square in THIS frame:
list_indices_in_squares_this_frame = (
produce_list_indices_point_in_polygon_this_frame(
list_square_vertex_arrays_this_core
_produce_list_indices_point_in_polygon_this_frame(
list_square_vertex_arrays_this_core,
relevant_particle_coordinate_array_xy,
)
)
# likewise, I will need a list of centroids of particles in each square (same order as above list):
list_centroids_in_squares_this_frame = (
produce_list_centroids_this_frame(
list_indices_in_squares_this_frame
_produce_list_centroids_this_frame(
list_indices_in_squares_this_frame,
relevant_particle_coordinate_array_xy,
)
)
if (
list_previous_frame_indices
): # if the previous frame had indices in at least one square I will need to use
# those indices to generate the updates to the corresponding centroids in this frame:
list_centroids_this_frame_using_indices_from_last_frame = (
produce_list_centroids_this_frame(list_previous_frame_indices)
_produce_list_centroids_this_frame(
list_previous_frame_indices,
relevant_particle_coordinate_array_xy,
)
)
# I need to write a velocity of zero if there are any 'empty' squares in either frame:
xy_deltas_to_write = []
Expand Down
158 changes: 158 additions & 0 deletions testsuite/MDAnalysisTests/visualization/test_streamlines.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,140 @@ def membrane_xtc(tmpdir_factory, univ):
return str(tmp_xtc)


def test_produce_list_indices_point_in_polygon_this_frame():
# Define two squares:
# square1 covers the area [0,1]x[0,1]
# square2 covers the area [2,3]x[2,3]
square1 = [(0, 0), (1, 0), (1, 1), (0, 1)]
square2 = [(2, 2), (3, 2), (3, 3), (2, 3)]

# Create a list of vertex coordinates (for two squares)
vertex_list = [square1, square2]

# Define points:
# Point [0.5, 0.5] lies inside square1.
# Point [1.5, 1.5] lies outside both squares.
# Point [2.5, 2.5] lies inside square2.
# Point [3.5, 3.5] lies outside both squares.
points = np.array([[0.5, 0.5], [1.5, 1.5], [2.5, 2.5], [3.5, 3.5]])
Comment thread
lilyminium marked this conversation as resolved.

# Call the function under test.
result = streamlines._produce_list_indices_point_in_polygon_this_frame(
vertex_list, points
)

# np.where returns a tuple; thus for each square we expect:
# For square1: point index 0 is inside → (array([0]),)
# For square2: point index 2 is inside → (array([2]),)
expected = [(np.array([0]),), (np.array([2]),)]

# Check that each result matches the expected indices.
for res_tuple, exp_tuple in zip(result, expected):
np.testing.assert_array_equal(res_tuple[0], exp_tuple[0])


def test_produce_list_centroids_empty():
# Simulate an empty index set for one square:
list_indices = [(np.array([]),)]
# Dummy particle coordinate array (won't be used since indices is empty)
pts = np.array([[0, 0], [1, 1]])
result = streamlines._produce_list_centroids_this_frame(list_indices, pts)
assert result == [None]


def test_produce_list_centroids_single_square():
# Create an array of particle coordinates
pts = np.array([[0, 0], [1, 1], [2, 2], [3, 3]])
# Choose indices that pick points [1] and [3]
indices_tuple = (np.array([1, 3]),)
list_indices = [indices_tuple]
result = streamlines._produce_list_centroids_this_frame(list_indices, pts)
expected = np.array([2.0, 2.0])
np.testing.assert_array_almost_equal(result[0], expected)
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should prefer assert_allclose in new code per the docstring of this NumPy function I think.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Used assert_allclose in recent changes.



def test_produce_list_centroids_multiple_squares():
pts = np.array([[0, 0], [2, 2], [4, 4], [6, 6]])
# First square will use pts[0] and pts[2] -> average is [2,2]
indices1 = (np.array([0, 2]),)
# Second square will use pts[1] and pts[3] -> average is [4,4]
indices2 = (np.array([1, 3]),)
list_indices = [indices1, indices2]
result = streamlines._produce_list_centroids_this_frame(list_indices, pts)
expected1 = np.array([2, 2])
expected2 = np.array([4, 4])
np.testing.assert_array_equal(result[0], expected1)
np.testing.assert_array_equal(result[1], expected2)


def test_adjacent_squares():
# Test two adjacent squares that share a boundary.
# Square1 covers [0,1]x[0,1] and square2 covers [1,2]x[0,1].
# A point at [0.5, 0.5] should be in square1 and one at [1.5,0.5] should be in square2.
square1 = [(0, 0), (1, 0), (1, 1), (0, 1)]
square2 = [(1, 0), (2, 0), (2, 1), (1, 1)]
vertex_list = [square1, square2]
points = np.array(
[
[0.5, 0.5],
[1.5, 0.5],
]
)
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

minor: I think we could write this more compactly

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure why, but the black linting was giving errors, hence wrote the list in this manner.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Even though I agree with @tylerjereddy , I am going to succumb to black's opinion here and just leave it in whichever way black wants it to be. Not worthwhile to mark it as an exception.

result = streamlines._produce_list_indices_point_in_polygon_this_frame(
vertex_list, points
)
expected = [(np.array([0]),), (np.array([1]),)]
for res_tuple, exp_tuple in zip(result, expected):
np.testing.assert_array_equal(res_tuple[0], exp_tuple[0])


def test_point_on_boundary():
# Test that a point exactly on the square's boundary is not considered inside.
# For a square covering [0,1]x[0,1], a point at [1,0.5] lies exactly on the right edge.
# By default, matplotlib.path.Path.contains_points does not include boundary points.
square = [(0, 0), (1, 0), (1, 1), (0, 1)]
vertex_list = [square]
points = np.array([[1, 0.5]]) # exactly on the boundary
Comment on lines +147 to +149
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for adding this test! Just to be rigorous (and because I'm curious), what happens if vertex_list contains two adjacent squares both including the boundary that points lies on? Does _produce_list_indices_point_in_polygon_this_frame identify the point as being in both squares?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, as it is implemented in the _produce_list_indices_point_in_polygon_this_frame function, the index of the point lying on the boundary of two of the adjacent squares will be included for both vertices. (identifying the point is on both squares)

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see, thanks. Sorry, I should have been clearer -- could you please update the test to include this test and make it clearer? Otherwise the PR looks good!

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A test has been added to check if the indices of the shared points on adjacent squares return expected data from _produce_list_indices_point_in_polygon_this_frame or not.

result = streamlines._produce_list_indices_point_in_polygon_this_frame(
vertex_list, points
)
expected = [(np.array([0], dtype=int),)]
np.testing.assert_array_equal(result[0][0], expected[0][0])
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe I'm misunderstanding something, but above it says:

matplotlib.path.Path.contains_points does not include boundary points.

But expected has the index-0 point detected as interior, no?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My bad, I just checked the boundary points that are included in the Matplotlib path and returns a 1D zero Numpy array. Made changes to the comment.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you're just checking the 0-index here, which wouldn't check if result had more than one entry -- could you convert both result and expected to arrays?

Copy link
Copy Markdown
Contributor Author

@TRY-ER TRY-ER Apr 4, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In this specific test, I was checking a single point on the shape's boundary! As there will be one expected element in the array, I tested only the first element in the initial test. As you commented, I have added to check all the items in the list rather than the first one only.



def test_per_core_work_2D(membrane_xtc, univ):
xmin = univ.atoms.positions[..., 0].min()
xmax = univ.atoms.positions[..., 0].max()
ymin = univ.atoms.positions[..., 1].min()
ymax = univ.atoms.positions[..., 1].max()
tuple_of_limits = (xmin, xmax, ymin, ymax)
grid = streamlines.produce_grid(
tuple_of_limits=tuple_of_limits, grid_spacing=20
)
(
list_square_vertex_arrays_per_core,
list_parent_index_values,
_,
_,
) = streamlines.split_grid(grid=grid, num_cores=1)
values = streamlines.per_core_work(
topology_file_path=Martini_membrane_gro,
trajectory_file_path=membrane_xtc,
list_square_vertex_arrays_this_core=list_square_vertex_arrays_per_core[
0
],
MDA_selection="name PO4",
start_frame=1,
end_frame=2,
reconstruction_index_list=list_parent_index_values[0],
maximum_delta_magnitude=2.0,
)
for entry in values:
res = entry[1]
assert res[0] == pytest.approx(0.8, abs=1e-1)
assert res[1] == pytest.approx(0.5, abs=1e-1)
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

probably minor, but I suspect using assert_allclose here with a slice of the first two indices will tell us if only one of these is failing while as written the first failure could mask the success/failure of the second assertion

Copy link
Copy Markdown
Contributor Author

@TRY-ER TRY-ER Apr 1, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done it with the numpy.assert_allclose() !



def test_streamplot_2D(membrane_xtc, univ):
# regression test the data structures
# generated by the 2D streamplot code
Expand Down Expand Up @@ -122,6 +256,30 @@ def test_streamplot_2D_zero_return(membrane_xtc, univ, tmpdir):
assert std == approx(0.0)


def test_streamplot_2D_max_core(membrane_xtc, univ, tmpdir):
# simple roundtrip test to ensure that
# zeroed arrays are returned by the 2D streamplot
# code when called with an empty selection
u1, v1, avg, std = streamlines.generate_streamlines(
topology_file_path=Martini_membrane_gro,
trajectory_file_path=membrane_xtc,
grid_spacing=20,
MDA_selection="name POX",
start_frame=1,
end_frame=2,
xmin=univ.atoms.positions[..., 0].min(),
xmax=univ.atoms.positions[..., 0].max(),
ymin=univ.atoms.positions[..., 1].min(),
ymax=univ.atoms.positions[..., 1].max(),
maximum_delta_magnitude=2.0,
num_cores="maximum",
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I might be a little hesitant to test num_cores="maximum" by default, at least in the CI. One option might be to have a slow test marker for the multi-core test and only do that locally/when needed, but I'm not sure if we have such a marker at the moment (it is quite common upstream for resource-intensive tests where you occasionally may want to check/bisect, but don't want to/can't run continuously).

I suspect the pragmatic thing for now may be to just use a single core, or perhaps 2 so that we get concurrency with reduced risk of locking things up.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The test case was added to consider the core inclusion case of "maximum". I have modified the cores to 2. If there is such a marker implemented later, I will modify the test case accordingly to test if the "maximum" case works as expected.

)
assert_allclose(u1, np.zeros((5, 5)))
assert_allclose(v1, np.zeros((5, 5)))
assert avg == approx(0.0)
assert std == approx(0.0)


def test_streamplot_3D(membrane_xtc, univ, tmpdir):
# because mayavi is too heavy of a dependency
# for a roundtrip plotting test, simply
Expand Down
Loading