Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/conda_env/environment_dask_2.30.0.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: test
name: ips_test_dask_2.30
channels:
- conda-forge
dependencies:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/conda_env/environment_dask_2.5.2.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: test
name: ips_test_dask_2.5
channels:
- conda-forge
dependencies:
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/conda_env/environment_linux.yml
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
name: test
name: ips_test
channels:
- conda-forge
dependencies:
- pytest-cov
- pytest-timeout
- psutil
- mpi4py
- dask=2021.10.0
- dask=2021.11.1
- dakota
4 changes: 2 additions & 2 deletions .github/workflows/conda_env/environment_macos.yml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
name: test
name: ips_test
channels:
- conda-forge
dependencies:
- pytest-cov
- pytest-timeout
- psutil
- dask=2021.10.0
- dask=2021.11.1
2 changes: 1 addition & 1 deletion .github/workflows/conda_env/environment_minimal.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: test
name: ips_test_minimal
channels:
- conda-forge
dependencies:
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/conda_env/environment_static_analysis.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: static_analysis
name: ips_static_analysis
channels:
- conda-forge
dependencies:
Expand All @@ -7,4 +7,4 @@ dependencies:
- pylint=2.11.1
- bandit=1.7.0
- codespell=2.1.0
- dask=2021.10.0
- dask=2021.11.1
2 changes: 1 addition & 1 deletion ipsframework/dakota_bridge.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
class Driver(Component):

def __init__(self, services, config):
Component.__init__(self, services, config)
super().__init__(services, config)
self.done = False
self.events_received = []
self.socket_address = ''
Expand Down
25 changes: 24 additions & 1 deletion ipsframework/ipsExceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,11 +69,34 @@ def __init__(self, caller_id, tid, nproc, ppn, max_procs, max_ppn):
self.args = (caller_id, tid, nproc, ppn, max_procs, max_ppn)

def __str__(self):
s = "component %s requested %d processes with %d processes per node, while the number of processes requested"\
s = "component %s requested %d processes with %d processes per node, while the number of processes requested "\
"is less than the max (%d), the processes per node value is too low." % (self.caller_id, self.nproc, self.ppn, self.max_procs)
return s


class ResourceRequestUnequalPartitioningException(Exception):
"""Exception raised by the resource manager when it is possible to
launch the requested number of processes, but the requested number
of processes and processes per node will result in unequal
partitioning of nodes.
"""

def __init__(self, caller_id, tid, nproc, ppn, max_procs, max_ppn):
super().__init__()
self.caller_id = caller_id
self.task_id = tid
self.nproc = nproc
self.ppn = ppn
self.max_procs = max_procs
self.max_ppn = max_ppn
self.args = (caller_id, tid, nproc, ppn, max_procs, max_ppn)

def __str__(self):
s = "component %s requested %d processes with %d processes per node, while the number of processes requested is less than the max (%d), "\
"it will result in unequal partitioning of processes across nodes" % (self.caller_id, self.nproc, self.ppn, self.max_procs)
return s


class InvalidResourceSettingsException(Exception):
"""
Exception raised by the resource helper to indicate inconsistent resource settings.
Expand Down
9 changes: 4 additions & 5 deletions ipsframework/ipsLogging.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,9 @@ class myLogRecordStreamHandler(socketserver.StreamRequestHandler):

def __init__(self, request, client_address, server, handler):
self.handler = handler
socketserver.StreamRequestHandler.__init__(self,
request,
client_address,
server)
super().__init__(request,
client_address,
server)

def handle(self):
"""
Expand Down Expand Up @@ -68,7 +67,7 @@ class LogRecordSocketReceiver(socketserver.ThreadingUnixStreamServer):

def __init__(self, log_pipe,
handler=myLogRecordStreamHandler):
socketserver.UnixStreamServer.__init__(self, log_pipe, handler)
super().__init__(log_pipe, handler)

def get_file_no(self):
return self.socket.fileno()
Expand Down
8 changes: 4 additions & 4 deletions ipsframework/messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ class ServiceRequestMessage(Message):
identifier = 'REQUEST'

def __init__(self, sender_id, receiver_id, target_comp_id, target_method, *args, **keywords):
Message.__init__(self, sender_id, receiver_id)
super().__init__(sender_id, receiver_id)
self.target_comp_id = target_comp_id
self.target_method = target_method
self.args = args
Expand All @@ -70,7 +70,7 @@ class ServiceResponseMessage(Message):
identifier = 'RESPONSE'

def __init__(self, sender_id, receiver_id, request_msg_id, status, *args):
Message.__init__(self, sender_id, receiver_id)
super().__init__(sender_id, receiver_id)
self.request_msg_id = request_msg_id
self.status = status
self.args = args
Expand All @@ -92,7 +92,7 @@ class MethodInvokeMessage(Message):
identifier = 'INVOKE'

def __init__(self, sender_id, receiver_id, call_id, target_method, *args, **keywords):
Message.__init__(self, sender_id, receiver_id)
super().__init__(sender_id, receiver_id)
self.call_id = call_id
self.target_method = target_method
self.args = args
Expand All @@ -115,7 +115,7 @@ class MethodResultMessage(Message):
identifier = 'RESULT'

def __init__(self, sender_id, receiver_id, call_id, status, *args):
Message.__init__(self, sender_id, receiver_id)
super().__init__(sender_id, receiver_id)
self.call_id = call_id
self.args = args
self.status = status
Expand Down
2 changes: 1 addition & 1 deletion ipsframework/portalBridge.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def __init__(self, services, config):
Declaration of private variables and initialization of
:py:class:`component.Component` object.
"""
Component.__init__(self, services, config)
super().__init__(services, config)
self.host = ''
self.curTime = time.localtime()
self.startTime = self.curTime
Expand Down
17 changes: 13 additions & 4 deletions ipsframework/resourceManager.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,10 @@
import os
import time
from math import ceil
from .ipsExceptions import InsufficientResourcesException, \
BadResourceRequestException, \
ResourceRequestMismatchException
from .ipsExceptions import (InsufficientResourcesException,
BadResourceRequestException,
ResourceRequestMismatchException,
ResourceRequestUnequalPartitioningException)
from .ips_es_spec import eventManager
from .resourceHelper import getResourceList
from .node_structure import Node
Expand Down Expand Up @@ -328,6 +329,11 @@ def get_allocation(self, comp_id, nproc, task_id,
c = ceil(float(nproc) / ppn)
raise InsufficientResourcesException(comp_id, task_id,
c, c - len(self.avail_nodes))
if nodes == "unequal":
raise ResourceRequestUnequalPartitioningException(comp_id, task_id,
nproc, ppn,
self.total_cores,
self.max_ppn)
else:
try:
self.processes += nproc
Expand Down Expand Up @@ -441,7 +447,10 @@ def check_whole_node_cap(self, nproc, ppn):
whole_cap += ppn
nodes.append(n)
if whole_cap >= nproc:
return True, nodes
if nproc > ppn and nproc % ppn != 0:
return False, "unequal"
else:
return True, nodes
except Exception:
self.fwk.exception("problem in RM.check_whole_node_cap")
raise
Expand Down
2 changes: 1 addition & 1 deletion ipsframework/runspaceInitComponent.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def __init__(self, services, config):
Declaration of private variables and initialization of
:py:class:`component.Component` object.
"""
Component.__init__(self, services, config)
super().__init__(services, config)
# get the simRootDir
self.simRootDir = services.get_config_param('SIM_ROOT')
self.cwd = self.config['OS_CWD']
Expand Down
4 changes: 2 additions & 2 deletions ipsframework/taskManager.py
Original file line number Diff line number Diff line change
Expand Up @@ -553,14 +553,14 @@ def init_task_pool(self, init_task_msg):
except BadResourceRequestException as e:
self.fwk.error("There has been a fatal error, %s requested %d too many processors in task %d",
caller_id, e.deficit, e.task_id)
for (task_id, cmd) in list(ret_dict.values()):
for task_id, _, _ in ret_dict.values():
self.resource_mgr.release_allocation(task_id, -1)
del self.curr_task_table[task_id]
raise
except ResourceRequestMismatchException as e:
self.fwk.error("There has been a fatal error, %s requested too few processors per node to launch task %d (request: procs = %d, ppn = %d)",
caller_id, e.task_id, e.nproc, e.ppn)
for (task_id, cmd) in list(ret_dict.values()):
for task_id, _, _ in ret_dict.values():
self.resource_mgr.release_allocation(task_id, -1)
del self.curr_task_table[task_id]
raise
Expand Down
16 changes: 14 additions & 2 deletions tests/new/test_resourceManager.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
from ipsframework.resourceManager import ResourceManager
from ipsframework.ipsExceptions import (InsufficientResourcesException,
BadResourceRequestException,
ResourceRequestMismatchException)
ResourceRequestMismatchException,
ResourceRequestUnequalPartitioningException)


def test_allocations(tmpdir):
Expand Down Expand Up @@ -67,6 +68,17 @@ def test_allocations(tmpdir):

assert "component comp0 requested 3 nodes, which is more than possible by 1 nodes, for task 0." == str(excinfo.value)

with pytest.raises(ResourceRequestUnequalPartitioningException) as excinfo:
rm.get_allocation(comp_id='comp0',
nproc=3,
task_id=0,
whole_nodes=True,
whole_socks=False,
task_ppn=2)

assert "component comp0 requested 3 processes with 2 processes per node, while the number of processes requested is less than the max (8), "\
"it will result in unequal partitioning of processes across nodes" == str(excinfo.value)

with pytest.raises(BadResourceRequestException) as excinfo:
rm.get_allocation(comp_id='comp0',
nproc=12,
Expand All @@ -84,7 +96,7 @@ def test_allocations(tmpdir):
whole_socks=False,
task_ppn=2)

assert ("component comp0 requested 6 processes with 2 processes per node, while the number of processes requestedis less than the max (8), "
assert ("component comp0 requested 6 processes with 2 processes per node, while the number of processes requested is less than the max (8), "
"the processes per node value is too low." == str(excinfo.value))

rm.get_allocation(comp_id='comp0',
Expand Down
Loading