diff --git a/docs/source/operators/config-add-env.md b/docs/source/operators/config-add-env.md index 54319b231..614404806 100644 --- a/docs/source/operators/config-add-env.md +++ b/docs/source/operators/config-add-env.md @@ -68,9 +68,9 @@ Besides those environment variables associated with configurable options, the fo Kubernetes only. Used during Kubernetes deployment, this indicates the name of the namespace in which the Enterprise Gateway service is deployed. The namespace is created prior to deployment, and is set into the EG_NAMESPACE env via - deployment.yaml script. This value is then used within Enterprise Gateway to coordinate kernel - configurations. Should this value not be set during deployment, Enterprise Gateway - will default its value to namespace 'default'. + deployment.yaml script. This value is then used within Enterprise Gateway to coordinate + kernel configurations. Should this value not be set during deployment, Enterprise + Gateway will default its value to namespace 'default'. EG_PROHIBITED_GIDS=0 Containers only. A comma-separated list of group ids (GID) whose values are not @@ -131,5 +131,16 @@ Besides those environment variables associated with configurable options, the fo Any other value will error. EG_YARN_CERT_BUNDLE= - The path to a .pem or any other custom truststore used as a CA bundle in yarn-api-client. + The path to a .pem or any other custom truststore used as a CA bundle in + yarn-api-client. + + EG_ZMQ_IO_THREADS=1 + The size of the ZMQ thread pool used to handle I/O operations. Applies only to shared + contexts which are enabled by default but can be specified via + `RemoteMappingKernelManager.shared_context = True`. + + EG_ZMQ_MAX_SOCKETS=1023 + Specifies the maximum number of sockets to allow on the ZMQ context. Applies only to + shared contexts which are enabled by default but can be specified via + `RemoteMappingKernelManager.shared_context = True`. ``` diff --git a/docs/source/operators/config-env-debug.md b/docs/source/operators/config-env-debug.md index 4ab98dc13..57362b779 100644 --- a/docs/source/operators/config-env-debug.md +++ b/docs/source/operators/config-env-debug.md @@ -28,8 +28,9 @@ The following environment variables may be useful for troubleshooting: The interval (in seconds) to wait before checking poll results again. EG_RESTART_STATUS_POLL_INTERVAL=1.0 - The interval (in seconds) to wait before polling for the restart status again when duplicate restart request - for the same kernel is received or when a shutdown request is received while kernel is still restarting. + The interval (in seconds) to wait before polling for the restart status again when + duplicate restart request for the same kernel is received or when a shutdown request + is received while kernel is still restarting. EG_REMOVE_CONTAINER=True Used by launch_docker.py, indicates whether the kernel's docker container should be diff --git a/enterprise_gateway/services/kernels/remotemanager.py b/enterprise_gateway/services/kernels/remotemanager.py index 947118958..28e89e714 100644 --- a/enterprise_gateway/services/kernels/remotemanager.py +++ b/enterprise_gateway/services/kernels/remotemanager.py @@ -18,6 +18,7 @@ from tornado import web from traitlets import directional_link from traitlets import log as traitlets_log +from zmq import IO_THREADS, MAX_SOCKETS, Context from enterprise_gateway.mixins import EnterpriseGatewayConfigMixin @@ -159,6 +160,28 @@ class RemoteMappingKernelManager(AsyncMappingKernelManager): Extends the AsyncMappingKernelManager with support for managing remote kernels via the process-proxy. """ + def _context_default(self) -> Context: + """ + We override the _context_default method in + """ + zmq_context = super()._context_default() + if self.shared_context: # this should be True by default + + # pyzmq currently does not expose defaults for these values, so we replicate them here + # libzmq/zmq.h: ZMQ_MAX_SOCKETS_DLFT = 1023; zmq.Context.MAX_SOCKETS + # libzmq/zmq.h: ZMQ_IO_THREADS_DFLT = 1; zmq.Context.IO_THREADS + zmq_max_sock_desired = int(os.getenv("EG_ZMQ_MAX_SOCKETS", zmq_context.MAX_SOCKETS)) + if zmq_max_sock_desired != zmq_context.MAX_SOCKETS: + zmq_context.set(MAX_SOCKETS, zmq_max_sock_desired) + self.log.info(f"Set ZMQ_MAX_SOCKETS to {zmq_context.MAX_SOCKETS}") + + zmq_io_threads_desired = int(os.getenv("EG_ZMQ_IO_THREADS", zmq_context.IO_THREADS)) + if zmq_io_threads_desired != zmq_context.IO_THREADS: + zmq_context.set(IO_THREADS, zmq_io_threads_desired) + self.log.info(f"Set ZMQ_IO_THREADS to {zmq_context.IO_THREADS}") + + return zmq_context + pending_requests: TrackPendingRequests = ( TrackPendingRequests() ) # Used to enforce max-kernel limits @@ -256,9 +279,10 @@ def _enforce_kernel_limits(self, username: str) -> None: """ if self.parent.max_kernels is not None or self.parent.max_kernels_per_user >= 0: - pending_all, pending_user = RemoteMappingKernelManager.pending_requests.get_counts( - username - ) + ( + pending_all, + pending_user, + ) = RemoteMappingKernelManager.pending_requests.get_counts(username) # Enforce overall limit... if self.parent.max_kernels is not None: