
    Vpf)                        d dl mZ d dlZd dlmZ d dlZd dlZd dlmZ d dl	m
Z
 d dl	mZ d dl	mZ d dlmZ  ej        e          Z G d	 d
          Z e            Z	 	 	 	 	 	 	 dddZd ZdS )    )annotationsN)Sequence)Any)clusters)config)
xla_bridge)xla_extensionc                      e Zd ZU dZded<   dZded<   dZded<   dZded	<   dZded
<   dZ	ded<   	 	 	 	 	 	 	 dddZ
d Zd ZdS )Stater   int
process_id   num_processesNz
Any | Noneserviceclientpreemption_sync_manager
str | Nonecoordinator_address,  
int | Nonelocal_device_idsint | Sequence[int] | Nonecluster_detection_methodinitialization_timeoutcoordinator_bind_addressc                H   |pt           j                            dd           }t          |t                    r|g}t
          j                            ||||||          \  }}}}|t          d          |t          d          |t          d          || _	        d|
                    dd          d         z   }|pt           j                            d|          }|t          d	          |rdd
                    d |D                       }	t                              d|	           t          j        d|	           t          j        d|	           || _        d t           j                                        D             }
t%          |
          dk    r8d                    |
          dz   }d| d}t                              |           |dk    rK| j        t+          d          t                              d|           t-          j        ||          | _        || _        | j        t+          d          t-          j        |||          | _        t                              d|           | j                                         |                                  d S )NJAX_COORDINATOR_ADDRESSz&coordinator_address should be defined.z$Number of processes must be defined.z6The process id of the current process must be defined.z[::]::r   JAX_COORDINATOR_BIND_ADDRESSz+coordinator_bind_address should be defined.,c              3  4   K   | ]}t          |          V  d S N)str).0xs     T/var/www/html/nettyfy-visnx/env/lib/python3.11/site-packages/jax/_src/distributed.py	<genexpr>z#State.initialize.<locals>.<genexpr>S   s(       B BAQ B B B B B B    z4JAX distributed initialized with visible devices: %sjax_cuda_visible_devicesjax_rocm_visible_devicesc                >    g | ]}d |                                 v |S )_proxy)lower)r$   keys     r&   
<listcomp>z$State.initialize.<locals>.<listcomp>[   s*    OOO3x399;;7N7N37N7N7Nr(   r    z. zHJAX detected proxy variable(s) in the environment as distributed setup: zpOn some systems, this may cause a hang of distributed.initialize and you may need to unset these ENV variable(s)z2distributed.initialize should only be called once.z&Starting JAX distributed service on %s)init_timeoutz+Connecting to JAX distributed service on %s)osenvironget
isinstancer   r   
ClusterEnv$auto_detect_unset_distributed_params
ValueErrorr   rsplitjoinloggerinfor   updater   keyslenwarningr   RuntimeErrorr	   get_distributed_runtime_servicer   r   get_distributed_runtime_clientconnect"initialize_preemption_sync_manager)selfr   r   r   r   r   r   r    default_coordinator_bind_addressvisible_devices
proxy_varsvarsr@   s                r&   
initializezState.initialize'   s    / K:>>*CTJJ "C(( ,*+ 	@@$"	
 	
 G-5E "?@@@=>>>OPPP2D (/1D1K1KCQR1S1STU1V'V$ 8 !Q "
/M/O!Q !Q   'DEEE A B B1A B B BBBokkH/ZZZm.@@@m.@@@ DO PO"*//"3"3OOOJ
:XXj!!D(d	6SW 	6 	6 	6 
 nnWQ		!OPPPkk
24L   #B
"M3 3dl 'D{MNNN>Z6LN N NDK
KK=?RSSSK++-----r(   c                    | j         r | j                                          d | _         | j        r | j                                         d | _        | j        r	d | _        d S d S r"   )r   shutdownr   r   rF   s    r&   rM   zState.shutdown{   so    { 
kdk| 
ldl# *%)d"""* *r(   c                    | j         t          d          t          j                    | _         | j                             | j                   d S )Nz8Preemption sync manager should only be initialized once.)r   rA   r	   create_preemption_sync_managerrK   r   rN   s    r&   rE   z(State.initialize_preemption_sync_manager   sX    #/
DF F F 	466 	  ++DK88888r(   NNNNNr   Nr   r   r   r   r   r   r   r   r   r   r   r   r   r   )__name__
__module____qualname__r   __annotations__r   r   r   r   r   rK   rM   rE    r(   r&   r   r      s         *-'&(,,,,,$((((( 48-1*.@D8</28<R. R. R. R. R.h* * *9 9 9 9 9r(   r   r   r   r   r   r   r   r   r   r   r   r   r   c           	         t          j                    rt          d          t                              | ||||||           t          j        t                     dS )a  Initializes the JAX distributed system.

  Calling :func:`~jax.distributed.initialize` prepares JAX for execution on
  multi-host GPU and Cloud TPU. :func:`~jax.distributed.initialize` must be
  called before performing any JAX computations.

  The JAX distributed system serves a number of roles:

    * It allows JAX processes to discover each other and share topology information,
    * It performs health checking, ensuring that all processes shut down if any process dies, and
    * It is used for distributed checkpointing.

  If you are using TPU, Slurm, or Open MPI, all arguments are optional: if omitted, they
  will be chosen automatically.

  The ``cluster_detection_method`` may be used to choose a specific method for detecting those
  distributed arguments. You may pass any of the automatic ``spec_detect_methods`` to this
  argument though it is not necessary in the TPU, Slurm, or Open MPI cases.  For other MPI
  installations, if you have a functional ``mpi4py`` installed, you may pass
  ``cluster_detection_method="mpi4py"`` to bootstrap the required arguments.

  Otherwise, you must provide the ``coordinator_address``,
  ``num_processes``, and ``process_id`` arguments to :func:`~jax.distributed.initialize`.

  Please note: on some systems, particularly HPC clusters that only access external networks
  through proxy variables such as HTTP_PROXY, HTTPS_PROXY, etc., the call to
  :func:`~jax.distributed.initialize` may timeout.  You may need to unset these variables
  prior to application launch.

  Args:
    coordinator_address: the IP address of process `0` and a port on which that
      process should launch a coordinator service. The choice of
      port does not matter, so long as the port is available on the coordinator
      and all processes agree on the port.
      May be ``None`` only on supported environments, in which case it will be chosen automatically.
      Note that special addresses like ``localhost`` or ``127.0.0.1`` usually mean that the program
      will bind to a local interface and are not suitable when running in a multi-host environment.
    num_processes: Number of processes. May be ``None`` only on supported environments, in
      which case it will be chosen automatically.
    process_id: The ID number of the current process. The ``process_id`` values across
      the cluster must be a dense range ``0``, ``1``, ..., ``num_processes - 1``.
      May be ``None`` only on supported environments; if ``None`` it will be chosen automatically.
    local_device_ids: Restricts the visible devices of the current process to ``local_device_ids``.
      If ``None``, defaults to all local devices being visible to the process except when processes
      are launched via Slurm and Open MPI on GPUs. In that case, it will default to a single device per process.
    cluster_detection_method: An optional string to attempt to autodetect the configuration of the distributed
      run.  Note that "mpi4py" method requires you to have a working ``mpi4py`` install in your environment,
      and launch the applicatoin with an MPI-compatible job launcher such as ``mpiexec`` or ``mpirun``.
      Legacy auto-detect options (OMPI, Slurm) remain enabled.
    initialization_timeout: Time period (in seconds) for which connection will
      be retried. If the initialization takes more than the timeout specified,
      the initialization will error. Defaults to 300 secs i.e. 5 mins.
    coordinator_bind_address: the address and port to which the coordinator service
      on process `0` should bind. If this is not specified, the default is to bind to
      all available addresses on the same port as ``coordinator_address``. On systems
      that have multiple network interfaces per node it may be insufficient to only
      have the coordinator service listen on one address/interface.

  Raises:
    RuntimeError: If :func:`~jax.distributed.initialize` is called more than once
      or if called after the backend is already initialized.

  Examples:

  Suppose there are two GPU processes, and process 0 is the designated coordinator
  with address ``10.0.0.1:1234``. To initialize the GPU cluster, run the
  following commands before anything else.

  On process 0:

  >>> jax.distributed.initialize(coordinator_address='10.0.0.1:1234', num_processes=2, process_id=0)  # doctest: +SKIP

  On process 1:

  >>> jax.distributed.initialize(coordinator_address='10.0.0.1:1234', num_processes=2, process_id=1)  # doctest: +SKIP
  zUjax.distributed.initialize() must be called before any JAX computations are executed.N)r   backends_are_initializedrA   global_staterK   atexitregisterrM   )r   r   r   r   r   r   r   s          r&   rK   rK      ss    f (** >
 = > > >-}j*,D02JL L L 	/(r(   c                 8    t                                            dS )z\Shuts down the distributed system.

  Does nothing if the distributed system is not running.N)rZ   rM   rW   r(   r&   rM   rM      s     r(   rQ   rR   )
__future__r   r[   collections.abcr   loggingr2   typingr   jax._srcr   r   r   jax._src.libr	   	getLoggerrS   r;   r   rZ   rK   rM   rW   r(   r&   <module>re      s?   # " " " " "  $ $ $ $ $ $  				                         & & & & & &		8	$	$l9 l9 l9 l9 l9 l9 l9 l9\ uww 26+/(,>B6:-06:Y Y Y Y Yx    r(   