
    Vpfq                       U d Z ddlmZ ddlZddlmZmZmZmZ ddl	Z	ddl
Z
ddlZddlmZ ddlmZ ddlZ e
j        e          ZdZdZd	Zd
ZddiddiddidddddiddddZded<   dZdZdZdGdZdHd!Z eeeeee iZ!d"ed#<   d$d%dId*Z"dJd+Z#dKd/Z$dLd4Z%dMd6Z&dNd8Z'dOd9Z(dPd;Z)dQd<Z*	 dRd$d$d=dSd@Z+	 dRd$dAd$dBdTdFZ,dS )Uz!Utils for building a device mesh.    )annotationsN)Callable	GeneratorMutableMappingSequence)Any)
xla_bridgezTPU v2zTPU v3zTPU v4zTPU v5 lite)   r
   )r      r
   )   r   )   r   )r   r
   r   ))@   r   )r   r   )r      ))   r   )r   r   ))r
   r
   r   )r
   r
   r   )r   r   r   )r   r   r   )r   r   r   )r   r   r   z=dict[tuple[int, ...], dict[tuple[int, ...], tuple[int, ...]]]_TRANSPOSE_TRICKS)r   r   r
            r      )r   r   r   r
   )r   r   r
   r   r   r   r   	   
                  r   r   
mesh_shapeSequence[int]devicesSequence[Any]return
np.ndarrayc                "   t          |          dk    rdt                              d           t          j        |          }|t          j        t                             }|                    |           }|S | d         dk    rft          j        |                              |           }t                              d           t          j        t                    }|d|f         }|S t          j        |                              |           S )Nr   z@Reordering mesh to physical ring order on single-tray TPU v2/v3.z>Reordering mesh to physical ring order on each TPU v2/v3 tray..)lenloggerinfonpasarrayarray_TRAY_RING_ORDERreshape)r   r   unused_kwargsdevice_meshperms        [/var/www/html/nettyfy-visnx/env/lib/python3.11/site-packages/jax/experimental/mesh_utils.py_tpu_v2_v3_create_device_meshr1   G   s    
 	\\Q
KKJ   *W%%Kbh'7889K%%j11K"~*W%%--j99K
KKH   8$%%Dc4i(K
 :g&&z222    np.ndarray | Nonec                   t          d |D                       \  }}}|dz   |dz   |dz   }}}t          |d           }	||cxk    rdk    rfn nc|dk    r]t          |          dk    rJt          j        |	          }
|
t          j        t                             }
|
                    |           }
|
S ||cxk    rdk    rn n|dk    r}t          |          dk    rjt          |           t          |          k    rJt          j        |	          }
|
t          j        t                             }
|
                    |           }
|
S dS )	zCreates rotated pincer device assignment for selected topologies.

  Args:
    mesh_shape: Logical mesh shape used by the model.
    devices: TPU devices.
    **unused_kwargs: ...

  Returns:
    None or reordered devices reshaped as `mesh_shape`.
  c              3  8   K   | ]}t          |d d          V  dS )coordsr   r   r   N)getattr.0ds     r0   	<genexpr>z*_vlc_create_device_mesh.<locals>.<genexpr>p   s.      MMGAx;;MMMMMMr2   r   c           	     X    t          t          t          | dd                              S )Nr6   r7   )tuplereversedr8   )r;   s    r0   <lambda>z)_vlc_create_device_mesh.<locals>.<lambda>w   s"    E(71h	#B#BCCDD r2   )keyr
   r   r   N)	maxsortedr%   r(   r)   r*   _TRAY_2x2_RING_ORDERr,   _TRAY_4x4_RING_ORDER)r   r   r-   max_xmax_ymax_zbound_xbound_ybound_zsequential_devicesr.   s              r0   _vlc_create_device_meshrM   c   sq    MMWMMMMM%#aiEAIG7' 
D
DF F F 1A#g,,!2C2C*/00Kbh';<<=K%%j11K1A#g,,"2D2D
:#g,,&&J122k)= > >?k''
33k	r2   z+dict[str, Callable[..., np.ndarray | None]]device_kind_handler_dictFallow_split_physical_axesphysical_meshrP   booltuple[np.ndarray, np.ndarray]c          
        t          | j                  }d |D             }t          t          t          |                              D ]\  }}t	          ddd          D ]y}t          j        t          |          |          }|D ]H}	t          |	 \  }
t          j	        |
          |k    r"||<   fdt          |          D             } nI||         r n4z|dk    r-|st          d| d| d	| d
          t          | |          c S g }t          j        t          | j                  t          |          gt          j                  }t          |          D ]@\  }}|D ]8}t          |          }| j        |         |||f<   |                    |           9A|                     |                              |          |fS )a  Assigns logical parallelism axes to physical axes of an N-D torus network.

  Given logical parallelism axes with sizes in `mesh_shape` and devices in an
  N-dimensional torus network represented by `physical_mesh`, maps each logical
  axis to one or more physical axes. Prefer to map more-performance-sensitive
  logical axes to larger numbers of physical axes to maximize the bandwidth
  available to them. Also prefer to assign logical axes to multiple physical
  axes of the same size (e.g., a 2D square) rather than multiple physical axes
  of different sizes when possible.

  If allow_split_physical_axes = False (default), this routine will error out
  instead of splitting a physical axis over more than one logical axis (which
  would reduce total usable bandwidth).

  Let's use a concrete example to explain the concepts and considerations.

  As an example, suppose the logical mesh is [data, model], for data and model
  parallelism respectively. Also suppose that data parallelism is less
  performance sensitive than model parallelism. Consider a 3D TPU pod slice of
  shape 4x4x16, represented by a physical mesh of shape (4, 4, 16).

  A TPU pod slice has equal bandwidth along all axes with wraparound links, but
  a 2D plane of size 4x4 may have faster XLA collective implementations than a
  non-square plane or a 1D subgroup. If the mesh_shape is [16, 16], we may want
  the more performance sensitive `model` axis to be mapped to the 4x4 XY plane.

  Args:
    physical_mesh: a np.ndarray of devices in the shape of the N-D torus
      physical topology.
    mesh_shape: shape of the logical mesh (size of the various logical
      parallelism axes), with axes ordered by increasing network intensity.
    allow_split_physical_axes: If True, we would split physical axes if
      necessary to fit the desired mesh shape.

  Returns:
    An np.ndarray of devices in the shape of the logical mesh (mesh_shape), with
      each logical parallelism axis mapped to one or more physical mesh axes.
    The axis assignment matrix, which is a 2-d array mapping from
      (physical_axis, logical_axis) to the size assigned, with the invariant
      np.prod(assignment, axis=1) = physical_mesh_shape, and
      np.prod(assignment, axis=0) = mesh_shape.
  c                    g | ]}d S ) rV   )r:   _s     r0   
<listcomp>z4_create_device_mesh_for_nd_torus.<locals>.<listcomp>   s    &>&>&>ar&>&>&>r2   r   r   r$   c                &    g | ]\  }}|v rd n|S )r   rV   )r:   iv	c_indicess      r0   rX   z4_create_device_mesh_for_nd_torus.<locals>.<listcomp>   s9     & & &!Q 	>>aaq& & &r2   r   z1Failed to find assignment for logical_axis_index z	 of size z  with remaining assignable mesh a  . The size of each axis in your logical mesh must be equal to the product of some subset of the physical mesh axis sizes. E.g. logical mesh (4, 16) is compatible with physical mesh 4x4x4 since 4=4 and 16=4x4. If you want to split physical axes, set  allow_split_physical_axes to True.dtype)listshaper?   	enumeraterange	itertoolscombinationszipr(   prodNotImplementedError/_create_device_mesh_for_nd_torus_splitting_axesonesr%   int64intappend	transposer,   )rQ   r   rP   assignable_physical_mesh
assignmentlogical_axis_indexlogical_axis_sizenum_axesindices_and_axeselemc_axesrm   assignment_arrayrZ   xyphysical_mesh_axisr\   s                    @r0    _create_device_mesh_for_nd_torusrz      s   b "-"566&>&>:&>&>&>*
 08
9Z  !!0 0 2 2++ !QOO . ."/
,
-
-x  #  $J	6 76??///+4*'
(& & & &#$<==& & &
" % 0 
&	'  
Q		( 	 $4$4 4/@4 4,D4 4 4	 	 	 AZ    
 )W
=Z1   
## + +da + +q660=0C
1)1,- )****+ i((00<<
 r2   c                .   t          j        | j                  t          j        |          k    rt          d| j         d| d          | j        }t	          |          }t          j        t          |          t          |          gt           j                  }t          t          t          |                              D ];\  }}d}t          |||          D ]}|t          ||||          r|}||dd|f<   <t          | ||          }	|	|fS )a  Assigns logical parallelism axes to physical axes of an N-D torus network.

  This implementation allows creating meshes that requires splitting physical
  axes, and thus one could produce logical mesh of any shape, as long as the
  number of devices matches, e.g.,

  - Creating 2x2x4 from 4x4;

  - Creating 2x2x16 from 8x8;

  Args:
    physical_mesh: a np.ndarray of devices in the shape of the N-D torus
      physical topology.
    mesh_shape: shape of the logical mesh (size of the various logical
      parallelism axes), with axes ordered by increasing network intensity.

  Returns:
    An np.ndarray of devices in the shape of the logical mesh (mesh_shape), with
      each logical parallelism axis mapped to one or more physical mesh axes.
    The axis assignment matrix, which is a 2-d array mapping from
      (physical_axis, logical_axis) to the size assigned, with the invariant
      np.prod(assignment, axis=1) = physical_mesh_shape, and
      np.prod(assignment, axis=0) = mesh_shape.
  z'The number of devices in physical mesh z6 does not match the number of devices in logical mesh .r]   N)physical_mesh_shapero   )r(   rf   r`   
ValueErrorr>   ri   r%   rj   r?   r_   ra   ,_enumerate_feasible_logical_axis_assignments%_prefer_first_logical_axis_assignment_generate_logical_mesh)
rQ   r   r}   logical_mesh_shapero   logical_axisrq   best_logical_axis_assignmentlogical_axis_assignmentlogical_meshs
             r0   rh   rh     s|   8 W] !!RWZ%8%888
	*	* 	*&	* 	* 	*   &+Z(( w
%7!8!89  * *2
9'(())* * ? ?%l%
 $( #OZ):$ $ ? ? '
.2%*"5#	   / (?$">Jqqq, (' , 
z	!!r2   rw   rk   	list[int]c                    | dk    sJ g }t          dt          j        |           dz             D ]8}| |z  dk    r#|                    |           | |z  } | |z  dk    #| dk    r|c S 9| gS )z<Returns a sorted list of prime factors for the given number.r   r
   r   )rb   mathisqrtrl   )rw   factorsps      r0   _get_prime_factorsr   b  s    	
Q'DJqMMA%&&  a
a%1**nnQAga a%1** 	Avvnnn  3Jr2   r}   ro   rq   !Generator[np.ndarray, None, None]c           	   #  X  K   t          j        t                    }t          |          D ]}||xx         dz  cc<   t	          j        |           t	          j        |d          z  }t          j        t                    }t          |          D ]7\  }}t          |          D ]"}||vr||         	                    |           #8g }	g }
|
                                D ]U\  }}|		                    |           |
	                    t          t          j        ||         |                               Vt          j        |
 D ]d}t	          j        t!          |           gt          j                  }t          |          D ] \  }}|D ]}||xx         |	|         z  cc<   !|V  edS )a  Yields feasible assignments for a single logical axis.

  For a physical mesh of shape [x_1, ..., x_n], and the product of all previous
  assignments on each physical axes [y_1, ..., y_n], this function yields all
  possible assignments for the axis as 1-d arrays [z_1, ..., z_n], so that:

  - prod(z_1, ..., z_n) = logical_axis_size

  - x_i % (z_i * y_i) = 0

  Args:
    physical_mesh_shape: Physical mesh shape.
    assignment: Existing assignment matrix.
    logical_axis_size: Size of the logical axis to assign.

  Yields:
    All valid assignments for the logical axis. Each assignment is represented
    as an integer array of length len(physical_mesh_shape).
  r   r$   axisr]   N)collectionsdefaultdictrk   r   r(   r*   rf   r_   ra   rl   itemssetrc   rd   productri   r%   rj   )r}   ro   rq   logical_axis_factorsfactoravailable_physical_mesh_shapephysical_axes_by_factorphysical_axisphysical_axis_sizer   assignments_by_factormultiplicityaxis_assignmentresultfactor_indexper_factor_assignments                   r0   r   r   p  s-     0 4?3J33O3O"#455 & &f   A%    "$(+>"?"?27rD D D # d##  ,5#, , < <'m' %%788 < <	+	+	+f%,,];;;;<
 '288::  flNN6  "'/ 	
 	
    #*,AB  oWc-../rx@@@F/8/I/I 7 7++0 7 7-}!667
LLLL r2   rx   c                 
 t          j        fdt          |           D                       }t          j        fdt          |          D                       }||k    r||k    S t          fdt          |           D                       }t          fdt          |          D                       }||k    r||k    S t          j        |d          
t          j        
fdt          |           D                       }t          j        
fdt          |          D                       }	||	k    r||	k    S t	          |           t	          |          k    S )	af  Returns True if the first axis assignment is preferred over the second.

  For now, this is implemented with some very simple heuristics. However,
  it is possible to introduce e.g., a value function here based on a more
  precise model of the underlying hardware.

  TODO(rosun): Use a proxy of network capacity to select the partitions.

  Args:
    x: Logical axis assignment as [len(physical_mesh_shape)] array.
    y: Logical axis assignment as [len(physical_mesh_shape)] array.
    physical_mesh_shape: Physical mesh shape.
    assignment: Assignment matrix.

  Returns:
    True if x is preferred over y.
  c                2    g | ]\  }}||         k    |S rV   rV   r:   rZ   sr}   s      r0   rX   z9_prefer_first_logical_axis_assignment.<locals>.<listcomp>  -    AAATQQ*=a*@%@%@q%@%@%@r2   c                2    g | ]\  }}||         k    |S rV   rV   r   s      r0   rX   z9_prefer_first_logical_axis_assignment.<locals>.<listcomp>  r   r2   c                >    g | ]\  }}||         k    |d k    d S r   rV   r   s      r0   rX   z9_prefer_first_logical_axis_assignment.<locals>.<listcomp>  2    KKKTQQ*=a*@%@%@QUUqUUUr2   c                >    g | ]\  }}||         k    |d k    d S r   rV   r   s      r0   rX   z9_prefer_first_logical_axis_assignment.<locals>.<listcomp>  r   r2   r$   r   c                2    g | ]\  }}|         d k    |S r   rV   r:   rZ   r   assigned_physical_mesh_shapes      r0   rX   z9_prefer_first_logical_axis_assignment.<locals>.<listcomp>  -    IIITQ%A!%Dq%H%Hq%H%H%Hr2   c                2    g | ]\  }}|         d k    |S r   rV   r   s      r0   rX   z9_prefer_first_logical_axis_assignment.<locals>.<listcomp>  r   r2   )r(   rf   ra   r%   r>   )rw   rx   r}   ro   x_whole_axis_sizey_whole_axis_sizex_num_whole_axesy_num_whole_axesx_non_overlapping_axis_sizey_non_overlapping_axis_sizer   s     `       @r0   r   r     s   : gAAAAYq\\AAA  gAAAAYq\\AAA  +++000
 KKKKYq\\KKK  KKKKYq\\KKK  )))... "$"!=!=!= "IIIIYq\\III! ! !#IIIIYq\\III! ! !$???&)DDD 
qE!HH	r2   r   c                   t          j        t          j        t          j        t	          | j                  t           j                  d          |j                                      dg          }t          j        t          j        t          j        t	          |          t           j                  d          |j                                      dg          }t          j        | |                    dg                    }t          t          t          ||t          t	          |                                         \  }}}t          j        ||          }t          j        ||          }|S )a  Compute the logical mesh from assignment map.

  Args:
    physical_mesh: Physical device mesh.
    logical_mesh_shape: Logical mesh shape.
    assignment: 2-d assignment matrix shape [physical_dims, logical_dims].

  Returns:
    Logical mesh reshaped from physical mesh.
  r]   r$   r   r   )r(   broadcast_toexpand_dimsaranger%   r`   rj   r,   re   rC   rb   rm   )rQ   r   ro   physical_indiceslogical_indicesr   rW   transpose_axess           r0   r   r     sT    _n
)C+,,BH
=
=
=B   	 
 GRDMM  On
)C*++28
<
<
<1   	 
 GRDMM   M:+=+=rd+C+CDD, 
o/s?7K7K1L1L
M
M !Q
 lN;;, L*<==,	r2   c                z    t          | d          s
J d            | j        \  }}}|dz   |dz   |dz   | j        dz   fS )z*Gets the bound from the given last device.r6   zOnly TPU supportedr   )hasattrr6   core_on_chip)last_devicerw   rx   zs       r0   _bounds_from_last_devicer   3  sT    
 
h	'	'==)===='!Q	
QAq1uk6:	::r2   jax_devicesc           
        | d         j         }d | D             }t          d t          |          D                       }t          |          dk    s
J |            |t          t
          fv rt          d | D                       dz   }t          j        |dd         |fz   t          	          }t          ||           D ]4\  }}|d         dk    s
J |            |||d         |d         |j
        f<   5ntt          j        |t          	          }t          ||           D ]H\  }}|j
        dk    rt          d
|j
         d| d| d          |||d         |d         |d         f<   I|S )aV  Rearrange TPU devices in a slice into a physical mesh.

  Args:
    jax_devices: A list of JAX devices in a TPU slice in process-tiled z, y, x,
      core order, e.g. from jax.devices().

  Returns:
    A np.ndarray of JAX devices with shape [global_x, global_y, global_z]. On
      v2 and v3, global_z is instead cores_per_chip (i.e., 2).
  r   c                    g | ]	}|j         
S rV   )r6   r9   s     r0   rX   z*_get_physical_tpu_mesh.<locals>.<listcomp>I  s    11118111r2   c              3      K   | ]	}|d z   V  
dS )r   NrV   r9   s     r0   r<   z)_get_physical_tpu_mesh.<locals>.<genexpr>J  s&      11q1u111111r2   r   c              3  $   K   | ]}|j         V  d S N)r   r9   s     r0   r<   z)_get_physical_tpu_mesh.<locals>.<genexpr>M  s$      ==A======r2   r   Nr
   r]   zZCreating meshes for TPU >v3 requires one device per chip ("megacore" mode). Got device id z for a device of kind z: r|   )device_kindr>   rB   r%   _TPU_V2_TPU_V3r(   emptyobjectre   r   AssertionError)r   r   device_coordsdimscores_per_chipoutr6   r;   s           r0   _get_physical_tpu_meshr   =  s    A*+11[111-	11c-00111	1	1$	TaWg&&&=======AN
(48~//v
>
>
>C44 4 4	AY!^^^Q^^^23c&)VAY
.//4 (4v
&
&
&C44 / /		
1		,12, ,#, ,'(, , ,
 
 	

 ./c&)VAYq	
)**	*r2   c                b   t          |          }| j        }|t          vrt          d|           d}|D ]}|dk    r||fz  }|t          |         vrBt          d| d| dt	          t          |                                                               | j        t          |         |          S )NzQcreate_device_mesh cannot create contiguous submeshes for physical mesh topology rV   r   zEcreate_device_mesh cannot create contiguous submeshes for mesh_shape z and physical mesh topology z. Available mesh_shapes: )r>   r`   r   r~   r_   keysrm   )rQ   r   topologymesh_shape_no_trivial_dimsdim_sizes        r0   _transpose_trickr   `  s	    Z  * (&&&
	-"*	- 	-  
 13 0 0h1}} XK/ '8'BBB
	M 	M 	M>F	M 	M"&'8'B'G'G'I'I"J"J	M 	M   
!	 "#=>
 r2   )contiguous_submeshesrP   Sequence[Any] | Noner   c               
   |t          j                    }t          j        |           t	          |          k    r"t          dt	          |           d|            |d         }t                              |j        d          }| || ||          }||S |j	        dk    r8t          |          }|rt          ||           }t          || |          \  }}	|S t          j        |                              |           }|S )ah  Creates a performant device mesh for jax.sharding.Mesh.

  Args:
    mesh_shape: shape of logical mesh, ordered by increasing network-intensity
      e.g. [replica, data, mdl] where mdl has the most network communication
      requirements.
    devices: optionally, the devices to construct a mesh for. Defaults to
      jax.devices().
    contiguous_submeshes: if True, this function will attempt to create a mesh
      where each process's local devices form a contiguous submesh. A ValueError
      will be raised if this function can't produce a suitable mesh. This
      setting was sometimes necessary before the introduction of jax.Array to
      ensure non-ragged local arrays; if using jax.Arrays, it's better to keep
      this set to False.
    allow_split_physical_axes: If True, we will split physical axes if necessary
      to produce the desired device mesh.

  Raises:
    ValueError: if the number of devices doesn't equal the product of
      `mesh_shape`.

  Returns:
    A np.ndarray of JAX devices with mesh_shape as its shape that can be fed
    into jax.sharding.Mesh with good collective performance.
  NzNumber of devices z& must equal the product of mesh_shape r$   )r   tpurO   )xbr   r(   rf   r%   r~   rN   getr   platformr   r   rz   r)   r,   )
r   r   r   rP   r   handlerr   rQ   r.   rW   s
             r0   create_device_meshr   |  s9   @ _jllGWZCLL((
	&S\\ 	& 	&#	& 	&   +$(()@$GG'WG2F  F mU""*733M B&}jAAm5";  NK
 *W%%--j99Kr2   T)process_is_granuleshould_sort_granules_by_keyrP   dcn_mesh_shaper   r   c               8    |t          j                    }|rdnd}t          |d         |          sJ t          j        t
                    |D ]+}t          ||                                       |           ,|r-fdt          	                                          D             n
                                }t          j        |          t          |          k    r"t          dt          |           d|            fd|D             t          j        t          |                                        |          }	 t          j        fd	t$          g
          |	          }
t          j        |
                                          }|S )a  Creates a device mesh for hybrid (e.g., ICI and DCN) parallelism.

  Args:
    mesh_shape: shape of the logical mesh for the faster/inner network, ordered
      by increasing network intensity, e.g. [replica, data, mdl] where mdl has
      the most network communication requirements.
    dcn_mesh_shape: shape of the logical mesh for the slower/outer network, in
      the same order as mesh_shape.
    devices: optionally, the devices to construct a mesh for. Defaults to
      jax.devices().
    process_is_granule: if True, this function will treat processes as the units
      of the slower/outer network. Otherwise it will look for slice_index
      attributes on devices and use slices as the units. Enabling this is meant
      as a fallback for platforms that don't set slice_index.
    should_sort_granules_by_key: Whether device granules should be sorted by the
      granule key, either slice or process index, depending on
      process_is_granule.
    allow_split_physical_axes: If True, we will split physical axes if necessary
      to produce the desired device mesh.

  Raises:
    ValueError: if the number of slices to which the `devices` belong doesn't
      equal the product of `dcn_mesh_shape`, or if the number of devices
      belonging to any single slice does not equal the product of `mesh_shape`.

  Returns:
    A np.ndarray of JAX devices with mesh_shape * dcn_mesh_shape as its shape
    that can be fed into jax.sharding.Mesh for hybrid parallelism.
  Nprocess_indexslice_indexr   c                     g | ]
}|         S rV   rV   )r:   rA   granule_dicts     r0   rX   z-create_hybrid_device_mesh.<locals>.<listcomp>  s    @@@S|C@@@r2   zNumber of slices z* must equal the product of dcn_mesh_shape c                4    g | ]}t          |           S )rO   )r   )r:   granulerP   r   s     r0   rX   z-create_hybrid_device_mesh.<locals>.<listcomp>  sC         

$=    r2   c                    |          S r   rV   )rZ   per_granule_meshess    r0   r@   z+create_hybrid_device_mesh.<locals>.<lambda>  s    "4Q"7 r2   )otypes)r   r   r   r   r   r_   r8   rl   rC   r   valuesr(   rf   r%   r~   r   r,   	vectorizer   blocktolist)r   r   r   r   r   rP   attrdevgranulesgranule_meshblocksr.   r   r   s   `    `      @@r0   create_hybrid_device_meshr     s   L _jllG.	AM$	T	"	""""(.., 1 1cd##$++C0000 
%!@@@@F<+<+<+>+>$?$?@@@@   
 W^H--
	+CMM 	+ 	+(	+ 	+          3x==))11.AA,I2<7777III & ))+	r2   )r   r   r   r    r!   r"   )r   r   r   r    r!   r3   )rQ   r"   r   r   rP   rR   r!   rS   )rQ   r"   r   r   r!   rS   )rw   rk   r!   r   )r}   r   ro   r"   rq   rk   r!   r   )
rw   r"   rx   r"   r}   r   ro   r"   r!   rR   )rQ   r"   r   r   ro   r"   r!   r"   )r!   r   )r   r    r!   r"   )rQ   r"   r   r   r!   r"   r   )
r   r   r   r   r   rR   rP   rR   r!   r"   )r   r   r   r   r   r   r   rR   r   rR   rP   rR   r!   r"   )-__doc__
__future__r   r   collections.abcr   r   r   r   rc   loggingr   typingr   jax._srcr	   r   numpyr(   	getLogger__name__r&   r   r   _TPU_V4_TPU_V5_LITEr   __annotations__r+   rD   rE   r1   rM   rN   rz   rh   r   r   r   r   r   r   r   r   r   rV   r2   r0   <module>r     s   ( ' ' " " " " " "     I I I I I I I I I I I I             % % % % % %    		8	$	$


 		 		 	  
 	  #      4 , # M 3 3 3 38$ $ $ $\ **)      ',	{ { { { { {|J" J" J" J"Z   @ @ @ @FG G G GT3 3 3 3l; ; ; ;   F   < %)= "'&+= = = = = =F %)E
  %(,&+E E E E E E E Er2   