
    VpfW              
         d Z ddlmZ ddlmZ ddlZddlmZ ddlZddl	Z	ddl	m
Z
 ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddl m!Z! ddl"m#Z# d Z$ G d d          Z% ej&                    Z'dZ(d Z)d Z*d Z+d Z,d Z-d Z.d Z/d  Z0 ej1        d!          Z2d"e2_3        ej4        5                    e2           d# Z6d$ Z7e28                    e6           e29                    e7           d% Z:ej;         G d& d!                      Z<d,d)Z= ej>        e2e=            ej?        e(e-e/e0d"*            ej@         eej?        e(e-e/e0d"+                     dS )-zrThe implementation of custom partitioning APIs.

It was moved out of ``jax.experimental`` to avoid import cycles.
    )annotations)partialN)Any)	tree_util)api_util)core)custom_api_util)dispatch)linear_util)mesh)sharding_impls)
xla_bridge)mlir)partial_eval)
xla_client)ir)hlo)UnexpectedTracerErrorc                     t          j        |           j        |i |}|                                 |j        rt          d          |j        S )Nz4keyword arguments could not be resolved to positions)inspect	signaturebindapply_defaultskwargs	TypeErrorargs)funr   r   bas       \/var/www/html/nettyfy-visnx/env/lib/python3.11/site-packages/jax/_src/custom_partitioning.py_resolve_kwargsr    -   sV    "w"D3F33"Y 
J
K
KK7N    c                       e Zd Zd Zd Zd ZdS )_ShardingCallbackInfoc
                    || _         || _        || _        || _        || _        || _        || _        || _        |	| _        d S N)	propagate_user_sharding	partitionto_mesh_pspec_shardingin_treeout_treeinfer_sharding_from_operandsmodule_contextr   static_args)
selfr&   r'   r(   r)   r*   r+   r,   r   r-   s
             r   __init__z_ShardingCallbackInfo.__init__8   sN     $;D DN"8DDLDM(DD%(DDI"Dr!   c           
         t          ||                     |t          |                                                              S r%   )_to_jax_sharded_shaper(   len
dimensions)r.   sshardings      r   unflatten_arg_shapez)_ShardingCallbackInfo.unflatten_arg_shapeE   s9     	4&&xQ\\^^1D1DEE  r!   c                l      j                              fdt          ||          D                       S )Nc                B    g | ]\  }}                     ||          S  r6   ).0r4   r5   r.   s      r   
<listcomp>z>_ShardingCallbackInfo.unflatten_arg_shapes.<locals>.<listcomp>L   s=     	
 	
 	
8 $$Q11	
 	
 	
r!   )r)   	unflattenzip)r.   
arg_shapesarg_shardingss   `  r   unflatten_arg_shapesz*_ShardingCallbackInfo.unflatten_arg_shapesJ   sM    <!!	
 	
 	
 	
":}==	
 	
 	
  r!   N)__name__
__module____qualname__r/   r6   rA   r9   r!   r   r#   r#   6   sA        # # #  
    r!   r#   CustomSPMDPartitioningc                t    t          j        |                                 |                                           S r%   )r   ShapedArrayr3   numpy_dtype)r4   s    r   _to_jax_shaperI   X   s%    		!,,..!--//	:	::r!   c                x    t          j        |                                 |                                 |          S )N)r5   )jaxShapeDtypeStructr3   rH   )r4   r5   s     r   r1   r1   \   s3    		llnnammoo
 
 
 r!   c                z    |                                  r t          j                            | |          S |d         S )Nr   )is_tuplexcHloShardingtuple_sharding)shaperesult_shardingss     r   _pack_result_shardingrT   b   s8    
^^ >((0@AAAAr!   c                \    d t          |                     |          |          D             S )Nc           	     r    g | ]4\  }}t          |t          |                                                    5S r9   )_to_hlo_shardingr2   r3   )r;   r5   rR   s      r   r<   z%_flatten_sharding.<locals>.<listcomp>j   sJ     
 
 

(E xU%5%5%7%7!8!899
 
 
r!   )r>   flatten_up_to)tree	shardingsshapess      r   _flatten_shardingr\   i   s@    
 
 


Y
'
' 
 
 
 r!   c                   t           |         j        | S |                                r)|                                }|                                 }n|f}| f}j                            fdt          ||          D                       } j        g j        j	        |R  }t          j        ||          }t          ||          S )Nc                B    g | ]\  }}                     ||          S r9   r:   r;   r4   r5   infos      r   r<   z@_custom_partitioning_propagate_user_sharding.<locals>.<listcomp>~   s=       a 
"
"1h
/
/  r!   )_sharding_callbacksr&   rN   tuple_shapestuple_elementsr*   r=   r>   r-   r   r\   rT   )	user_shardingrR   backend_stringuser_shapesuser_shardings
user_shaperesult_shardingrS   r`   s	           @r   ,_custom_partitioning_propagate_user_shardingrj   r   s   	^	,$	!)
^^ &$$&&K"1133NN(K#%N}&&    n==   * 1D0 $.  / '
m_k3 3	u&6	7	77r!   c                    t          | t          j        j                  st	          d          |                     |          S )Nz/Custom Partitioning rules must return Sharding.)
isinstancerK   r5   Sharding
ValueError_to_xla_hlo_sharding)r5   num_dimensionss     r   rW   rW      s=    	Hcl3	4	4 H
F
G
GG		&	&~	6	66r!   c                   t           |         |                                r)|                                }|                                }n|f}|f} j        g j        j                            | |          j        	                    fdt          ||          D                       R  \  }}}}j        }	t          j        ||          }t          j        ||           }d t          ||           D             }
d t          ||          D             } t          j        |t!          |j                                                            |
 }|j        |k    r4t)          dt+          |j                  dt+          |                    t-          j        |          }t1          j        |j                                                  5  t5          j        |d|	j        |	j        |                    t?          |j                                       }d d d            n# 1 swxY w Y   tC          ||          }t5          j"        |          ||fS )	Nc                B    g | ]\  }}                     ||          S r9   r:   r_   s      r   r<   z2_custom_partitioning_partition.<locals>.<listcomp>   s=       !X &&q(33  r!   c                X    g | ]'\  }}t          |                    |                    (S r9   rI   tiler;   r5   r4   s      r   r<   z2_custom_partitioning_partition.<locals>.<listcomp>   s@       
(A HMM!$$%%  r!   c                X    g | ]'\  }}t          |                    |                    (S r9   rt   rv   s      r   r<   z2_custom_partitioning_partition.<locals>.<listcomp>   s@       
(A HMM!$$%%  r!   )axis_envzMismatch in result shapes. z vs tmp_xla_computation)name	platformsbackend_or_nameaxis_context)#ra   rN   rb   rc   r'   r-   r   rA   r*   r=   r>   r,   r\   r)   rK   
make_jaxprlistrR   items	out_avalsrn   reprr   SPMDAxisContextr   extend_axis_env_ndr   build_mlir_module_helperr{   r|   extend_manual	frozenset
axis_namesrT   module_to_bytecode)r?   r@   result_shaperi   re   result_shapesrS   r   lower_fnr,   
tiled_argstiled_resultsclosed_jaxprr}   moduler`   s                  @r   _custom_partitioning_partitionr      s   	^	,$ * --//M&5577!OM')3A4> 
4
4
i
4 
M::
4 m   !$]4D!E!E   	
4 
4 
40$/= &.&
m_m5 5#DL-LL- ]J77  * -}==  - M4
8H8H8J8J3K3KLLL, },,
*&''''m)<)<)<	>    /55,tz//1122  *" *&6!//	$/0J0JKK  F               *,8HII/		 	(	(-	HHs   A	H11H58H5c                f   t           |         }|                                r|                                }n|f} |j        g |j        |j        |                    | |          |j                            d |D                       R  }t          |j        ||          }t          ||          S )Nc                ,    g | ]}t          |          S r9   )rI   r;   r4   s     r   r<   zE_custom_partitioning_infer_sharding_from_operands.<locals>.<listcomp>   s     GGGA}Q//GGGr!   )ra   rN   rb   r+   r-   r   rA   r*   r=   r\   rT   )r?   r@   r   re   r`   r   ri   rS   s           r   1_custom_partitioning_infer_sharding_from_operandsr      s     
^	,$ $ --//MM!OM5D5 
i 
M:: mGGGGGHH	  / '
m_m5 5	|-=	>	>>r!   custom_partitioningTc                    ~~~~~~~| j         S r%   )r   )	callr)   r*   r&   r'   r+   decode_shardingsr-   avalss	            r   "_custom_partitioning_abstract_evalr      s    
 x0)"$4k	r!   c                >    ~~~~~~~ t          j        |           | S r%   )r   jaxpr_as_fun)	r   r)   r*   r&   r'   r+   r   r-   r   s	            r   _custom_partitioning_implr      s.     x0)"$4k	 	4	 	 $	''r!   c                z    t          d t          j        |           D                       rt          d          d S )Nc              3  J   K   | ]}t          |t          j                  V  d S r%   )rl   r   Tracer)r;   leafs     r   	<genexpr>z%_check_for_tracers.<locals>.<genexpr>   s.      LL4D$+	&	&LLLLLLr!   zFound a JAX Tracer object passed as an argument to acustom_partitioning function in a position indicated as static bystatic_argnums. )anyr   tree_leavesr   )xs    r   _check_for_tracersr      sM    LL93H3K3KLLLLL 
	   r!   c                  D    e Zd ZU dZd
dZej        Zded<   	 ddZ	d	 Z
dS )r   aP  Inserts a CustomCallOp into the XLA graph with custom SPMD lowering rules.

  .. code-block:: python

    @custom_partitioning
    def f(*args):
      return ...

    def propagate_user_sharding(mesh, user_shape):
      '''Update the sharding of the op from a user's shape.sharding.'''
      user_sharding = jax.tree.map(lambda x: x.sharding, user_shape)

    def partition(mesh, arg_shapes, result_shape):
      def lower_fn(*args):
        ... builds computation on per-device shapes ...
      result_shardings = jax.tree.map(lambda x: x.sharding, result_shape)
      arg_shardings = jax.tree.map(lambda x: x.sharding, arg_shapes)
      # result_sharding and arg_shardings may optionally be modified and the
      # partitioner will insert collectives to reshape.
      return mesh, lower_fn, result_sharding, arg_shardings

    def infer_sharding_from_operands(mesh, arg_shapes, shape):
      '''Compute the result sharding from the sharding of the operands.'''
      arg_shardings = jax.tree.map(lambda x: x.sharding, arg_shapes)


    f.def_partition(partition, propagate_user_sharding, infer_sharding_from_operands)

  The args to ``def_partition`` are as follows:

  * ``propagate_user_sharding``: Callable which takes the sharding of a user (in the dag)
    and returns a suggestion for a new `NamedSharding`. The default
    implementation is just to return the suggested sharding.
  * ``partition``: Callable which takes the SPMD suggested partition shapes and
    partition specs and returns the mesh, a per-shard lowering function, and the final
    input and output sharding specs (the SPMD partitioner will repartition the
    inputs to match). The mesh is returned to allow configuring axis_names for
    collectives when no mesh is provided.
  * ``infer_sharding_from_operands``: Callable which computes an output ``NamedSharding``
    from the ``NamedSharding`` chosen for each argument.
  * ``decode_shardings``: When set to True, convert input ``GSPMDSharding``s to
    ``NamedSharding`` if possible. This may not be possible if the user does not
    provide a contextual mesh.

  Positional arguments can be specified as static using static_argnums. JAX uses
  :code:`inspect.signature(fun)` to resolve these positional arguments.

  Examples:

    As an example, assume we want to enhance the existing ``jax.numpy.fft.fft``. This function computes
    the discrete Fourier transform of an N-dimensional input along the last dimension, and is batched
    along the first N-1 dimensions.
    By default, however, it will ignore the sharding of the input and gather the input on all devices.
    However, since ``jax.numpy.fft.fft`` is batched along the first N-1 dimensions,
    this is unnecessary. We will create a new ``my_fft`` op that, instead, does not alter the sharding
    along the first `N-1` dimensions, and only gathers the input along the last dimension if needed.

    .. code-block:: python

      import jax
      from jax.sharding import NamedSharding
      from jax.experimental.custom_partitioning import custom_partitioning
      from jax.experimental.pjit import pjit
      from jax.sharding import PartitionSpec as P
      from jax.sharding import Mesh
      from jax.numpy.fft import fft
      import regex as re
      import numpy as np

      # Pattern to detect all-gather or dynamic-slice in the generated HLO
      _PATTERN = '(dynamic-slice|all-gather)'

      # For an N-D input, keeps sharding along the first N-1 dimensions
      # but replicate along the last dimension
      def supported_sharding(sharding, shape):
          rank = len(shape.shape)
          max_shared_dims = min(len(sharding.spec), rank-1)
          names = tuple(sharding.spec[:max_shared_dims]) + tuple(None for _ in range(rank - max_shared_dims))
          return NamedSharding(sharding.mesh, P(*names))

      def partition(mesh, arg_shapes, result_shape):
          result_shardings = jax.tree.map(lambda x: x.sharding, result_shape)
          arg_shardings = jax.tree.map(lambda x: x.sharding, arg_shapes)
          return mesh, fft,               supported_sharding(arg_shardings[0], arg_shapes[0]),               (supported_sharding(arg_shardings[0], arg_shapes[0]),)

      def infer_sharding_from_operands(mesh, arg_shapes, result_shape):
          arg_shardings = jax.tree.map(lambda x: x.sharding, arg_shapes)
          return supported_sharding(arg_shardings[0], arg_shapes[0])

      @custom_partitioning
      def my_fft(x):
          return fft(x)

      my_fft.def_partition(
          infer_sharding_from_operands=infer_sharding_from_operands,
          partition=partition)

    Now create a 2D array sharded along the first axis, pass it through ``my_fft``
    and notice how it is still sharded as expected, and identical to the output
    of ``fft``. However, inspecting the HLO
    (using ``lower(x).compile().runtime_executable().hlo_modules()``) reveals that
    ``my_fft`` does not create any all-gather or dynamic-slice, while ``fft`` does.

    .. code-block::

      with Mesh(np.array(jax.devices()), ('x',)):
        x = np.asarray(np.random.randn(32*1024, 1024), dtype=np.complex64)
        y = pjit(lambda x: x, in_shardings=None, out_shardings=P('x'))(x)
        pjit_my_fft = pjit(my_fft, in_shardings=P('x'), out_shardings=P('x'))
        pjit_fft    = pjit(fft,    in_shardings=P('x'), out_shardings=P('x'))
        print(pjit_my_fft(y))
        print(pjit_fft(y))
        # dynamic-slice or all-gather are not present in the HLO for my_fft, because x is a 2D array
        assert(re.search(_PATTERN, pjit_my_fft.lower(x).compile().runtime_executable().hlo_modules()[0].to_string()) is None)
        # dynamic-slice or all-gather are present in the HLO for fft
        assert(re.search(_PATTERN, pjit_fft.lower(x).compile().runtime_executable().hlo_modules()[0].to_string())    is not None)

    .. code-block::

      # my_fft
      [[-38.840824   +0.j        -40.649452  +11.845365j
      ...
        -1.6937828  +0.8402481j  15.999859   -4.0156755j]]

      # jax.numpy.fft.fft
      [[-38.840824   +0.j        -40.649452  +11.845365j
        ...
        -1.6937828  +0.8402481j  15.999859   -4.0156755j]]

    Because of the logic in ``supported_sharding``, ``my_fft`` also works on 1-dimensional arrays.
    However, in this case, the HLO of ``my_fft`` does show a dynamic-slice, since the last dimension
    is the dimension along which FFTs are calculated and needs to be replicated on all devices before
    the computation can be done.

    .. code-block::

      with Mesh(np.array(jax.devices()), ('x',)):
        x = np.asarray(np.random.randn(32*1024*1024), dtype=np.complex64)
        y = pjit(lambda x: x, in_shardings=None, out_shardings=P('x'))(x)
        pjit_my_fft = pjit(my_fft, in_shardings=P('x'), out_shardings=P('x'))
        pjit_fft    = pjit(fft,    in_shardings=P('x'), out_shardings=P('x'))
        print(pjit_my_fft(y))
        print(pjit_fft(y))
        # dynamic-slice or all-gather are present in the HLO for my_fft, because x is a 1D array
        assert(re.search(_PATTERN, pjit_my_fft.lower(x).compile().runtime_executable().hlo_modules()[0].to_string()) is None)
        # dynamic-slice or all-gather are present in the HLO for fft
        assert(re.search(_PATTERN, pjit_fft.lower(x).compile().runtime_executable().hlo_modules()[0].to_string())    is not None)

    .. code-block::

      # my_fft
      [    7.217285   +0.j     -3012.4937  +4287.635j   -405.83594 +3042.984j
      ...  1422.4502  +7271.4297j  -405.84033 -3042.983j
      -3012.4963  -4287.6343j]

      # jax.numpy.fft.fft
      [    7.217285   +0.j     -3012.4937  +4287.635j   -405.83594 +3042.984j
      ...  1422.4502  +7271.4297j  -405.84033 -3042.983j
      -3012.4963  -4287.6343j]

  r9   c                L    || _         d | _        || _        d | _        d | _        d S r%   )r   r'   static_argnumsr&   r+   )r.   r   r   s      r   r/   zcustom_partitioning.__init__  s.    DHDN(D#'D (,D%%%r!   r   __getattr__NTc                >    || _         || _        || _        || _        |S r%   )r'   r&   r+   r   )r.   r'   r+   r&   r   s        r   def_partitionz!custom_partitioning.def_partition  s'    DN#:D (DD%,Dr!   c                   t          | j        |          | j        rt          | j                  t	          fdt                    D                       fdt          t                              D             }t          j	        t          j        | j                  |d          \  }}fd| j        D             }t          |           ng }t          j        | j                  }}t          j        |          \  }}t          j        ||          \  }	}
d |D             }t!          j        | j        ||
dd          }t!          j        |	||          \  }}}\   t          |          rJ t'          j        t!          j        |          d          }t-          j        g ||R || j        | j        | j        | j        | |
            |d	}t          j         |
            |          S )
Nc              3  .   K   | ]\  }}|v r|n|V  d S r%   r9   )r;   ir   r   s      r   r   z/custom_partitioning.__call__.<locals>.<genexpr>  s5      OOtq!^++11OOOOOOr!   c                    g | ]}|v|	S r9   r9   )r;   r   r   s     r   r<   z0custom_partitioning.__call__.<locals>.<listcomp>  s#    LLL1A^4K4KQ4K4K4Kr!   F)require_static_args_hashablec                     g | ]
}|         S r9   r9   )r;   r   r   s     r   r<   z0custom_partitioning.__call__.<locals>.<listcomp>  s    :::T!W:::r!   c                Z    g | ](}t          j        t          j        |                    )S r9   )r   raise_to_shapedget_aval)r;   r   s     r   r<   z0custom_partitioning.__call__.<locals>.<listcomp>  s-    JJJ1$T]1%5%566JJJr!   r   r9   )r   r'   r&   r+   r   r)   r*   r-   )r    r   r   settuple	enumerateranger2   r   argnums_partiallu	wrap_initr   r   tree_flattenflatten_fun_nokwargspe
debug_infotrace_to_jaxpr_dynamicr   ClosedJaxprconvert_constvars_jaxprcustom_partitioning_pr   r'   r&   r+   r   tree_unflatten)r.   r   r   dyn_argnumsf_dyn_argsr-   	args_flatr)   flat_funr*   in_avalsdebugjaxpr_constsclosed_callout_flatr   s    `                @r   __call__zcustom_partitioning.__call__  s   48T622D 24.//nOOOOyOOOOOdLLLLc$ii 0 0LLLk-
,tx
 
 

',	  lb( ;:::d&9:::k%%%%k\$(++T(b"/99Iw!6r7CCHhJJ	JJJHM$(GXu/1 1E4XxOOE1fb6{{"2#=e#D#DbIIK$) 		  . $ <%)%F.  H #HHJJ999r!   )r9   )NT)rB   rC   rD   __doc__r/   r	   forward_attrr   __annotations__r   r   r9   r!   r   r   r      sw         b bH- - - - %1+1111 DH   %: %: %: %: %:r!   ctxmlir.LoweringRuleContextc                  t           j        j        j        | j        j        }
t          |
t          j                  r^t          |
j
                  t          |
j        j                  k    r/ t          j        t          j        |          d          | g|	R  S t          |
t          j                  r|
j        t'          d          n)t          |
t          j                  r|
j        j        nd rt+                    dk    r/ t          j        t          j        |          d          | g|	R  S dfd}t-          ||||||| j        |	  	        }t/          t1          |                    }|t2          t5          |d          <   | j                            |           d	 |j        D             }t;          j        |t?          |	          t@          j!        "                    tF                    t@          j$        "                    d
          t          j%        d          t@          j&        "                    g           t@          j!        "                    |          d d 	  	        }|j'        S )NT)multiple_resultsz9Please file a bug at https://github.com/google/jax/issues   hlo_shardingxc.HloSharding | Nonec                @   | | S j         ssJ t          j        |           S t          j        |           d                                         }t          j        j        g |d|t          |          z
  z  R  }t
          j        	                    |          S )Nr   r%   )
emptyr   _op_sharding_to_pos_shardingparse_flatten_op_shardingget_partition_specrK   r5   PartitionSpecr2   NamedSharding)r   ndimpspecr   devicesr   s      r   r(   zB_custom_partitioning_lowering_rule.<locals>.to_mesh_pspec_sharding  s    z P) P   8wOOO4d 1133 
L&PPD3u::<M1NPPPE<%%dE222r!   utf8c                6    g | ]}t          j        |          S r9   )r   aval_to_ir_typer   s     r   r<   z6_custom_partitioning_lowering_rule.<locals>.<listcomp>  s#    BBBa$&q))BBBr!   F   )call_target_namehas_side_effectapi_versioncalled_computationsbackend_configoperand_layoutsresult_layouts)r   r   )(mesh_libthread_resourcesenvphysical_meshr,   r}   rl   r   r   r   manual_axesr   r   r   	lower_funr   r   ShardingContextdevice_assignmentAssertionError_flat_devices_tupler2   r#   stridra   bytesadd_keepaliver   r   CustomCallOpr   r   
StringAttrget_CUSTOM_PARTITIONING_CALL_NAMEBoolAttri32_attr	ArrayAttrresults)r   r   r)   r*   r&   r'   r+   r   r-   valuesr}   r(   sharding_callback_infokeyresult_typesoutr   r   s          `        @@r   "_custom_partitioning_lowering_ruler    s    
	"	&	4$#0,~=>> X	,
"##s<+<+G'H'HHHI4>$+D11DIII#WPVWWWWn<== ,G
EG G G  , >?? 3GGG	 FCLLA%%84>$$8 8 88;F>DF F F F	3 	3 	3 	3 	3 	3 	3 	3 11H'("C$6kK K 	B%&&''#,BeC(() ""#9:::BB4>BBB,
6ll}(()GHHkooe,,-"",**2..]&&s++		 		 		# 
r!   )+can_side_effecting_have_replicated_sharding)rz   prop_user_shardingr'   r+   r  )r   r   )Ar   
__future__r   	functoolsr   r   typingr   weakrefrK   r   jax._srcr   r   r	   r
   r   r   r   r   r   r   xbjax._src.interpretersr   r   r   jax._src.libr   rO   jax._src.lib.mlirr   jax._src.lib.mlir.dialectsr   
jax.errorsr   r    r#   WeakValueDictionaryra   r  rI   r1   rT   r\   rj   rW   r   r   	Primitiver   r   %prim_requires_devices_during_loweringaddr   r   def_abstract_evaldef_implr   register_custom_decorator_typer   r  register_lowering register_custom_call_partitionerregister_plugin_callbacksr9   r!   r   <module>r&     s   
 # " " " " "               



                   $ $ $ $ $ $       & & & & & & % % % % % % # # # # # # % % % % % % & & & & & & 4 4 4 4 4 4 ) ) ) ) ) )             * * * * * * , , , , , ,         : 2g133 !9 ; ; ;      8 8 827 7 73I 3I 3Il? ? ?& ''<== )-  &  . 2 23H I I I  ( ( (  ' '(J K K K   8 9 9 9   /[: [: [: [: [: [: [: 0/[:|9 9 9 9v  ,9; ; ; $ #"0"504     G
++G0%V48  	 	 	 	 	r!   