
    VpfE              
      <   d Z ddlmZ ddlmZmZmZ ddlmZm	Z	 ddl
Z
ddlmZ ddlZddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddl mZ! ddl"m#Z# ddl$m%Z& ddl$m'Z( ddl)m*Z*m+Z+m,Z,m-Z-m.Z.m/Z/ ddl0m1Z2 ddl1Z3e*e4cZ4Z5e+e6cZ6Z7e!j8        Z8e!j9        Z9e!j:        Z:e!j;        Z;e!j<        Z<e!j=        Z=e!j>        Z>e!j?        Z?e!j@        Z@ ejA        d          ZBdeB_C        d ZDd ZEd ZFd{dZGd|d#ZHd$ ZId% ZJd& ZKd' ZLd}d4ZMeBN                    eL           d~d5ZOeBP                    eO           dd6ZQeQejR        eB<   dd?ZSddDZTddFZUddGZVeVejW        eB<   ddMZXddOZYeYejZ        eB<   e/ddV            Z[ddYZ\ ej]        dZ ej^        d[d\          d]^          Z_ddaZ`dddZa ejb        eBea           ddiZcecejd        eB<   dj Zeeeejf        eB<   ddke@e@i d\d\dddl	ddyZg	 ddzlhmiZj dS # ek$ r dZjY dS w xY w)z-Module for calling pallas functions from JAX.    )annotations)CallableIterableSequence)partialreduceN)Any)api_util)lax)ad_util)checkify)config)core)effects)linear_util)	tree_util)ad)batching)mlir)partial_eval)uninitialized_value)	discharge)utils)safe_mapsafe_zip
split_listtuple_insertunzip2weakref_lru_cachepallas_callTc                H   | |J |S |J t          d | D                       } t          j        || |          }t          t          j        t          |                    t          j        |t          j                                     }t          j        ||          S )Nc              3  V   K   | ]$}t          j        |t           j                   V  %dS dtypeNjnpasarrayint32.0ss     [/var/www/html/nettyfy-visnx/env/lib/python3.11/site-packages/jax/_src/pallas/pallas_call.py	<genexpr>z'_maybe_dynamic_slice.<locals>.<genexpr>J   3      GGCK333GGGGGG    )slice_sizesr$   )	tupler   dynamic_slicenparangelenarraybool_squeeze)	start_idxblock_shapevalueis_indexingoutputsqueeze_dimss         r-   _maybe_dynamic_slicer@   E   s    L		 	 	 GGYGGGGG)UI;GGG&ry[!1!12228KBD(4L 4L 4L M N N,	V\	*	**r0   c                
   | |J |S |J t          d | D                       } t          d t          |          D                       }t          j        |||          }|j        |k    sJ t          j        |||           S )Nc              3  V   K   | ]$}t          j        |t           j                   V  %dS r#   r&   r*   s     r-   r.   z._maybe_dynamic_update_slice.<locals>.<genexpr>V   r/   r0   c              3  $   K   | ]\  }}||V  d S N r+   ibs      r-   r.   z._maybe_dynamic_update_slice.<locals>.<genexpr>W   s>       # #tq! !# # # # # # #r0   )r2   	enumerater   broadcast_in_dimshapedynamic_update_slice)r:   r;   r<   updater=   broadcast_dimss         r-   _maybe_dynamic_update_slicerO   P   s    M		 	 	 GGYGGGGG) # #y'='= # # # # #.^DD&		$	$	$	$		!%	;	;;r0   c                &   t          d t          | j        |          D                       }|| j        k    rYt          d t          || j                  D                       }t          d| j                  }t          j        | ||          } | S )ay  Pads values so the shape evenly divides into block dimensions.

  For example, if values has a shape of (33, 2, 5) with a block_shape of
  (32, 2, 4), this function will pad the value of shape to (64, 2, 8).

  Args:
    value: Array to be padded.
    block_shape: Block shapes to use for padding. If None, no padding will
      be performed.

  Returns:
    A padded array.
  c              3  8   K   | ]\  }}|d z
  |z  d z   |z  V  dS    NrE   )r+   vrH   s      r-   r.   z1_pad_values_to_block_dimension.<locals>.<genexpr>l   sI        !%AA!|a1     r0   c              3  *   K   | ]\  }}d ||z
  fV  dS )r   NrE   )r+   arH   s      r-   r.   z1_pad_values_to_block_dimension.<locals>.<genexpr>p   s.      JJ41aq!A#hJJJJJJr0   rE   rK   r%   )constant_values)r2   ziprK   r   r%   r'   pad)r<   r;   padded_shape	pad_width	pad_values        r-   _pad_values_to_block_dimensionr^   ]   s       ),U[+)F)F    , U[  JJ3|U[+I+IJJJJJI#"EK@@@IGE9i@@@E	,r0   returntuple[jax.Array, ...]c                L    d | D             } t          d | D                       S )Nc              3  >   K   | ]}t          j        |          V  d S rD   jax_coreraise_to_shapedr+   xs     r-   r.   z+_initialize_scratch_vals.<locals>.<genexpr>v   s-      FF18+A..FFFFFFr0   c              3  J   K   | ]}t          |j        |j                  V  d S rD   )r   rK   r%   r+   rV   s     r-   r.   z+_initialize_scratch_vals.<locals>.<genexpr>w   s1      LL"17AG44LLLLLLr0   )r2   )scratch_avalss    r-   _initialize_scratch_valsrk   u   s2    FFFFF-	LLmLLL	L	LLr0   block_mappings_outputIterable[BlockMapping]Sequence[jax.Array]c                   d |D             }g }t          |           D ]b\  }}||v r"|                    |||                             +|                    t          |j        j        |j        j                             c|S )Nc                    i | ]\  }}||	S rE   rE   )r+   krT   s      r-   
<dictcomp>z+_initialize_output_vals.<locals>.<dictcomp>|   s    222TQAq222r0   )rI   appendr   array_shape_dtyperK   r%   )rl   
input_argsinput_output_aliasesoi_mapoutput_valsrG   bms          r-   _initialize_output_valsrz   y   s     321222&+.// J JeaF{{F1I.////,R-A-G-/-A-GI I J J J J	r0   c                    t          | d          r4t          | j        d          r| j                            |           j        S | S )a  Converts logical dtypes into JAX dtypes for interpret mode.

  This function is used to convert device-specific dtypes that have no
  corresponding equivalent in JAX/XLA into a type that can be executed
  by the XLA interpreter (e.g. TPU semaphores -> int32).
  _rulespallas_interpret_element_aval)hasattrr|   r}   r%   r$   s    r-    _logical_to_interpret_mode_dtyper      sK     eX Cel;<<C<55e<<BB	,r0   c                ,   t          | t          j                  r*t          | j                  }|                     |          S t          | t          j                  r5t          | j	                  }t          j        | j
        || j                  S | S )z*Logical to interpret mode aval conversion.)
inner_aval)	weak_type)
isinstancepallas_coreAbstractMemoryRef$_logical_aval_to_interpret_mode_avalr   rM   rd   ShapedArrayr   r%   rK   r   )avalr   inner_dtypes      r-   r   r      s    k344 .5doFFJ;;*;---h*++ S24:>>K
K4>RRRR	+r0   c                2   g }d}t          t          t          | |                              D ]M\  }}t          j        ||dz   |          }||k    }|                    t          j        |d|                     Nt          t          |                    S )NTrS   r   )reversedlistrY   r'   wherers   r2   )gridindicesnext_indicescarrydim_sizeindexrG   s          r-   _get_next_indicesr      s    ,
%!$s4'9'9":":;; 0 0oh	%E**AME	%A..////	x%%	&	&&r0   c                     J rD   rE   )argskwargss     r-   _pallas_call_implr      s    ,r0   jaxprjax_core.Jaxprnamestrdebugboolrv   tuple[tuple[int, int], ...]grid_mappingGridMappingcompiler_paramsr	   c           	     f   !"#$ ~~t          |j        g          \  }}t          |          t          fdj        D                       t          d           J                                 5  t          j        | d          \  d d d            n# 1 swxY w Y   |rt                     t          j        ||          }|j                 #|t          #          d          }	| j        j                 }
d |
D             }t!          |          $g }t#          t%          j        |	|          j                  D ]\  }}t+          |j        t.          j                  rn|j        j        }|`t5          d |D                       rG|rt7          d          t9          d|j                  }t=          j        ||d |D                       }|                     |           d j        D              d	 t#           j                  D             tC          tD          |          }|#                    $           t          |	          t          |          z   !tI          j%        d
          ft                    z  }rtM          tH          j'                  "nd""fd} !#$f	d}t=          j(        ||tI          j%        d
          |g|R           ^}}}|t          |	          t          |	          t          |          z            }g }t#          |j                  D ]\  }}t+          |j        t.          j                  ry|j        j        }|kt5          d |D                       rR|rt7          d          t#          | \  }}d t#          |j)        |          D             }t=          j*        |||          }|j)        |j+        j)        k    r(t=          j*        |d|j,        z  |j+        j)                  }|                     |           |S )Nc              3  V   K   | ]#}|t           j        ur|nt                    V  $d S rD   )r   dynamic_grid_dimnext)r+   rV   dynamic_grid_args_iters     r-   r.   z._pallas_call_impl_interpret.<locals>.<genexpr>   sT          K000aa&''     r0   rE   c                    g | ]	}|j         
S rE   r   r+   rT   s     r-   
<listcomp>z/_pallas_call_impl_interpret.<locals>.<listcomp>   s    222a16222r0   c              3  "   K   | ]
}|d k    V  dS r   r   NrE   r+   ps     r-   r.   z._pallas_call_impl_interpret.<locals>.<genexpr>   &      $B$BQQ&[$B$B$B$B$B$Br0   z$Padding with aliasing not supported.rW   c                    g | ]	}g |d R 
S r   rE   r   s     r-   r   z/_pallas_call_impl_interpret.<locals>.<listcomp>   s"    "<"<"<q7Q777"<"<"<r0   c                J    g | ] }t          d  |j        D                       !S )c              3  2   K   | ]}|t           j        u V  d S rD   )r   mapped)r+   rH   s     r-   r.   z9_pallas_call_impl_interpret.<locals>.<listcomp>.<genexpr>   s*      <<A##<<<<<<r0   )r2   r;   r+   ry   s     r-   r   z/_pallas_call_impl_interpret.<locals>.<listcomp>   sA       
 <<R^<<<<<  r0   c           	     t    g | ]5\  }}|d n+t          d t          ||j                  D                       6S )Nc              3  (   K   | ]\  }}|rd n|V  dS rR   rE   rF   s      r-   r.   z9_pallas_call_impl_interpret.<locals>.<listcomp>.<genexpr>   s.      DD41aaQDDDDDDr0   )r2   rY   r;   )r+   iidry   s      r-   r   z/_pallas_call_impl_interpret.<locals>.<listcomp>   sZ        #r kddDD3sBN+C+CDDDDD  r0   r   rS   c                    | ^}}|k     S rD   rE   )r   rG   _num_iterationss      r-   condz)_pallas_call_impl_interpret.<locals>.cond   s    EA~r0   c           	       	 | ^}}t          fdt          t                              D                       }t          |g          \  }}t	          j        |          5  fdj        D             }d d d            n# 1 swxY w Y   t          t          ||          }t	          j        |          5  t          j
                  t                    t          |          z   t                    z   k    sGJ t          j
                  t                    t          |          t                    f            t          j        g||R  }d d d            n# 1 swxY w Y   t          |j        g          \  }}	}
t          t          |||	          }|dz   t                    g||
R S )Nc              3  `   K   | ](\  }\  }}|j         vt          j        ||          V  )d S rD   )vmapped_dimsr   GridAxis)r+   dimidxrH   r   s       r-   r.   z<_pallas_call_impl_interpret.<locals>.body.<locals>.<genexpr>   sR        C#ql/// 	S!$$//// r0   c                4    g | ]}|d n |j         gR  S rD   )compute_start_indices_interpret)r+   ry   loop_idxscalarss     r-   r   z=_pallas_call_impl_interpret.<locals>.body.<locals>.<listcomp>   sH     1 1 1 *$$"D""DX"XPW"X"X"X1 1 1r0   rS   )r2   rI   rY   r   r   grid_envblock_mappingsmapr@   r6   invarsrd   
eval_jaxprnum_index_operandsrO   r   )r   rG   carry_blockslocal_grid_envcarry_consts_insscratchstart_indicesblocksr   	out_inoutout_scratch	out_carryr   block_shapesdischarged_constsdischarged_jaxprr   r   is_indexing_dimnum_inout_blocksr   scratch_valuess               @r-   bodyz)_pallas_call_impl_interpret.<locals>.body   s   !&Ax,    &s8T':':;;    N
 !+<:J9K L Lg		n	-	- 1 11 1 1 1 1 /1 1 1m1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 %}l!?4 4F		n	-	- 
6 
6!())S\\CKK-G#
K K .    %
&
&
g,,
f++
n

	
   "#35F 6 6$*6-46 6 6f
6 
6 
6 
6 
6 
6 
6 
6 
6 
6 
6 
6 
6 
6 
6 !+02BC!E !EAy+/$iB BIE$T844 &&$& & &s%   &BBB>B$E..E25E2c              3  "   K   | ]
}|d k    V  dS r   rE   r   s     r-   r.   z._pallas_call_impl_interpret.<locals>.<genexpr>  r   r0   c                    g | ]
\  }}||z
  S rE   rE   )r+   r,   r   s      r-   r   z/_pallas_call_impl_interpret.<locals>.<listcomp>"  s     BBB41aQBBBr0   r   )-r   num_dynamic_grid_boundsiterr2   r   r   	trace_envstate_dischargedischarge_stateprintrz   rl   slice_index_opsr6   r   slice_scratch_opsrk   rY   	itertoolschainr   r   indexing_moder   	UnblockedpaddinganyNotImplementedErrorr   r%   r   rZ   rs   r   r^   extendr'   r)   r   multiply
while_looprK   slicert   ndim)%r   r   r   rv   r   r   r   dynamic_grid_argsout
block_argsscratch_invarsrj   r   rg   ry   r   r]   grid_start_indicesr   r   r   out_out	out_nopadopad_lowpad_highlimit_indicesr   r   r   r   r   r   r   r   r   r   s%       `                      @@@@@@@@@@r-   _pallas_call_impl_interpretr      s    t '
\12 T   122	         
 
$
 
$d	+	+	3	3	3 U U*9*I%QS*T*T''U U U U U U U U U U U U U U U
 	
 B $&:	< 	<#-.'CLLMM"* < >?.22>222-+M::.
%9?:s33\5PQQ  ea""K$9:: > (g		$B$B'$B$B$B!B!B	 	L#$JKK
K'b@@@	GAy"<"<G"<"<"<==	LLOOOO +  /  ,*EFF  , ,e\
B
B%,,~__s3xx/	!T2	 CL$//NN N
    & & & & & & & & & & & & &B >
D39Q<<!3<e<< .1a% #j//#j//CHH"<<=')7L>??  ea""K$9:: 1 (g		$B$B'$B$B$B!B!B	 	L#$JKK
KMBB3qw+A+ABBBIa-00w"&,,,
)Ataf}b&:&@
A
AaQ	s   <B!!B%(B%c                >    t          d | j        D                       S )Nc              3  h   K   | ]-}t          j        |j        j        |j        j                  V  .d S rD   )rd   r   rt   rK   r%   r   s     r-   r.   z-_pallas_call_abstract_eval.<locals>.<genexpr>,  sV       = = #B$8$>$&$8$>@ @ = = = = = =r0   )r2   rl   )r   avalsr   s      r-   _pallas_call_abstract_evalr  +  s3    	 = =%;= = = 
= 
= =r0   c               :   |j         rt          d          |j        rt          |rt          d          d |D             }	d |D             }|	dg|j        z  z   }
t	          j        |d          }t          j        ||
g           \  }}|j        |j	        c}\   t          |j        t          |           |j        t          |          g          \  }}}}g ||||R }g }|j        D ]j}t          |t          j                  r9|                    |                    |j        |j                                     }|                    |           k|                    ||          }|rt)          |           t          |j        t          |           g          \  }}g ||||R }|                    ||j        d	z  |j        d	z  
          }t/          j        g | |R || d|||d|d}t          |t          |          d	z  g          \  }}||fS )Nz.interpret with dynamic grid bounds unsupportedz JVP with aliasing not supported.c                D    g | ]}t          |t          j                   S rE   )r   r   Zeror+   ts     r-   r   z)_pallas_call_jvp_rule.<locals>.<listcomp>:  s'    HHH!*Q555HHHr0   c                H    g | ]}t          |          t          j        u| S rE   )typer   r  r  s     r-   r   z)_pallas_call_jvp_rule.<locals>.<listcomp>;  s*    AAAAT!WWGL%@%@a%@%@%@r0   TrE   )input_index)r   r      r   
num_inputsnum_outputs_jvp)r   r   r   	interpretr   rv   r   )r   r   r   r  rd   ClosedJaxprr   	jvp_jaxprr   constsr   r   r6   r   r   JaxprInputEffectreplacer   r  rs   r   r   r  pallas_call_pbind)primalstangentsr   r   rv   r   r   r  r   nonzero_tangentsnonzero_tangents_with_outputsclosed_jaxpr
jvp_jaxpr_r   r  primal_refsprimal_out_refstangent_refstangent_out_refsr   effseffin_bmsout_bmsjvp_bmsjvp_grid_mappingout_flatout_primalsout_tangentss                                r-   _pallas_call_jvp_ruler-  1  s    ) P
N
O
OO$ 
 B
@
A
AAHHxHHHAAAAA("2dVl>V5V"V%eR00,,|-JBOO-*a"J$5-)R BLW|'?XOB B>+.> N[M<M/M<LMM&	$  c#w/00 KKll9#3CO#DEE   c 	KKvt<<)
 	)|:S\\NKK/&'2f2v22'22'!))(1,*Q. *  
  


 
 ===#%
 
 
( )CMMQ4F3GHH+|	l	""r0   	axis_sizeintr   jax_core.ShapedArrayr   int | batching.NotMappedblock_mappingBlockMappingc                4   fd}t           j        gj        j        }|                                 5  t          j        t          j        |          |          \  }}}	\   d d d            n# 1 swxY w Y   j	        }
t          j        u r
|
}j        }nSt          |
t           j                  }t          j        t          j        j        |          j        j                  }t'          j        ||	          }                    |||          S )Nc                    t          j        j        j        j        j        g|R  }t
          j        ur|                    |            t          |          S rD   )	rd   r   index_map_jaxprr   r  r   
not_mappedinsertr2   )new_idxr   r   r2  r   s      r-   _block_map_functionz1_batch_block_mapping.<locals>._block_map_functionq  sc    !-"?"E"/"?"F)#') ) )G (%%%nnS'""">>r0   )r;   rt   r6  )r   index_map_grid_avalr6  in_avalsr   petrace_to_jaxpr_dynamiclu	wrap_initr;   r   r7  rt   r   r   jaxShapeDtypeStructrK   r%   rd   r  r  )r   r.  r   r   r2  r:  	idx_avalsblock_mapping_jaxprr   r  rK   new_block_shapenew_array_shape_dtyper   s      ``         r-   _batch_block_mappingrG  l  s}   
      .X1N1WX) 6 6)+)B
())9*6 *6&FB6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 
#%HO);"5#{/ABBO0]4:	  	  	'-	/ / 
2F
;
;%			?1F/4 
 
6 
6 6s   .A00A47A4r   dims"Sequence[int | batching.NotMapped]Btuple[tuple[jax.Array, ...], tuple[int | batching.NotMapped, ...]]c               J   t          |           }t          |          }|D ]e\  }}||         }d||<   |t          j        u r t          j        ||         |d          ||<   @|dk    rt	          j        | |         |d          ||<   ft          |          t          |          fS )a$  Broadcast input/output operands.

  When we have input/output aliasing, since the output will be mapped, we need
  to make sure to broadcast the input across that dimension if it is not
  mapped. If the input is mapped, but on a different axis, we tranpose the input
  to match the output.
  r   )r   r   r7  	broadcastr'   moveaxisr2   )	r   rH  rv   r.  args_dims_r  r   r   s	            r-   _broadcast_input_output_aliasesrP    s     t**%
t**%, C Cnk1

CE+
h!!!#-eK.@)QOOeK	<[(93BBeK	uuU||	##r0   r  c          
     N    st          d          d t                     D             \  t                     \   fdj        D             }	d f	d
}
t          j                            d|
|	d          }|dt          |          z  fS )a  Batch the pallas_call by calling it in loop over the batch size.

  This function provides a fallback implementation of batching a pallas_call
  for the cases in which adding a batch dimension to the pallas grid is not
  supported. This is currently the case when the batched dimension corresponds
  to a dynamic axis or a scalar prefetch argument.

  This implementation builds a HLO loop that dynamic_slices the inputs according
  to the current iteration index and dynamic_updates an (initially empty) output
  allocation.
  'vmapping pallas_call with no arguments.c                J    h | ] \  }}|t           j        u|j        |         !S rE   r   r7  rK   )r+   argr   s      r-   	<setcomp>z,_batch_with_explicit_loop.<locals>.<setcomp>  s:       
#s	H'	'	' 
in	'	'	'r0   rv   r.  c           	         g | ];}t          j        t          |j        j        d           |j        j                  <S )r   r$   )r'   emptyr   rt   rK   r%   )r+   ry   r.  s     r-   r   z-_batch_with_explicit_loop.<locals>.<listcomp>  sY         
iR17IFF*02 2 2  r0   batch_index	jax.Arraystatelist[jax.Array]r_   c                  	 g }t                    D ]s\  }}|t          j        u r|                    |           )|                    t	          j        t          j                            || d|          |                     tt          j
        |
	d}t          |          D ]1\  }}t          j                            ||         || d          ||<   2|S )NrS   )operandstart_index
slice_sizeaxisrb  r   r   r   rv   r   r  r   r   )rY   r   r7  rs   r'   r9   rA  r   dynamic_slice_in_dimr  r  rI   dynamic_update_index_in_dim)rZ  r\  
batch_argsrU  r   	batch_outrG   batch_out_arrayr   r   r   rH  r   rv   r  r   r   s           r-   r   z'_batch_with_explicit_loop.<locals>.body  s.   JdOO 
 
S 
#	#	##K,, + 	 -     
	
 
	
 
	
 
	
 "	!1'	 	 	I (	22  ?44
(

	 5  eAhh Lr0   r   F)unrollr   )rZ  r[  r\  r]  r_   r]  )r   rY   rP  rl   rA  r   	fori_loopr6   )r   rH  r   r   r   rv   r   r  r   initial_stater   resultr.  s   `````````   @r-   _batch_with_explicit_looprn    s    . 
 I
G
H
HH $oo  ,9 /

/	  *$    2  -& & & & & & & & & & & & & &P 7Q	4uMM&	F#	##r0   c                  dd}	d t          | |          D             \  }
|
dk    rFt          |	| |          } t          j        | |||||||d	}d
 |D             dt	          |          z  fS t          | |j        g          \  }} t          ||j        g          \  }}t          d t          ||          D                       rt          |	||          }n8t          d |D                       rt          || z   ||z   |||||||	  	        S 	 ~|j        rt          | |j        g          \  }} t          ||j        g          \  }}t          d t          ||          D                       r=t          |	||          }t          j        gt	          |          z  }g || R } g ||R }nt          || z   ||z   |||||||	  	        S |st          d          |j        }d |j        D             }t#          | |||
          \  } }t%          |          dg|j        z  z   }|j        }|j        }||t	          |          |z
           }t          t+          t,          ||
          |||d          |          }|j                            |j                  \  }}|rJ t4          j        f|z   }t9          j        |i f          \  }}|                    |
g|j        R tA          |          ||dtA          d |j!        D                       z             }t          j        g || R |d| |||||d	}|dt	          |          z  fS )Nrg   r[  bdimr1  r_   c                N    |t           j        u r| S t          j        | |          S )Nrc  )r   r7  r'   r9   )rg   rp  s     r-   _maybe_squeeze_out_bdimz;_pallas_call_batching_rule.<locals>._maybe_squeeze_out_bdim  s,     x"""h;qt$$$$r0   c                J    h | ] \  }}|t           j        u|j        |         !S rE   rT  )r+   rg   ds      r-   rV  z-_pallas_call_batching_rule.<locals>.<setcomp>  s9     1 1 1tq!H/// 
///r0   rS   rd  c                8    g | ]}t          j        |d           S r   )r'   expand_dimsrf   s     r-   r   z._pallas_call_batching_rule.<locals>.<listcomp>)  s$    ///aCOAq!!///r0   r   c              3  Z   K   | ]&\  }}|t           j        u p|j        |         d k    V  'dS rR   rT  r+   rU  rp  s      r-   r.   z-_pallas_call_batching_rule.<locals>.<genexpr>3  sR       	 	
#t h!!9SYt_%9	 	 	 	 	 	r0   c              3  2   K   | ]}|t           j        uV  d S rD   )r   r7  )r+   rp  s     r-   r.   z-_pallas_call_batching_rule.<locals>.<genexpr>:  s*      
I
It4x**
I
I
I
I
I
Ir0   )	r   rH  r   r   r   rv   r   r  r   c              3  Z   K   | ]&\  }}|t           j        u p|j        |         d k    V  'dS rR   rT  rx  s      r-   r.   z-_pallas_call_batching_rule.<locals>.<genexpr>R  sR        C 	##;sy!';     r0   rR  c                    g | ]	}|j         
S rE   r   r   s     r-   r   z._pallas_call_batching_rule.<locals>.<listcomp>l  s    
(
(
(a16
(
(
(r0   rW  r   c              3      K   | ]	}|d z   V  
dS rR   rE   ri   s     r-   r.   z-_pallas_call_batching_rule.<locals>.<genexpr>  s&      II!AIIIIIIr0   )r   r   index_map_avalsindex_map_treer   batched_)rg   r[  rp  r1  r_   r[  )"rY   r   r  r  r6   r   r   allr   r   rn  r   r   r7  r   r   r   rP  r   r  num_scratch_operandsr   rG  r~  	unflattenr}  r   r;  r   tree_flattenr  r   r2   r   )r   rH  r   r   r   rv   r   r  r   rr  r.  r   r   dynamic_grid_dimsscalar_argsscalar_bdimsbdimsr   r  all_dimsr   r  avals_to_batchbatched_block_mappingsindex_map_tree_argsindex_map_tree_kwargsbatched_index_map_argsbatched_index_map_avalsbatched_index_map_treebatched_grid_mappings                                 r-   _pallas_call_batching_ruler    s   % % % %1 1s4 1 1 1*)!^^&d33D

	!1'	 	 	C 0/3///C@@ '
\12 T '
\12 T 	 	 	,.?@@	 	 	   	 !!24E  
I
I7H
I
I
III 	 %%%!1'
 
 
 
 	$ "4,*I)JKKK$TL,K+LMML%
   [,77      4k<PPk)*S-=-==l"{"T""d$|$e$$dd 'T!e##3)
 
 
 
 
 I
G
H
HH..
(
(5<
(
(
(% /
D';y  *$ $ZZ1# 888(#6%:
 +SZZ:N-NOP."L)<<!""#	  0</J/T/T"0$ 0$,,""""';=@SS4=4Jr"5$ 5$11%--*)**122-+%II|/HIIIIII . K K 	 
	
	
	 
	 d'/%
	 
	 
	# 
dSXXo	r0   
body_jaxprjax_core.ClosedJaxprerrorcheckify.ErrorKtuple[jax_core.ClosedJaxpr, tree_util.PyTreeDef, set[checkify.ErrorEffect]]c                   t          j        |          \  }}t          t          j        |          }g || j        }t          j        |j        d          5  t          j	        | ||g|R  \  }}}	d d d            n# 1 swxY w Y   |||	fS )NrE   )
r   r  r   r   get_shaped_avalr<  r   tracing_grid_envr   jaxpr_to_checkify_jaxpr)
r  enabled_errorsr  r   err_valserr_treeflat_err_and_in_valschecked_jaxprout_treeerror_effectss
             r-   !checkify_pallas_kernel_body_jaxprr    s     !-e44(H)844(:8:j&9:#L$5r:: E E-5-MNH.E/C.E .E .E*M8]E E E E E E E E E E E E E E E 
-	//s   A<<B B jax_core.Valuec          
       01234 t          ||j        |j        g          \  }}	}t          |	          4t          |          2|j        3t          j        |          }
t          |
|| |          \  }}}|                     |          } t          j
                            |           \  }}t          t          j        |          }d |j        D             }t          |          1t!          d |D                       }g ||}t          j        |          }t#          j        |j        d          5  t          j        |||g|R  \  0}}d d d            n# 1 swxY w Y   01234fd}d }t          ||          }t          ||          }d |D             }t          |423g          \  }}}}g ||||||}t+          j        |          \  }}t/          j        t3          j        |          |          \  } }!t          j        |||!dd          }"t#          j        |j        d          5  t          j        | ||"          \  }#}}\   d d d            n# 1 swxY w Y   t#          j        d d           gt          |          z  }$t=          t+          j        |$          d	                   \  }%}t          tA          t"          j!        |j"        |j#        |j        |j$        d
          |$|%|          }&t          |j%        2g          \  }'}(|&                    g |&|'|&|(R |j'        t          |&          z   |j        t          |&          z             })t!          1fd|D                       }t!          4fdtQ          1          D                       |z   }*g |	||}+tS          j*        g ||+R |#||)|*d|},t          |,1g          \  }-}.d |-D             }-t          j
        +                    ||-          \  }/}|/|.fS )Nc                    g | ]	}|j         
S rE   r   r   s     r-   r   z-pallas_call_checkify_rule.<locals>.<listcomp>  s    ...A...r0   c              3  >   K   | ]}t          j        |          V  d S rD   rc   rf   s     r-   r.   z,pallas_call_checkify_rule.<locals>.<genexpr>  s-      NNQX5a88NNNNNNr0   rE   c                 n   t          | g          \  }}}}}}d |D             }g |||||}t          j        j                  t          |          k    sJ t	          j        j        j        g|R  }	t          |	g          \  }
}t          |||
          D ]\  }}}||d<   ||d<   g S )Nc                    g | ]
}|d          S r   rE   )r+   err_refs     r-   r   zHpallas_call_checkify_rule.<locals>.checked_kernel_fn.<locals>.<listcomp>  s    CCC'CCCr0   r   )r   r6   r   r   rd   r   r  rY   )r   r   in_error_refsinputsout_error_refsoutputsr   input_error_vals
jaxpr_argsresult_flatoutput_errorsr   in_refout_refr  r  num_err_valsnum_kernel_inputsnum_kernel_outputsnum_scalarss                  r-   checked_kernel_fnz4pallas_call_checkify_rule.<locals>.checked_kernel_fn  s   		l	L*<	>
? 
?WmV^Wg DC]CCC L#KgKKK7KJ}")**c*oo====%]1@4>@ @ @K!+~>>M1"%~}#6 #6  fTlgdmmIr0   c                &   t          | t          j                  r+| j        }t          j        d| j        z   || j                  S t          | t          j                  rt          j	        | d| j        z             S t          j
        | gg          S )N)rS   rS   )r%   r   )r   rd   r   r%   rK   r   rA  Arrayr'   reshaper7   )rU  r%   s     r-   _ensure_2d_error_shapez9pallas_call_checkify_rule.<locals>._ensure_2d_error_shape  s    #x+,,  ie!&39"4E,/M; ; ; ;	C	#	#  [fsy0111Ywr0   c                V    g | ]&}t          j        |t           j        j                  'S rE   )r   r   MemorySpaceERRORr+   err_vals     r-   r   z-pallas_call_checkify_rule.<locals>.<listcomp>  sF     O O O29 #4{&,. . O O Or0   Fcheckify_pallasr   r  )r}  r~  r   mapped_dimswhatr  c              3  2   K   | ]\  }}|z   |z   fV  d S rD   rE   )r+   rG   r   r  s      r-   r.   z,pallas_call_checkify_rule.<locals>.<genexpr>3  sM       K K+1Aqq~q~&K K K K K Kr0   c              3  &   K   | ]}|z   |fV  d S rD   rE   )r+   rG   r  s     r-   r.   z,pallas_call_checkify_rule.<locals>.<genexpr>5  s=       *7 *7q}a*7 *7 *7 *7 *7 *7r0   )r   r  r   rv   c                    g | ]
}|d          S r  rE   r  s     r-   r   z-pallas_call_checkify_rule.<locals>.<listcomp>A  s    000gGDM000r0   ),r   r   r   r6   r  r=  close_jaxprr  _add_placeholder_effectsrA  treeflattenr   r   r  r   r2   r   r  r   r  r   r  r
   flatten_fun_nokwargsr?  r@  
debug_infor>  	BlockSpecr   tree_flatten_with_pathr   $_convert_block_spec_to_block_mappingr}  r~  r   r   r  r  ranger  r  r  )5r  r  r   r  rv   r   r   r   dynamic_grid_boundsr   r  _jaxprr   r  r  err_in_treeshaped_err_avalsinput_avalsshaped_input_avalscheckify_in_avalsclosed_kernel_jaxprerror_out_treer  r  error_memref_avalshaped_scalar_avals
input_avaloutput_avalscratch_avalretrace_in_avalsjaxpr_flat_avalsjaxpr_in_treewrapped_kernel_with_errout_tree_thunkr   final_jaxprerror_block_specserror_pathserror_block_mappingsinput_block_mappingsoutput_block_mappingsgrid_mapping_with_errorinput_output_aliases_with_errornew_vals_inrm  errorsresults	new_errorr  r  r  r  r  s5                                                   @@@@@r-   pallas_call_checkify_ruler    s     (2
\1,.( ($w G+$ii#/ &&,>NE<9 9&!]

(
(
7
7%(**511(K18<< /....+X,NN+NNNNN,( ,*,u--#L$5r:: N N'/'G^[(N;L(N (N (N$M>1N N N N N N N N N N N N N N N        :      /1ABB'22(O O=MO O O?I;(9;MN@P @P<z;G* G-> G G(G+6G9EG$-$:;K$L$L!M,4,Il$%%}-6 -6)>
-}ne=NP P%#L$5r:: : :5!15: :KAr: : : : : : : : : : : : : : : #,T4889C@P<Q<QQ):;LMMaPQQ.+q<(8'6"$1	 	 	 	;(8: : 1;!$5#719 19--(00E+ E.B E+E.CE E(3/C+D+DD*S1E-F-FF	 1    K K K K5IK K K K K$) *7 *7 *7 *7"'"5"5*7 *7 *7 %7 %79M%N! -',H,t,+ 2 [  
(8	 
  & v~66/&'00000&##NF;;,)Q	G	s$   3EEE4II!$I!funr   kernel_avals&tuple[pallas_core.AbstractMemRef, ...]kernel_in_treetree_util.PyTreeDefc                   |r"t          t          t          |                    }t          j        t          j        |           |          \  }}t          j        | ||dd          }|	                                5  t          j
        |||          \  }}	}
\   |
rt          j        ||j        d           }t          |
          }g }t          |
          D ]\  }}t!          j        t!          j        d d           t'          j        |          ft+          j        |j        |j                  |j        |j        |j        dd          }|                    |           |                    g ||j        R |          }d d d            n# 1 swxY w Y    |            }|t'          j        d           k    rtA          d	|           |||
fS )
NFr    c                ,    t          j        | d           S rD   )r   r   r   s    r-   <lambda>z(_trace_kernel_to_jaxpr.<locals>.<lambda>\  s    )FtT)R)R r0   )r   make_abstract_refrE   r  )path
array_avalr}  r~  r   r  r  )r   num_constant_operandszIThe kernel function in a pallas_call should return None. Found a PyTree: )!r2   r   r   r
   r  r?  r@  r=  r  r   r>  state_utilshoist_consts_to_refsr   r6   rI   r   r  r  r   SequenceKeyrd   r   rK   r%   r}  r~  r   rs   r  r   tree_structure
ValueError)r  r   r  r  r  wrapped_kernel_funr  r   r   r   r  r  const_block_mappingsc_idxcconst_block_mappingkernel_out_trees                    r-   _trace_kernel_to_jaxprr
  F  sa     ,A)+ + , ,L'/'Dl3() ()$n
-^^UM
R
R%  45G5A5J JE1fb  .
/RRT T Te "&kk'' 9 9(%)N!$--'..0+AGQW==(8'6"	
 	
 	
 	##$78888!))N/N,2MNN 5 *  l5              < #N$$/	06666
	-*	- 	-. . . 
uf	$$s   ;DFFFf
str | Nonec                J    | t          | d          r| j        r| j        nd}|S )N__name__func)r~   r  )r  r   s     r-   _extract_function_namer  w  s-    	\ J//JAJJ1::FD	+r0   jax_pallas_use_mosaic_gpuJAX_PALLAS_USE_MOSAIC_GPUFz\If True, lower Pallas kernels to the experimental Mosaic GPU dialect, instead of Trition IR.)defaulthelpplatform	Exceptionc                (    t          d|  d          S )Nz&Cannot lower pallas_call on platform: z. To use Pallas on GPU, install jaxlib GPU 0.4.24 or newer. To use Pallas on TPU, install jaxlib TPU and libtpu. See https://jax.readthedocs.io/en/latest/installation.html.r  )r  s    r-   _unsupported_lowering_errorr    s.    	Ax A A A
 
 r0   ctxmlir.LoweringRuleContextc          
         |r/t          t          fi |} t          j        |d          | g|R  S dd}dd}dd	}t          j        | d
t          ||||          d t          j        g|R d|i|S )NT)multiple_resultsr  r  in_nodes'mlir.ir.Value | Sequence[mlir.ir.Value]c                     t          d          )Nz0Only interpret mode is supported on CPU backend.r  r  r  paramss      r-   cpu_loweringz+_pallas_call_lowering.<locals>.cpu_lowering  s     G
H
HHr0   c                X    t           t          d          t          j        | g|R i |S )Ntpu)mosaic_tpu_backendr  pallas_call_tpu_lowering_ruler!  s      r-   tpu_loweringz+_pallas_call_lowering.<locals>.tpu_lowering  sJ     !'...;     r0   c                    	 t           j        rddlm} nddlm} n# t
          $ r t          d          w xY w |j        | g|R i |S )Nr   pallas_call_registrationgpu)_PALLAS_USE_MOSAIC_GPUr<   jax._src.pallas.mosaic_gpur+  jax._src.pallas.tritonImportErrorr  pallas_call_lowering)r  r  r"  r+  s       r-   gpu_loweringz+_pallas_call_lowering.<locals>.gpu_lowering  s    /		% DGGGGGGGCCCCCC / / /'.../8#8     s    6r    )cpur%  cudarocmr  )r  r  r  r  )r   r   r   	lower_funlower_per_platformdictr   
no_effects)r  r  r  r"  implr#  r(  r2  s           r-   _pallas_call_loweringr;    s      G.99&99D64>$666sFXFFFFI I I I
       
	 m!%,*6+7+7"9 "9 "9 "&!(!3	
+ #+	
+ 	
+ 	
+ ,5	
+ $*	
+ 	
+ 	+r0   primjax_core.Primitiver"  dict[Any, Any]c                
    ~ ~dS )Nr    rE   )r<  r"  s     r-   _pallas_custom_str_eqn_compactr@    s     F	r0   c                    |                                  5  t          j        |d| i|cd d d            S # 1 swxY w Y   d S )Nr   )r   r  abstract_eval)r   r<  r"  s      r-   _pallas_call_typecheck_rulerC    s      &	 ,06                  s   599rE   )		grid_specr   in_specs	out_specsrv   r   r  r   r   Callable[..., None]	out_shaperD  GridSpec | Noner   	TupleGridrE  BlockSpecTreerF  dict[int, int]dict[str, Any] | NoneCallable[..., Any]c       	        (   	
 t           	          	
i 
t          |||          nJ|rt          d|           |t          urt          d|           |t          urt          d|           ~~~t	          j                  \  t          |t                    rt          |          }t          j
        |          \  }t          |          \  d D             t          j        
 	fd            }|S )am  Invokes a Pallas kernel on some inputs.

  See `Pallas Quickstart <https://jax.readthedocs.io/en/latest/pallas/quickstart.html>`_.

  Args:
    f: the kernel function, that receives a Ref for each input and output.
      The shape of the Refs are given by the ``block_shape`` in the
      corresponding ``in_specs`` and ``out_specs``.
    out_shape: a PyTree of :class:`jax.ShapeDtypeStruct` describing the shape
      and dtypes of the outputs.
    grid_spec: An alternative way to specify ``grid``, ``in_specs``, and
      ``out_specs``. If given, those other parameters must not be also given.
    grid: the iteration space, as a tuple of integers. The kernel is executed
      as many times as ``prod(grid)``.
      See details at :ref:`pallas_grid`.
    in_specs: a PyTree of :class:`jax.experimental.pallas.BlockSpec` with
      a structure matching that of the positional arguments.
      The default value for ``in_specs`` specifies the whole array for all
      inputs, e.g., as ``pl.BlockSpec(x.shape, lambda *indices: (0,) * x.ndim)``.
      See details at :ref:`pallas_blockspec`.
    out_specs: a PyTree of :class:`jax.experimental.pallas.BlockSpec` with
      a structure matching that of the outputs.
      The default value for ``out_specs`` specifies the whole array,
      e.g., as ``pl.BlockSpec(x.shape, lambda *indices: (0,) * x.ndim)``.
      See details at :ref:`pallas_blockspec`.
    input_output_aliases: a dictionary mapping the index of some inputs to
      the index of the output that aliases them. These indices are in the
      flattened inputs and outputs.
    debug: if True, Pallas prints various intermediate forms of the kernel
      as it is being processed.
    interpret: runs the ``pallas_call`` as a ``jax.jit`` of a scan over the
      grid whose body is the kernel lowered as a JAX function. This does not
      require a TPU or a GPU, and is the only way to run Pallas kernels on CPU.
      This is useful for debugging.
    name: TO BE DOCUMENTED.
    compiler_params: TO BE DOCUMENTED.

  Returns:
    A function that can be called on a number of positional array arguments to
    invoke the Pallas kernel.

  Nz=If `grid_spec` is specified, then `grid` must be `()`. It is zLIf `grid_spec` is specified, then `in_specs` must be `no_block_spec`. It is zMIf `grid_spec` is specified, then `out_specs` must be `no_block_spec`. It is c                L    g | ]!}t          j        |j        |j                  "S rE   )rA  rB  rK   r%   rf   s     r-   r   zpallas_call.<locals>.<listcomp>)  s9     / / / )!'17;; / / /r0   c                    t          j        |           \  }}t          |          \  }}t          d |D                       }t          d D                       }t	          j        ||||          \  }}t          j        |          \  }	}
t          |t          |	          |
          \  }}}                                D ]\  }}|t          t          |                    vr)t          d| d| d| dt          |           d	          |t          t          |                    vr)t          d| d| d	| dt          |           d	          ||         }||         }|j        |j        k    s|j        |j        k    rRt          d| d| d
t          j        ||                    d| dt          j        |                    d| d          t          ||j        g          \  }}t#          j        g |||R ||t                                                    d}t          j        |          }|S )Nc              3  b   K   | ]*}t          j        t          j        |                    V  +d S rD   )rd   re   get_avalri   s     r-   r.   z/pallas_call.<locals>.wrapped.<locals>.<genexpr>/  sL       . . #283DQ3G3GHH . . . . . .r0   c              3  T   K   | ]#}t          j        |j        |j                  V  $d S rD   )rd   r   rK   r%   r   s     r-   r.   z/pallas_call.<locals>.wrapped.<locals>.<genexpr>1  sG       5 5  $/AA 5 5 5 5 5 5r0   )r  z+input_output_aliases contains the mapping ':z' with input index z outside the range [0, )z' with output index z' referring to inputz with abstract value z and to outputz! with a different abstract value .)r   r   r   r  r   rv   r   )r   r  r   r2   r   get_grid_mappingr  r
  itemsr  r6   r  rK   r%   keystrr   r   r  r  tree_unflatten) r   flat_args_with_pathsin_treein_paths	flat_argsflat_in_avalsflat_out_avalsr  r   flat_kernel_avalsr  r   r  i_idxo_idxin_avalout_aval
index_args	rest_argsr*  r   r   r   r  r  flat_out_shapesrD  rv   r  r   	out_pathsr  s                         r-   wrappedzpallas_call.<locals>.wrapped+  s\   $-$DT$J$J!' !566Hi . .#,. . . . .M 5 5$35 5 5 5 5N "-!=w)"- "-L, )2(>|(L(L%~"8	<011># # #L% -2244 7 7u	eC..//	/	/)% ) )% ) ) ) )}%%) ) )* * 	* 
eC//00	0	0*% * *% * *!* *~&&* * *+ + 	+ e$g&h	(.	(	(GMX^,K,K6% 6 6% 6 6!*!1(5/!B!B6 6%6 6 &,Yu-=>>6 6 +3	6 6 67 7 	7 -L 'y<3R2STTJ	! )	))),2)5>) )$!"#7#=#=#?#?@@') ) )H 
"8X
6
6CJr0   )r  GridSpecr  no_block_specr   unzip_dynamic_grid_boundsr   r   r2   r   r  r   rA  jit)r  rH  rD  r   rE  rF  rv   r   r  r   r   flat_out_shapes_with_pathsrk  r  ri  rj  r  s   ` `   `````  @@@@r-   r    r      s   p 
 4	(	($Ox33II $# # #$ $ $ }$$2'/2 23 3 3 %%3'03 34 4 4 Hi#.#H#S#S )  	4   !i  I)2)I))T)T&h%&@AA)_/ /-/ / //7/ / / / / / / / / / / / / / 7/` 
.r0   r*  )r_   r`   )rl   rm   r_   rn   )r   r   r   r   r   r   rv   r   r   r   r   r	   )r   r   )rv   r   r   r	   )r   r   r.  r/  r   r0  r   r1  r2  r3  r_   r3  )
r   rn   rH  rI  rv   r   r.  r/  r_   rJ  )r   rn   rH  rI  r   r   r   r   r   r   rv   r   r   r   r  r   r   r	   )r   r   r   r   r   r   rv   r   r   r   r  r   r   r	   )r  r  r  r  r   r   r_   r  )r  r  r   r  r   r   r  r   rv   r   r   r   )
r  r   r   r   r  r  r  r  r  r   )r  r   r   r  r_   r   )r  r   r_   r  )r  r  r  r   )r<  r=  r"  r>  r_   r   )r  rG  rH  r	   rD  rI  r   rJ  rE  rK  rF  rK  rv   rL  r   r   r  r   r   r  r   rM  r_   rN  )l__doc__
__future__r   collections.abcr   r   r   	functoolsr   r   r   typingr	   rA  r
   r   jax._srcr   r   r   r   rd   r   r   r?  r   jax._src.interpretersr   r   r   r   r=  jax._src.pallasr   jax._src.pallas.primitivesr   jax._src.stater   r   r   r  jax._src.utilr   r   r   r   r   r   	jax.numpynumpyr'   r4   r   
unsafe_maprY   
unsafe_zipGridrJ  rl  r3  r   r  rK  NoBlockSpecrm  	Primitiver  r  r@   rO   r^   rk   rz   r   r   r   r   r   def_implr  def_abstract_evalr-  primitive_jvpsrG  rP  rn  r  primitive_batchersr  r  error_checksr
  r  	bool_flagbool_envr-  r  r;  register_loweringr@  custom_str_eqn_compact_rulesrC  custom_typechecksr    jax._src.pallas.mosaicr+  r&  r0  rE   r0   r-   <module>r     s   4 3 " " " " " " 8 8 8 8 8 8 8 8 8 8 % % % % % % % %           



                               % % % % % %       & & & & & &       $ $ $ $ $ $ * * * * * * & & & & & & 4 4 4 4 4 4 / / / / / / : : : : : : 7 7 7 7 7 7 / / / / / /                         CZCZ!	'%!	)%) #"=11!% 	+ 	+ 	+< < <  0M M M M   
 
 
  ' ' '     B   ( ) ) )= = = =    : ; ; ;8# 8# 8# 8#r $9 -  6 6 6 6D$ $ $ $:Y$ Y$ Y$ Y$xX X X Xv .H M *0 0 0 0O O O O` (A m $.% .% .% .%`    *)FO7??	+	      -+ -+ -+ -+`  }&; < < <    # 	 %m 4  
 -H = ) "&+,+--1G G G G G G\SSSSSSSS   s   H HH