
    VpfPV                    V   d Z ddlmZ ddlZddlZddlmZmZ ddlZddl	Z	ddl
Z
ddlZddlZddlmZ ddlZddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlm Z  	 ddl!m"Z" e"j#        Z#n# e$$ r i Z#Y nw xY w ej%        ddd          Z& ej%        ddd          Z' ej(        d          Z)e)*                     e	j+        ej,        e)                     de)_-         ej.        d           G d d                      Z/ ej.        d           G d d                      Z0e)j1        d             Z2d^d!Z3d_d,Z4 ej5        e)e4d-.           d`d3Z6dad8Z7dbdDZ8dcdLZ9dddQZ:dd-ddddddRdddSdTdedWZ;ddddddddddddRdddXdfdZZ<dgd[Z=dhd]Z>dS )izJAX bindings for Mosaic.    )annotationsN)CallableSequence)Any)core)config)sharding_impls)mlir)tpu)
xla_client)xla)ir)mhlo)PassManager)flagsmosaic_use_python_pipelineFzRun the initial Mosaic MLIR passes from Python, when as_tpu_kernel is called (for Pallas, this happens at JAX lowering time), instead of later within XLA.)namedefaulthelpjax_mosaic_allow_hlozAllow hlo dialects in Mosaictpu_custom_callT)frozenc                  6    e Zd ZU ded<   ded<   ded<   d	dZdS )
CostEstimateintflopstranscendentalsbytes_accessedreturnbytesc                ^    d| j          d| j         d| j         d                    d          S )Nz
{"flops": z, "transcendentals": z, "bytes_accessed": }ascii)r   r   r   encodeselfs    X/var/www/html/nettyfy-visnx/env/lib/python3.11/site-packages/jax/_src/tpu_custom_call.pyto_jsonzCostEstimate.to_jsonL   sI    	6dj 	6 	6t7K 	6 	6"1	6 	6 	6fWoo    Nr   r    )__name__
__module____qualname____annotations__r(    r)   r'   r   r   F   sK         ***     r)   r   c                      e Zd ZU dZded<   ded<   ded<   ded	<   d
ed<   ded<   ded<   ded<   ded<   ded<   ded<   ded<   d ZddZdS )CustomCallBackendConfigz;Represents an unserialized backend config for custom calls.r    lowered_module_asmboolhas_communication
int | Nonecollective_id
str | Nonedevice_typeCostEstimate | Nonecost_estimateneeds_hlo_passesneeds_layout_passesvmem_limit_bytes$dict[str, bool | int | float] | Noner   list[bool] | Noneallow_input_fusionserialization_formatinternal_scratch_in_bytesc                    dS )Nz"CustomCallBackendConfig(<omitted>)r/   r%   s    r'   __repr__z CustomCallBackendConfig.__repr__e   s    //r)   r   c                   t          j                    } |j        d            |j        t          j        | j                              |j        d           | j        rW |j        d            |j        t          | j                                                  	                    d                     | j
        E |j        d            |j        t          | j
                  	                    d                     | j        7 |j        d            |j        | j                                                   | j        rW |j        d            |j        t          | j                                                  	                    d                     | j        W |j        d	            |j        t          | j                                                  	                    d                     | j        rW |j        d
            |j        t          | j                                                  	                    d                     | j        y |j        d           t#          | j                  D ]D\  }} |j        |rdnd           |dz   t%          | j                  k    r |j        d           E |j        d           | j        E |j        d            |j        t          | j                  	                    d                      |j        d           | j        P |j        d            |j        d| j                                        z   dz   	                    d                     | j        U |j        d            |j        t          | j                  	                    d                      |j        d           | j         |j        d           t#          | j                                                  D ]\  }\  }} |j        d            |j        |	                    d                      |j        d           t3          |t4                    r% |j        d            |j        |rdnd           nt3          |t6                    rA |j        d            |j        t          |          	                    d                     nut3          |t8                    rA |j        d            |j        t          |          	                    d                     nt;          dt          |          z              |j        d           |dz   t%          | j                  k    r |j        d            |j        d            |j        d             |j        d            |j                    S )!z(Serializes the backend config into JSON.s!   {"custom_call_config": {"body": "   "s   , "has_communication": r#   Ns   , "collective_id": s   , "cost_estimate": s   , "needs_hlo_passes": s   , "serialization_format": s   , "needs_layout_passes": s   , "allow_input_fusion": [s   trues   false      ,   ]s   , "internal_scratch_in_bytes":    }s   , "device_type": z"DEVICE_TYPE_"sD   , "scoped_memory_configs": [{"memory_space":1, "offset": 0, "size": s   }]s   , "flag_configs": [s   {"flag_type": "s   ", value: {s   "boolean_value": s   "integer_value": s   "double_value": zinvalid flag value: s   }}s)   , "implicit_sharding": {"type": "MANUAL"})ioBytesIOwritebase64	b64encoder2   r4   strlowerr$   r6   r:   r(   r;   rA   r<   r@   	enumeratelenrB   r8   upperr=   r   items
isinstancer3   r   float
ValueErrorgetvalue)r&   r   ivalueflags        r'   r(   zCustomCallBackendConfig.to_jsonh   s    Z\\FFL5666FL!$"9::;;;FL Hfl-...fl3t-..4466==gFFGGG%fl)***fl3t)**11'::;;;%fl)***fl4%--//000 Gfl,---fl3t,--3355<<WEEFFF ,fl0111fl3t0117799@@IIJJJ Jfl/000fl3t/006688??HHIII*fl/000 788  (!U3WW8444q5C/0000
&,t


fl4%1fl5666fl3t566==gFFGGGFL#fl'(((flT-33555;
C
CG
L
L   (fl   fl3t,--44W==>>>fl5zfl)***'
(8(8(:(:;;  
!]dE'(((T[[))***^$$$eT"" 
	@
&,+
,
,
,
&,%5wwX
6
6
6
6s## 	@
&,+
,
,
,
&,s5zz((11
2
2
2
2u%% 	@
&,*
+
+
+
&,s5zz((11
2
2
2
21CJJ>??
?Uq5C
OO##
&,t


fl4 FL=>>>FL6?r)   Nr*   )r+   r,   r-   __doc__r.   rD   r(   r/   r)   r'   r1   r1   S   s         CC$$$$----''''""""''''0 0 0J J J J J Jr)   r1   c                    | S Nr/   )	out_avals___s      r'   _tpu_custom_call_abstract_evalrd      s    	r)   r   Sequence[Sequence[int]]c                    d | D             S )Nc           	     Z    g | ](}t          t          |j        d z
  dd                    )S )rG   )tuplerangendim).0as     r'   
<listcomp>z%_avals_to_layouts.<locals>.<listcomp>   s2    	:	:	:q%afqj"b))
*
*	:	:	:r)   r/   )avalss    r'   _avals_to_layoutsrp      s    	:	:E	:	:	::r)   ctxmlir.LoweringRuleContextr   kernel_namer7   ra   r   input_output_aliasestuple[tuple[int, int], ...]...c                   d |D             } j         j        }t          |t          j                  r2|j        t          |j        j                  k    rt          d          nKt          |t          j
                  r|j        dk    rt          d          n|j        rt          d          t          d  j        D                       rd }n fd j        D             }d }	|-t          t           j                            |                    }	t'          j        d|| |j                    dt          |          t-           j                  t-           j                  ||		
  
        }
|
j        S )
Nc                6    g | ]}t          j        |          S r/   )r
   aval_to_ir_type)rl   avals     r'   rn   z-_tpu_custom_call_lowering.<locals>.<listcomp>   s#    CCC$&t,,CCCr)   zXMosaic kernels cannot be automatically partitioned. Please wrap the call in a shard_map.rG   z4Replica lowering for Mosaic kernels not implemented.c              3  H   K   | ]}t          j        |j                  V  d S r`   )r   is_constant_shapeshape)rl   aval_outs     r'   	<genexpr>z,_tpu_custom_call_lowering.<locals>.<genexpr>   s/      NNH		/	/NNNNNNr)   c                h    g | ].}t          j        t          j        |j                            /S r/   )r
   shape_tensoreval_dynamic_shaper}   )rl   r~   rq   s     r'   rn   z-_tpu_custom_call_lowering.<locals>.<listcomp>   sC     ' ' ' 	$1#x~FFGG' ' 'r)   )rs   r   )	result_typesoperandsbackend_configapi_versionoperand_output_aliasesoperand_layoutsresult_layoutsresult_shapesextra_attributes)module_contextaxis_contextrW   r	   SPMDAxisContextmanual_axes	frozensetmesh
axis_namesNotImplementedErrorShardingContextnum_devicesr4   all	avals_outdictr   
StringAttrgetr
   custom_callr(   rp   avals_inresults)rq   r   rs   ra   rt   in_nodesr   r   r   r   calls   `          r'   _tpu_custom_call_loweringr      s    DCCCC,#0,n<== 9\->-I#J#JJJ"   K
 , >?? 	1$$"   %
  
>   	NNNNNNN 'MM' ' ' '' ' 'M  (9(9+(F(FGGG		#V^%%!"677'55&s}55!'

) 

) 

)$ 
r)   r   )platformmodule	ir.Modulehardware_generationr   c                
	   	 | j                                          n'# t          j        $ r}t	          d          |d}~ww xY w| j        5 }| j         j        5 }|                    t          j	                   |
                                 t          j        |           t          j        |           t          j                     t!          | d           t"          j        rYg d}t'          j        dd                    |           d          }|                    | j                    t!          | d           d	| d
g}t'          j        dd                    |           d          }|                    | j                    t!          | d           ddg}t'          j        dd                    |           d          }|                    | j                    t!          | d           	 t.          d         j        }n# t0          $ r d}Y nw xY w|x}rt3          |                    d                    }|dhk    r?t'          j        d          }|                    | j                    t!          | d           n<|r:|                    d           t	          dd                    |                     dg}t'          j        dd                    |           d          }|                    | j                    t!          | d           dg}t'          j        dd                    |           d          }|                    | j                    t!          | d           d}d}	|dk     rdnd}
d| d|	 d | d!|
 d"|
 d#||d$z   z   d
g}t'          j        dd                    |           d          }|                    | j                    t!          | d%           ddg}t'          j        dd                    |           d          }|                    | j                    t!          | d&           | cddd           cddd           S # 1 swxY w Y   ddd           dS # 1 swxY w Y   dS )'aG  Runs MLIR passes lowering the given module to an MLIR module.

  Uses Python versions of canonicalize-mosaic,infer-memref-layout and
    apply-vector-layout.

  Args:
    module: The MLIR module to lower.
    hardware_generation: The TPU hardware generation to target.

  Returns:
    An MLIR module implementing the kernel.
  z+The compiled module fails MLIR verificationNoriginal)zhlo-legalize-to-arithmeticz!func.func(hlo-legalize-to-linalg)zfunc.func(linalg-vectorization)zbuiltin.module(,)zpost-hlo-conversionz6func.func(tpu-infer-memref-layout{hardware-generation=z})zpost-infer-memref-layoutcanonicalizecsez!post-infer-memref-layout-simplifyxla_mosaic_on_device_checksFboundsz1builtin.module(func.func(debug-assert-insertion))zpost-assert-insertionz)Unrecognized on-device check categories: z, z$func.func(tpu-canonicalize-mosaic{})zpost-canonicalize-mosaiczBfunc.func(tpu-infer-vector-layout{sublane-count=8 lane-count=128})zpost-infer-vector-layout            z1func.func(tpu-apply-vector-layout{ sublane-count=z lane-count=z hardware-generation=z mxu-contracting-size=z mxu-noncontracting-size=z max-sublanes-in-scratch=rG   zpost-apply-vector-layoutz!post-apply-vector-layout-simplify)	operationverifyr   	MLIRErrorrY   contextlocationappend_dialect_registryr
   upstream_dialectsload_all_available_dialectsr   register_dialectr   register_mhlo_dialectregister_mhlo_passes	dump_mlir_MOSAIC_ALLOW_HLOr\   r   parsejoinrunFLAGSKeyErrorsetsplitdiscard)r   r   erq   rb   pipelineon_device_checkscheckssl_cntl_cntmxu_sizes              r'   _lower_tpu_kernelr      s    K
	 K K K
B
C
CJK ~ ^f.7 ^1 6777##%%%s###fj!!! 	/  h
 "#JSXXh5G5G#J#J#JKKhll6#$$$-... 	[BUZZZH  !H388H3E3E!H!H!HIIHLL!"""f0111 	H  !H388H3E3E!H!H!HIIHLL!"""f9:::<=C    "!v 
6<<$$%%f	H:		$?
 
 	V%&&&&12222 
x   K		&8I8IKK
 
 	

 	/H  !H388H3E3E!H!H!HIIHLL!"""f0111 	MH  !H388H3E3E!H!H!HIIHLL!"""f0111FE)A--ss3H	 	 	.3	 	 3	 	 "*	 	 EM	 	 %+fqj$9		 	 	H  !H388H3E3E!H!H!HIIHLL!"""f0111 	H  !H388H3E3E!H!H!HIIHLL!"""f9:::}^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^sn    A ;A 
Q8FQ 1HQ HQ HH4Q Q8 Q$	$Q8'Q$	(Q88Q<?Q<backendrQ   r8   /tuple[ir.Module, tuple[bool, bool, bool, bool]]c                  t          j        | j                  \  }}t          j        }| }| j        5 }| j        j        5 }t          j        	                    | j        
                    dd                    } |rt          j        rt          j        |          d         }	|	j        }
|
                    d          st!          d|
 d          t#          |
t%          d                             }t'          | |          } d}d}|j        }d|_        	 t+          j	        d          }|                    | j                   ||_        n# ||_        w xY wt/          j                    }| j                            |d	           |                                }|||||ffcd d d            cd d d            S # 1 swxY w Y   d d d            d S # 1 swxY w Y   d S )
NTbinaryenable_debug_infor   zTPU vzUnrecognized TPU device kind: zc. tpu_custom_call cannot be lowered on a machine without TPUs when mosaic_use_python_pipeline=True.Fz,builtin.module(mosaic-serde{serialize=true}))desired_version)r   private_has_communicationr   r   r\   r   r   r   Moduler   get_asm_MOSAIC_USE_PYTHON_PIPELINEjaxdevicesdevice_kind
startswithrY   r   rT   r   allow_unregistered_dialectsr   r   rL   rM   write_bytecoderZ   )r   r   r8   r4   has_custom_barrierr;   r<   rq   rb   some_tpur   r    prev_allow_unregistered_dialectsr   bytecode_bufferasms                   r'   _lower_mosaic_module_to_asmr   m  s    +.*G+ +'' ','~ f.7 1Y__   EE F  ":@ "W%%a(h(k##G,, 54[ 4 4 45 5 	5  CLL 9:: )<==f!'*'F$&*C#I"#QRRhll6#$$$(Hc%%(Hc%HHHHjllO
##OQ#GGG

"
"
$
$C	 5                                        sO   G CF;.EF;	EAF;"G;F?	?GF?	GGGr=   r5   r:   r9   r   r>   r@   r?   rB   r6   rA   c       	        n    t          | ||          \  }
\  }}}}t          |
||||||||	||||          S )N)r   r8   r=   r:   r   r@   rB   r6   r8   rA   r   r4   r;   r<   )r   _lowered_to_custom_call_config)r   r   r8   r=   r:   r   r@   rB   r6   rA   r2   r4   r   r;   r<   s                  r'   _lower_to_custom_call_configr     sx    $ "    
('!+ 9!/+)'-
 
 
 r)   r2   r    r   r3   r4   r;   r<   c                   |r|t          d          n|t          d          |5t          |t                    s t          dt          |           d          t	          | |	||||
||||||          }|S )Nz=collective_id has to be specified when using a custom barrierzKcollective_id has to be unspecified or None when not using a custom barrierz1vmem_limit_bytes must be an int: provided with a .)rY   rW   r   typer1   )r2   r=   r:   r   r@   rB   r6   rA   r   r4   r;   r<   r8   r   s                 r'   r   r     s       	
I     
	   !*5Es*K*K!
	&!""	& 	& 	&   # & 
-r)   r   ir.Valueout_typeSequence[ir.Value]c               Z    t          |||||||
|||
  
        }t          | g|R ||||	dS )N)	r   r=   r:   r   r@   rB   r6   r8   rA   r   rs   ra   rt   )r   r   )rq   r   r   r   rs   r:   r=   r   r@   rt   rB   r6   rA   r8   r   r   s                   r'   lower_module_to_custom_callr     sr    " ('!+ 9!/  & 
#	

 
 /
 
 
 r)   r/   rG   )r:   r   r8   rs   r=   r   r@   rt   rB   r6   rA   str | xla_client.ClientCallable[..., Any]c               Z    t          | |||||||
||
  
        }t          ||||	          S )z;Turns an MLIR Mosaic kernel into a JAX-compatible function.)	r   r8   r=   r:   r   r@   rB   r6   rA   rs   rt   )r   _as_jax_callable)r   r   r:   r   r8   rs   r=   r   r@   rt   rB   r6   rA   r   s                 r'   as_tpu_kernelr     s[    " ('!+ 9!/  & 
/	
 
 
 r)   )r6   r:   r;   r<   r8   r4   r   rs   r=   r   r@   rt   rA   rB   lowered_modulec                   | j                             dd          }t          ||
|||||||||||          }t          |||	|          S )NTr   r   r   )r   r   r   r   )r   r   r6   r:   r;   r<   r8   r4   r   rs   r=   r   r@   rt   rA   rB   r2   r   s                     r'   lowered_as_tpu_kernelr   ?  s    & &/77T 8   *'!+ 9!/+)'-  & 
/	
 
 
 r)   c                    dt          |t          j        j                  s|f}dt	          d |D                        fd}t          j        |          S )NFTc              3  T   K   | ]#}t          j        |j        |j                  V  $d S r`   )r   ShapedArrayr}   dtype)rl   tys     r'   r   z#_as_jax_callable.<locals>.<genexpr>w  s3      LLRD$RXrx88LLLLLLr)   c                 D    t          j        | d}r|d         n|S )Nr   r   )tpu_custom_call_pbind)argsresultr   rt   rs   ra   unpacks     r'   apply_kernelz&_as_jax_callable.<locals>.apply_kernelz  s>    #	1  F *6!99F*r)   )rW   collectionsabcIterableri   r   jit)r   r   rs   rt   r  ra   r  s   ` `` @@r'   r   r   l  s     &	Hko6	7	7 {HFLL8LLLLL)+ + + + + + + + + 
		r)   r   c                   	 t           d         j        }n# t          $ r Y dS w xY w|dk    rt          j                            dd          }|rt          j                            |t          j	                     d| d          }t          |d          5 }|                    t          |                      ddd           dS # 1 swxY w Y   dS dS dS )z,A helper function to dump mosaic mlir modulexla_mosaic_dump_toNspongeTEST_UNDECLARED_OUTPUTS_DIRz-mosaic-dump-z-py.txtw)r   r\   r   osenvironr   pathr   timetime_nsopenrN   rQ   )r   r   should_dumpoutdirr  fs         r'   r   r     s'   ,-3KK	   
FFHZ^^94@@F W\\&T\^^"O"O$"O"O"OPPdc?? a	F                 	  s    
###CC	C	)r   re   )rq   rr   r   r1   rs   r7   ra   r   rt   ru   r   rv   )r   r   r   r   r   r   )r   r   r   rQ   r8   r7   r   r   )r   r   r   rQ   r8   r7   r=   r5   r:   r9   r   r>   r@   r?   rB   r5   r6   r5   rA   r5   r   r1   )r2   r    r=   r5   r:   r9   r   r>   r@   r?   rB   r5   r6   r5   rA   r5   r   r3   r4   r3   r;   r3   r<   r3   r8   r7   ) rq   rr   r   r   r   r   r   r   r   rQ   rs   rQ   r:   r9   r=   r5   r   r>   r@   r?   rt   ru   rB   r5   r6   r5   rA   r5   r8   r7   r   r   )r   r   r   r   r:   r9   r   r   r8   r7   rs   r7   r=   r5   r   r>   r@   r?   rt   ru   rB   r5   r6   r5   rA   r5   r   r   )"r   r   r   r   r6   r5   r:   r9   r;   r3   r<   r3   r8   r7   r4   r3   r   r3   rs   r7   r=   r5   r   r>   r@   r?   rt   ru   rA   r5   rB   r5   r   r   )
r   r1   r   r   rs   r7   rt   ru   r   r   )r   r   r   rQ   )?r^   
__future__r   rO   collections.abcr  r   r   dataclasses	functoolsrL   r  r  typingr   r   r   jax._srcr   r	   jax._src.interpretersr
   jax._src.libr   r   jax.interpretersr   jaxlib.mlirr   jaxlib.mlir.dialectsr   jaxlib.mlir.passmanagerr   abslr   r   ImportError
bool_stater   r   	Primitiver  def_implpartialapply_primitivemultiple_results	dataclassr   r1   def_abstract_evalrd   rp   r   register_loweringr   r   r   r   r   r   r   r   r   r/   r)   r'   <module>r0     sZ     # " " " " "      . . . . . . . .         				 				        



             # # # # # # & & & & & &       # # # # # #                   % % % % % % / / / / / /
+%%   
%%% 0f/	%		    &F%		'    #DN#455    Ic)+<==? ? ?%)  " d###	 	 	 	 	 	 	 $#	 d###^ ^ ^ ^ ^ ^ ^ $#^B $  %$; ; ; ;2 2 2 2j  (*C %' ' ' 's s s sl+ + + +\% % % %P- - - -`$ $ $ $V *.',""#'26,08:,0 $'(" " " " " "R !%)-" %"#$"#'26,08:'+,0#* * * * * *Z   6     s   4B BB