
    VpfCh                       d dl mZ d dlmZ d dlZd dlZd dlZd dlZd dlm	Z	 d dl
Z
d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlZ ej        d ej        dd          d          Z ej         d ej!        dd          d          Z"dZ# ej$        e%           ej&        Z& ej'        e(          Z)dUdZ*d Z+dVd#Z,dWd(Z-dWd)Z.	 	 	 	 	 	 	 	 	 	 dXdYd:Z/ej0        dZd?            Z1	 d[d\dFZ2d]dIZ3i e3_4        d^dKZ5de5_6        d^dLZ7i e7_8        d_dMZ9d`dNZ:dadPZ;dbdTZ<dS )c    )annotations)SequenceN)Any)compilation_cache)config)distributed)lib)
monitoring)profiler)traceback_util)mlir)
xla_client)xla_extension_version)irjax_disable_most_optimizationsJAX_DISABLE_MOST_OPTIMIZATIONSFzTry not to do much optimization work. This can be useful if the cost of optimization is greater than that of running a less-optimized program.%jax_compiler_detailed_logging_min_ops%JAX_COMPILER_DETAILED_LOGGING_MIN_OPS
   zHow big should a module be in MLIR operations before JAX enables detailed compiler logging? The intent of this flag is to suppress detailed logging for small/uninteresting computations.)helpbackend	xc.Clientreturnintc                    ~ dS )Nr    )r   s    Q/var/www/html/nettyfy-visnx/env/lib/python3.11/site-packages/jax/_src/compiler.pyget_latest_profile_versionr   G   s
    	    c                    |dz  }|dk     r|S | j         D ]*}|D ]%}|D ] }t          ||          }|dk     r|c c c S !&+|S )N   r   )regions_walk_operations)opkregionblockchild_ops        r   r$   r$   L   s    q&!UUH
  f    (Xq))q55((((((( 
 
(r    module	ir.Moduleboolc                L    t           j        }t          | j        |          dk     S )zBReturns 'true' if detailed logging should be enabled for 'module'.r   )"_COMPILER_DETAILED_LOGGING_MIN_OPSvaluer$   	operation)r*   bounds     r   use_detailed_loggingr2   Y   s"    
,
2%	&*E	2	2Q	66r    module_namestr	cache_keyNonec                    t           j        j        rt          j        nt          j        }t                              |d| |           d S )Nz5Persistent compilation cache hit for '%s' with key %r)r   log_compilesr/   loggingWARNINGDEBUGloggerlog)r3   r5   hit_log_prioritys      r   log_persistent_cache_hitr?   _   sK    )/)<)B )goo"= **V)% % % % %r    c                    t           j        j        rt          j                    rt
          j        nt
          j        }t          	                    |d| |           d S )Nz6PERSISTENT COMPILATION CACHE MISS for '%s' with key %r)
r   explain_cache_missesr/   r   is_persistent_cache_enabledr9   r:   r;   r<   r=   )r3   r5   miss_log_prioritys      r   log_persistent_cache_missrD   f   s`    !6<+-IKK+w %] 
 	** X)% % % % %r    Tnum_replicasnum_partitionsuse_spmd_partitioninguse_shardy_partitioneruse_auto_spmd_partitioning!auto_spmd_partitioning_mesh_shapelist[int] | Noneauto_spmd_partitioning_mesh_idsenv_options_overridesdict[str, str] | Nonefdo_profilebytes | Nonedetailed_loggingxc.Client | Nonexc.CompileOptionsc                   t          j                    }| |_        ||_        |j        }||_        ||_        |	|	|_        |r|pg |_        |pg |_	        |Jt                              d| ||           t          j        |          }|j        dk    r|dk    r|dddf         }| |j        d         k    r%d}t!          |                    ||                     ||j        d         k    r%d}t!          |                    ||                    |j        t&          k    r& t          j        d t*          g          |          }t           j                            |          }|                                | k    sJ |                                |k    sJ ||_        |&t7          |                                          |_        |j        j        }t>          j         t>          j         |_!        tD          j#        rd|_$        d	|_%        d
|_&        tN          dk    r||_(        tR          j*        j#        }|dk    r#||_+        t                              d|           nutX          |_+        |t[          j.        d           nRt_          |          }|dk    r#||_+        t                              d|           nt          0                    d           |
|_1        |S )a  Returns the compile options to use, as derived from flag values.

  Args:
    num_replicas: Number of replicas for which to compile.
    num_partitions: Number of partitions for which to compile.
    device_assignment: Optional ndarray of jax devices indicating the assignment
      of logical replicas to physical devices (default inherited from
      xla_client.CompileOptions). Must be consistent with `num_replicas` and
      `num_partitions`.
    use_spmd_partitioning: boolean indicating whether to enable SPMD or MPMD
      partitioning in XLA.
    use_shardy_partitioner: boolean indicating whether to use the Shardy
      partitioner in XLA. Shardy is a new open sourced propagation framework for
      MLIR. Currently Shardy is experimental in JAX. See
      www.github.com/openxla/shardy.
    use_auto_spmd_partitioning: boolean indicating whether to automatically
      generate XLA shardings for SPMD partitioner.
    auto_spmd_partitioning_mesh_shape: device mesh shape used to create
      auto_spmd_partitioning search space.
    auto_spmd_partitioning_mesh_ids: device ids used to create
      auto_spmd_partitioning search space.
    env_options_overrides: dict of additional options parsed by the compiler
    fdo_profile: Optional profile for feedback-directed optimization passed to
      XLA.
    detailed_logging: Is this an "interesting" computation about which XLA would
      be wise to log compilation information?
    backend: the client, if available.
  NzKget_compile_options: num_replicas=%s num_partitions=%s device_assignment=%sr"   r   z8device_assignment does not match num_replicas: {} vs {}.z:device_assignment does not match num_partitions: {} vs {}.c                    | j         S Nid)ds    r   <lambda>z%get_compile_options.<locals>.<lambda>   s     r    )otypesTFi  zSget_compile_options XLA-AutoFDO profile: using JAX XLA profile version %d from flagzGget_compile_options: no backend supplied; disabling XLA-AutoFDO profilezMget_compile_options XLA-AutoFDO profile: using XLA-AutoFDO profile version %dzaget_compile_options XLA-AutoFDO profile: XLA-AutoFDO profile version is 0; this should not happen)2xcCompileOptionsrE   rF   executable_build_optionsrG   rI   rO   rJ   rL   r<   debugnparrayndimshape
ValueErrorformatdtypeobject	vectorizer   DeviceAssignmentcreatereplica_countcomputation_countdevice_assignmentlistitemsenv_option_overridesdebug_optionsr	   	cuda_pathxla_gpu_cuda_data_dir_DISABLE_MOST_OPTIMIZATIONSr/   xla_backend_optimization_level!xla_llvm_disable_expensive_passesxla_test_all_input_layoutsr   xla_use_shardyr   jax_xla_profile_versionprofile_version_NO_PROFILE_DONT_RETRIEVEr9   infor   errorxla_detailed_logging)rE   rF   rm   rG   rH   rI   rJ   rL   rM   rO   rQ   r   compile_optionsbuild_optionsmsgrq   ry   fdo_profile_versions                     r   get_compile_optionsr   p   s2   T %''/!-/#1/ !:-(=-%-G-* +M Z6W6][]M34S4YWYM1"
LLUn&79 9 9 !233 	!##.A*=*=+AAAtG4(.q111Fcszz"3\BBCCC*0333Hcszz"3^DDEEE&((D",~~seDDD
 +223DEE**,,<<<<..00NBBBB(9O%&+/0E0K0K0M0M+N+NO(!:H-]*--M' & 534M06:M3/4M, c!!#9M  #:@q  &=O#
LL >(* * * * '@O#l 3 4 4 4 4 7w??		!	!*=' <(	* 	* 	* 	* 	 P 	Q 	Q 	Q (8-$	r    optionshost_callbacksSequence[Any]xc.LoadedExecutablec                    t          | dd          rt          j        |          }n|}|r|                     |||          S |                     ||          S )Nneeds_str_irT)r   r   )r   )getattrr   module_to_bytecodecompile)r   r*   r   r   built_cs        r   backend_compiler      ss     Wnd++ %f--GGG  :??7G*8  : : :
 
'	:	::r    computationdevices
np.ndarrayr   pgle_profilerprofiler.PGLEProfiler | Nonec           
     h   |j         j        d         }t          j        |          j        }t          j        |d          x}rt          j        d|           t          j
        |           }	|	st          | |||          S t          j        d           	 t          j        ||||           }
nN# t          j        j        $ r7}t$                              d|           t          | |||          cY d }~S d }~ww xY wt)          d |                                D                       dk    }t-          |                                d 	          j        }t0          j        j        rt0          j        j        d
k    r|j        j        }d|j        _        t          j        ||||           }||j        _        t;          | |          r|}
||                                 nW|Ut)          |          d
k    rB|}
|r>t>          j         j!        -tE          |||| t>          j         j!        |          |j        _        tG          j$                    }tK          ||
||           \  }}tG          j$                    |z
  }|W|J tM          ||
           t          j        d           t          j'        d||z
             t          j'        d|           |S t0          j(        j        r[|rYt>          j         j!        Ht)          |          d
k    r5tS          ||
           tU          | |||t>          j         j!        ||
|          S t0          j+        j        rH|rFt>          j         j!        5tS          ||
           tY          | |||t>          j         j!        ||
|          S tS          ||
           t[          | |||||
          S )Nsym_namer   zDumped the module to %s.z1/jax/compilation_cache/compile_requests_use_cachezKcompile_or_get_cached: unable to generate cache key, skipping the cache: %sc                    h | ]	}|j         
S r   )process_index).0devices     r   	<setcomp>z(compile_or_get_cached.<locals>.<setcomp>-  s    
@
@
@F6
@
@
@r    r"   c                    | j         S rV   rW   )r   s    r   rZ   z'compile_or_get_cached.<locals>.<lambda>/  s    	 r    )keyr   s   pgle profiledz!/jax/compilation_cache/cache_hitsz-/jax/compilation_cache/compile_time_saved_secz//jax/compilation_cache/cache_retrieval_time_sec).r0   
attributesr   
StringAttrr/   r   dump_module_to_filer9   r|   r   is_cache_usedr   r
   record_eventget_cache_keyr\   _xlaXlaRuntimeErrorr<   r}   lenflattenminr   r   enable_pglepgle_profiling_runsr^   rO   _is_executable_in_cachedisabler   global_stateclient_share_fdo_profilestime	monotonic_cache_readr?   record_event_duration_secsshare_binary_between_hostsrD   _compile_and_share_module#share_autotune_config_between_hosts"_compile_and_write_autotune_config_compile_and_write_cache)r   r   r   r   r   r   r   r3   	dumped_touse_compilation_cacher5   exis_multi_processmin_device_process_idrO   pgle_profiled_module_keycache_retrieval_startretrieved_executableretrieved_compile_timecache_retrieval_times                       r   compile_or_get_cachedr     sK    "-j9(h''-+*;	BBBY 8L+Y777+9'BB	 +7K)+ + + MNNN+!/Wow8 8II		  + + +
LL *+-/ / /7K)+ + + + + + + ++ 

@
@goo.?.?
@
@
@AAAE  
'//

!9!9:::H $  


$
*Q
.
.!:FK;KO,80>Wow 8  8;FO,8w(@AA 
*i		"		 S%5%5%9%9*i	 
k6=I?R
w

"
)
@
 @
0< .**1<9ow28 28..)),AA%!---[)444?@@@)7!557 7 7 )9;OQ Q Q  '-,
, 
"
)
5 n


"
"k9555$ '	 	 	 06
 
"
)
5k9555- '	 	 	 k9555#  s   B- -C8,C3-C83C8global_client*lib.xla_extension.DistributedRuntimeClientc                f   | j         j        d         }t          j        |          j        }|j        j        }|t          |          dk    r|S d|j        _        t          j	        | |||          dz   }	|	t          j        v rt          j        |	         S t          j        j        }
t          j        j        |k    r4t"                              d|||           |                    |	|           n3t"                              d|||           |                    |	|
          }|t          j        |	<   |S )Nr   r   r    	_fdo_syncz3Sharing FDO profile: %s. For module %s. Process %d.zHWaiting for FDO profile: %s. For module %s. Should be set by process %d.)r0   r   r   r   r/   r^   rO   r   r   r   r   modules_profilesr   %share_binary_between_hosts_timeout_msr   r   
process_idr<   r_   key_value_set_bytesblocking_key_value_get_bytes)r   r   r   r   r   min_process_idr   r3   rO   profile_keyshare_timeouts              r   r   r     sO    "-j9(h''-+8D+C,,119</*6%
w  	  '888/<<>D-(N::
LL=	   %%k;????
LLR	    <<] K 7B&{3	r    first_process_idc                L   t           j        j        }|j        j        }	t
          j        t          j                    t
          _        t          j
                            t
          j        |          }
t          j
                            |
          r7t                              d||
           |
|	_        t!          | |||||          S t"          j        j        |k    r|
|	_        t                              d||           t!          | |||||          }t                              d||
           t+          |
d          5 }|                                }d d d            n# 1 swxY w Y   t/          j        |          }|                    ||           t                              d|t5          |          |           nt                              d|||           |                    ||          }t                              d|t5          |                     t/          j        |          }t+          |
d          5 }|                    |           d d d            n# 1 swxY w Y   t                              d	||
           |
|	_        t!          | |||||          }|S )
Nz;Compiling module: %s. Use existing autotune config file: %sz8Process %d compiling and dumping autotune for module: %sz+Writing autotune config for module %s to %srbzAAutotune config for module %s with size %d shared by cache_key %szSCompiling module %s, waiting for config to be shared by cache_key %sfrom process %dz1Received autotune config for module %s of size %dwbz2Compiling module %s, using autotune config from %s)r   r   r/   r^   rq   r   autotune_configs_dirtempfilemkdtempospathjoinexistsr<   r_   "xla_gpu_load_autotune_results_fromr   r   r   r    xla_gpu_dump_autotune_results_toopenreadr   compress_executabler   r   r   decompress_executablewrite)r   r   r   r   r   r3   r5   r   r   rq   autotune_tmp_file
executablefautotune_configs                 r   r   r     sF    >D-!:H-'<D>F>N>P>P&;gll(=y  W^^%&& 
LLE  
 8IM4#   (,<<<5FM2
LLK!;0 0 0) J LL5  
 
	&	& !!o! ! ! ! ! ! ! ! ! ! ! ! ! ! ! (;OLLO%%iAAA
LLKO	    LL	   $@@= O LL;O  
 (=oNNO		&	& !ggo               LL<  
 8IM4) J 
s$   ?E  E$'E$ I""I&)I&c                H   t           j        j        }|t          j        v rt          j        |         S t
          j        j        |k    rpt          	                    d||           t          | |||||          }	|                     |	          }
t          j        |
          }
|                    ||
           n\t          	                    d||           |                    ||          }
t          j        |
          }
|                     |
|          }	|	t          j        |<   |	S )Nz+Process %d compiling and sharing module: %sz&Waiting for module: %s from process %d)r   r   r/   r   modules_cacher   r   r   r<   r_   r   serialize_executabler   r   r   r   r   deserialize_executable)r   r   r   r   r   r3   r5   r   r   r   serialized_executables              r   r   r   9  sI    >D-+999$29==(,<<<
LL>!;0 0 0) J $88DD-A  %%i1FGGGG
LL9;!# # #)FF=  .C  // J 8B))4	r    c                    t          j                    }t          | |||          }t          j                    |z
  }t          |||| ||           |S rV   )r   r   r   _cache_write)	r   r   r   r   r3   r5   
start_timer   compile_times	            r   r   r   j  sd     ~*{O^ * !!J.,{GZ   
r    c           	         	 t          j        | |          S # t          $ rL}t          j        j        r t          j        d| dt          |          j	         d|            Y d}~dS d}~ww xY w)z<Checks if executable is presented in cache on a given key
  6Error reading persistent compilation cache entry for '': : NF)
r   is_executable_in_cache	Exceptionr   raise_persistent_cache_errorsr/   warningswarntype__name__)r   r5   r   s      r   r   r   |  s    3GYGGG	   +1 M	4	4 	4r((+	4 	4/1	4 	45 5 5 55555s    
A-AA((A--tuple[xc.LoadedExecutable | None, int | None]c           	         	 t          j        |||          S # t          $ rL}t          j        j        r t          j        d|  dt          |          j	         d|            Y d}~dS d}~ww xY w)ziLooks up the `computation` and it's compilation time in the persistent
  compilation cache repository.
  r   r   r   N)NN)
r   get_executable_and_timer   r   r   r/   r   r   r   r   )r3   r5   r   r   r   s        r   r   r     s    	4?G- - -	   +1 M	6	6 	6 HH-	6 	613	6 	67 7 7 :::::s    
A.AA))A.compile_time_secsfloatr   c           	        t           j        j        rt          j                    rt
          j        nt
          j        }t          j	        j
        dk    rt                              |d           dS |rt                              |d|           dS t           j        j        }||k     r t                              |d|||           dS t                              d|||           	 t          j        | |||t!          |                     dS # t"          $ rL}t           j        j        r t'          j        d| dt+          |          j         d	|            Y d}~dS d}~ww xY w)
zqWrites the `serialized_computation` and its compilation time to the
  persistent compilation cache repository.
  r   z8Not writing persistent cache entry since process_id != 0NztNot writing persistent cache entry for '%s' because it uses host callbacks (e.g. from jax.debug.print or breakpoint)z]Not writing persistent cache entry for '%s' because it took < %.2f seconds to compile (%.2fs)z2'%s' took at least %.2f seconds to compile (%.2fs)z6Error writing persistent compilation cache entry for 'r   r   )r   rA   r/   r   rB   r9   r:   r;   r   r   r   r<   r=   &persistent_cache_min_compile_time_secsr_   put_executable_and_timer   r   r   r   r   r   r   )	r5   r   r3   r   r   r   log_prioritymin_compile_timer   s	            r   r   r     s    06%'CEE%'// }  (A--
JJ|IK K K
F 
JJ	>?JL L L FBH)))
JJ	%&13C	  
 F
LL<%'8: : :7-;
GS9J5K5KM M M M M	 7 7 7+1 M	6	6 	6 HH-	6 	613	6 	67 7 7 7 7 7 7 7 77s   $%D 
E!AEE!)r   r   r   r   )r*   r+   r   r,   )r3   r4   r5   r4   r   r6   )
NTFFNNNNTN)rE   r   rF   r   rG   r,   rH   r,   rI   r,   rJ   rK   rL   rK   rM   rN   rO   rP   rQ   r,   r   rR   r   rS   )
r   r   r*   r+   r   rS   r   r   r   r   rV   )r   r   r   r+   r   r   r   rS   r   r   r   r   r   r   )r   r+   r   r   r   rS   r   r   r   r   r   rP   )r   r   r   r+   r   rS   r   r   r   r   r3   r4   r5   r4   r   r   r   r   )r   r   r   r+   r   rS   r   r   r3   r4   r5   r4   r   r   )r   r,   )
r3   r4   r5   r4   r   rS   r   r   r   r   )r5   r4   r   r   r3   r4   r   r   r   r   r   r   r   r6   )=
__future__r   collections.abcr   r9   r   r   r   typingr   r   jax._srcr   r   r   r	   r
   r   r   jax._src.interpretersr   jax._src.libr   r\   r   jax._src.lib.mlirr   numpyr`   	bool_flagbool_envrt   int_flagint_envr.   r{   register_exclusion__file__r]   	getLoggerr   r<   r   r$   r2   r?   rD   r   annotate_functionr   r   r   r   r   r   r   r   r   r   r   r   r   r    r   <module>r     s  " # " " " " " $ $ $ $ $ $  				          & & & & & & % % % % % %                               # # # # # # & & & & & & ) ) ) ) ) ) . . . . . .                 /f.$FO4e<<MN N  &5V_+FN:B??	A	& & & "   ! !( + + +"		8	$	$   

 
 
7 7 7 7% % % %% % % % "&#(',:>8<37 $! $    D 
; ; ; ;@ 37I I I I IZ- - - -` (*  $
d d d dL ;? " 7- - - -^ +-  '   $      &/7 /7 /7 /7 /7 /7r    