
    Vpf(                        d dl Z d dlZd dlZd dlZd dlZd dlZd dlmZ d dlm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlZd	d
lT 	 d dlmZ  e	j        dej                                        d           n# e$ r Y nw xY wej                            d          Zde_        ej         d             Z! ej"        ej#        ed          d             Z$d Z%d Z& G d d          Z' G d d          Z(dS )    N)mlir)
xla_client)ir)arith)gpu)memref)scf   )*)
mosaic_gpumosaic_gpu_record_eventCUDA)platformrecord_eventTc                     ~ |S N )eventargss     d/var/www/html/nettyfy-visnx/env/lib/python3.11/site-packages/jax/experimental/mosaic/gpu/profiler.py_record_event_abstract_evalr   6   s
    	+    cudac                   t          j        |t           j                  j                            dd          }t          j        dd | j        D             ||d t          t          |                    D                       }|j
        S )N   little)	byteorderr   c                 6    g | ]}t          j        |          S r   )r   aval_to_ir_type).0avals     r   
<listcomp>z/_record_event_lowering_rule.<locals>.<listcomp>B   s#    III4D(..IIIr   c                     i | ]}||S r   r   )r    is     r   
<dictcomp>z/_record_event_lowering_rule.<locals>.<dictcomp>E   s    ===qa===r   )result_typesoperandsbackend_configoperand_output_aliases)ctypescastc_void_pvalueto_bytesr   custom_call	avals_outrangelenresults)ctxr   r   	ptr_bytesops        r   _record_event_lowering_ruler7   ;   s    k%117@@8 A  ) II3=III==E#d)),<,<===  " 
r   c                     t           j                            |           \  }}t           j                            |t	          j        |d|i          S )Nr   )jaxtreeflatten	unflattenrecord_event_pbind)r   r   	flat_argstreedefs       r   _record_eventrA   I   sK    x''--)W			~"I;U;;
 
 r   c                 h    t           j                                        t           j                                        	 t          j         fd            }t          j         ||i |           t          j         ||i |          }t           j                                      }t           j                                       t           j                                       nC# t           j                                       t           j                                       w xY w||fS )Nc                      t           j                            | |f          \  }}t          |          }t           j                            ||          \  } }t           | i |          S r   )r9   r:   r;   rA   r<   )r   kwargsr?   r@   	end_eventfstart_events       r   runzmeasure.<locals>.runU   sj    8++T6N;;i	;77iX'';;ldF11d-f--y999r   )mosaic_gpu_lib_mosaic_gpu_ext_gpu_event_creater9   jitblock_until_ready_gpu_event_elapsed_gpu_event_destroy)rF   r   rD   rH   r3   elapsedrE   rG   s   `     @@r   measurerQ   O   s5   .@@BB+,>>@@)AW: : : : : : W: ##t.v..///#CC$8$8$899G,??Y G "55kBBB"55i@@@@ "55kBBB"55i@@@@	'	s   A-C- -A D-c                   J   e Zd ZdZdZdefdZdeedf         deedf         defd	Zdeedf         deedf         de	j
        fd
Zdeedf         deedf         de	j
        fdZdeedf         fdZdeedf         fdZdedefdZdeedf         deedf         fdZdS )ProfilerSpecr   l        entries_per_warpgroupc                 "    || _         i | _        d S r   )rT   interned_names)selfrT   s     r   __init__zProfilerSpec.__init__k   s    !6DDr   grid.blockreturnc                     t          j        |          t          z  rt          d          t          j        |          t          j        |          z  t          z  S )Nz.Block size is not a multiple of warpgroup size)mathprodWARPGROUP_SIZE
ValueErrorrW   rY   rZ   s      r   _num_warpgroupszProfilerSpec._num_warpgroupso   sM     y.( IGHHH9T??TYu---??r   c                     t           j                            |                     ||          | j        z  ft           j                            d                    S )N    )r   
MemRefTypegetrb   rT   IntegerTypeget_signlessra   s      r   mlir_buffer_typezProfilerSpec.mlir_buffer_typev   sO     =			dE	*	*T-G	GI
##B''  r   c                 z    t          j        |                     ||          | j        z  ft          j                  S r   )r9   ShapeDtypeStructrb   rT   jnpuint32ra   s      r   jax_buffer_typezProfilerSpec.jax_buffer_type~   s<     			dE	*	*T-G	GI
  r   c                 \    |                      d|          }t          || j        z            S )Nr   )rb   intrT   )rW   rZ   num_warpgroupss      r   smem_i32_elementszProfilerSpec.smem_i32_elements   s-    ))"e44N~ ::;;;r   c                 6    d}|                      |          |z  S )N   )rr   )rW   rZ   bytes_per_entrys      r   
smem_byteszProfilerSpec.smem_bytes   s     O!!%((?::r   namec                     | j                             |d           x}|S t          | j                   x}| j         |<   || j        z  rt	          d          |S )NzAllocated too many names)rV   rf   r2   EXITRuntimeError)rW   rw   name_ids      r   intern_namezProfilerSpec.intern_name   se    &**4666Cn*-d.A*B*BBGd!$' 53444Nr   c           
      n   t          j        |          }t          j        |          }|                     d|          }|                    ||| j                  }|dd df                             t           j                  }|d         dz  |d         z   }||	                                z  }|d         }	t          j
        |	| j        dz
  k              rt          d          |dd	d f         }
d
 | j                                        D             }g }t          j        ||          D ]\  }}|	||f         d	z
  }d }|dz  dk    s
J |            |||f         }g }t          d|d          D ]}|
|||f         }|
|||dz   f         }||}||z  }||dz  z  }|dk     r nz|}d}|t           j        z  r|t           j        z  }d}||         }|                    ||rdndt'          ||z             dz  d|z   d|z   d           |                    |           t+          j        d|d|          S )Nr   .   ).r   rd   ).r
   ).r~   z*Insufficient space to capture a full trace   c                     i | ]\  }}||	S r   r   )r    kvs      r   r%   z%ProfilerSpec.dump.<locals>.<dictcomp>   s    ===A1===r   r   r
      TFBEg     @@)rw   phtspidtidns)displayTimeUnittraceEvents)npasarrayr]   r^   rb   reshaperT   astypeint64minanyrz   rV   itemsndindexr1   rS   ry   appendfloatextendjsondump)rW   bufferrF   rY   rZ   
num_blockswarpgroups_per_blockentriesstart_timesentries_usedtracesuninternevents	block_idxwg_idxvalid_entrieslocal_clock_offset
start_timeblock_eventsr$   tagtimer{   beginrw   s                            r   r   zProfilerSpec.dump   s   ZF4J//E::nn($*D G #rr'"))"(33Kv&",F0CCK;??$$$K6?L	vlT7!;;<< GEFFFS!""WF==!4!:!:!<!<===HFZ
4HII $ $	6"9f#459mQ!###]###y&01jlQq)) $ $!Y)*iQ./%#
""A!88
%\&& 	l//'% '##C
T)**S0y=v:
 
 	 	 	 	 	l###9fEEqIIIr   N)__name__
__module____qualname__ENTERry   rp   rX   tuplerb   r   Typeri   rn   rr   rv   strr|   r   r   r   r   rS   rS   g   s       
%	$C    @S/@*/S/@
@ @ @ @S/*/S/	w   S/*/S/	w   <U38_ < < < <;eCHo ; ; ; ;c c    .J%S/ .J%S/ .J .J .J .J .J .Jr   rS   c                       e Zd Zdedej        dej        fdZej        de	fd            Z
deedf         d	eedf         fd
ZdS )OnDeviceProfilerspecsmem_buffergmem_bufferc                 Z   || _         t          j                            d          }t          j                                        }|j        | _        t          d          }t          |t          t          j        |t          j        |t          | j        |                              | j                            | _        || _        t#          j        t          j                            d|          g g           | _        t#          j        t          d|          | j        g            d S )Nrd   Fsyncr   r   )r   r   rg   rh   	IndexTyperf   rT   entries_per_wgwarpgroup_idxmemref_slicedsr   
index_castmulicr   r   r   allocare   offsetstore)rW   r   r   r   i32indexr   s          r   rX   zOnDeviceProfiler.__init__   s    DI
.
%
%b
)
)CLE4D&&&F#
uz&!D,?*E*EFF  		
 	
 D #D- 1 1"c : :BCCDK
L1cDK,,,,,r   rw   c              #   ,   K   t           j                            d          t           j                                         j                            |           fd} |t          j                   d V   |t          j	                   d S )Nrd   c                     t          j        t          j        j        g                     }t          j        t          | z            j        |g           t          j        t                      j        t          j	        |t          d|j
                            g           t          j        t          j        t          j	        |t          d|j
                                      j        g            d S )Nr
   r~   )r   r   r   loadr   r   r   r   clockadditype)modifiercurr   r   r{   rW   s     r   r   z&OnDeviceProfiler.record.<locals>.store   s    UFKR$@$@AAc l1X'--t/?#GGGl
''4#eja38nn&E&E%F   l

3
3!SX ? ?
@
@
+
    r   )
r   rg   rh   r   rf   r   r|   rS   r   ry   )rW   rw   r   r   r   r{   s   `  @@@r   recordzOnDeviceProfiler.record   s      
.
%
%b
)
)CLEi##D))G         
E,
	EEE	E,
r   rY   .rZ   c                    t           j                                        }t           j                            d          }t          j                     t          d|          }t
          j        D ]N}t          j
        t          j        |t          j        |                    t          j        |                    }Ot          d          }t          j        |          t"          z  }t          j
        t          j        |t          ||                    t          j        ||                    }	t          j        |	t          | j        |                    }
t)          j        | j        |
g| j        gdgt           j                            d| j         d                    }t          j        t5                      t          d	|                    }t7          j        t          j        t          j        j        |t          d|                              }t          j         |j!                  5  t)          j"        t          d|          |t          d|          g           t)          j"        t          d|          |t          d|          g           t)          j"        t          j
        t)          j#        | j$        g           t          d
|                    |t          d|          g           t7          j%        t          d|          t          | j        d
z
  |          t          d|                    }t          j         |j&                  5  t)          j#        | j'        |j(        g          }t)          j"        ||t          j
        |j(        t          d
|                    g           t7          j)        g            d d d            n# 1 swxY w Y   t7          j)        g            d d d            d S # 1 swxY w Y   d S )Nrd   r   Fr   r
   zmemref<zxi32, strided<[1], offset: ?>>)result_type   r   r~   )*r   r   rf   rg   rh   r   barrierr   	Dimensionr   r   r   grid_dimblock_idr   r]   r^   r_   r   r   r   subviewr   r   parseremui
thread_idxr	   IfOpcmpiCmpIPredicateeqInsertionPoint
then_blockr   r   r   ForOpbodyr   induction_variableyield_)rW   rY   rZ   r   r   r   dimr   wg_per_blockglobal_wg_idxstart_offsetwg_gmem_bufferthread_in_wgif_firstfor_opxs                   r   finalizezOnDeviceProfiler.finalize   s   LE
.
%
%b
)
)CKMMM!UI}  *
*YS 1 1
2
2CL4E4E ii &&&F9U##~5LJ
9ae4455'' M :mQt/BE-J-JKKL^<.4+>*?!GMMId)III
 
  N ;z||QsC[[99Lx
5&)<1cCC H 
	8.	/	/   l1Q99nqE{{m<<<l1Q99nqE{{m<<<l
*V[b111Q99
=
=
Q;;-   y
Au++
D!#U
+
+
Au++ f
 V[))  K(6+D*EFFZ11Q;;??@	
 	
 	

 	
2               
jnnn3                 s8   2D&PA2O
PO	PO	PP	PN)r   r   r   rS   r   ValuerX   
contextlibcontextmanagerr   r   r   rp   r   r   r   r   r   r      s        -< -bh -RX - - - -*     055c? 55c? 5 5 5 5 5 5r   r   ))r   r*   	functoolsr   r]   r9   jax._src.interpretersr   jax._src.libr   	jax.numpynumpyrl   jaxlib.mlirr   jaxlib.mlir.dialectsr   r   r   r	   r   utilsr   rI   register_custom_call_targetrJ   _record_event_capsuleImportErrorcore	Primitiver=   multiple_resultsdef_abstract_evalr   partialregister_loweringr7   rA   rQ   rS   r   r   r   r   <module>r     s`               



 & & & & & & # # # # # #             & & & & & & $ $ $ $ $ $ ' ' ' ' ' ' $ $ $ $ $ $        	777777(*($::<<    
    $ ##N33"& !  "! 4)>FKKK  LK    0]J ]J ]J ]J ]J ]J ]J ]J@e e e e e e e e e es   0B B
B