
    Vpf
                         d Z ddlZddlZddlmZ ddlmZ de	fdZ
 ej        ej        g d          dd	d
d
ddej        de	de	dededej        fd            ZdS )zPallas softmax kernel.    N)pallas	block_rowc                   | j         d         }t          j        |          |k     }t          j        | t          j        d|          f|t          d                     }t          j        |d          }t          j        ||z
  	                    t          j
                            }t          j        |d          }t          j        |t          j        d|          f||z  	                    |j                  |           d S )Nr   inf)maskother)axis)r   )shapejnparangeplloaddslicefloatmaxexpastypefloat32sumstoredtype)		input_ref	probs_refr   row_lenr   rowrow_max	numeratordenominators	            g/var/www/html/nettyfy-visnx/env/lib/python3.11/site-packages/jax/experimental/pallas/ops/gpu/softmax.py_vmappable_softmax_kernelr!      s     OB'	I			($
")Ay))+$uU||m	 	 	# GCa   'gsW},,S[99::)	***+(")Ay))+;&&y77         )r
   	num_warps	interpretdebug)static_argnamesr      Fxr
   r#   r$   r%   returnc          	      >   |dk    r|nt          | j                  |z   }|t          | j                  dz
  k    rt          d          | j        d         }t          j        |          }t          j        |f| j                  }t          j	        t          |          }t          j        |t          t          |d                    d	|||
          }	t          t          | j                  dz
            D ]}
t          j        |	          }	 |	|           S )a  Computes the softmax of the input array along the specified axis.

  Args:
    x: input array
    axis: the axis along which to perform the computation
    num_warps: the number of warps to use for executing the Triton kernel
    interpret: whether to interpret the kernel using pallas
    debug: whether to use pallas in debug mode

  Returns:
    The result of the softmax operation over the specified axis of x.
  r      z3reductions along non-trailing dimension unsupportedr   )r   r   )r   )r#   
num_stages)triton )compiler_paramsgrid	out_shaper%   r$   )lenr   NotImplementedErrorr   next_power_of_2jaxShapeDtypeStructr   	functoolspartialr!   pallas_calldictrangevmap)r(   r
   r#   r$   r%   r   r   r1   kernelf_s              r    softmaxr@   3   s   $ AGt 3$	S\\A
=? ? ? GBK' )))"'17CCC)6)LLL&n$q"I"I"IJJJ  ! QW!""  aAA	
1+r"   )__doc__r7   r5   	jax.numpynumpyr   jax.experimentalr   r   intr!   r8   jitArrayboolr@   r.   r"   r    <module>rI      s         



       ) ) ) ) ) )    8 37 -6 -6 -6 7 7 7 "$a5' ' '
y''03''$(' 	Y' ' '7 7' ' 'r"   