
    VpfSP                       d Z ddlmZ ddlmZ ddlmZmZ ddlm	Z	 ddl
Z
ddl
mZ ddlmZ ddlmZmZmZ dd	lmZ dd
lmZmZmZmZ eZeZ e	dg d          Z eed d            eZeZeZeZ eegef         Z!e"Z#ee#e egef         Z$eegef         Z% G d de          Z&ee#ge'f         Z(d9dZ)e)d             Z*e)d:d            Z+e)d:d            Z,e)d;d            Z-e)d<d             Z.e)d=d!            Z/e)d>d#            Z0e)d>d$            Z1e)d;d%            Z2d?d&Z3d' Z4d@d)Z5dAd+Z6dBd/Z7dCd2Z8d3 Z9d4 Z: G d5 d6          Z;d7 Z<d8 Z=dS )DaF	  Examples of how to write optimizers with JAX.

You likely do not mean to import this module! The optimizers in this library
are intended as examples only. If you are looking for a fully featured optimizer
library, two good options are JAXopt_ and Optax_.

This module contains some convenient optimizer definitions, specifically
initialization and update functions, which can be used with ndarrays or
arbitrarily-nested tuple/list/dicts of ndarrays.

An optimizer is modeled as an ``(init_fun, update_fun, get_params)`` triple of
functions, where the component functions have these signatures:

::

  init_fun(params)

  Args:
    params: pytree representing the initial parameters.

  Returns:
    A pytree representing the initial optimizer state, which includes the
    initial parameters and may also include auxiliary values like initial
    momentum. The optimizer state pytree structure generally differs from that
    of `params`.

::

  update_fun(step, grads, opt_state)

  Args:
    step: integer representing the step index.
    grads: a pytree with the same structure as `get_params(opt_state)`
      representing the gradients to be used in updating the optimizer state.
    opt_state: a pytree representing the optimizer state to be updated.

  Returns:
    A pytree with the same structure as the `opt_state` argument representing
    the updated optimizer state.

::

  get_params(opt_state)

  Args:
    opt_state: pytree representing an optimizer state.

  Returns:
    A pytree representing the parameters extracted from `opt_state`, such that
    the invariant `params == get_params(init_fun(params))` holds true.


Notice that an optimizer implementation has a lot of flexibility in the form of
opt_state: it just has to be a pytree of JaxTypes (so that it can be passed to
the JAX transforms defined in api.py) and it has to be consumable by update_fun
and get_params.

Example Usage:

.. code-block:: python

  opt_init, opt_update, get_params = optimizers.sgd(learning_rate)
  opt_state = opt_init(params)

  def step(step, opt_state):
    value, grads = jax.value_and_grad(loss_fn)(get_params(opt_state))
    opt_state = opt_update(step, grads, opt_state)
    return value, opt_state

  for i in range(num_steps):
    value, opt_state = step(i, opt_state)


.. _JAXopt: https://github.com/google/jaxopt
.. _Optax: https://github.com/deepmind/optax
    )annotations)Callable)Any
NamedTuple)
namedtupleN)partial)safe_zipsafe_mapunzip2)	tree_util)tree_maptree_flattentree_unflattenregister_pytree_nodeOptimizerStatepacked_statetree_defsubtree_defsc                .    | j         f| j        | j        ffS Nr   )xss    `/var/www/html/nettyfy-visnx/env/lib/python3.11/site-packages/jax/example_libraries/optimizers.py<lambda>r   z   s    "R["/$BC     c                H    t          |d         | d         | d                   S )Nr      )r   )datar   s     r   r   r   {   s    ^BqE47DG<< r   c                  .    e Zd ZU ded<   ded<   ded<   dS )	OptimizerInitFninit_fnUpdateFn	update_fnParamsFn	params_fnN)__name__
__module____qualname____annotations__ r   r   r    r       s3         ///r   r    	opt_makeruCallable[..., tuple[Callable[[Params], State], Callable[[Step, Updates, Params], Params], Callable[[State], Params]]]returnCallable[..., Optimizer]c                F     t          j                    fd            }|S )a  Decorator to make an optimizer defined for arrays generalize to containers.

  With this decorator, you can write init, update, and get_params functions that
  each operate only on single arrays, and convert them to corresponding
  functions that operate on pytrees of parameters. See the optimizers defined in
  optimizers.py for examples.

  Args:
    opt_maker: a function that returns an ``(init_fun, update_fun, get_params)``
      triple of functions that might only work with ndarrays, as per

      .. code-block:: haskell

          init_fun :: ndarray -> OptStatePytree ndarray
          update_fun :: OptStatePytree ndarray -> OptStatePytree ndarray
          get_params :: OptStatePytree ndarray -> ndarray

  Returns:
    An ``(init_fun, update_fun, get_params)`` triple of functions that work on
    arbitrary pytrees, as per

    .. code-block:: haskell

          init_fun :: ParameterPytree ndarray -> OptimizerState
          update_fun :: OptimizerState -> OptimizerState
          get_params :: OptimizerState -> ParameterPytree ndarray

    The OptimizerState pytree type used by the returned functions is isomorphic
    to ``ParameterPytree (OptStatePytree ndarray)``, but may store the state
    instead as e.g. a partially-flattened data structure for performance.
  c                      | i |\  t          j                  fd            }t          j                  fd            }t          j                  fd            }t          |||          S )Nc                    t          |           \  }}fd|D             }t          t          t           |                    \  }}t          |||          S )Nc                &    g | ]} |          S r+   r+   ).0x0inits     r   
<listcomp>zHoptimizer.<locals>.tree_opt_maker.<locals>.tree_init.<locals>.<listcomp>   s!    333RR333r   )r   r   mapr   )x0_treex0_flattreeinitial_statesstates_flatsubtreesr6   s         r   	tree_initz4optimizer.<locals>.tree_opt_maker.<locals>.tree_init   s[    "7++mgt33337333n$S~%F%FGGk8Kx888r   c                   |\  }}}t          |          \  }}||k    r%d}t          |                    ||                    t          t          ||          }	t          t          |           ||	          }
t          t          t           |
                    \  }}t          ||          D ]0\  }}||k    r%d}t          |                    ||                    1t          |||          S )Nzoptimizer update function was passed a gradient tree that did not match the parameter tree structure with which it was initialized: parameter tree {} and grad tree {}.zvoptimizer update function produced an output structure that did not match its input structure: input {} and output {}.)	r   	TypeErrorformatr8   r   r   r   zipr   )i	grad_tree	opt_stater=   r;   r>   	grad_flattree2msgstates
new_statesnew_states_flat	subtrees2subtreesubtree2updates                  r   tree_updatez6optimizer.<locals>.tree_opt_maker.<locals>.tree_update   s    $-!k4%i00i	$B 

4//000>8[99fwvq))9f==j#)#lJ*G*G#H#H oy"8Y77 9 9
'8wN##**Wh7788
8  OT8<<<r   c                |    | \  }}}t          t          ||          }t          |          }t          ||          S r   )r8   r   )rF   r=   r;   r>   rJ   params
get_paramss         r   tree_get_paramsz:optimizer.<locals>.tree_opt_maker.<locals>.tree_get_params   s?    $-!k4>8[99f:v&&fD&)))r   )	functoolswrapsr    )	argskwargsr?   rQ   rU   rT   r6   rP   r,   s	        @@@r   tree_opt_makerz!optimizer.<locals>.tree_opt_maker   s    (y$9&99D&*_T9 9 9 9 9 _V= = = = =$ _Z  * * * * ! * Y_===r   )rV   rW   )r,   rZ   s   ` r   	optimizerr[      s=    H ?9$> $> $> $> $>J 
r   c                Z     t                      d } fd}d }t          |||          S )a  Construct optimizer triple for stochastic gradient descent.

  Args:
    step_size: positive scalar, or a callable representing a step size schedule
      that maps the iteration index to a positive scalar.

  Returns:
    An (init_fun, update_fun, get_params) triple.
  c                    | S r   r+   )r5   s    r   r6   zsgd.<locals>.init   s    Ir   c                &    | |           |z  z
  S r   r+   )rD   gx	step_sizes      r   rP   zsgd.<locals>.update   s    yy||ar   c                    | S r   r+   )r`   s    r   rT   zsgd.<locals>.get_params   s    Hr   )make_scheduler    )ra   r6   rP   rT   s   `   r   sgdrd      s[     I&&)             	4	,	,,r   ra   Schedulemassfloatc                F     t                      d } fd}d }|||fS )aF  Construct optimizer triple for SGD with momentum.

  Args:
    step_size: positive scalar, or a callable representing a step size schedule
      that maps the iteration index to a positive scalar.
    mass: positive scalar representing the momentum coefficient.

  Returns:
    An (init_fun, update_fun, get_params) triple.
  c                2    t          j        |           }| |fS r   jnp
zeros_liker5   v0s     r   r6   zmomentum.<locals>.init       			Br6Mr   c                H    |\  }}|z  |z   }| |           |z  z
  }||fS r   r+   rD   r_   stater`   velocityrf   ra   s        r   rP   zmomentum.<locals>.update  s;    KAxh"H	IIaLL8##Ah;r   c                    | \  }}|S r   r+   rr   r`   _s      r   rT   zmomentum.<locals>.get_params      DAqHr   rc   ra   rf   r6   rP   rT   s   ``   r   momentumrz      ^     I&&)       
   
vz	!!r   c                F     t                      d } fd}d }|||fS )aO  Construct optimizer triple for SGD with Nesterov momentum.

  Args:
    step_size: positive scalar, or a callable representing a step size schedule
      that maps the iteration index to a positive scalar.
    mass: positive scalar representing the momentum coefficient.

  Returns:
    An (init_fun, update_fun, get_params) triple.
  c                2    t          j        |           }| |fS r   rj   rm   s     r   r6   znesterov.<locals>.init  ro   r   c                T    |\  }}|z  |z   }| |           |z  |z   z  z
  }||fS r   r+   rq   s        r   rP   znesterov.<locals>.update  sD    KAxh"H	IIaLLD8Oa/00Ah;r   c                    | \  }}|S r   r+   ru   s      r   rT   znesterov.<locals>.get_params"  rw   r   rx   ry   s   ``   r   nesterovr     r{   r   ?c                F     t                      d } fd}d }|||fS )a  Construct optimizer triple for Adagrad.

  Adaptive Subgradient Methods for Online Learning and Stochastic Optimization:
  http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf

  Args:
    step_size: positive scalar, or a callable representing a step size schedule
      that maps the iteration index to a positive scalar.
    momentum: optional, a positive scalar value for momentum

  Returns:
    An (init_fun, update_fun, get_params) triple.
  c                \    t          j        |           }t          j        |           }| ||fS r   rj   )r5   g_sqms      r   r6   zadagrad.<locals>.init9  s,    >"DrAtQ;r   c                    |\  }}}|t          j        |          z  }t          j        |dk    dt          j        |          z  d          }dz
  ||z  z  |z  z   }| |           |z  z
  }|||fS )Nr         ?g        )rk   squarewheresqrt)	rD   r_   rr   r`   r   r   g_sq_inv_sqrtrz   ra   s	          r   rP   zadagrad.<locals>.update>  s    JAtQCJqMMDIdQhSXd^^(;SAAM	h1},-1<A	IIaLL1AdA:r   c                    | \  }}}|S r   r+   ru   s      r   rT   zadagrad.<locals>.get_paramsF      GAq!Hr   rx   )ra   rz   r6   rP   rT   s   ``   r   adagradr   (  s^     I&&)  
        
vz	!!r   :0yE>c                J     t                      d } fd}d }|||fS )a7  Construct optimizer triple for RMSProp.

  Args:
    step_size: positive scalar, or a callable representing a step size schedule
      that maps the iteration index to a positive scalar.
      gamma: Decay parameter.
      eps: Epsilon parameter.

  Returns:
    An (init_fun, update_fun, get_params) triple.
  c                2    t          j        |           }| |fS r   rj   )r5   avg_sq_grads     r   r6   zrmsprop.<locals>.init[  s    .$$K{?r   c                    |\  }}|z  t          j        |          dz
  z  z   }| |           |z  t          j        |z             z  z
  }||fS Nr   rk   r   r   )rD   r_   rr   r`   r   epsgammara   s        r   rP   zrmsprop.<locals>.update^  sa    NA{%
1e(DDK	IIaLL1sxc(9::::Ak>r   c                    | \  }}|S r   r+   ru   s      r   rT   zrmsprop.<locals>.get_paramsc  rw   r   rx   )ra   r   r   r6   rP   rT   s   ```   r   rmspropr   M  sd     I&&)        
   
vz	!!r   c                N     t                      d } fd}d }|||fS )a  Construct optimizer triple for RMSProp with momentum.

  This optimizer is separate from the rmsprop optimizer because it needs to
  keep track of additional parameters.

  Args:
    step_size: positive scalar, or a callable representing a step size schedule
      that maps the iteration index to a positive scalar.
    gamma: Decay parameter.
    eps: Epsilon parameter.
    momentum: Momentum parameter.

  Returns:
    An (init_fun, update_fun, get_params) triple.
  c                \    t          j        |           }t          j        |           }| ||fS r   rj   )r5   r   moms      r   r6   zrmsprop_momentum.<locals>.init{  s-    .$$K
.

C{Cr   c                    |\  }}}|z  t          j        |          dz
  z  z   }|z   	|           |z  t          j        |z             z  z   }||z
  }|||fS r   r   )
rD   r_   rr   r`   r   r   r   r   rz   ra   s
         r   rP   z rmsprop_momentum.<locals>.update  st    A{C%
1e(DDK
S.99Q<<!+ch{S7H.I.II
IC	CAk3r   c                    | \  }}}|S r   r+   ru   s      r   rT   z$rmsprop_momentum.<locals>.get_params  r   r   rx   )ra   r   r   rz   r6   rP   rT   s   ````   r   rmsprop_momentumr   i  sj    " I&&)               
vz	!!r   +?c                N     t                      d } fd}d }|||fS )a|  Construct optimizer triple for Adam.

  Args:
    step_size: positive scalar, or a callable representing a step size schedule
      that maps the iteration index to a positive scalar.
    b1: optional, a positive scalar value for beta_1, the exponential decay rate
      for the first moment estimates (default 0.9).
    b2: optional, a positive scalar value for beta_2, the exponential decay rate
      for the second moment estimates (default 0.999).
    eps: optional, a positive scalar value for epsilon, a small constant for
      numerical stability (default 1e-8).

  Returns:
    An (init_fun, update_fun, get_params) triple.
  c                \    t          j        |           }t          j        |           }| ||fS r   rj   )r5   m0rn   s      r   r6   zadam.<locals>.init  ,    			B			Br2:r   c                `   |\  }}}dz
  |z  |z  z   }d	z
  t          j        |          z  	|z  z   }|dt          j        |j                  | dz   z  z
  z  }|dt          j        	|j                  | dz   z  z
  z  }| |           |z  t          j        |          
z   z  z
  }|||fS Nr   )rk   r   asarraydtyper   )rD   r_   rr   r`   r   vmhatvhatb1b2r   ra   s           r   rP   zadam.<locals>.update  s    GAq!	
R1rAvA	
R3:a== 26)ACKAG,,Q778DCKAG,,Q778D	IIaLL438D>>C#788Aa7Nr   c                    | \  }}}|S r   r+   ru   s      r   rT   zadam.<locals>.get_params  r   r   rx   ra   r   r   r   r6   rP   rT   s   ````   r   adamr     sj    " I&&)            
vz	!!r   c                N     t                      d } fd}d }|||fS )a  Construct optimizer triple for AdaMax (a variant of Adam based on infinity norm).

  Args:
    step_size: positive scalar, or a callable representing a step size schedule
      that maps the iteration index to a positive scalar.
    b1: optional, a positive scalar value for beta_1, the exponential decay rate
      for the first moment estimates (default 0.9).
    b2: optional, a positive scalar value for beta_2, the exponential decay rate
      for the second moment estimates (default 0.999).
    eps: optional, a positive scalar value for epsilon, a small constant for
      numerical stability (default 1e-8).

  Returns:
    An (init_fun, update_fun, get_params) triple.
  c                \    t          j        |           }t          j        |           }| ||fS r   rj   )r5   r   u0s      r   r6   zadamax.<locals>.init  r   r   c                    |\  }}}dz
  |z  |z  z   }t          j        |z  t          j        |                    }| 	|           dt          j        |j                  | dz   z  z
  z  |z  |z   z  z
  }|||fS r   )rk   maximumabsr   r   )
rD   r_   rr   r`   r   ur   r   r   ra   s
         r   rP   zadamax.<locals>.update  s    GAq!	
R1rAvABFCGAJJ''A	
iilla#+b!'":":q1u"EEF!KG 
Aa7Nr   c                    | \  }}}|S r   r+   ru   s      r   rT   zadamax.<locals>.get_params  r   r   rx   r   s   ````   r   adamaxr     sj    " I&&)            
vz	!!r   c                ^     t                      d fdd } fd}d }|||fS )a  Construct optimizer triple for SM3.

  Memory-Efficient Adaptive Optimization for Large-Scale Learning.
  https://arxiv.org/abs/1901.11150

  Args:
    step_size: positive scalar, or a callable representing a step size schedule
      that maps the iteration index to a positive scalar.
    momentum: optional, a positive scalar value for momentum

  Returns:
    An (init_fun, update_fun, get_params) triple.
  c                8    t          |           }||||dz   <   |S r   )list)seqrD   r`   lsts       r   splicezsm3.<locals>.splice  s#    
s))CC!A#JJr   c                l     d g| z  |t          d           g          }|t          |                   S r   )slicetuple)ndimr`   axisidxr   s       r   broadcast_intozsm3.<locals>.broadcast_into  s4    
&$$uT{{m
4
4CU3ZZ=r   c                      j         }t          j                     fd j         D             } t          j                   ||fS )Nc                F    g | ]}t          j        |j                   S ))r   )rk   zerosr   )r4   szr5   s     r   r7   z%sm3.<locals>.init.<locals>.<listcomp>  s*    	;	;	;B#)Bbh
'
'
'	;	;	;r   )shaperk   
atleast_1drl   )r5   x_shapevss   `  r   r6   zsm3.<locals>.init  sM    hG			B	;	;	;	;"(	;	;	;Bs~b!!2w..r   c                   |\  }}}	fdt          |          D             }t          j        t          j        |          t          j                  z   t          j        dk    dt          j                  z  d          }d
z
  |z  z  
|z  z   } |           |z  z
  fdt          j	                  D             }|||fS )Nc                :    g | ]\  }} j         ||          S r+   )r   )r4   rD   r   r   r_   s      r   r7   z'sm3.<locals>.update.<locals>.<listcomp>  s-    	A	A	A41a..A
&
&	A	A	Ar   r   r   c           	     t    g | ]4}                      t          j                  |g                     5S r+   )maxranger   )r4   jaccumr   r`   s     r   r7   z'sm3.<locals>.update.<locals>.<listcomp>  s;    	I	I	Ia%))FF5==!R00
1
1	I	I	Ir   )
	enumeraterV   reducerk   minimumr   r   r   r   r   )rD   r_   rr   r   r   r   accum_inv_sqrtr   r`   r   rz   r   ra   s    `     @@r   rP   zsm3.<locals>.update  s    Aq"g	A	A	A	A	A9R==	A	A	ABS["--
1=EYuqy"sx*>BBN	h1~-.A=A	IIaLL1A	I	I	I	I	I	I5==	I	I	IBaWr   c                :    | \  }}}}|                     |          S r   )reshape)rr   r`   rv   r   s       r   rT   zsm3.<locals>.get_params  s"    Aq!W99Wr   rx   )ra   rz   r6   rP   rT   r   r   s   ``   @@r   sm3r     s     I&&)  
    / / /          
vz	!!r   c                      fd}|S )Nc                    S r   r+   )rD   ra   s    r   schedulezconstant.<locals>.schedule  s	    r   r+   )ra   r   s   ` r   constantr     s!        	/r   c                      fd}|S )Nc                    | z  z  z  S r   r+   rD   
decay_ratedecay_stepsra   s    r   r   z#exponential_decay.<locals>.schedule  s    za+o666r   r+   )ra   r   r   r   s   ``` r   exponential_decayr   
  s-    7 7 7 7 7 7 7	/r   Fc                .     |r fd}n fd}|S )Nc                D    dt          j        | z            z  z   z  S r   )rk   floorr   s    r   r   z$inverse_time_decay.<locals>.schedule  s&    !j39Q_+E+EEEFFr   c                     d| z  z  z   z  S r   r+   r   s    r   r   z$inverse_time_decay.<locals>.schedule  s    !j1n{::;;r   r+   )ra   r   r   	staircaser   s   ```  r   inverse_time_decayr     se     <G G G G G G G G< < < < < < <	/r   r   c                      fd}|S )Nc                Z    t          j        |           } d| z  z
  z  }|z
  z  z   S r   )rk   r   )step_num	step_multr   final_step_sizepowerra   s     r   r   z"polynomial_decay.<locals>.schedule  s=    {8[11HX++5I	O34FFr   r+   )ra   r   r   r   r   s   ```` r   polynomial_decayr     s=    G G G G G G G G
 
/r   
boundariesr   valuesc                    t          j                    t          j                   j        j        cxk    rdk    sn t          d           j        d         j        d         dz
  k    st          d           fd}|S )Nr   z'boundaries and values must be sequencesr   z8boundaries length must be one shorter than values lengthc                @    t          j        | k                       S r   )rk   sum)rD   r   r   s    r   r   z$piecewise_constant.<locals>.schedule(  s    #'!j.))**r   )rk   arrayr   
ValueErrorr   )r   r   r   s   `` r   piecewise_constantr      s    y$$*9V&	FK	,	,	,	,1	,	,	,	,
>
?
??		!	Q! 3	3	3
O
P
PP+ + + + + +	/r   scalar_or_schedulefloat | Schedulec                    t          |           r| S t          j        |           dk    rt          |           S t	          t          |                     )Nr   )callablerk   r   r   rA   type)r   s    r   rc   rc   ,  sU     !! .
x"##q((&'''
D+,,
-
--r   c                |    t          |           \  }}t          j        t          d |D                                 S )zCCompute the l2 norm of a pytree of arrays. Useful for weight decay.c              3  @   K   | ]}t          j        ||          V  d S r   )rk   vdot)r4   r`   s     r   	<genexpr>zl2_norm.<locals>.<genexpr>:  s,      55chq!nn555555r   )r   rk   r   r   )r;   leavesrv   s      r   l2_normr  7  s:    4  )&!	#55f55555	6	66r   c                P    t          |           fd}t          ||           S )zGClip gradients stored as a pytree of arrays to maximum norm `max_norm`.c                D    t          j        k     | | z  z            S r   )rk   r   )r_   max_normnorms    r   r   zclip_grads.<locals>.<lambda>?  s"    	$/1a8d?6KLL r   )r  r   )rE   r	  	normalizer
  s    ` @r   
clip_gradsr  <  s2    			$LLLLL)	)Y	'	''r   c                      e Zd ZdZd Zd ZdS )	JoinPointz7Marks the boundary between two joined (nested) pytrees.c                    || _         d S r   rN   )selfrN   s     r   __init__zJoinPoint.__init__G  s    DLLLr   c              #     K   | j         V  d S r   r  )r  s    r   __iter__zJoinPoint.__iter__K  s      
,r   N)r'   r(   r)   __doc__r  r  r+   r   r   r  r  E  s8        ??      r   r  c                |    | \  }}}t          t          ||          }d |D             }t          j        ||          S )a  Converts an OptimizerState to a marked pytree.

  Converts an OptimizerState to a marked pytree with the leaves of the outer
  pytree represented as JoinPoints to avoid losing information. This function is
  intended to be useful when serializing optimizer states.

  Args:
    opt_state: An OptimizerState
  Returns:
    A pytree with JoinPoint leaves that contain a second level of pytrees.
  c                ,    g | ]}t          |          S r+   )r  )r4   rN   s     r   r7   z*unpack_optimizer_state.<locals>.<listcomp>\  s     :::gy!!:::r   )r8   r   r   )rF   r=   r   r   r>   	sentinelss         r   unpack_optimizer_stater  N  sG     )2%+x{;;(:::::)		!(I	6	66r   c                    t          |           \  }}t          d |D                       sJ d |D             }t          t          t           |                    \  }}t	          |||          S )a  Converts a marked pytree to an OptimizerState.

  The inverse of unpack_optimizer_state. Converts a marked pytree with the
  leaves of the outer pytree represented as JoinPoints back into an
  OptimizerState. This function is intended to be useful when deserializing
  optimizer states.

  Args:
    marked_pytree: A pytree containing JoinPoint leaves that hold more pytrees.
  Returns:
    An equivalent OptimizerState to the input argument.
  c              3  @   K   | ]}t          |t                    V  d S r   )
isinstancer  r4   ss     r   r  z'pack_optimizer_state.<locals>.<genexpr>m  s,      99!Z9%%999999r   c                    g | ]	}|j         
S r+   r  r  s     r   r7   z(pack_optimizer_state.<locals>.<listcomp>n  s    +++Aai+++r   )r   allr   r8   r   )marked_pytreer  r   r>   r=   r   s         r   pack_optimizer_stater"  _  sz     %]33)X	99y999	9	9999+++++($Sx%@%@AA+|	X|	<	<<r   )r,   r-   r.   r/   )ra   re   rf   rg   )r   )r   r   )r   r   r   )r   r   r   )r.   re   )F)r   )r   r   r   r   )r   r   r.   re   )>r  
__future__r   collections.abcr   typingr   r   collectionsr   rV   r   	jax.numpynumpyrk   jax._src.utilr	   r
   r   jaxr   jax.tree_utilr   r   r   r   r8   rC   r   ArrayParamsStateUpdatesr!   intStepr#   r%   r    rg   re   r[   rd   rz   r   r   r   r   r   r   r   r   r   r   r   r   rc   r  r  r  r  r"  r+   r   r   <module>r2     s  K KZ # " " " " " $ $ $ $ $ $ " " " " " " " " " " " " " "                 4 4 4 4 4 4 4 4 4 4      1 1 1 1 1 1 1 1 1 1 1 1  ,HHHJ J  CC<<> > > 		
	6(N*	+
T7N3^CD^$f,-    
   
 TFEM"J J J J^ - - -& " " " "4 " " " "4 !" !" !" !"H " " " "6 " " " "B  "  "  "  "F " " " "D -" -" -" -"d   
  
      
 
 
 
. . . .7 7 7
( ( (       7 7 7"= = = = =r   