o
    i	z                  	   @   s0  d dl mZmZmZ d dlZd dlm  mZ d dlmZ d dl	Z
d dlmZ G dd dejZG dd dZG d	d
 d
Zd!depEepEedefddZd"depSepSedededefddZdd Zdeeeedf f deeeedf f fddZG dd dejZG dd dejZG dd  d ejZdS )#    )TupleUnionOptionalN)nn)optimized_attentionc                )       s   e Zd Z																							d%d	ed
ee dedee dedededee dee dee dedededededededededef( fddZddi fde	j
d ee	j
 d!ee	j
 d"e	j
fd#d$Z  ZS )&	AttentionN   @           FTh㈵>      ?	query_dimcross_attention_dimheadskv_headsdim_headdropoutbiasqk_normadded_kv_proj_dimadded_proj_biasout_biasscale_qkonly_cross_attentionepsrescale_output_factorresidual_connectionout_dimout_context_dimelementwise_affine	is_causalc              	      s  t    |d ur|n|| | _|d u r| jn|| | _|| _|| _|d u| _|d ur-|n|| _|| _|| _	|| _
d| _|d urB|n|| _|d urK|n|| _|| _|| _|| _|| _| jra|d nd| _|d url|| n|| _|| _|	| _|| _| jd u r| jrtdd | _d | _d | _d | _d | _|j|| j|||d| _| js|j| j| j|||d| _|j| j| j|||d| _ nd | _d | _ |
| _!| jd ur|j|	| j|
||d| _"|j|	| j|
||d| _#| jd ur|j|	| j|
||d| _$n	d | _$d | _"d | _#| js"t%&g | _'| j'(|j| j| j|||d | j'(t%)| nd | _'| jd ur=| js=|j| j| j|||d| _*nd | _*d | _+d | _,|| _-d S )NFg      r   z`only_cross_attention` can only be set to True if `added_kv_proj_dim` is not None. Make sure to set either `only_cross_attention=False` or define `added_kv_proj_dim`.)r   dtypedevice).super__init__	inner_diminner_kv_dimr   use_biasis_cross_attentionr   r   r   r   fused_projectionsr   r   context_pre_onlypre_onlyr    r   scaler   sliceable_head_dimr   r   
ValueError
group_normspatial_normnorm_qnorm_k
norm_crossLinearto_qto_kto_vr   
add_k_proj
add_v_proj
add_q_projr   
ModuleListto_outappendDropout
to_add_outnorm_added_qnorm_added_k	processor)selfr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rB   r   r   r*   r+   r   r    r!   r"   
operations	__class__ 5/mnt/c/Users/fbmor/ComfyUI/comfy/ldm/ace/attention.pyr$      st   



 
zAttention.__init__hidden_statesencoder_hidden_statesattention_maskreturnc                 K   s   | j | |f|||d|S )N)rJ   rK   transformer_options)rB   )rC   rI   rJ   rK   rM   cross_attention_kwargsrG   rG   rH   forward   s   zAttention.forward)Nr   Nr	   r
   FNNTTTFr   r   FNNNNFTFNNN)__name__
__module____qualname__intr   floatboolstrr$   torchTensorrO   __classcell__rG   rG   rE   rH   r      s    	
mr   c                   @   s   e Zd ZdZdd Zdejdeejeej f deejejf fddZ											dd
e
dejdejdeej deej deejeej f deejeej f dejfddZd	S )CustomLiteLAProcessor2_0zAttention processor used typically in processing the SD3-like self-attention projections. add rms norm for query and key and apply RoPEc                 C   s   t jdd| _d| _d| _d S )NFinplacegV瞯<r   )r   ReLUkernel_funcr   pad_val)rC   rG   rG   rH   r$      s   
z!CustomLiteLAProcessor2_0.__init__x	freqs_cisrL   c           	      C      |\}}|d }|d }| |j| |j}}|jg |jdd ddR  d\}}tj| |gddd}| | | |   |j	}|S a_  
        Apply rotary embeddings to input tensors using the given frequency tensor. This function applies rotary embeddings
        to the given query or key 'x' tensors using the provided frequency tensor 'freqs_cis'. The input tensors are
        reshaped as complex numbers, and the frequency tensor is reshaped for broadcasting compatibility. The resulting
        tensors contain rotary embeddings and are returned as real tensors.

        Args:
            x (`torch.Tensor`):
                Query or key tensor to apply rotary embeddings. [B, H, S, D] xk (torch.Tensor): Key tensor to apply
            freqs_cis (`Tuple[torch.Tensor]`): Precomputed frequency tensor for complex exponentials. ([S, D], [S, D],)

        Returns:
            Tuple[torch.Tensor, torch.Tensor]: Tuple of modified query tensor and key tensor with rotary embeddings.
        )NNN   dim   
tor"   reshapeshapeunbindrW   stackflattenrT   r!   	rC   r`   ra   cossinx_realx_imag	x_rotatedoutrG   rG   rH   apply_rotary_emb      , z)CustomLiteLAProcessor2_0.apply_rotary_embNattnrI   rJ   rK   encoder_attention_maskrotary_freqs_cisrotary_freqs_cis_crossc                 O   s  |j d }
|j}|dkr |j \}}}}||||| dd}|d ur?|j}|dkr?|j \}}}}||||| dd}|j d }|j}||}||}||}t|dodt|dodt|d}|d ur|r|	|}|
|}||}|jstj||gdd}tj||gdd}tj||gdd}n|}|}|}|j d	 }||j }|d	d
||j|d	}|d	d
||j|d	d	d
}|d	d
||j|d	}|dddd}|jd ur||}|jd ur||}|d ur| ||}|js| ||}n|d ur|r| ||}|dddd}|d urM|d d d d d d f |j}||dddd }|jsM|| }||dddd }|jrw|d urw|rw|d d d d d d f |j}|| }||dddd }| |}| |}| | | }}}tj|dd| jd}t||}t||}|jtjtjfv r| }|d d d d d d	f |d d d d d	d f | j  }|||j| d	ddd}||}|d ur||}|d ur|js|r|d d d |
f |d d |
d f }}|jd |}|jd |}|d ur5|j s5|js5t|dr5|!|}|dkrF|d	d
||||}|d ur\|dkr\|d	d
||||}t" tjkru|#dd}|d uru|#dd}||fS )N      re   r   r:   r8   r9   rf   rd   rh   )r   r   r   r}   constant)modevaluer?   i  i  )$rl   ndimview	transposer!   r5   r6   r7   hasattrr:   r8   r9   r(   rW   catr   rk   permuter1   r2   rw   rj   r^   rT   Fpadr_   matmulfloat16bfloat16r   r<   r*   r?   get_autocast_gpu_dtypeclip)rC   ry   rI   rJ   rK   rz   r{   r|   argskwargshidden_states_len
input_ndim
batch_sizechannelheightwidthcontext_input_ndimr!   querykeyr   has_encoder_hidden_state_proj encoder_hidden_states_query_projencoder_hidden_states_key_proj encoder_hidden_states_value_projr%   head_dimvkrG   rG   rH   __call__   s   









"





  

:


&


z!CustomLiteLAProcessor2_0.__call__)NNNNN)rP   rQ   rR   __doc__r$   rW   rX   r   r   rw   r   FloatTensorr   r   rG   rG   rG   rH   rZ      s@    
"rZ   c                   @   s   e Zd ZdZdejdeejeej f deejejf fddZdddddi fde	d	ej
d
ej
deej
 deej
 deejeej f deejeej f dejfddZdS )CustomerAttnProcessor2_0zs
    Processor for implementing scaled dot-product attention (enabled by default if you're using PyTorch 2.0).
    r`   ra   rL   c           	      C   rb   rc   ri   rp   rG   rG   rH   rw   L  rx   z)CustomerAttnProcessor2_0.apply_rotary_embNry   rI   rJ   rK   rz   r{   r|   c	              	   O   s  |}|j }|dkr|j\}}}}||||| dd}|d u r$|jn|j\}}}t|do8t|do8t|d}|jd urK||dddd}||}|d u rW|}n|jr_||}|	|}|
|}|jd }||j }||d|j|dd}||d|j|dd}||d|j|dd}|jd ur||}|jd ur||}|d ur| ||}|js| ||}n|d ur|r| ||}|jr|d ur|r|d d d d d f |d d d d d f  }t|dkdtj }|d d d d d d d f d|jdd|j}n|js1|d ur1||||}|||jd|jd }t||||jd |d	|d
|j}|jd |}|jd |}|dkrb|dd||||}|jrj|| }||j }|S )Nr~   r}   re   r:   r8   r9   rd   r
   T)r   maskskip_reshaperM   r   r   )r   rl   r   r   r   r/   r5   r3   norm_encoder_hidden_statesr6   r7   r   r1   r2   rw   r(   rW   whereinfexpandrj   r!   prepare_attention_maskr   r<   rk   r   r   )rC   ry   rI   rJ   rK   rz   r{   r|   rM   r   r   residualr   r   r   r   r   sequence_length_r   r   r   r   r%   r   combined_maskrG   rG   rH   r   j  sh   










,4

z!CustomerAttnProcessor2_0.__call__)rP   rQ   rR   r   rW   rX   r   r   rw   r   r   r   r   rG   rG   rG   rH   r   G  s@    
"r   r}   r`   rL   c                    s,   t  ttfrt S  fddt|D S )zNRepeat `val` for `repeat_time` times and return the list or val if list/tuple.c                    s   g | ]} qS rG   rG   .0r   r`   rG   rH   
<listcomp>  s    zval2list.<locals>.<listcomp>)
isinstancelisttuplerange)r`   repeat_timerG   r   rH   val2list  s   r   rd   min_len
idx_repeatc                    sD   t tdkr fddt|t D   < tS )z=Return tuple with min_len by repeating element at idx_repeat.r   c                    s   g | ]}  qS rG   rG   r   r   r`   rG   rH   r         zval2tuple.<locals>.<listcomp>)r   lenr   r   )r`   r   r   rG   r   rH   	val2tuple  s   (r   c                 C   s   | d|  | S )Nr}   rG   )r`   shiftr,   rG   rG   rH   t2i_modulate  s   r   kernel_size.c                 C   s@   t | trtdd | D S | d dksJ d|  d| d S )Nc                 S   s   g | ]}t |qS rG   )get_same_padding)r   ksrG   rG   rH   r     r   z$get_same_padding.<locals>.<listcomp>re   r   zkernel size z should be odd number)r   r   )r   rG   rG   rH   r     s   
r   c                	       s\   e Zd Z									ddededeedf f fdd	Zd
ejdejfddZ  Z	S )	ConvLayerrh   r}   NFin_dimr   paddingc                    s   t    |d u rt|}||9 }|| _|| _|| _|| _|| _|| _|| _	|| _
|j||||||||||d
| _|	d urH|j|d||d| _nd | _|
d urXtjdd| _d S d | _d S )N)r   strider   dilationgroupsr   r"   r!   F)r   r!   r"   Tr[   )r#   r$   r   r   r   r   r   r   r   r   r'   Conv1dconvRMSNormnormr   SiLUact)rC   r   r   r   r   r   r   r   r'   r   r   r!   r"   rD   rE   rG   rH   r$     s<   

zConvLayer.__init__r`   rL   c                 C   s.   |  |}| jr| |}| jr| |}|S )N)r   r   r   )rC   r`   rG   rG   rH   rO     s   


zConvLayer.forward)rh   r}   r}   r}   NFNNNNN
rP   rQ   rR   rS   r   r$   rW   rX   rO   rY   rG   rG   rE   rH   r     s$    
1r   c                	       s\   e Zd Z									ddeded	eedf f fd
dZdejdejfddZ  Z	S )	GLUMBConvNrh   r}   FNNNsilur   Nin_featureshidden_featuresr   c                    s   |p|}t    t|d}t|d}t|	d}	tjdd| _t||d d|d |d |	d |||d	| _t|d |d |||d ||d |d d |
|||d| _t||d|d |d |	d |||d	| _	d S )	Nrh   Fr[   re   r}   r   )r'   r   r   r!   r"   rD   )
r   r   r   r'   r   r   r   r!   r"   rD   )
r#   r$   r   r   r   glu_actr   inverted_conv
depth_conv
point_conv)rC   r   r   out_featurer   r   r   r'   r   r   r   r!   r"   rD   rE   rG   rH   r$   #  sV   



zGLUMBConv.__init__r`   rL   c                 C   s`   | dd}| |}| |}tj|ddd\}}| |}|| }| |}| dd}|S )Nr}   re   rf   )r   r   r   rW   chunkr   r   )rC   r`   gaterG   rG   rH   rO   ^  s   



zGLUMBConv.forward)Nrh   r}   NFr   r   r}   NNNr   rG   rG   rE   rH   r   "  s$    
;r   c                       s   e Zd ZdZ									d fdd	Zddddddi fdejd	ejd
ejdejdeeje	ej f deeje	ej f dejfddZ
  ZS )LinearTransformerBlockzZ
    A Sana block with global shared adaptive layer norm (adaLN-single) conditioning.
    TNF      @c                    s   t    |j|ddd| _t||||||d|t |||d| _|	| _|| _|	r?|
d ur?t||
|
||||d|t	 |||d| _
|j|ddd| _t|t|| dd	d
|||d| _|| _|rmttjd|||d| _d S d S )NFgư>)r   r   T)r   r   r   r   r   r   r   r   rB   r!   r"   rD   )r   r   r   r   r   r   r*   r   r   rB   r!   r"   rD   )r   )TTFr   r   )r   r   r'   r   r   r!   r"   rD      r!   r"   )r#   r$   r   norm1r   rZ   ry   add_cross_attentionr*   r   
cross_attnnorm2r   rS   ffuse_adaln_singler   	ParameterrW   emptyscale_shift_table)rC   rg   num_attention_headsattention_head_dimr   r   r   r*   	mlp_ratior   add_cross_attention_dimr   r!   r"   rD   rE   rG   rH   r$   q  sb   


zLinearTransformerBlock.__init__rI   rJ   rK   rz   r{   r|   tembc	              	   C   s0  |j d }	| jr(tjj| jd  |j|jd||	dd j	ddd\}
}}}}}| 
|}| jr8|d|  |
 }| jsJ| j|||||||d\}}n| j||d d |d |d\}}| jr_|| }|| }| jrv| j|||||||d}|| }| |}| jr|d|  | }| |}| jr|| }|| }|S )Nr   r   r   rd   r}   rf   )rI   rK   rJ   rz   r{   r|   rM   )rl   r   comfymodel_managementcast_tor   r!   r"   rk   r   r   r   ry   r   r   r   )rC   rI   rJ   rK   rz   r{   r|   r   rM   N	shift_msa	scale_msagate_msa	shift_mlp	scale_mlpgate_mlpnorm_hidden_statesattn_outputr   	ff_outputrG   rG   rH   rO     sb   
(



	

zLinearTransformerBlock.forward)TNNFr   FNNNNN)rP   rQ   rR   r   r$   rW   r   r   rX   r   rO   rY   rG   rG   rE   rH   r   m  sD    Hr   )r}   )r}   rd   )typingr   r   r   rW   torch.nn.functionalr   
functionalr   comfy.model_managementr   comfy.ldm.modules.attentionr   Moduler   rZ   r   r   r   anyr   rS   r   r   r   r   r   r   rG   rG   rG   rH   <module>   s$   } 3 $2;K