o
    ig'                     @   s   d dl Z d dlmZ d dlmZmZmZ d dlmZ ddee j	ee j	 f de j	fddZ
d	d
 Z	dde j	dee j	 dee j	ee j	 f dedee j	e j	f f
ddZG dd dejZG dd dejZdS )    N)TupleUnionOptional)optimized_attentionF	freqs_cisxc                    s  |j  dd  kr k sJ  J t| try|rB| d j|jd |jd fks5J d| d j d|j  fddt|jD }n)| d j|jd |jd fks_J d| d j d|j  fd	dt|jD }| d j| | d j| fS |r| j|jd |jd fksJ d| j d|j  fd
dt|jD }n%| j|jd |jd fksJ d| j d|j  fddt|jD }| j| S )a  
    Reshape frequency tensor for broadcasting it with another tensor.

    This function reshapes the frequency tensor to have the same shape as the target tensor 'x'
    for the purpose of broadcasting the frequency tensor during element-wise operations.

    Args:
        freqs_cis (Union[torch.Tensor, Tuple[torch.Tensor]]): Frequency tensor to be reshaped.
        x (torch.Tensor): Target tensor for broadcasting compatibility.
        head_first (bool): head dimension first (except batch dim) or not.

    Returns:
        torch.Tensor: Reshaped frequency tensor.

    Raises:
        AssertionError: If the frequency tensor doesn't match the expected shape.
        AssertionError: If the target tensor 'x' doesn't have the expected number of dimensions.
    r      zfreqs_cis shape z does not match x shape c                    0   g | ]\}}| d  ks| d kr|ndqS    r    .0idndimr   9/mnt/c/Users/fbmor/ComfyUI/comfy/ldm/hydit/attn_layers.py
<listcomp>!      0 z)reshape_for_broadcast.<locals>.<listcomp>c                    ,   g | ]\}}|d ks| d  kr|nd qS r   r   r   r   r   r   r   $      , c                    r   r   r   r   r   r   r   r   *   r   c                    r   r   r   r   r   r   r   r   -   r   )r   
isinstancetupleshape	enumerateview)r   r   
head_firstr   r   r   r   reshape_for_broadcast   s   
::22
r!   c                 C   sF   | j g | jd d ddR  d\}}tj| |gdddS )Nr
   r   dim   )reshaper   unbindtorchstackflatten)r   x_realx_imagr   r   r   rotate_half1   s   ,r,   xqxkr    returnc           
      C   s  d}t |tr+t|| |\}}| | t| |  }|dur'|| t||  }||fS t|  jg | jdd ddR  }t|||	| j
}t|| d| }|durt| jg |jdd ddR  }	t|	| d|}||fS )a  
    Apply rotary embeddings to input tensors using the given frequency tensor.

    This function applies rotary embeddings to the given query 'xq' and key 'xk' tensors using the provided
    frequency tensor 'freqs_cis'. The input tensors are reshaped as complex numbers, and the frequency tensor
    is reshaped for broadcasting compatibility. The resulting tensors contain rotary embeddings and are
    returned as real tensors.

    Args:
        xq (torch.Tensor): Query tensor to apply rotary embeddings. [B, S, H, D]
        xk (torch.Tensor): Key tensor to apply rotary embeddings.   [B, S, H, D]
        freqs_cis (Union[torch.Tensor, Tuple[torch.Tensor]]): Precomputed frequency tensor for complex exponentials.
        head_first (bool): head dimension first (except batch dim) or not.

    Returns:
        Tuple[torch.Tensor, torch.Tensor]: Tuple of modified query tensor and key tensor with rotary embeddings.

    Nr
   r   r$   )r   r   r!   r,   r'   view_as_complexfloatr%   r   todeviceview_as_realr)   type_as)
r-   r.   r   r    xk_outcossinxq_outxq_xk_r   r   r   apply_rotary_emb6   s   
	,,r<   c                       s<   e Zd ZdZ								d
 fdd	Zddd	Z  ZS )CrossAttentionz
    Use QK Normalization.
    TF        Nc                    s0  |	|
d}t    || _|| _|| _|| _| j| dks!J d| j| | _| jd dkr3| jdks7J d| jd | _|j||fd|i|| _	|j|d	| fd|i|| _
|rf|j| jd
d|
|	dnt | _|rx|j| jd
d|
|	dnt | _t|| _|j||fd|i|| _t|| _d S )N)r3   dtyper   z(self.qdim must be divisible by num_heads      /Only support head_dim <= 128 and divisible by 8      biasr   Tư>elementwise_affineepsr?   r3   )super__init__attn_precisionqdimkdim	num_headshead_dimscaleLinearq_projkv_proj	LayerNormnnIdentityq_normk_normDropout	attn_dropout_proj	proj_drop)selfrL   rM   rN   qkv_biasqk_normrZ   r\   rK   r3   r?   
operationsfactory_kwargs	__class__r   r   rJ   d   s"   

 $$zCrossAttention.__init__c                 C   s  |j \}}}|j \}}}| |||| j| j}	| |||d| j| j}
|
jdd\}}| |	}	| |}|durYt	|	d|\}}|j |	j ksWJ d|j  d|	j  |}	|	
dd }	|
dd }|
dd }t|	||| jd| jd	}| |}| |}|f}|S )
a5  
        Parameters
        ----------
        x: torch.Tensor
            (batch, seqlen1, hidden_dim) (where hidden_dim = num heads * head dim)
        y: torch.Tensor
            (batch, seqlen2, hidden_dim2)
        freqs_cis_img: torch.Tensor
            (batch, hidden_dim // 2), RoPE for image
        r   r"   Nqq: , q: r	   Tskip_reshaperK   )r   rR   r   rN   rO   rS   r&   rW   rX   r<   	transpose
contiguousr   rK   r[   r\   )r]   r   yfreqs_cis_imgbs1c_s2qkvkvqqcontextout	out_tupler   r   r   forward   s&   

$

zCrossAttention.forwardTFr>   r>   NNNNN__name__
__module____qualname____doc__rJ   rz   __classcell__r   r   rb   r   r=   `   s    "r=   c                       s,   e Zd ZdZd
 fdd	Zddd	Z  ZS )	AttentionzB
    We rename some layer names to align with flash attention
    TFr>   Nc                    s   t    || _|| _|| _| j| dksJ d| j| | _| jd dkr+| jdks/J d| jd | _|
j||d |||	d| _|rO|
j	| jd	d
||	dnt
 | _|ra|
j	| jd	d
||	dnt
 | _t
|| _|
j||||	d| _t
|| _d S )Nr   z$dim should be divisible by num_headsr@   rA   rB   rC   r$   )rD   r?   r3   TrE   rF   )r?   r3   )rI   rJ   rK   r#   rN   rO   rP   rQ   WqkvrT   rU   rV   rW   rX   rY   rZ   r[   r\   )r]   r#   rN   r^   r_   rZ   r\   rK   r?   r3   r`   rb   r   r   rJ      s   
 $$zAttention.__init__c              	   C   s   |j \}}}| |||d| j| jddddd}|d\}}}	| |}| |}|d ur`t	|||dd\}
}|
j |j krG|j |j ks[J d|
j  d	|j  d
|j  d|j  |
|}}t
|||	| jd| jd}| |}| |}|f}|S )Nr$   r   r   r      T)r    rd   re   z, kk: z, k: rg   )r   r   r%   rN   rO   permuter&   rW   rX   r<   r   rK   r[   r\   )r]   r   rl   BNCqkvrr   rt   ru   rv   kkry   r   r   r   rz      s    *

"


zAttention.forwardr{   r|   r}   r   r   rb   r   r      s    r   )F)r'   torch.nnrU   typingr   r   r   comfy.ldm.modules.attentionr   Tensorr!   r,   boolr<   Moduler=   r   r   r   r   r   <module>   s(    &*	
*N