o
    ip                     @   s   d dl Z d dlmZ d dlZd dlmZ G dd de jjZ	G dd de jjZ
G dd	 d	e jjZG d
d de jjZG dd de jjZG dd de jjZG dd de jjZG dd de jjZG dd de jjZG dd de jjZdS )    N)BertAttention)optimized_attention_for_devicec                       $   e Zd Z fddZdd Z  ZS )Dino2AttentionOutputc                    s"   t    |j||||d| _d S )Ndtypedevice)super__init__Lineardense)self	input_dim
output_dimlayer_norm_epsr   r   
operations	__class__ 8/mnt/c/Users/fbmor/ComfyUI/comfy/image_encoders/dino2.pyr
      s   
zDino2AttentionOutput.__init__c                 C   s
   |  |S N)r   r   xr   r   r   forward   s   
zDino2AttentionOutput.forward__name__
__module____qualname__r
   r   __classcell__r   r   r   r   r          r   c                       r   )Dino2AttentionBlockc                    s4   t    t|||||| _t||||||| _d S r   )r	   r
   r   	attentionr   output)r   	embed_dimheadsr   r   r   r   r   r   r   r
      s   
zDino2AttentionBlock.__init__c                 C   s   |  | |||S r   )r"   r!   )r   r   maskoptimized_attentionr   r   r   r      s   zDino2AttentionBlock.forwardr   r   r   r   r   r       s    r    c                       r   )
LayerScalec                    s(   t    tjtj|||d| _d S )Nr   r   )r	   r
   torchnn	Parameteremptylambda1)r   dimr   r   r   r   r   r   r
      s   
zLayerScale.__init__c                 C   s   |t j| j|j|j S r   )comfymodel_managementcast_to_devicer-   r   r   r   r   r   r   r      s   zLayerScale.forwardr   r   r   r   r   r'      r   r'   c                       s8   e Zd Zdef fddZdejdejfddZ  ZS )	Dinov2MLPhidden_sizec                    sJ   t    d}t|| }|j||d||d| _|j||d||d| _d S )N   Tbiasr   r   )r	   r
   intr   fc1fc2)r   r3   r   r   r   	mlp_ratiohidden_featuresr   r   r   r
   #   s
   
zDinov2MLP.__init__hidden_statereturnc                 C   s&   |  |}tjj|}| |}|S r   )r8   r)   r*   
functionalgelur9   )r   r<   r   r   r   r   +   s   

zDinov2MLP.forward)	r   r   r   r7   r
   r)   Tensorr   r   r   r   r   r   r2   "   s    r2   c                       r   )	SwiGLUFFNc                    sn   t    | }}t|d }t|d d d d d }|j|d| d||d| _|j||d||d| _d S )Nr4               Tr5   )r	   r
   r7   r   
weights_inweights_out)r   r.   r   r   r   in_featuresout_featuresr;   r   r   r   r
   2   s   
zSwiGLUFFN.__init__c                 C   s8   |  |}|jddd\}}tjj|| }| |S )NrB   r.   )rF   chunkr)   r*   r>   silurG   )r   r   x1x2r   r   r   r   ;   s   

zSwiGLUFFN.forwardr   r   r   r   r   rA   1   s    	rA   c                       r   )
Dino2Blockc                    s   t    t||||||| _t||||| _t||||| _|r*t||||| _nt	||||| _|j
||||d| _|j
||||d| _d S )Nepsr   r   )r	   r
   r    r!   r'   layer_scale1layer_scale2rA   mlpr2   	LayerNormnorm1norm2)r   r.   	num_headsr   r   r   r   use_swiglu_ffnr   r   r   r
   C   s   
zDino2Block.__init__c                 C   s<   ||  | | |d | }|| | | | }|S r   )rS   r!   rW   rT   rU   rX   )r   r   r&   r   r   r   r   O   s   zDino2Block.forwardr   r   r   r   r   rP   B   s    rP   c                       &   e Zd Z fddZdddZ  ZS )Dino2Encoderc	           	   	      s:   t    tj fddt|D | _d S )Nc                    s"   g | ]}t  d qS )rZ   )rP   ).0_r   r.   r   r   rY   r   rZ   r   r   
<listcomp>X   s    z)Dino2Encoder.__init__.<locals>.<listcomp>)r	   r
   r)   r*   
ModuleListrangelayer)	r   r.   rY   r   
num_layersr   r   r   rZ   r   r`   r   r
   V   s   
zDino2Encoder.__init__Nc                 C   sh   t |jddd}|d ur|dk rt| j| }d }t| jD ]\}}|||}||kr/| }q||fS )NFT)small_inputr   )r   r   lenrd   	enumerateclone)r   r   intermediate_outputr&   intermediateird   r   r   r   r   [   s   
zDino2Encoder.forwardr   r   r   r   r   r   r\   U   s    r\   c                       s&   e Zd Zd	 fdd	Zdd Z  ZS )
Dino2PatchEmbeddingsrC        Nc              	      s(   t    |j||||d||d| _d S )NT)in_channelsout_channelskernel_sizestrider6   r   r   )r	   r
   Conv2d
projection)r   r.   num_channels
patch_size
image_sizer   r   r   r   r   r   r
   k   s   
zDino2PatchEmbeddings.__init__c                 C   s   |  |dddS )NrB      )ru   flatten	transpose)r   pixel_valuesr   r   r   r   w   s   zDino2PatchEmbeddings.forward)rC   rn   ro   NNNr   r   r   r   r   rm   j   s    rm   c                       r   )Dino2Embeddingsc              	      s   t    d}d}t||||||d| _tjtjd|| d d |||d| _tjtjdd|||d| _	tjtjd|||d| _
d S )Nrn   ro   )rw   rx   r   r   r   ry   rB   r   )r	   r
   rm   patch_embeddingsr)   r*   r+   r,   position_embeddings	cls_token
mask_token)r   r.   r   r   r   rw   rx   r   r   r   r
   |   s   
* zDino2Embeddings.__init__c                 C   sZ   |  |}tj| jj|j|jd|jd dd|fdd}|t	j
| j|j|j }|S )Nr(   r   rJ   ry   rK   )r~   r)   catr   tor   r   expandshaper/   r0   r1   r   )r   r|   r   r   r   r   r      s   
2zDino2Embeddings.forwardr   r   r   r   r   r}   {   s    
r}   c                       r[   )Dinov2Modelc           
   
      st   t    |d }|d }|d }|d }|d }	t||||| _t||||||||	d| _|j||||d| _d S )Nnum_hidden_layersr3   num_attention_headsr   rZ   r]   rQ   )r	   r
   r}   
embeddingsr\   encoderrV   	layernorm)
r   config_dictr   r   r   re   r.   r$   r   rZ   r   r   r   r
      s   
zDinov2Model.__init__Nc                 C   sH   |  |}| j||d\}}| |}|d d dd d f }|||d fS )N)rj   r   )r   r   r   )r   r|   attention_maskrj   r   rl   pooled_outputr   r   r   r      s
   

zDinov2Model.forward)NNr   r   r   r   r   r      s    r   )r)   comfy.text_encoders.bertr   comfy.model_managementr/   comfy.ldm.modules.attentionr   r*   Moduler   r    r'   r2   rA   rP   r\   rm   r}   r   r   r   r   r   <module>   s    	
