o
    i%+                     @   sB  d dl mZ d dlZd dlmZ d dlmZ d dlZd dl	Zd dlm
Z
 ddlmZ G dd	 d	e
jZG d
d de
jZeG dd dZG dd dejZG dd dejZG dd dejZG dd dejZG dd dejZG dd dejZG dd dejZG dd dejZG dd de
jZG d d! d!e
jZdS )"    )	dataclassN)nn)
functional)sd1_clip   )SPieceTokenizerc                       s*   e Zd Zdi f fdd	Zdd Z  ZS )JinaClip2TokenizerNc                    s>   | dd }t j|dddtdddddddddd	|d
 d S )Nspiece_modelF   jina_clip_2Ti    r      )add_bosadd_eos)pad_with_endembedding_sizeembedding_keytokenizer_classhas_start_tokenhas_end_tokenpad_to_max_length
max_length
min_length	pad_token	end_tokentokenizer_argstokenizer_data)getsuper__init__r   )selfembedding_directoryr   	tokenizer	__class__ =/mnt/c/Users/fbmor/ComfyUI/comfy/text_encoders/jina_clip_2.pyr      s   2zJinaClip2Tokenizer.__init__c                 C   s   d| j  iS )Nr	   )r!   serialize_modelr   r$   r$   r%   
state_dict      zJinaClip2Tokenizer.state_dict)__name__
__module____qualname__r   r(   __classcell__r$   r$   r"   r%   r      s    r   c                       s"   e Zd Zdi f fdd	Z  ZS )JinaClip2TokenizerWrapperNc                    s   t  j||tdd d S )Nr   )r    r   r!   name)r   r   r   )r   r    r   r"   r$   r%   r      s   z"JinaClip2TokenizerWrapper.__init__r*   r+   r,   r   r-   r$   r$   r"   r%   r.      s    r.   c                   @   s   e Zd ZU dZeed< dZeed< dZeed< dZeed< d	Z	eed
< dZ
eed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dS )XLMRobertaConfigi 
vocab_sizer   type_vocab_sizer
   hidden_size   num_hidden_layers   num_attention_headsg     @rotary_emb_basei   intermediate_sizegelu
hidden_actg?hidden_dropout_probattention_probs_dropout_probgh㈵>layer_norm_epsr   bos_token_idr   eos_token_idpad_token_idN)r*   r+   r,   r2   int__annotations__r3   r4   r6   r8   r9   floatr:   r<   strr=   r>   r?   r@   rA   rB   r$   r$   r$   r%   r1      s   
 r1   c                       (   e Zd Zd fdd	ZdddZ  ZS )XLMRobertaEmbeddingsNc                    sD   t    |j}|j|j||j||d| _|j|j|||d| _d S )N)padding_idxdevicedtyperJ   rK   )	r   r   r4   	Embeddingr2   rB   word_embeddingsr3   token_type_embeddingsr   configrJ   rK   ops	embed_dimr"   r$   r%   r   0   s   
zXLMRobertaEmbeddings.__init__c                 C   sR   |d ur|d u r|  |}|d ur'tj|jd |jtjd}| |}|| }|S )Nr   rL   )rN   torchzerosshaperJ   int32rO   )r   	input_ids
embeddingstoken_type_idsrO   r$   r$   r%   forward6   s   

zXLMRobertaEmbeddings.forwardNNNNNr*   r+   r,   r   r[   r-   r$   r$   r"   r%   rH   /   s    rH   c                       s0   e Zd Zd fdd	Zd	ddZdd Z  ZS )
RotaryEmbeddingNc              	      sR   t    d|tjd|d|tjd|   }| jd|dd d| _d | _d | _d S )N      ?r   r   rL   inv_freqF)
persistent)	r   r   rT   arangefloat32register_buffer_seq_len_cached_cos_cached_sin_cached)r   dimbaserJ   ra   r"   r$   r%   r   A   s   
"
zRotaryEmbedding.__init__c                 C   s   || j ks| jd u s| jj|ks| jj|krI|| _ tj||tjd}t|| jj	|jd}tj
||fdd}| 	|| _| 	|| _d S d S )NrL   rJ   ri   )rf   rg   rJ   rK   rT   rc   rd   outerra   tocatcossinrh   )r   seqlenrJ   rK   tfreqsembr$   r$   r%   _update_cos_sin_cacheI   s   ,z%RotaryEmbedding._update_cos_sin_cachec                 C   s   |j \}}}}| j||j|jd | jd | d|d|}| jd | d|d|}dd }	|| |	||  }
|| |	||  }|
|fS )NrL   r   c                 S   sD   | j d d }| dd |f | d|d f }}tj| |fddS )Nrl   r   .rm   )rV   rT   rp   )xsizex1x2r$   r$   r%   rotate_halfY   s   "z,RotaryEmbedding.forward.<locals>.rotate_half)rV   rw   rJ   rK   rg   viewrh   )r   qkbatchrs   headshead_dimrq   rr   r|   q_embedk_embedr$   r$   r%   r[   R   s   zRotaryEmbedding.forwardNr]   )r*   r+   r,   r   rw   r[   r-   r$   r$   r"   r%   r_   @   s    
	r_   c                       rG   )MHANc                    sh   t    |j}|j| _||j | _t| j|j|d| _|j	|d| ||d| _
|j	||||d| _d S )Nrk      rL   )r   r   r4   r8   	num_headsr   r_   r9   
rotary_embLinearWqkvout_projrP   r"   r$   r%   r   c   s   
zMHA.__init__c                 C   s   |  |}|j\}}}|||d| j| j}|d\}}	}
| ||	\}}	|dd}|	dd}	|
dd}
|||	|
| j|dd}| |S )Nr   r   r   T)r   maskskip_reshape)	r   rV   r}   r   r   unbindr   	transposer   )r   rx   r   optimized_attentionqkv
batch_sizeseq_len_r~   r   voutr$   r$   r%   r[   m   s   

zMHA.forwardr\   r]   r^   r$   r$   r"   r%   r   b   s    
r   c                       s&   e Zd Zd fdd	Zdd Z  ZS )MLPNc                    sF   t    |j|j|j||d| _tj| _|j|j|j||d| _	d S )NrL   )
r   r   r   r4   r:   fc1Fr;   
activationfc2r   rQ   rJ   rK   rR   r"   r$   r%   r   ~   s   
zMLP.__init__c                 C   s"   |  |}| |}| |}|S r   )r   r   r   )r   rx   r$   r$   r%   r[      s   


zMLP.forwardr\   r^   r$   r$   r"   r%   r   }   s    r   c                       rG   )BlockNc                    s~   t    t||||d| _t|j| _|j|j	|j
||d| _t||||d| _t|j| _|j|j	|j
||d| _d S NrJ   rK   rR   )epsrJ   rK   )r   r   r   mixerr   Dropoutr=   dropout1	LayerNormr4   r?   norm1r   mlpdropout2norm2r   r"   r$   r%   r      s   
zBlock.__init__c                 C   sF   | j |||d}| | || }| |}| | || }|S )Nr   r   )r   r   r   r   r   r   )r   hidden_statesr   r   	mixer_outmlp_outr$   r$   r%   r[      s
   
zBlock.forwardr\   r]   r^   r$   r$   r"   r%   r      s    	r   c                       rG   )XLMRobertaEncoderNc                    s4   t    t fddt jD | _d S )Nc                    s   g | ]
}t  d qS )r   )r   ).0r   rQ   rJ   rK   rR   r$   r%   
<listcomp>   s    z.XLMRobertaEncoder.__init__.<locals>.<listcomp>)r   r   r   
ModuleListranger6   layersr   r"   r   r%   r      s   
*zXLMRobertaEncoder.__init__c                 C   s:   t jjjj|j|d udd}| jD ]	}||||d}q|S )NT)r   small_inputr   )comfyldmmodules	attentionoptimized_attention_for_devicerJ   r   )r   r   attention_maskr   layerr$   r$   r%   r[      s   
zXLMRobertaEncoder.forwardr\   r   r^   r$   r$   r"   r%   r      s    r   c                       s6   e Zd Zd fdd	Zddddddg fddZ  ZS )XLMRobertaModel_Nc                    sX   t    t||||d| _|j|j|j||d| _t	|j
| _t||||d| _d S r   )r   r   rH   rY   r   r4   r?   emb_lnr   r   r=   emb_dropr   encoderr   r"   r$   r%   r      s
   
zXLMRobertaModel_.__init__Tc	                 C   s   | j ||d}	| |	}	| |	}	d }
|d ur<d||	j|jd dd|jd f }
|
|
tj	t
|	jj }
| j|	|
d}d }|d u rP|jdd}n||j}||d jdd|jddd	 }|d |fS )
N)rX   rY   r`   r   r   rl   )r   rm   T)ri   keepdim)rY   r   r   ro   rK   reshaperV   masked_fillrT   boolfinfomaxr   mean	unsqueezesum)r   rX   r   embeds
num_tokensintermediate_outputfinal_layer_norm_intermediaterK   embeds_inforx   r   sequence_outputpooled_outputr$   r$   r%   r[      s   

* $
zXLMRobertaModel_.forwardr\   r^   r$   r$   r"   r%   r      s     r   c                       s4   e Zd Z fddZdd Zdd Zdd Z  ZS )	XLMRobertaModelc                    s<   t    tdi || _t| j|||d| _| jj| _d S )Nr   r$   )r   r   r1   rQ   r   modelr6   
num_layers)r   config_dictrK   rJ   
operationsr"   r$   r%   r      s   
zXLMRobertaModel.__init__c                 C   s
   | j jjS r   r   rY   rN   r'   r$   r$   r%   get_input_embeddings   s   
z$XLMRobertaModel.get_input_embeddingsc                 C   s   || j j_d S r   r   )r   rY   r$   r$   r%   set_input_embeddings   r)   z$XLMRobertaModel.set_input_embeddingsc                 O   s   | j |i |S r   )r   )r   argskwargsr$   r$   r%   r[      s   zXLMRobertaModel.forward)r*   r+   r,   r   r   r   r[   r-   r$   r$   r"   r%   r      s
    r   c                       $   e Zd Zddi f fdd	Z  ZS )JinaClip2TextModelcpuNc              
      s(   t  j||i tdddddd|d d S )Nr   r   r   )startendpadT)rJ   rK   textmodel_json_configmodel_classspecial_tokensenable_attention_masksreturn_attention_masksmodel_options)r   r   r   r   rJ   rK   r   r"   r$   r%   r      s   (zJinaClip2TextModel.__init__r0   r$   r$   r"   r%   r          r   c                       r   )JinaClip2TextModelWrapperr   Nc                    s   t  j||td|d d S )Nr   )rJ   rK   
clip_modelr/   r   )r   r   r   r   r"   r$   r%   r      s   z"JinaClip2TextModelWrapper.__init__r0   r$   r$   r"   r%   r      r   r   )dataclassesr   rT   r   torch.nnr   r   comfy.model_managementr   	comfy.opsr   spiece_tokenizerr   SDTokenizerr   SD1Tokenizerr.   r1   ModulerH   r_   r   r   r   r   r   r   SDClipModelr   SD1ClipModelr   r$   r$   r$   r%   <module>   s,   	"