o
    i                     @   s   d dl mZ d dlZd dlmZ d dlZd dlZd dlZG dd dejZ	G dd dej
ZG dd	 d	ejZG d
d dejZdddZdS )    )Qwen2TokenizerN)sd1_clipc                       s"   e Zd Zdi f fdd	Z  ZS )Qwen3TokenizerNc                    sF   t jt jt jtd}t j|dddtdddddd|d d S )	Nqwen25_tokenizerFi   qwen3_2bii  [P )pad_with_endembedding_sizeembedding_keytokenizer_classhas_start_tokenhas_end_tokenpad_to_max_length
max_length
min_length	pad_tokentokenizer_data)	ospathjoindirnamerealpath__file__super__init__r   )selfembedding_directoryr   tokenizer_path	__class__ 6/mnt/c/Users/fbmor/ComfyUI/comfy/text_encoders/ovis.pyr   	   s   (zQwen3Tokenizer.__init____name__
__module____qualname__r   __classcell__r    r    r   r!   r      s    r   c                       s0   e Zd Zdi f fdd	Zd fdd	Z  ZS )OvisTokenizerNc                    s   t  j||dtd d| _d S )Nr   )r   r   name	tokenizerz<|im_start|>user
Describe the image by detailing the color, quantity, text, shape, size, texture, spatial relationships of the objects and background: {}<|im_end|>
<|im_start|>assistant
<think>

</think>

)r   r   r   llama_template)r   r   r   r   r    r!   r      s   
zOvisTokenizer.__init__Fc                    s>   |d u r| j |}n||}t j|f|dd|}|S )NT)return_word_idsdisable_weights)r*   formatr   tokenize_with_weights)r   textr+   r*   kwargs
llama_texttokensr   r    r!   r.      s
   
z#OvisTokenizer.tokenize_with_weights)FN)r#   r$   r%   r   r.   r&   r    r    r   r!   r'      s    r'   c                       s*   e Zd Zdddddi f fdd	Z  ZS )Ovis25_2BModelcpulastNTc                    s2   t  j|||i |ddidtjjj|dd|d d S )Npadr   FT)devicelayer	layer_idxtextmodel_json_configdtypespecial_tokenslayer_norm_hidden_statemodel_classenable_attention_masksreturn_attention_maskszero_out_maskedmodel_options)r   r   comfytext_encodersllama	Ovis25_2B)r   r7   r8   r9   r;   attention_maskrB   r   r    r!   r      s   2zOvis25_2BModel.__init__r"   r    r    r   r!   r3      s    "r3   c                       s2   e Zd Zddi f fdd	Zd fdd	Z  ZS )	OvisTEModelr4   Nc                    s   t  j||dt|d d S )Nr   )r7   r;   r(   
clip_modelrB   )r   r   r3   r   r7   r;   rB   r   r    r!   r   "   s   zOvisTEModel.__init__c           
         s   t  |\}}|d d }d}|dkrQt|D ]!\}}|d }	t|	s9t|	tjr9|	dkr9|dk r9|}|d7 }q|jd |d krQ||d  d dkrQ|d7 }|d d |d f }||i fS )Nr   r   rK   i        )	r   encode_token_weights	enumeratetorch	is_tensor
isinstancenumbersIntegralshape)
r   token_weight_pairstemplate_endoutpooled	tok_pairscount_im_startivelemr   r    r!   rN   %   s"   

z OvisTEModel.encode_token_weights)rK   )r#   r$   r%   r   rN   r&   r    r    r   r!   rH   !   s    rH   c                    s   G  fdddt }|S )Nc                       s(   e Zd Zddi f fdd	Z  ZS )zte.<locals>.OvisTEModel_r4   Nc                    s:   d ur}d ur|  }|d< t j|||d d S )Nquantization_metadata)r7   r;   rB   )copyr   r   rJ   )r   dtype_llamallama_quantization_metadatar    r!   r   <   s   z!te.<locals>.OvisTEModel_.__init__r"   r    ra   rb   r   r!   OvisTEModel_;   s     rd   )rH   )ra   rb   rd   r    rc   r!   te:   s   re   )NN)transformersr   comfy.text_encoders.llamarC   r   r   rP   rS   SDTokenizerr   SD1Tokenizerr'   SDClipModelr3   SD1ClipModelrH   re   r    r    r    r!   <module>   s    