o
    i                     @   s   d dl mZ d dlZ ddlmZmZ d dlmZ d dlZd dl	Z	G dd dej
ZG dd	 d	eZG d
d dejZG dd dejZG dd deZdddZdS )    )sd1_clipN   )QwenImageTokenizerQwenImageTEModel)ByT5Tokenizerc                       s"   e Zd Zdi f fdd	Z  ZS )ByT5SmallTokenizerNc                    sB   t jt jt jtd}t j|dddtdddd|d
 d S )Nbyt5_tokenizerFi  
byt5_smallir   )	pad_with_endembedding_sizeembedding_keytokenizer_classhas_start_tokenpad_to_max_length
max_length
min_lengthtokenizer_data)	ospathjoindirnamerealpath__file__super__init__r   )selfembedding_directoryr   tokenizer_path	__class__ ?/mnt/c/Users/fbmor/ComfyUI/comfy/text_encoders/hunyuan_image.pyr   	   s   $zByT5SmallTokenizer.__init____name__
__module____qualname__r   __classcell__r    r    r   r!   r      s    r   c                       s6   e Zd Zdi f fdd	Zddef fddZ  ZS )	HunyuanImageTokenizerNc                    s(   t  j||d d| _t||d| _d S )N)r   r   z<|im_start|>system
Describe the image by detailing the color, shape, size, texture, quantity, text, spatial relationships of the objects and background:<|im_end|>
<|im_start|>user
{}<|im_end|>)r   r   llama_templater   byt5)r   r   r   r   r    r!   r      s   zHunyuanImageTokenizer.__init__Ftextc                    s   t  j||fi |}g }d}d}d}t||}	t||}
t||}||	 ||
 || t|dkrO| jjdtdd ||fi ||d< |S )	Nz	\"(.*?)\"u   ‘(.*?)’u   “(.*?)”r    c                 S   s
   d | S )NzText "{}". )format)ar    r    r!   <lambda>&   s   
 z=HunyuanImageTokenizer.tokenize_with_weights.<locals>.<lambda>r)   )	r   tokenize_with_weightsrefindallextendlenr)   r   map)r   r*   return_word_idskwargsouttext_prompt_textspattern_quote_doublepattern_quote_chinese_singlepattern_quote_chinese_doublematches_quote_doublematches_quote_chinese_singlematches_quote_chinese_doubler   r    r!   r/      s   


*z+HunyuanImageTokenizer.tokenize_with_weights)F)r#   r$   r%   r   strr/   r&   r    r    r   r!   r'      s    r'   c                       s*   e Zd Zdddddi f fdd	Z  ZS )Qwen25_7BVLIModelcpuhiddenNTc                    sT   | dd }|d ur| }||d< t j|||i |ddidtjjj|||d d S )Nllama_quantization_metadataquantization_metadatapadi[P F)devicelayer	layer_idxtextmodel_json_configdtypespecial_tokenslayer_norm_hidden_statemodel_classenable_attention_masksreturn_attention_masksmodel_options)getcopyr   r   comfytext_encodersllamaQwen25_7BVLI)r   rG   rH   rI   rK   attention_maskrQ   rD   r   r    r!   r   *   s
   0zQwen25_7BVLIModel.__init__r"   r    r    r   r!   r@   )       "r@   c                       s(   e Zd Zddddi f fdd	Z  ZS )ByT5SmallModelrA   lastNc                    sN   t jt jt jtd}t j||||||dddtj	j
jddd
 d S )Nzbyt5_config_small_glyph.jsonr   r   )endrF   T)
rG   rH   rI   rJ   rK   rQ   rL   rN   rO   zero_out_masked)r   r   r   r   r   r   r   r   rT   rU   t5T5)r   rG   rH   rI   rK   rQ   rJ   r   r    r!   r   3   s   0zByT5SmallModel.__init__r"   r    r    r   r!   rZ   2   s     rZ   c                       sV   e Zd Zdddi f fdd	Z fddZ fdd	Z fd
dZ fddZ  ZS )HunyuanImageTEModelTrA   Nc                    s<   t t| j||dt|d |rt|||d| _d S d | _d S )N	qwen25_7b)rG   rK   name
clip_modelrQ   )rG   rK   rQ   )r   r   r   r@   rZ   r	   )r   r)   rG   rK   rQ   r   r    r!   r   9   s   
zHunyuanImageTEModel.__init__c                    s~   |d d }d}|d d dkrt |dkrd}t j||d\}}}| jd ur:d|v r:| j|d }|d |d< |||fS )	Nra   r      $   )template_endr)   conditioning_byt5small)r3   r   encode_token_weightsr	   )r   token_weight_pairs	tok_pairsrg   condpextrar7   r   r    r!   ri   A   s   
z(HunyuanImageTEModel.encode_token_weightsc                    s*   t  | | jd ur| j| d S d S N)r   set_clip_optionsr	   )r   optionsr   r    r!   rp   N   s   
z$HunyuanImageTEModel.set_clip_optionsc                    s&   t    | jd ur| j  d S d S ro   )r   reset_clip_optionsr	   )r   r   r    r!   rr   S   s   

z&HunyuanImageTEModel.reset_clip_optionsc                    s    d|v r
| j |S t |S )Nz.encoder.block.0.layer.0.SelfAttention.o.weight)r	   load_sdr   )r   sdr   r    r!   rs   X   s   zHunyuanImageTEModel.load_sd)	r#   r$   r%   r   ri   rp   rr   rs   r&   r    r    r   r!   r`   8   s    r`   Tc                    s   G  fdddt }|S )Nc                       s*   e Zd Zddi f fdd	Z  ZS )zte.<locals>.QwenImageTEModel_rA   Nc                    s<   d ur|  }|d< d ur}t j|||d d S )NrD   )r)   rG   rK   rQ   )rS   r   r   )r   rG   rK   rQ   )r   r)   dtype_llamarD   r    r!   r   `   s   z&te.<locals>.QwenImageTEModel_.__init__r"   r    r)   ru   rD   r   r!   QwenImageTEModel__   rY   rw   )r`   )r)   ru   rD   rw   r    rv   r!   te^   s   rx   )TNN)rT   r   comfy.text_encoders.llama
qwen_imager   r   transformersr   r   r0   SDTokenizerr   r'   SDClipModelr@   rZ   r`   rx   r    r    r    r!   <module>   s    	&