o
    ik                     @   s   d dl mZ d dl mZ d dlmZ d dlZ d dlZd dlZd dlZ d dl	Z	d dl
Z G dd dejZddd	ZG d
d dejZG dd dZG dd dejjZdddZdS )    )sd1_clip)	sdxl_clip)T5TokenizerFastNc                       s*   e Zd Zdddddi f fdd	Z  ZS )
T5XXLModelcpulastNFc           	         s   t jt jt jtd}|dd }|d ur!| }||d< i |ddi}t j	|||||dddt
jjj|||d	
 d S )
Nzt5_config_xxl.jsont5xxl_quantization_metadataquantization_metadata
model_namet5xxl   r   )endpad)
devicelayer	layer_idxtextmodel_json_configdtypespecial_tokensmodel_classenable_attention_masksreturn_attention_masksmodel_options)ospathjoindirnamerealpath__file__getcopysuper__init__comfytext_encoderst5T5)	selfr   r   r   r   attention_maskr   r   r   	__class__ :/mnt/c/Users/fbmor/ComfyUI/comfy/text_encoders/sd3_clip.pyr"      s   0zT5XXLModel.__init____name__
__module____qualname__r"   __classcell__r+   r+   r)   r,   r      s    "r    c                 C   sF   i }d |}|| v r| | j|d< tj| |}|d ur!||d< |S )Nz!{}encoder.final_layer_norm.weightdtype_t5t5_quantization_metadata)formatr   r#   utilsdetect_layer_quantization)
state_dictprefixoutt5_keyquantr+   r+   r,   t5_xxl_detect   s   
r=   c                       s&   e Zd Zdi ddf fdd	Z  ZS )T5XXLTokenizerNM   ic                    sD   t jt jt jtd}t j||dddtdd|||d d S )Nt5_tokenizerF   r   )
embedding_directorypad_with_endembedding_sizeembedding_keytokenizer_classhas_start_tokenpad_to_max_length
max_length
min_lengthtokenizer_data)	r   r   r   r   r   r   r!   r"   r   )r'   rB   rK   rJ   rI   tokenizer_pathr)   r+   r,   r"   $   s   &zT5XXLTokenizer.__init__r-   r+   r+   r)   r,   r>   #   s    r>   c                   @   s:   e Zd Zdi fddZddefddZdd	 Zd
d ZdS )SD3TokenizerNc                 C   s2   t j||d| _tj||d| _t||d| _d S )N)rB   rK   )r   SDTokenizerclip_lr   SDXLClipGTokenizerclip_gr>   r   )r'   rB   rK   r+   r+   r,   r"   *   s   zSD3Tokenizer.__init__Ftextc                 K   sV   i }| j j||fi ||d< | jj||fi ||d< | jj||fi ||d< |S )Nglr   )rQ   tokenize_with_weightsrO   r   )r'   rR   return_word_idskwargsr:   r+   r+   r,   rU   /   s
   z"SD3Tokenizer.tokenize_with_weightsc                 C   s   | j |S N)rQ   
untokenize)r'   token_weight_pairr+   r+   r,   rY   6   s   zSD3Tokenizer.untokenizec                 C   s   i S rX   r+   r'   r+   r+   r,   r8   9   s   zSD3Tokenizer.state_dict)F)r.   r/   r0   r"   strrU   rY   r8   r+   r+   r+   r,   rM   )   s
    rM   c                       sN   e Zd Zdddddddi f fdd	Zdd Zd	d
 Zdd Zdd Z  ZS )SD3ClipModelTNFr   c	           	   	      s   t    t | _|rtjdd||dd|d| _| j| nd | _|r4tj	|||d| _
| j| nd | _
|rUtj|||}|| _t|||| jd| _| j| nd | _td|||| d S )NhiddenF)r   r   r   r   layer_norm_hidden_statereturn_projected_pooledr   )r   r   r   )r   r   r   r(   z@Created SD3 text encoder with: clip_l {}, clip_g {}, t5xxl {}:{})r!   r"   setdtypesr   SDClipModelrO   addr   	SDXLClipGrQ   r#   model_managementpick_weight_dtypet5_attention_maskr   r   loggingdebugr5   )	r'   rO   rQ   r%   r3   ri   r   r   r   r)   r+   r,   r"   =   s"   
zSD3ClipModel.__init__c                 C   sJ   | j d ur| j | | jd ur| j| | jd ur#| j| d S d S rX   )rO   set_clip_optionsrQ   r   )r'   optionsr+   r+   r,   rl   V   s   


zSD3ClipModel.set_clip_optionsc                 C   sD   | j d ur
| j   | jd ur| j  | jd ur | j  d S d S rX   )rO   reset_clip_optionsrQ   r   r[   r+   r+   r,   rn   ^   s   




zSD3ClipModel.reset_clip_optionsc                 C   s  |d }|d }|d }d }d }d }i }t |dks t |dkr| jd ur.| j|\}}	n
tjdtj d}	| jd urw| j|\}
}|d urnt	|j
d |
j
d }tj|d d d |f |
d d d |f gdd	}ntjj|
d
}nd }
tjdtj d}|d urtjj|dd|j
d  f}|}tj|	|fdd	}| jd ur| j|}|d d \}}| jr|d d |d< |d urtj||gdd	}n|}|d u rtjdtj d}|d u rtjdtj d}|||fS )NrT   rS   r   r   )r      )r   r   )dim)ro   r   )r   i   rA      r(   r_   )r   r?   rA   )r   i   )lenrO   encode_token_weightstorchzerosr#   rg   intermediate_devicerQ   minshapecatnn
functionalr   r   ri   )r'   token_weight_pairstoken_weight_pairs_ltoken_weight_pairs_gtoken_weight_pairs_t5lg_outpooledr:   extral_pooledg_outg_pooledcut_to	t5_outputt5_out	t5_pooledr+   r+   r,   rt   f   sH   

4

z!SD3ClipModel.encode_token_weightsc                 C   s4   d|v r
| j |S d|v r| j|S | j|S )Nz+text_model.encoder.layers.30.mlp.fc1.weightz*text_model.encoder.layers.1.mlp.fc1.weight)rQ   load_sdrO   r   )r'   sdr+   r+   r,   r      s
   zSD3ClipModel.load_sd)	r.   r/   r0   r"   rl   rn   rt   r   r1   r+   r+   r)   r,   r]   <   s    2r]   TFc                    s"   G  fdddt }|S )Nc                       s0   e Zd Zddi f fdd	Z  ZS )zsd3_clip.<locals>.SD3ClipModel_r   Nc              
      s8   d ur|  }|d< t j|||d d S )Nr   )rO   rQ   r%   r3   ri   r   r   r   )r    r!   r"   )r'   r   r   r   )r*   rQ   rO   r3   r%   ri   r4   r+   r,   r"      s    z(sd3_clip.<locals>.SD3ClipModel_.__init__r-   r+   rQ   rO   r3   r%   ri   r4   r)   r,   SD3ClipModel_   s    (r   )r]   )rO   rQ   r%   r3   r4   ri   r   r+   r   r,   sd3_clip   s   r   )r2   )TTTNNF)r#   r   r   transformersr   comfy.text_encoders.t5ru   r   comfy.model_managementrj   comfy.utilsrd   r   r=   rN   r>   rM   r{   Moduler]   r   r+   r+   r+   r,   <module>   s    
d