o
    iB                     @   s  d dl mZ ddlZddlmZ ddlZddlZddlZddlZ								 	 			
		d*de	de	de	de
de	de
de
de
de
de
de
fddZd+ddZG dd dejZG dd  d ejZG d!d" d"ejZG d#d$ d$ejZG d%d& d&ejjZd,d(d)ZdS )-   )Qwen3Tokenizer    N)sd1_clip       @333333??           uP uJ ]P 	cfg_scaletemperaturetop_ptop_kmin_pseed
min_tokensmax_new_tokensaudio_start_idaudio_end_ideos_token_idc           1      C   s  |d u rg S | j }|d u rtj|rtj}ntj}| ||\}}}}|jd }g }g }tj	|d}|
| | jjj}||j|jd |	 |jg}t|jD ]}|tj|||dtj|||ddf qOtj|
}tjj|
ddD ]_}| jd ||||d |||d}| j|d d d df }|d	 }|d
kr|dd }|dd	 }||||   } n|dd } |d uo||k o|	|k }!|!r| d d |f  }"t| jj}#|#| d d d |f< |#| d d |d f< |!r|"| d d |f< |d ur|dkrt| |\}$}%|$d }&|#| | |&k < |d ur4|dkr4tj| dd}'|'jdddj }(|'||( k })|#| |)< |d urv|d
k rvtj!| dd\}*}+tj"tj|*dddd},|,|k}-|-dd df  |-ddd f< d|-d< |-#d|+|-})|#| |)< |dkr| | } tj$tj| ddd|d%d}.ntj&| dd}.|.' }/|/|kr |S | |/gg|\}0}%}%}%|0(|dd}tj)|tj*|df||jdgdd}||/|  |+| qt|S )Nr   )devicer   )r   dtypezLM sampling)desc)embeds
num_tokensintermediate_outputr   embeds_infopast_key_values         ?).r    N)dimT)r#   keepdim)
descending.).r   )num_samples	generator),execution_devicecomfymodel_managementshould_use_bf16torchbfloat16float32process_tokensshape	Generatormanual_seedtransformermodelconfignum_key_value_headshead_dimrangenum_hidden_layersappendemptyutilsProgressBarmodel_trangetologitsclonefinfor   mintopksoftmaxmaxvaluessortcumsumscattermultinomialsqueezeargmaxitemrepeatcatonesupdate_absolute)1r4   idsexecution_dtyper   r   r   r   r   r   r   r   r   r   r   r   r   attention_maskr   r   embeds_batchoutput_audio_codesr   r'   model_configpast_kv_shapexprogress_barstepoutputsnext_token_logitscond_logitsuncond_logits
cfg_logitsuse_eos_score	eos_scoreremove_logit_value
top_k_vals_min_valprobsp_maxindices_to_removesorted_logitssorted_indicescumulative_probssorted_indices_to_remove
next_tokentokenembed rr   7/mnt/c/Users/fbmor/ComfyUI/comfy/text_encoders/ace15.pysample_manual_loop_no_classes
   s   


*  
"
	$rt      c                 C   s   dd |D }|d }|dkrWdd |D }|d }d}t |t |k r6t |t | }| jd g| | }d}t |t |krRt |t | }| jd g| | }||g}n|g}t| |||||	|
|||d
S )Nc                 S      g | ]	}d d |D qS )c                 S      g | ]\}}|qS rr   rr   .0rp   rf   rr   rr   rs   
<listcomp>u       3generate_audio_codes.<locals>.<listcomp>.<listcomp>rr   ry   
inner_listrr   rr   rs   rz   u       z(generate_audio_codes.<locals>.<listcomp>r   r"   c                 S   rv   )c                 S   rw   rr   rr   rx   rr   rr   rs   rz   y   r{   r|   rr   r}   rr   rr   rs   rz   y   r   pad)r   r   r   r   r   r   r   r   )lenspecial_tokensrt   )r4   positivenegativer   
max_tokensr   r   r   r   r   r   neg_padpos_padrS   rr   rr   rs   generate_audio_codest   s    
r   c                       sR   e Zd Zdi f fdd	Zdddedefdd	Zdefd
dZdddZ  Z	S )ACE15TokenizerNc                    s   t  j||dtd d S )N	qwen3_06b)embedding_directorytokenizer_dataname	tokenizer)super__init__r   )selfr   r   	__class__rr   rs   r      s   zACE15Tokenizer.__init__F)return_yamlr   returnc                   s    fdddD }| d}t|tr |dr |d d |d< dd | D }t|r8tj|ddd	 }nd
}|sBd| dS |S )Nc                    s    i | ]}| v r|  |qS rr   popry   kkwargsrr   rs   
<dictcomp>   s
    
z0ACE15Tokenizer._metas_to_cot.<locals>.<dictcomp>)bpmdurationkeyscaletimesignaturer   /4c                 S   s8   i | ]\}}|d vr|t |tr| s|nt|qS )>   Nunspecified)
isinstancestrisdigitint)ry   r   vrr   rr   rs   r      s
    T)allow_unicode	sort_keys z<think>
z	
</think>)	getr   r   endswithitemsr   yamldumpstrip)r   r   r   
user_metasr   	meta_yamlrr   r   rs   _metas_to_cot   s   

zACE15Tokenizer._metas_to_cotc                    s   d} fdd|D  d}t|tr"|dr"|d d d< d }|dkr/d	d< nt|tttfrDtt| d
d< ntdd	fdd|D S )N)r   r   r   r   c                    s   i | ]	}|  |d qS )N/Ar   r   r   rr   rs   r      r   z0ACE15Tokenizer._metas_to_cap.<locals>.<dictcomp>r   r   r   r   r   z
30 secondsz secondsz;Unexpected type for duration key, must be str, int or float
c                 3   s$    | ]}d | d |  V  qdS )z- z: Nrr   r   )r   rr   rs   	<genexpr>   s   " z/ACE15Tokenizer._metas_to_cap.<locals>.<genexpr>)
r   r   r   r   r   floatmathceil	TypeErrorjoin)r   r   use_keysr   r   rr   )r   r   rs   _metas_to_cap   s   

zACE15Tokenizer._metas_to_capc              
      s  |  } d|  } dd} d|} dd}t|tr-t|d dd } d	} d
d}	 dd}
 dd} dd} dd} dd} dd}t|}| d< |d }t d|}t d|} fdddD } ds|	dd }j
d&d|i }|sdnj
d&i |}jd&i  }d}d }d!}|||  ||||  |||d ur|nd||||d"} fd#d| D }|||	|
|||||d$	|d%< |S )'Ncaption_negativelyricsr   lyrics_negativer   x   r   r   languager   r   Tr   r   r   r   r   r   r   r   r      r   r   c                    s,   i | ]}| v r| d dd  |qS )rf   r   r   )rsplitr   r   r   rr   rs   r      s
    z8ACE15Tokenizer.tokenize_with_weights.<locals>.<dictcomp>)bpm_negativeduration_negativekeyscale_negativetimesignature_negativelanguage_negativer   use_negative_captioncaptionz<think>

</think>z<|im_start|>system
# Instruction
Generate audio semantic tokens based on the given conditions:

<|im_end|>
<|im_start|>user
# Caption
{}

# Lyric
{}
<|im_end|>
<|im_start|>assistant
{}

<|im_end|>
z4# Languages
{}

# Lyric
{}<|endoftext|><|endoftext|>z# Instruction
Generate audio semantic tokens based on the given conditions:

# Caption
{}

# Metas
{}
<|endoftext|>
<|endoftext|>)	lm_promptlm_prompt_negativer   r   c                    s4   i | ]\}}|j j||d kofddi qS )r   disable_weightsT)r   tokenize_with_weights)ry   
prompt_keypromptr   return_word_idsr   rr   rs   r      s    
)	r   r   r   r   r   r   r   r   r   lm_metadatarr   )r   r   r   r   r   splitr   r   r   r   r   r   formatr   )r   textr   r   text_negativer   r   r   r   r   r   r   r   r   r   r   tokens_durationr   r   metas_negativerf   cot_textcot_text_negativemeta_caplm_templatelyrics_templateqwen3_06b_templatellm_promptsoutrr   r   rs   r      sd   





	

z$ACE15Tokenizer.tokenize_with_weights)F)
__name__
__module____qualname__r   boolr   r   r   r   __classcell__rr   rr   r   rs   r      s
    r   c                       *   e Zd Zdddddi f fdd	Z  ZS )Qwen3_06BModelcpulastNTc                    s0   t  j|||i |ddidtjjj|||d d S )Nr   [P Fr   layer	layer_idxtextmodel_json_configr   r   layer_norm_hidden_statemodel_classenable_attention_masksreturn_attention_masksmodel_options)r   r   r)   text_encodersllamaQwen3_06B_ACE15)r   r   r   r   r   rU   r   r   rr   rs   r      s   0zQwen3_06BModel.__init__r   r   r   r   r   rr   rr   r   rs   r          "r   c                       r   )Qwen3_2B_ACE15r   r   NTc                    T   | dd }|d ur| }||d< t j|||i |ddidtjjj|||d d S Nllama_quantization_metadataquantization_metadatar   r   Fr   )r   copyr   r   r)   r   r   Qwen3_2B_ACE15_lmr   r   r   r   r   rU   r   r   r   rr   rs   r      
   0zQwen3_2B_ACE15.__init__r   rr   rr   r   rs   r      r   r   c                       r   )Qwen3_4B_ACE15r   r   NTc                    r   r   )r   r  r   r   r)   r   r   Qwen3_4B_ACE15_lmr  r   rr   rs   r     r  zQwen3_4B_ACE15.__init__r   rr   rr   r   rs   r    r   r  c                       sR   e Zd Zddddi f fdd	Zdd Zdd Zd	d
 Zdd ZdddZ  Z	S )ACE15TEModelr   Nc                    s   t    |d u r|}d }d| _|dkrt}d| _n|dkr t}|| _t|||d| _|d ur;t| | j||||d t	||g| _
d S )Ng      ?qwen3_4bg      ?qwen3_2b)r   r   r   )r   r   constantr  r   lm_modelr   r   setattrsetdtypes)r   r   r   dtype_llamar  r   r4   r   rr   rs   r     s   
zACE15TEModel.__init__c                 C   s   |d }|d }| j dd i | j |\}}}| j ddgi | j |\}}}d|d d df i}	|d }
|
d rktt| | j| j |d |d	 |
d
 |
d
 |
d |
d |
d |
d |
d |
d d}|g|	d< |d |	fS )Nr   r   r   r   conditioning_lyricsr   r   r   r   r   r   r   r   r   r   r   )r   r   r   r   r   r   r   r   audio_codes)r   set_clip_optionsencode_token_weightsr   getattrr  )r   token_weight_pairstoken_weight_pairs_basetoken_weight_pairs_lyricsbase_outrf   extralyrics_embedsextra_lr   r   r  rr   rr   rs   r  $  s   R

z!ACE15TEModel.encode_token_weightsc                 C   s4   | j | t| | jd }|d ur|| d S d S N)r   r  r  r  )r   optionsr  rr   rr   rs   r  6  s
   zACE15TEModel.set_clip_optionsc                 C   s0   | j   t| | jd }|d ur|  d S d S r  )r   reset_clip_optionsr  r  )r   r  rr   rr   rs   r  <  s
   
zACE15TEModel.reset_clip_optionsc                 C   s@   d|v r|d j }|d dkr| j|S t| | j|S d S )Nz.model.layers.0.post_attention_layernorm.weightr   ru   )r0   r   load_sdr  r  )r   sdr0   rr   rr   rs   r  B  s   
zACE15TEModel.load_sdc                 C   sd   | di }| j}tj|r|d9 }| dg }ttdd |}|| dd7 }|| d d S )	Nr   g      ?r   c                 S   s   t | S r  )r   )arr   rr   rs   <lambda>Q  s    z9ACE15TEModel.memory_estimation_function.<locals>.<lambda>r   r   ru   )r   r
  r)   r*   r+   summap)r   r  r   r   r
  r   rr   rr   rs   memory_estimation_functionJ  s   z'ACE15TEModel.memory_estimation_functionr  )
r   r   r   r   r  r  r  r  r%  r   rr   rr   r   rs   r    s    r  r	  c                    s   G  fdddt }|S )Nc                       s*   e Zd Zddi f fdd	Z  ZS )zte.<locals>.ACE15TEModel_r   Nc                    s2   d ur|  }|d< t j|||d d S )Nr   )r   r  r  r   r   )r  r   r   )r   r   r   r   )r   r  r   r  rr   rs   r   W  s   z"te.<locals>.ACE15TEModel_.__init__r   rr   r  r   r  r   rs   ACE15TEModel_V  r   r'  )r  )r  r   r  r'  rr   r&  rs   teU  s   r(  )NNr   r   r   Nr   r   r   r	   r
   r   r   )r   ru   r   r   r   r   r   r   )NNr	  )animar   comfy.text_encoders.llamar)   r   r,   r   r   comfy.utilsr   r   rt   r   SD1Tokenizerr   SDClipModelr   r   r  nnModuler  r(  rr   rr   rr   rs   <module>   sd    	


jl		F