o
    ¶Ïi©“ ã                	   @  sÚ  d dl mZ d dlZd dlZd dlmZ d dlZd dlmZ d dl	m
Z
 ddlmZmZ ddlmZ dd	lmZ dd
lmZ d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl Z d dl!Z!d dl	Zddl"m#Z# ddl"m$Z$ ddl"m%Z% ddl"m&Z& ddl"m'Z' ddl"m(Z( d dl)Zd dl*Zd dl+Zd dl,Zd dl-Zd dl.Zd dl/Zd dl0Zd dl1Zd dl2Zd dl3Zd dl4Zd dl5Zd dl6Zd dl7Zd dl8Zd dl9Zd dl:Zd dl;Zd dl<Zd dl=Zd dl>Zd dl?Zd dl@Zd dlAZd dlBZd dlCZd dlDZd dlEZd dlFZd dlGZd dlHZd dlIZd dlJZd dlKZd dlLZd dlMZdd„ ZNdd„ ZOG dd„ dƒZPG dd„ dƒZQG dd„ dƒZRdd„ ZSG dd„ deƒZTdeTjUi dfd d!„ZVdeTjUi dfd"d#„ZWG d$d%„ d%eƒZXd&d'„ ZYd(d)„ ZZd*d+„ Z[g deTjUi dfd,d-„Z\d.d/„ Z]d0d1„ Z^dGd3d4„Z_d2d2ddd2i i dfd5d6„Z`di i dfd7d8„Zadi i dfd9d:„Zbd2d2ddd2i i ddf	d;d<„Zci ddfd=d>„Zdi dfd?d@„ZedHdAdB„ZfdHdCdD„Zgddddi fdEdF„ZhdS )Ié    )ÚannotationsN)ÚEnum)Úmodel_management)ÚProgressBaré   )ÚAutoencoderKLÚAutoencodingEngine)ÚStageA)ÚStageC_coder)ÚAudioOobleckVAE)Úclip_vision)Úgligen)Údiffusers_convert)Úmodel_detection)Úsd1_clip)Ú	sdxl_clipc                 C  sÜ   i }| d urt j | j|¡}|d urt j |j|¡}t j |¡}t j ||¡}| d ur6|  	¡ }| 
||¡}nd}d }|d urI| 	¡ }	|	 
||¡}
nd}
d }	t|ƒ}t|
ƒ}
|D ]}||vri||
vrit d |¡¡ qW||	fS )N© zNOT LOADED {})ÚcomfyÚloraÚmodel_lora_keys_unetÚmodelÚmodel_lora_keys_clipÚcond_stage_modelÚlora_convertÚconvert_loraÚ	load_loraÚcloneÚadd_patchesÚsetÚloggingÚwarningÚformat)r   Úclipr   Ústrength_modelÚstrength_clipÚkey_mapÚloadedÚnew_modelpatcherÚkÚnew_clipÚk1Úxr   r   ú&/mnt/c/Users/fbmor/ComfyUI/comfy/sd.pyÚload_lora_for_modelsM   s0   €r-   c                 C  s°  i }| durt j | j|¡}|durt j |j|¡}t dt|ƒ› d¡ t j	 
|¡}t j ||¡}t dt|ƒ› d¡ i }i }| ¡ D ]\}	}
t|
t jjƒrU|
||	< qE|
||	< qEt dt|ƒ› dt|ƒ› d¡ tƒ }tƒ }| durÉ|  ¡ }|r…| ||¡}| |¡ t j ¡ }t|j ¡  ¡ ƒ}| ¡ D ]\}	}|	|v r­|j|	||d | |	¡ q—t d	|	› ¡ q—| |j¡}| ¡ d
krÈ| d|¡ nd}|dur| ¡ }|rá| ||¡}| |¡ t j ¡ }t|j ¡  ¡ ƒ}| ¡ D ]\}	}|	|v r	|j|	||d | |	¡ qó| |j¡}| ¡ d
kr|j d|¡ nd}|D ]0}||vrR||vrR|| }
t|
ƒj}t|
t ƒrFd|
d
 › d}t d|› d|› d¡ q#||fS )an  
    Load LoRA in bypass mode without modifying base model weights.

    Instead of patching weights, this injects the LoRA computation into the
    forward pass: output = base_forward(x) + lora_path(x)

    Non-adapter patches (bias diff, weight diff, etc.) are applied as regular patches.

    This is useful for training and when model weights are offloaded.
    Nz[BypassLoRA] key_map has z entriesz[BypassLoRA] loaded has z[BypassLoRA] z bypass adapters, z regular patches)Ústrengthz2[BypassLoRA] Adapter key not in model state_dict: r   Úbypass_loraztuple(ú)zNOT LOADED: z (type=)!r   r   r   r   r   r   r   ÚdebugÚlenr   r   r   ÚitemsÚ
isinstanceÚweight_adapterÚWeightAdapterBaser   r   r   ÚupdateÚBypassInjectionManagerÚ
state_dictÚkeysÚadd_adapterÚaddr    Úcreate_injectionsÚget_hook_countÚset_injectionsÚpatcherÚtypeÚ__name__Útuple)r   r"   r   r#   r$   r%   r&   Úbypass_patchesÚregular_patchesÚkeyÚ
patch_datar(   r*   r'   Úpatched_keysÚmanagerÚmodel_sd_keysÚadapterÚ
injectionsr)   Úclip_managerÚclip_sd_keysÚclip_injectionsr+   Ú
patch_typer   r   r,   Úload_bypass_lora_for_modelsl   sz   

 

€




€€
€rQ   c                   @  s¼   e Zd Zdddi dg i dfdd„Zd-dd„Zd.d	d
„Zdd„ Zdd„ Zd-dd„Zd/dd„Z	di dfd0dd„Z
d1dd„Zdd„ Zd-dd„Zdd „ Zi fd!d"„Zd#d$„ Zd2d)d*„Zd3d+d,„ZdS )4ÚCLIPNFr   c	                 C  s„  |rd S |j  ¡ }	|j}
|j}| dt ¡ ¡}| dt ¡ ¡}| dd ¡}|d u r.t |¡}||	d< | dt 	|||t 
|¡ ¡¡|	d< ||	d< |
di |	¤Ž| _| jjD ]}t ||¡so|}|	d |kro| j |¡ t d¡ qTt | j¡ |||d| _|rƒtjjntjj}|| j||d	| _| j tj¡ tjjj| j_d
| j_d | _t |ƒdkrt!|t"ƒrÚ|D ]%}|  #|¡\}}t |ƒdkrÊt d $|¡¡ t |ƒdkrØt %d $|¡¡ q³n@| j#|d
d\}}t |ƒdkrt"t&dd„ |ƒƒ}t |ƒdkrt d $|¡¡ nt %d $|¡¡ t |ƒdkrt %d $|¡¡ |	d |kr*tj'| jgd
d d | _(d| _)t *d $|||	d |¡¡ i | _+d S )NÚload_deviceÚoffload_deviceÚdtypeÚinitial_deviceÚdeviceÚmodel_optionszHad to shift TE back.)Úembedding_directoryÚtokenizer_data©rS   rT   Tr   zclip missing: {}zclip unexpected: {})Ú
full_modelc                 S  s   d| vod| vS )Nz.logit_scalez#.transformer.text_projection.weightr   ©Úar   r   r,   Ú<lambda>   s    zCLIP.__init__.<locals>.<lambda>zclip unexpected {}:©Úforce_full_loadFzSCLIP/text encoder model load device: {}, offload device: {}, current: {}, dtype: {}r   ),ÚparamsÚcopyr"   Ú	tokenizerÚgetr   Útext_encoder_deviceÚtext_encoder_offload_deviceÚtext_encoder_dtypeÚtext_encoder_initial_deviceÚ
dtype_sizer   ÚdtypesÚsupports_castÚtor   r    Úarchive_model_dtypesr   Úmodel_patcherÚModelPatcherÚCoreModelPatcherr@   Úset_model_compute_dtypeÚtorchÚfloat32ÚhooksÚEnumHookModeÚMinVramÚ	hook_modeÚis_clipÚapply_hooks_to_condsr2   r4   ÚlistÚload_sdr!   r1   ÚfilterÚload_models_gpuÚ	layer_idxÚuse_clip_scheduleÚinfoÚtokenizer_options)ÚselfÚtargetrY   Úno_initrZ   Ú
parametersr9   rX   Údisable_dynamicrb   r"   rd   rS   rT   rU   Údtrp   ÚcÚmÚuÚm_filterr   r   r,   Ú__init__Ð   sj   

$
€
€ú
zCLIP.__init__c                 C  sR   t dd}| jj|d|_| j|_| j|_| j|_| j ¡ |_| j|_| j	|_	|S )NT)r…   )r‡   )
rR   r@   r   r   rd   r   r‚   rc   r€   rz   )rƒ   r‡   Únr   r   r,   r     s   
z
CLIP.cloneç      ð?c                 C  s   | j  |||¡S ©N)r@   r   )rƒ   ÚpatchesÚstrength_patchr#   r   r   r,   r     ó   zCLIP.add_patchesc                 C  s   || j |< d S r   )r‚   )rƒ   Úoption_nameÚvaluer   r   r,   Úset_tokenizer_option  ó   zCLIP.set_tokenizer_optionc                 C  ó
   || _ d S r   )r   )rƒ   r   r   r   r,   Ú
clip_layer!  ó   
zCLIP.clip_layerc                 K  sR   |  di ¡}t| jƒdkri | j¥|¥}t|ƒdkr||d< | jj||fi |¤ŽS )Nr‚   r   )re   r2   r‚   rd   Útokenize_with_weights)rƒ   ÚtextÚreturn_word_idsÚkwargsr‚   r   r   r,   Útokenize$  s   zCLIP.tokenizeÚpooled_dictú	dict[str]c                 C  s   | j r| j |d< |S )Nru   )rz   )rƒ   r    r   r   r,   Úadd_hooks_to_dict,  s   
zCLIP.add_hooks_to_dictTÚadd_dictc                 C  sÐ  g }| j j}|d u s| js.|rdnd}| j||dd}| d¡}	| |¡ | |	|g¡ |S | ¡ }
| j 	¡  | j
d urE| j d| j
i¡ |rO| j ddi¡ |  |¡ | j d| j ji¡ | ¡  | j  d ¡ |rptt|
ƒƒ}|
D ]o}|d	 }d
|v r…|d |d
 k r…qrd|v r’|d	 |d kr’qr|d }|D ]\}}||j_q˜| j  |¡ | j |¡}|d d… \}	}d|i}|d	 |d< |d |d< | |¡ |  |¡ | |	|g¡ |rÝ| d¡ t ¡  qr| ¡  |S )NÚunprojectedT)Úreturn_pooledÚreturn_dictÚcondÚlayerÚprojected_pooledFÚexecution_devicer   Ústart_percentr   Úend_percenté   Úpooled_outputÚclip_start_percentÚclip_end_percent)r@   Úforced_hooksr€   Úencode_from_tokensÚpopr7   ÚappendÚget_hooks_for_clip_scheduler   Úreset_clip_optionsr   Úset_clip_optionsÚ
load_modelrS   ÚresetÚpatch_hooksr   r2   Úhook_keyframeÚ_current_keyframeÚencode_token_weightsr¢   r   Ú)throw_exception_if_processing_interrupted)rƒ   Útokensr¤   r£   Ú	show_pbarÚall_cond_pooledÚ	all_hooksr¥   r    r§   Úscheduled_keyframesÚpbarÚscheduled_optsÚt_rangeÚhooks_keyframesÚhookÚkeyframeÚoÚpooledr   r   r,   Úencode_from_tokens_scheduled1  s\   

/Ó







z!CLIP.encode_from_tokens_scheduledc           	      C  sÎ   | j  ¡  | jd ur| j  d| ji¡ |dkr| j  ddi¡ |  |¡ | j  d| jji¡ | j  |¡}|d d… \}}|r_||dœ}t|ƒdkrX|d D ]
}|d | ||< qM|  	|¡ |S |re||fS |S )Nr¨   r¤   r©   Frª   r­   )r§   r®   )
r   r¶   r   r·   r¸   r@   rS   r½   r2   r¢   )	rƒ   r¿   r¥   r¦   rÊ   r§   rË   Úoutr(   r   r   r,   r²   l  s&   




zCLIP.encode_from_tokensc                 C  s   |   |¡}|  |¡S r   )rŸ   r²   )rƒ   rœ   r¿   r   r   r,   Úencode…  s   

zCLIP.encodec                 C  sP   |r| j j|d| j ¡ dS | j ¡ }|| j _| j  ¡ D ]}||_q| j  |¡S )NF©ÚstrictÚassign)r   Úload_state_dictr@   Ú
is_dynamicÚcan_assign_sdÚmodulesr|   )rƒ   Úsdr\   Ú
can_assignrŠ   r   r   r,   r|   ‰  s   
zCLIP.load_sdc                 C  s.   | j  ¡ }| j ¡ }|D ]}|| ||< q|S r   )r   r9   rd   )rƒ   Úsd_clipÚsd_tokenizerr(   r   r   r,   Úget_sd™  s
   

zCLIP.get_sdc                 C  s<   d}t | jdƒr| jj|| jjd}tj| jg|d | jS )Nr   Úmemory_estimation_function©rW   )Úmemory_required)Úhasattrr   rÛ   r@   rS   r   r~   )rƒ   r¿   Úmemory_usedr   r   r,   r¸      s
   zCLIP.load_modelc                 C  ó
   | j  ¡ S r   )r@   Úget_key_patches©rƒ   r   r   r,   rá   §  rš   zCLIP.get_key_patchesé   é2   çffffffî?ç        c                 C  sX   | j  ¡  |  |¡ | j  dd i¡ | j  d| jji¡ | j j|||||||||	|
d
S )Nr¨   rª   )	Ú	do_sampleÚ
max_lengthÚtemperatureÚtop_kÚtop_pÚmin_pÚrepetition_penaltyÚseedÚpresence_penalty)r   r¶   r¸   r·   r@   rS   Úgenerate)rƒ   r¿   rç   rè   ré   rê   rë   rì   rí   rî   rï   r   r   r,   rð   ª  s
   

 zCLIP.generatec                 C  s   | j j||dS )N)Úskip_special_tokens)rd   Údecode)rƒ   Ú	token_idsrñ   r   r   r,   rò   ²  r“   zCLIP.decode)F)r   r   )r    r¡   )r£   r¡   )FF)	Trã   r   rä   rå   ræ   r   Nræ   )T)rB   Ú
__module__Ú__qualname__r   r   r   r–   r™   rŸ   r¢   rÌ   r²   rÎ   r|   rÚ   r¸   rá   rð   rò   r   r   r   r,   rR   Ï   s"    
@



;

rR   c                   @  sº   e Zd Zd3dd„Zdd„ Zdd„ Zdd	„ Zd
d„ Zd4dd„Zd5dd„Z	d6dd„Z
d7dd„Zd8dd„Zd9d!d"„Zi fd#d$„Zd3d%d&„Zd'd(„ Zd3d)d*„Zd+d,„ Zd-d.„ Zd/d0„ Zd1d2„ ZdS ):ÚVAENc                   s®  d|  ¡ v rt |¡}t ¡ rd‰ nd‰ ‡ fdd„| _‡ fdd„| _d| _d| _d| _	d	| _
d
| _d | _dd„ | _dd„ | _tjtjg| _d| _d| _d | _d | _d | _d | _d| _d| _|d u rAd|v r•dddd
d
dg d¢d	g ddœ
}| ¡ }g d¢|d< d|d< tddid|dœd|dœd| _n¶d|v r­|d jd  | _	tj j j!| j	d!| _nžd"|v rÇt"ƒ | _d| _d| _d#d„ | _d$d„ | _n„d%|v rét#ƒ | _d&| _d'| _	i }|D ]}	||	 |d( $|	¡< qÙ|}nbd)|v r	t#ƒ | _d'| _	i }|D ]}	||	 |d* $|	¡< qù|}nBd+|v rt#ƒ | _d&| _d'| _	n1d,|v rf|d, jd  d-krlg d.¢d
d
d	d&ddd/œ}
|d, jd   | _	|
d0< d&| _d&| _tj%tjtjg| _tddid1|
dœd2|
dœd| _d3d„ | _d4d„ | _nß|d, jd  d&krÙ|d, j&d5krÙg d6¢d
d
d	d'ddddd7œ	}
|d, jd   | _	|
d0< tj%tjtjg| _d8d„ d'd'f| _d9| _d:d„ d'd'f| _d9| _d
| _
d| _tddid;|
dœd<|
dœd| _d=d„ | _d>d„ | _nrdddd
d
dg d¢d	g ddœ
}
d?|vrþd@|vrþg dA¢|
dB< d| _d| _|d, jd   | _	|
d0< dC|v rtj' (|dDdEdFœ¡}dG|v rAd|
dH< |  jd	9  _|  jd	9  _|  j	d9  _	| j‰‡fdId„| _dJ|v rTt)|
|dJ jd  dK| _n÷tddid|
dœdL|
dœd| _nådM|v r×i }d }dN| _dN| _dO|v r|dO}dP|v rƒdP}|d ur¡|| jdQ dRkr¡g dS¢|dT< dU| _dV| _dV| _t*dÌi |¤Ž| _dWd„ | _dXd„ | _d-| _	d	| _dY| _d | _
dZd„ | _d[d„ | _tj%tjtjg| _d| _ntd\|v sëd]|v sëd^|v sëd_|v r?d\|v rùtj' (|d`dai¡}d^|v rtj' (|d`dbi¡}tj+j,j-j. /¡ | _dR| _	d
| _
dcd„ | _ddd„ | _ded„ ddf| _df| _dgd„ ddf| _df| _tj%tjg| _ndh|v r¸|dh }di}|jdi djkrUdi}n|jdi dkkrfd }dl|v rfd	}d }|d ur}dm|v r}t0 1|dm ¡ 2dnd ¡}tj+j3j-j4j/||do| _d| _	d
| _
dpd„ | _dqd„ | _drd„ d&d&f| _ds| _dtd„ d&d&f| _ds| _tjtjg| _n“du|v r!|du jd  d&kr!g d6¢d
d
d	d'ddddvœ}
|du jd  |
d0< d&| _	dwd„ d'd'f| _d9| _dxd„ d'd'f| _d9| _d
| _
d| _tj%tjtjg| _tddyid;|
dœd<|
dœd| _dzd„ | _d{d„ | _n*du|v rƒdddd
d
dg d¢d	g ddœ
}
d|
d|< d|
d}< d~d„ ddf| _d| _d€d„ ddf| _d| _d
| _
|du jd   | _	|
d0< t)|
|dJ jd  dK| _dd„ | _d‚d„ | _tjtj%tjg| _nÈdƒ|v rÙd„d„ ddf| _d…| _d†d„ ddf| _d…| _d
| _
d'| _	d'| j	d dkd
d
dg d‡¢d	d&gddd dddˆœ}
tj+j5j-j6dÌi |
¤Ž| _d‰d„ | _dŠd„ | _tjtjg| _nrd‹|v r™dŒ|v r/dd„ d'd'f| _d9| _dŽd„ d'd'f| _d9| _d
| _
d| _	d| j	g d¢d	g g d‘¢dd’œ}
tj+j7j8j9dÌi |
¤Ž| _tjtj%tjg| _d“d„ | _d”d„ | _n|d• jdi }d–d„ ddf| _d| _d—d„ ddf| _d| _d
| _
d'| _	|d˜ jd  | _|d™ jdi | _:d| _|| j	g d¢d	g g d‘¢| j| j:ddšœ	}
tj+j7j-j9dÌi |
¤Ž| _tjtj%tjg| _d›d„ | _dœd„ | _n²d|v rÉd | _
dÍdždŸ„‰dÎ‡fd d„	| _dÍ‡fd¡d„	| _tj+j;j- <¡ | _tj%tjtjg| _n‚d¢|v rtj+j=j-j>j?dd£| _d¤d„ | _d¥d„ | _d| _	d	| _dY| _d¦| _d¦| _d	| _
d§d„ | _d¨d„ | _tjtj%tjg| _d| _d'| _n;d©|v r6tj@ A¡ | _dªd„ | _d«d„ | _d | _d | _d
| _	d	| _
d
| _nd¬|v r†d­}|d­krEd®}nd¯}tj+jBj-jCjD|d°| _d±d„ | _d²d„ | _d³| _	d	| _djd|  | _djd|  | _d | _
d´d„ | _dµd„ | _tjg| _d| _nÅd¶|v r7|d· jd  | _	d
| _
d¸d„ d'd'f| _d9| _d¹d„ d'd'f| _d9| _| j	dºv rÐtj jEjF| j	d d»| _d¼d„  | _| _d½d„ | _d¾d„ | _n{| j	d&kró|d¶ jdi dRkrótj jEjF| j	tjGjHd»| _d¿d„ | _nX|d· jItj%krtjGjJ}nd }tj jEjF| j	|d»| _dÀd„  | _| _dÁd„ ddf| _d| _dÂd„ ddf| _d| _dÃd„ | _dÄd„ | _ntK LdÅ¡ d | _d S t)dÌi |dÆ ¤Ž| _| j M¡ | _|d u rZt N¡ }|| _Ot P¡ }|d u rnt Q| jO| j¡}|| _Q| j R| jQ¡ t S| j¡ t T¡ | _UtjVjW}| jrtjVjX}|| j| jO|dÇ| _Y| jjZ|d| jY [¡ dÈ\}}t\|ƒdikr¶tK LdÉ $|¡¡ t\|ƒdikrÅtK ]dÊ $|¡¡ tK ^dË $| jO|| jQ¡¡ |  _¡  d S )ÏNz*decoder.up_blocks.0.resnets.0.norm1.weightg×£p=
×@r   c                   s"   d| d  | d  t  |¡ ˆ  S )Niç  r­   é   ©r   rj   ©ÚshaperU   ©ÚVAE_KL_MEM_RATIOr   r,   r_   ¿  ó   " zVAE.__init__.<locals>.<lambda>c                   s&   d| d  | d  d t  |¡ ˆ  S )Ni‚  r­   r÷   é@   rø   rù   rû   r   r,   r_   À  ó   & é   é   r­   r÷   c                 S  s   | d d S )Nç       @r   r   ©Úimager   r   r,   r_   Ç  s    c                 S  s   |   d¡ d¡ dd¡S )Nr   r  ræ   )Úadd_Údiv_Úclamp_r  r   r   r,   r_   È  ó    FTiD¬  zdecoder.mid.block_1.mix_factorrã   é€   )r   r­   r  r  ræ   )
Údouble_zÚ
z_channelsÚ
resolutionÚin_channelsÚout_chÚchÚch_multÚnum_res_blocksÚattn_resolutionsÚdropout)r÷   r   r   Úvideo_kernel_sizeÚalphar„   z8comfy.ldm.models.autoencoder.DiagonalGaussianRegularizerz0comfy.ldm.modules.diffusionmodules.model.Encoder)r„   rb   z*comfy.ldm.modules.temporal_ae.VideoDecoder)Úregularizer_configÚencoder_configÚdecoder_configztaesd_decoder.1.weightr   )Úlatent_channelszvquantizer.codebook.weightc                 S  ó   | S r   r   r  r   r   r,   r_   è  ó    c                 S  r  r   r   r  r   r   r,   r_   é  r  z*backbone.1.0.block.0.1.num_batches_trackedé    é   z
encoder.{}zblocks.11.num_batches_trackedzpreviewer.{}z2encoder.backbone.1.0.block.0.1.num_batches_trackedzdecoder.conv_in.weightrþ   )r	  rã   é   r  é   r  )Úblock_out_channelsr  Úout_channelsr  Úffactor_spatialÚdownsample_match_channelÚupsample_match_channelr  z#comfy.ldm.hunyuan_video.vae.Encoderz#comfy.ldm.hunyuan_video.vae.Decoderc                 S  ó   d| d  | d  t  |¡ S )Né¼  r­   r÷   rø   rù   r   r   r,   r_     ó    c                 S  ó&   d| d  | d  d d t  |¡ S )Nr&  r­   r÷   r  rø   rù   r   r   r,   r_   	  rÿ   é   )r	  rã   r  r  r  )	r   r  r!  r  r"  Úffactor_temporalr#  r$  Úrefiner_vaec                 S  ó   t d| d d ƒS ©Nr   r  r÷   ©Úmaxr]   r   r   r,   r_     ó    )r  r  r  c                 S  ó   t dt | d d ¡ƒS ©Nr   r÷   r  ©r/  ÚmathÚfloorr]   r   r   r,   r_     r  z+comfy.ldm.hunyuan_video.vae_refiner.Encoderz+comfy.ldm.hunyuan_video.vae_refiner.Decoderc                 S  r%  )Néð
  éþÿÿÿéÿÿÿÿrø   rù   r   r   r,   r_     r'  c                 S  s.   d| d  | d  | d  d d t  |¡ S )Nr6  éýÿÿÿr7  r8  r  rø   rù   r   r   r,   r_     ó   . z%encoder.down.2.downsample.conv.weightz!decoder.up.3.upsample.conv.weight)r   r­   r  r  zdecoder.post_quant_conv.weightzpost_quant_conv.zquant_conv.)zdecoder.post_quant_conv.zencoder.quant_conv.zbn.running_meanÚbatch_norm_latentc                   s   ˆ | |ƒd S )Ng      @r   rù   )Úold_memory_used_decoder   r,   r_   -  s    zpost_quant_conv.weight)ÚddconfigÚ	embed_dimz0comfy.ldm.modules.diffusionmodules.model.Decoderzdecoder.layers.1.layers.0.betaé   z"decoder.layers.2.layers.1.weight_vz;decoder.layers.2.layers.1.parametrizations.weight.original1r8  é   )r­   r  r  é   é
   Ústridesi€»  i€  c                 S  ó   d| d  t  |¡ S )Néè  r­   rø   rù   r   r   r,   r_   F  ó    c                 S  ó   d| d  d t  |¡ S )NrE  r­   r?  rø   rù   r   r   r,   r_   G  ó    Ú	replicatec                 S  r  r   r   ©Úaudior   r   r,   r_   L  r  c                 S  r  r   r   rJ  r   r   r,   r_   M  r  z blocks.2.blocks.3.stack.5.weightz(decoder.blocks.2.blocks.3.stack.5.weightz,layers.4.layers.1.attn_block.attn.qkv.weightz4encoder.layers.4.layers.1.attn_block.attn.qkv.weightÚ zdecoder.zencoder.c                 S  ó*   d| d  | d  | d  d t  |¡ S )NrE  r­   r÷   r  é€  rø   rù   r   r   r,   r_   X  ó   * c                 S  s0   dt | d dƒ | d  | d  d t |¡ S )Ng      ø?r­   é   r÷   r  rN  ©r/  r   rj   rù   r   r   r,   r_   Y  s   0 c                 S  r,  )Nr   rA  r)  r.  r]   r   r   r,   r_   Z  r0  )rA  r   r   c                 S  r1  )Nr   r)  rA  r3  r]   r   r   r,   r_   \  r  z2decoder.up_blocks.0.res_blocks.0.conv1.conv.weightr   r  r  z$encoder.down_blocks.1.conv.conv.biasÚconfigÚvae)ÚversionrR  c                 S  rM  )Né°  r­   r÷   r  r  rø   rù   r   r   r,   r_   n  rO  c                 S  s,   dt | d dƒ | d  | d  t |¡ S )NéP   r­   rP  r÷   r  rQ  rù   r   r   r,   r_   o  ó   , c                 S  r,  ©Nr   r   rP  r.  r]   r   r   r,   r_   p  r0  )r   r  r  c                 S  r1  ©Nr   rP  r   r3  r]   r   r   r,   r_   r  r  zdecoder.conv_in.conv.weight)r   r  r!  r  r"  r*  r#  r$  c                 S  r,  r-  r.  r]   r   r   r,   r_   y  r0  c                 S  r1  r2  r3  r]   r   r   r,   r_   {  r  z-comfy.ldm.models.autoencoder.EmptyRegularizerc                 S  r%  )Ni81  r7  r8  rø   rù   r   r   r,   r_   „  r'  c                 S  r(  )Ni@8  r7  r8  r  rø   rù   r   r   r,   r_   …  rÿ   Úconv3dÚtime_compressc                 S  r,  r-  r.  r]   r   r   r,   r_   Š  r0  )r  r   r   c                 S  r1  r2  r3  r]   r   r   r,   r_   Œ  r  c                 S  s<   dt d| d d d d ƒ | d  | d  d t |¡ S )Nr6  r   r­   r   r  r÷   rþ   ©Úminr   rj   rù   r   r   r,   r_   “  ó   < c                 S  s,   dt d| d ƒ | d  | d  t |¡ S )Nix  é	   r­   r÷   r  r\  rù   r   r   r,   r_   ”  rW  zdecoder.unpatcher3d.waveletsc                 S  r,  rX  r.  r]   r   r   r,   r_   —  r0  )r   r   r   c                 S  r1  rY  r3  r]   r   r   r,   r_   ™  r  )r­   r  r  )r  r  Úz_factorr  r  r!  ÚchannelsÚchannels_multr  r  r  Ú
patch_sizeÚ
num_groupsÚtemporal_compressionÚspacial_compressionc                 S  rM  )Nrä   r­   r÷   r  r  rø   rù   r   r   r,   r_      rO  c                 S  s6   dt | d d d ƒd  | d  | d  t |¡ S )Nrä   r­   rP  r   r÷   r  )Úroundr   rj   rù   r   r   r,   r_   ¡  s   6 z!decoder.middle.0.residual.0.gammaz1decoder.upsamples.0.upsamples.0.residual.2.weightc                 S  r,  r-  r.  r]   r   r   r,   r_   ¥  r0  c                 S  r1  r2  r3  r]   r   r   r,   r_   §  r  é0   é    )FTT)ÚdimÚz_dimÚdim_multr  Úattn_scalesÚtemperal_downsampler  c                 S  r%  )Niä  r÷   r  rø   rù   r   r   r,   r_   ®  r'  c                 S  s"   d| d  | d  d t  |¡ S )Ni@  r÷   r  rã   rø   rù   r   r   r,   r_   ¯  rý   zdecoder.head.0.gammac                 S  r,  r-  r.  r]   r   r   r,   r_   ²  r0  c                 S  r1  r2  r3  r]   r   r   r,   r_   ´  r  zencoder.conv1.weightzdecoder.head.2.weight)	rj  rk  rl  r  rm  rn  Úimage_channelsÚconv_out_channelsr  c                 S  s.   | d dkrdnd| d  | d  t  |¡ S )Nr­   r  iÜ  ip  r÷   rø   rù   r   r   r,   r_   ¾  r:  c                 S  s2   | d dkrdnd| d  | d  d t  |¡ S )Nr­   r  i˜  iX  r÷   rþ   rø   rù   r   r   r,   r_   ¿  s   2 z(geo_decoder.cross_attn_decoder.ln_1.biasc           	      S  s4   | \}}}t  |¡}|| | | d||   }|S )Nr   rø   )	rú   rU   Ú
num_layersÚkv_cache_multiplierÚbatchÚ
num_tokensÚ
hidden_dimrj   Ú	total_memr   r   r,   Úestimate_memoryÇ  s   

z%VAE.__init__.<locals>.estimate_memoryc                   ó   ˆ | |||ƒS r   r   ©rú   rU   rq  rr  ©rw  r   r,   r_   Ï  r—   c                   rx  r   r   ry  rz  r   r,   r_   Ò  r—   z(vocoder.backbone.channel_layers.0.0.bias)Úsource_sample_ratec                 S  s   | d d t  |¡ S )Nr­   iJ  rø   rù   r   r   r,   r_   Û  rF  c                 S  s   | d | d  d t  |¡ S )Nr­   r÷   iØS rø   rù   r   r   r,   r_   Ü  r'  é   c                 S  r  r   r   rJ  r   r   r,   r_   ã  r  c                 S  r  r   r   rJ  r   r   r,   r_   ä  r  Úpixel_space_vaec                 S  r%  ©Nr   r­   r÷   rø   rù   r   r   r,   r_   ê  r'  c                 S  r%  r~  rø   rù   r   r   r,   r_   ë  r'  z1vocoder.activation_post.downsample.lowpass.filteri€>  Ú16kÚ44k)Úmodec                 S  rD  )Né   r­   rø   rù   r   r   r,   r_   ù  rF  c                 S  rG  )NéZ   r­   gÍÌÌÌÌ–@rø   rù   r   r   r,   r_   ú  rH  é   c                 S  r  r   r   rJ  r   r   r,   r_      r  c                 S  r  r   r   rJ  r   r   r,   r_     r  zdecoder.22.biaszdecoder.1.weightc                 S  r,  r-  r.  r]   r   r   r,   r_     r0  c                 S  r1  r2  r3  r]   r   r   r,   r_   	  r  )rh  r	  )r  Úlatent_formatc                 S  r  r   r   r  r   r   r,   r_     r  c                 S  r  r   r   r  r   r   r,   r_     r  c                 S  ó<   dt d| d d d ƒ| d  | d  d d  t |¡ S )	Ni  r   r9  çffffffæ?gš™™™™™¹?r7  r8  r  rQ  rù   r   r   r,   r_     r^  c                 S  r†  )	NrU  r   r9  r‡  gš™™™™™©?r7  r8  r  rQ  rù   r   r   r,   r_     r^  c                 S  r  r   r   r  r   r   r,   r_     r  c                 S  r,  r-  r.  r]   r   r   r,   r_     r0  c                 S  r1  r2  r3  r]   r   r   r,   r_     r  c                 S  s4   dt d| d d d ƒ| d  | d   t |¡ S )Nr&  r   r9  g…ëQ¸å?g)\Âõ(¼?r7  r8  rQ  rù   r   r   r,   r_     s   4 c                 S  r†  )	Nrä   r   r9  gÍÌÌÌÌÌä?g¤p=
×£Ð?r7  r8  r  rQ  rù   r   r   r,   r_     r^  z5WARNING: No VAE weights detected, VAE not initalized.rb   r[   rÏ   zMissing VAE keys {}zLeftover VAE keys {}z2VAE load device: {}, offload device: {}, dtype: {}r   )r  r­   )r   r   )`r:   r   Úconvert_vae_state_dictr   Úis_amdÚmemory_used_encodeÚmemory_used_decodeÚdownscale_ratioÚupscale_ratior  Ú
latent_dimÚoutput_channelsÚpad_channel_valueÚprocess_inputÚprocess_outputrs   Úbfloat16rt   Úworking_dtypesÚdisable_offloadÚ	not_videoÚsizeÚdownscale_index_formulaÚupscale_index_formulaÚextra_1d_channelÚ
crop_inputÚaudio_sample_raterc   r   Úfirst_stage_modelrú   r   ÚtaesdÚTAESDr	   r
   r!   Úfloat16ÚndimÚutilsÚstate_dict_prefix_replacer   r   ÚldmÚgenmorS  r   ÚVideoVAEÚjsonÚloadsre   Ú
lightricksÚcausal_video_autoencoderÚcosmosÚCausalContinuousVideoTokenizerÚwanÚvae2_2ÚWanVAErp  Ú	hunyuan3dÚShapeVAEÚaceÚmusic_dcae_pipelineÚ	MusicDCAEÚpixel_space_convertÚPixelspaceConversionVAEÚmmaudioÚautoencoderÚAudioAutoencoderÚtaehvÚTAEHVÚlatent_formatsÚHunyuanVideo15rU   ÚHunyuanVideor   r    ÚevalÚ
vae_devicerW   Úvae_offload_deviceÚ	vae_dtyperm   rn   Úintermediate_deviceÚoutput_devicero   rq   rp   r@   rÒ   rÓ   r2   r1   r   Ú
model_size)rƒ   rÖ   rW   rR  rU   Úmetadatar  r  Únew_sdr(   r=  Ú	param_keyÚtensor_conv1rT  Ú
vae_configrj  Úsample_rater  r…  rT   ÚmprŠ   r‹   r   )rü   rw  r<  r,   r   ¶  s´  



þ




þ
$þ



þ








(







þ




,




&





















 





zVAE.__init__c                 C  s&   | j d ur| j S tj | j¡| _ | j S r   )r—  r   r   Úmodule_sizer  râ   r   r   r,   rÅ  B  s   
zVAE.model_sizec                 C  s   | j d u r	tdƒ‚d S )NzvERROR: VAE is invalid: None

If the VAE is from a checkpoint loader node your checkpoint does not contain a valid VAE.)r  ÚRuntimeErrorrâ   r   r   r,   Úthrow_exception_if_invalidH  s   
ÿzVAE.throw_exception_if_invalidc           	      C  sø   | j r6|  ¡ }|jdd… }tt|ƒƒD ]!}|| | | }|| | d }||| kr5| |d ||¡}q|jd | jkrI|dd | j…f }|S |jd | jk rz| jd urzt| jt	ƒrb| j}d }nd}| j}t
jjj|d| j|jd  f||d}|S )Nr   r8  r­   .Úconstantr   )r  r•   )r›  Úspacial_compression_encoderú   Úranger2   Únarrowr  r  r4   Ústrrs   ÚnnÚ
functionalÚpad)	rƒ   ÚpixelsrŒ  ÚdimsÚdr+   Úx_offsetr  r•   r   r   r,   Úvae_encode_crop_pixelsL  s*   €ö
&zVAE.vae_encode_crop_pixelsc                 C  s   t  ¡ S r   )r   Úintermediate_dtyperâ   r   r   r,   Úvae_output_dtypee  s   zVAE.vae_output_dtyperþ   r  c           	        s,  |j d tj |j d |j d |||¡ }||j d tj |j d |j d |d |d |¡ 7 }||j d tj |j d |j d |d |d |¡ 7 }tj |¡}‡ fdd„}ˆ  tjj|||d |d |ˆ jˆ j|dtjj|||d |d |ˆ jˆ j|d tjj|||||ˆ jˆ j|d d ¡}|S )Nr   r÷   r­   c                   ó(   ˆ j  |  ˆ j¡ ˆ j¡¡jˆ  ¡ dS ©N©rU   ©r  rò   rm   rÂ  rW   rÞ  r]   râ   r   r,   r_   n  ó   ( z#VAE.decode_tiled_.<locals>.<lambda>)Úupscale_amountrÄ  rÄ   ç      @)	rú   r   r¢  Úget_tiled_scale_stepsr   r’  Útiled_scaler  rÄ  )	rƒ   ÚsamplesÚtile_xÚtile_yÚoverlapÚstepsrÄ   Ú	decode_fnÚoutputr   râ   r,   Údecode_tiled_h  s   *66&&ÿþýÿzVAE.decode_tiled_rã   r  c                   st   |j dkr‡fdd„}n|j‰ | ˆ d ˆ d ˆ d  df¡}‡ ‡fdd„}ˆ tjj|||f|ˆjˆjˆj	d	¡S )
Nr÷   c                   rß  rà  râ  r]   râ   r   r,   r_   x  rã  z%VAE.decode_tiled_1d.<locals>.<lambda>r   r   r­   r8  c              	     sD   ˆj  |  dˆ d ˆ d | jd f¡ ˆj¡ ˆj¡¡jˆ ¡ dS )Nr8  r   r­   rá  )r  rò   Úreshaperú   rm   rÂ  rW   rÞ  r]   ©Úog_shaperƒ   r   r,   r_   |  s   D ©Útilerë  rä  r!  rÄ  )
r¡  rú   rð  r’  r   r¢  Útiled_scale_multidimr  r  rÄ  )rƒ   rè  ré  rë  rí  r   rñ  r,   Údecode_tiled_1dv  s   
 (zVAE.decode_tiled_1déç  ©r   r   r   c                   s<   ‡ fdd„}ˆ   tjj|||||f|ˆ jˆ jˆ jˆ jd¡S )Nc                   rß  rà  râ  r]   râ   r   r,   r_     rã  z%VAE.decode_tiled_3d.<locals>.<lambda>)rô  rë  rä  r!  Úindex_formulasrÄ  )r’  r   r¢  rõ  r  r  r™  rÄ  )rƒ   rè  Útile_tré  rê  rë  rí  r   râ   r,   Údecode_tiled_3d€  s   0zVAE.decode_tiled_3dr  c           	        sJ  |j d tj |j d |j d |||¡ }||j d tj |j d |j d |d |d |¡ 7 }||j d tj |j d |j d |d |d |¡ 7 }tj |¡}‡ fdd„}tjj|||||dˆ j ˆ jˆ j|d	}|tjj|||d |d |dˆ j ˆ jˆ j|d	7 }|tjj|||d |d |dˆ j ˆ jˆ j|d	7 }|d }|S )	Nr   r÷   r­   c                   ó.   ˆ j  ˆ  | ¡ ˆ j¡ ˆ j¡¡jˆ  ¡ dS rà  ©r  rÎ   r‘  rm   rÂ  rW   rÞ  r]   râ   r   r,   r_   Š  r:  z#VAE.encode_tiled_.<locals>.<lambda>r   )rä  r!  rÄ  rÄ   rå  )	rú   r   r¢  ræ  r   rç  rŒ  r  rÄ  )	rƒ   Úpixel_samplesré  rê  rë  rì  rÄ   Ú	encode_fnrè  r   râ   r,   Úencode_tiled_„  s   *66(44zVAE.encode_tiled_é   é   c              	     s¤   ˆj dkr‡fdd„}ˆj‰ dˆj }nˆj}ˆj| ‰ || }|| }dˆj }‡ ‡fdd„}tjj|||f||ˆ ˆjd}ˆj dkrF|S | |j	d ˆj|d¡S )Nr   c                   rü  rà  rý  r]   râ   r   r,   r_   “  r:  z%VAE.encode_tiled_1d.<locals>.<lambda>c                   s8   ˆj  ˆ | ¡ ˆj¡ ˆj¡¡ dˆ d¡jˆ ¡ dS )Nr   r8  rá  )r  rÎ   r‘  rm   rÂ  rW   rð  rÞ  r]   ©r!  rƒ   r   r,   r_   œ  s   8 ró  r   r8  )
rŽ  r  rŒ  rš  r   r¢  rõ  rÄ  rð  rú   )rƒ   rè  ré  rë  rÿ  rä  Úextra_channel_sizerÍ   r   r  r,   Úencode_tiled_1d‘  s   



zVAE.encode_tiled_1dé'  ©r   rþ   rþ   c                   s8   ‡ fdd„}t jj|||||f|ˆ jˆ jdˆ jˆ jd	S )Nc                   rü  rà  rý  r]   râ   r   r,   r_   ¥  r:  z%VAE.encode_tiled_3d.<locals>.<lambda>T)rô  rë  rä  r!  Ú	downscalerù  rÄ  )r   r¢  rõ  rŒ  r  r˜  rÄ  )rƒ   rè  rú  ré  rê  rë  rÿ  r   râ   r,   Úencode_tiled_3d¤  s   ,zVAE.encode_tiled_3dc              
   C  sˆ  |   ¡  d }d}| jdkr|jdkr|d d …d d …df }z¸|  |j| j¡}tj| jg|| j	d | j 
| j¡}t|| ƒ}td|ƒ}d}t| jddƒr^tj| j |j¡| j|  ¡ d}d	}td|jd |ƒD ]l}	||	|	| … j| j| jd}
|r| jj|
fd
||	|	| … i|¤Ž n;| jj|
fi |¤Žj| j|  ¡ d	d}|d u r¼tj|jd ft|jdd … ƒ | j|  ¡ d}||	|	| …  |¡ ~|  ||	|	| … ¡ qgW n tyó } zt |¡ t d¡ d	}W Y d }~nd }~ww |r8tj  ¡  |jd }|dks| j!d ur|  "|¡}n&|dkr|  #|¡}n|dkr8d|  $¡  }|d }| j%|||d||fd}| | j¡ &dd¡}|S )NFr­   r)  r   ©rÝ   ra   r   Úcomfy_has_chunked_io©rW   rU   TÚoutput_buffer)rW   rU   rc   zWWarning: Ran out of memory when regular VAE decoding, retrying with tiled VAE decoding.r÷   rã   r  ©ré  rê  rë  r8  )'rÏ  rŽ  r¡  r‹  rú   rÂ  r   r~   r@   r•  Úget_free_memoryrW   Úintr/  Úgetattrr  rs   ÚemptyÚdecode_output_shaperÄ  rÞ  rÒ  rm   rò   rC   Úcopy_r’  Ú	ExceptionÚraise_non_oomr   r    r   Úsoft_empty_cacherš  rö  rï  Úspacial_compression_decoderû  Úmovedim)rƒ   Ú
samples_inÚvae_optionsrþ  Údo_tilerß   Úfree_memoryÚbatch_numberÚpreallocatedr+   rè  rÍ   ÚerÙ  rô  rë  r   r   r,   rò   ¨  sZ   
 &&0ö

€ù	



z
VAE.decodec                 C  s   |   ¡  |  |j| j¡}tj| jg|| jd |jd }i }	|d ur&||	d< |d ur.||	d< |d ur6||	d< |dks?| j	d urN|	 
d¡ | j|fi |	¤Ž}
n<|dkr\| j|fi |	¤Ž}
n.|dkrŠ|d u rld||f|	d< n
td|ƒ||f|	d< |d urtd|ƒ|	d< | j|fi |	¤Ž}
|
 dd	¡S )
Nr
  r­   ré  rê  rë  r   r÷   rú  r8  )rÏ  r‹  rú   rÂ  r   r~   r@   r•  r¡  rš  r³   rö  rï  r/  rû  r  )rƒ   rè  ré  rê  rë  rú  Ú	overlap_trß   rÙ  Úargsrî  r   r   r,   Údecode_tiledÞ  s0   

zVAE.decode_tiledc              
   C  s8  |   ¡  |  |¡}| dd¡}d}| jdkr-|jdk r-| js(| dd¡ d¡}n| d¡}z”|  |j| j	¡}t
j| jg|| jd | j | j¡}t|td|ƒ ƒ}td|ƒ}d }td|jd |ƒD ]^}|  |||| … ¡ | j	¡}t| jd	dƒrƒ| jj|| jd
}	n| | j¡}| j |¡}	|	 | j¡j|  ¡ d}	|d u r·tj|jd ft|	jdd … ƒ | j|  ¡ d}|	|||| …< qaW n tyß }
 zt
 |
¡ t  d¡ d}W Y d }
~
nd }
~
ww |rt!j
 "¡  | jdkrd}|d }| j#|||d||fd}|S | jdks| j$d ur|  %|¡}|S |  &|¡}|S )Nr8  r   Fr÷   r)  r   r­   r
  r  rÜ   rá  r  zWWarning: Ran out of memory when regular VAE encoding, retrying with tiled VAE encoding.Trã   r  r  )'rÏ  rÜ  r  rŽ  r¡  r–  Ú	unsqueezerŠ  rú   rÂ  r   r~   r@   r•  r  rW   r  r/  rÒ  r‘  rm   r  r  rÎ   rÄ  rÞ  rs   r  rC   r  r  r   r    r   r  r	  rš  r  r   )rƒ   rþ  r  rß   r  r  rè  r+   Ú	pixels_inrÍ   r   rô  rë  r   r   r,   rÎ   û  sX   


0ö

€ù	
û

þz
VAE.encodec                 C  sÄ  |   ¡  |  |¡}| j}| dd¡}|dkr(| js#| dd¡ d¡}n| d¡}|  |j| j¡}t	j
| jg|| jd i }	|d urE||	d< |d urM||	d< |d urU||	d	< |dkri|	 d¡ | j|fi |	¤Ž}
|
S |dkrx| j|fi |	¤Ž}
|
S |dkrà|d ur‹td| jd |ƒƒ}nd
}| jd |ƒ|	d< |d u r¢d||f|	d	< n| jd tdt|d | jd |ƒƒƒƒ||f|	d	< |jd }| jd | jd |ƒƒ}| j|d d …d d …d |…f fi |	¤Ž}
|
S )Nr8  r   r÷   r   r­   r
  ré  rê  rë  r  rú  )rÏ  rÜ  rŽ  r  r–  r$  rŠ  rú   rÂ  r   r~   r@   r•  r³   r  r   r/  rŒ  r  r]  r	  )rƒ   rþ  ré  rê  rë  rú  r!  rÙ  rß   r"  rè  Útile_t_latentÚmaximumr   r   r,   Úencode_tiled.  sH   


îð2
(zVAE.encode_tiledc                 C  rà   r   )r  r9   râ   r   r   r,   rÚ   [  rš   z
VAE.get_sdc                 C  ó   z| j d W S    | j  Y S ©Nr8  )r  râ   r   r   r,   r  ^  ó   
zVAE.spacial_compression_decodec                 C  r)  r*  )rŒ  râ   r   r   r,   rÑ  d  r+  zVAE.spacial_compression_encodec                 C  s&   zt | jd dƒd ƒW S    Y d S )Nr   i    )rg  r  râ   r   r   r,   Útemporal_compression_decodej  s   zVAE.temporal_compression_decode)NNNNN)rþ   rþ   r  )rã   r  )r÷  r  r  rø  )r  r  rþ   )r  r  )r  r  r  r  )rB   rô   rõ   r   rÅ  rÏ  rÜ  rÞ  rï  rö  rû  r   r  r	  rò   r#  rÎ   r(  rÚ   r  rÑ  r,  r   r   r   r,   rö   µ  s.    
   







6
3-rö   c                   @  s   e Zd Zddd„Zdd„ ZdS )Ú
StyleModelÚcpuc                 C  r˜   r   )r   )rƒ   r   rW   r   r   r,   r   r  rš   zStyleModel.__init__c                 C  s   |   |j¡S r   )r   Úlast_hidden_state)rƒ   Úinputr   r   r,   Úget_condu  s   zStyleModel.get_condN)r.  )rB   rô   rõ   r   r1  r   r   r   r,   r-  q  s    
r-  c                 C  sr   t jj| dd}| ¡ }d|v rt jjjdddddd}nd	|v r)t jjj	 
¡ }ntd
 | ¡ƒ‚| |¡ t|ƒS )NT©Ú	safe_loadÚstyle_embeddingr  i   r   r÷   )ÚwidthÚcontext_dimÚnum_headÚn_layesÚ	num_tokenzredux_down.weightzinvalid style model {})r   r¢  Úload_torch_filer:   Út2i_adapterrK   ÚStyleAdapterr¤  ÚfluxÚreduxÚReduxImageEncoderr  r!   rÒ   r-  )Ú	ckpt_pathÚ
model_datar:   r   r   r   r,   Úload_style_modely  s   
rB  c                   @  st   e Zd ZdZdZdZdZdZdZdZ	dZ
d	Zd
ZdZdZdZdZdZdZdZdZdZdZdZdZdZdZdZdZdS )ÚCLIPTyper   r­   r÷   r  r)  rA  rP  r   r_  rB  é   r@  é   é   é   r  é   é   é   r„  é   é   é   é   é   é   N)rB   rô   rõ   ÚSTABLE_DIFFUSIONÚSTABLE_CASCADEÚSD3ÚSTABLE_AUDIOÚHUNYUAN_DITÚFLUXÚMOCHIÚLTXVÚHUNYUAN_VIDEOÚPIXARTÚCOSMOSÚLUMINA2ÚWANÚHIDREAMÚCHROMAÚACEÚOMNIGEN2Ú
QWEN_IMAGEÚHUNYUAN_IMAGEÚHUNYUAN_VIDEO_15ÚOVISÚ
KANDINSKY5ÚKANDINSKY5_IMAGEÚNEWBIEÚFLUX2ÚLONGCAT_IMAGEr   r   r   r,   rC  …  s6    rC  Fc                 C  s   t | ||||ƒ}|jS r   )Ú	load_clipr@   )Ú
ckpt_pathsrY   Ú	clip_typerX   r‡   r"   r   r   r,   Úload_clip_model_patcher£  s   rn  c           
      C  s~   g }| D ]%}t jj|ddd\}}| dd ¡d u r$t jj|d|d\}}| |¡ qt|||||d}	t| |||ff|	j_	|	S )NT)r3  Úreturn_metadataÚcustom_operationsrL  )Úmodel_prefixrÆ  )rY   rm  rX   r‡   )
r   r¢  r:  re   Úconvert_old_quantsr´   Úload_text_encoder_state_dictsrn  r@   Úcached_patcher_init)
rl  rY   rm  rX   r‡   Ú	clip_dataÚprÖ   rÆ  r"   r   r   r,   rk  §  s   rk  c                   @  sx   e Zd ZdZdZdZdZdZdZdZ	dZ
d	Zd
ZdZdZdZdZdZdZdZdZdZdZdZdZdZdZdZdZdZdS )ÚTEModelr   r­   r÷   r  r)  rA  rP  r   r_  rB  rD  r@  rE  rF  rG  r  rH  rI  rJ  r„  rK  rL  rM  rN  rO  rP  é   N)rB   rô   rõ   ÚCLIP_LÚCLIP_HÚCLIP_GÚT5_XXLÚT5_XLÚT5_BASEÚLLAMA3_8Ú
T5_XXL_OLDÚ
GEMMA_2_2BÚ	QWEN25_3BÚ	QWEN25_7BÚBYT5_SMALL_GLYPHÚ
GEMMA_3_4BÚMISTRAL3_24BÚMISTRAL3_24B_PRUNED_FLUX2ÚQWEN3_4BÚQWEN3_2BÚGEMMA_3_12BÚJINA_CLIP_2ÚQWEN3_8BÚ	QWEN3_06BÚGEMMA_3_4B_VISIONÚ
QWEN35_08BÚ	QWEN35_2BÚ	QWEN35_4BÚ	QWEN35_9BÚ
QWEN35_27Br   r   r   r,   rw  ³  s8    rw  c                 C  s   d| v rt jS d| v rt jS d| v rt jS d| v rt jS d| v r8| d }|jd dkr.t jS |jd dkr8t jS d	| v r?t jS d
| v rT| d
 }|jd dkrQt j	S t j
S d| v rpd| v r_t jS d| v rmd| v rjt jS t jS t jS d| v rŒ| d }|jd dkr‚t jS |jd dkrŒt jS d| v rÃd| v rÃ| d }|jd dkr¢t jS |jd dkr¬t jS |jd dkr¶t jS |jd dkrÀt jS t jS d| v r| d }d| v rø|jd dkrÚt jS |jd dkrät jS |jd dkrît jS |jd dkrøt jS |jd dkrd| v rt jS t jS t jS d S )Nz+text_model.encoder.layers.30.mlp.fc1.weightz+text_model.encoder.layers.22.mlp.fc1.weightz*text_model.encoder.layers.0.mlp.fc1.weightz(model.encoder.layers.0.mixer.Wqkv.weightú3encoder.block.23.layer.1.DenseReluDense.wi_1.weightr   i (  i   ú1encoder.block.23.layer.1.DenseReluDense.wi.weightz.encoder.block.0.layer.0.SelfAttention.k.weightrN  z0model.layers.0.post_feedforward_layernorm.weightz'model.layers.47.self_attn.q_norm.weightú&model.layers.0.self_attn.q_norm.weightz.vision_model.embeddings.patch_embedding.weightz$model.layers.0.self_attn.k_proj.biasrã   r  z/model.language_model.layers.0.linear_attn.A_logz4model.language_model.layers.0.input_layernorm.weightr  i 
  r|  z.model.layers.0.post_attention_layernorm.weightr?  z/model.layers.39.post_attention_layernorm.weight)rw  r{  rz  ry  r‹  rú   r|  r}  r€  r„  r~  rŠ  rŽ  r…  r  r‚  rƒ  r  r‘  r’  r“  r  rˆ  r‰  rŒ  r  r†  r‡  r  )rÖ   Úweightr   r   r,   Údetect_te_modelÑ  s~   

r˜  c                 C  s8   d}d}| D ]}||v s||v rt jj |¡  S qi S )Nr”  r•  )r   Útext_encodersÚsd3_clipÚt5_xxl_detect)ru  Úweight_nameÚweight_name_oldrÖ   r   r   r,   Út5xxl_detect  s   ÿrž  c                 C  s>   ddg}| D ]}|D ]}||v rt jj |¡    S q
qi S )Nz&model.layers.0.self_attn.k_proj.weightz+model.layers.0.linear_attn.in_proj_a.weight)r   r™  Úhunyuan_videoÚllama_detect)ru  Úweight_namesrÖ   rœ  r   r   r,   r      s   ÿÿr   c              
   C  s˜  | }G dd„ dƒ}t t|ƒƒD ]:}d|| v r$tj || dd¡||< nd|| v r8|| d  dd¡|| d< d	|| v rI||  d	¡|| d
< qi }|ƒ }	i |	_t|ƒdkr|t|d ƒ}
|
t	j
kr°|tjkrstj|	_tj|	_n1|tjkr‹tjjjdddd|	_tjjj|	_n|tjkr¦tjjjddddd d d|	_tjjj|	_nþtj|	_tj|	_nô|
t	jkrÃtjjj|	_tjjj|	_ná|
t	j kr€|tjkrçtjjjd.ddddœt!|ƒ¤Ž|	_tjjj|	_n½|tj"krtjj#j$d.i t!|ƒ¤Ž|	_tjj#j%|	_n¢|tj&ks|tj'kr#tjj(j)d.i t!|ƒ¤Ž|	_tjj(j*|	_n|tj+krHtjj,j-d.i t!|ƒ¤Ž|	_tjj,j.|	_|d  /dd ¡|d< n\|tjkrktjjjd.i t!|ƒ¤ddddd dœ¤Ž|	_tjjj|	_n9tjj0j1d.i t!|ƒ¤Ž|	_tjj0j2|	_n$|
t	j3kr›tjj4j-d.i t!|ƒ¤Ž|	_tjj4j5|	_n	|
t	j6kr¯tjj7j8|	_tjj7j9|	_nõ|
t	j:krè|tj;ksÂd|d v rÚtjj<j=|	_tjj<j>|	_|d  /dd ¡|d< nÊtjj?j@|	_tjj?jA|	_n¼|
t	jBkrtjjCj-d.i tD|ƒ¤Ž|	_tjjCjE|	_|d  /dd ¡|d< n—|
t	jFkr6tjjCj-d.i tD|ƒ¤ddi¤Ž|	_tjjCjG|	_|d  /dd ¡|d< nn|
t	jHkr_tjjCj-d.i tD|ƒ¤ddi¤Ž|	_tjjCjG|	_|d  /dd ¡|d< nE|
t	jIkr„tjj#jJd.i tD|ƒ¤Ž|	_tjj#jK|	_|d  /dd ¡|d< n |
t	jLkr§tjjjd.i tD|ƒ¤ddddd dœ¤Ž|	_tjjj|	_ný|
t	jMkrÂtjjNj-d.i tD|ƒ¤Ž|	_tjjNjO|	_nâ|
t	jPkr|tjQkråtjjRj-d.dditD|ƒ¤Ž|	_tjjRjS|	_n¿|tjTkr tjjUj-d.i tD|ƒ¤Ž|	_tjjUjV|	_n¤tjjWj-d.i tD|ƒ¤Ž|	_tjjWjX|	_n|
t	jYks!|
t	jZkrGtjj[j\d.i tD|ƒ¤d|
t	jZki¤Ž|	_tjj[j]|	_|d  /dd ¡|d< n]|
t	j^kr‡|tj_ksY|tj`krrtjj[jad.i tD|ƒ¤ddi¤Ž|	_tjj[jb|	_n2tjjcj-d.i tD|ƒ¤Ž|	_tjjcjd|	_n|
t	jekr¢tjjfj-d.i tD|ƒ¤Ž|	_tjjfjg|	_n|
t	jhkrÁtjj[jad.i tD|ƒ¤ddi¤Ž|	_tjj[ji|	_nã|
t	jjkrÕtjjkjl|	_tjjkjm|	_nÏ|
t	jnt	jot	jpt	jqt	jrfv r"tj s|d ddddœ¡|d< t	jndt	jodt	jpd t	jqd!t	jrd"i|
 }tjjtj-d.i tD|ƒ¤d|i¤Ž|	_tjjtj|d#|	_n‚|
t	jukr=tjjvj-d.i tD|ƒ¤Ž|	_tjjvjw|	_ng|tjkrVtjjjdddd|	_tjjj|	_nN|tjkrrtjjjddddd d d|	_tjjj|	_n2txjy|	_txjz|	_n(t|ƒd$krk|tjkr·t|d ƒt|d ƒg}tjjjd.t	j{|v t	j
|v t	j |v dœt!|ƒ¤Ž|	_tjjj|	_ní|tj|krËtjj}j~|	_tjj}j|	_nÙ|tj_krætjj[j€d.i t!|ƒ¤Ž|	_tjj[j|	_n¾|tj‚krtjjƒj„d.i tD|ƒ¤Ž|	_tjjƒj…|	_n£|tjkrXg }|D ]}t|ƒ}
| †|
¡ qt	j{|v }t	j
|v }t	j |v }t	jL|v }|r3t!|ƒni }|r<tD|ƒni }tjjjd.||||d%œ|¤|¤Ž|	_tjjj|	_nL|tjQkrstjjRj-d.i tD|ƒ¤Ž|	_tjjRjS|	_n1|tj‡krŽtjjRj-d.i tD|ƒ¤Ž|	_tjjƒjˆ|	_n|tj‰kr¨tjjŠj-d.i tD|ƒ¤Ž|	_tjjŠj‹|	_nü|tjŒkrÂtjjŠj-d.i tD|ƒ¤Ž|	_tjjŠj|	_nâ|tj"krítjj#jŽd.i tD|ƒ¤tjj# |¡¤Ž|	_tjj#j|	_|d  /dd ¡|d< n·|tj‘kr/tjj’j-d.i tD|ƒ¤Ž|	_tjj’j“|	_d&|d v r|d }|d }n|d }|d }| /dd ¡|d'< | /dd ¡|d(< nu|tj;krbt|d ƒt|d ƒg}t	j^|v rJd}nd)}tjj”j-d.d*|itD|ƒ¤Ž|	_tjj”j•|	_nBtj–|	_tj|	_n9t|ƒd+kr†tjjjd.i t!|ƒ¤Ž|	_tjjj|	_nt|ƒd,kr¤tjjjd.i t!|ƒ¤tD|ƒ¤Ž|	_tjjj|	_d}|D ]}|tj —|¡7 }tjj˜ ™|||¡\}}q¨tš|	||||||d-}|S )/Nc                   @  ó   e Zd ZdS )z1load_text_encoder_state_dicts.<locals>.EmptyClassN©rB   rô   rõ   r   r   r   r,   Ú
EmptyClass-  ó    r¤  z#transformer.resblocks.0.ln_1.weightrL  Útext_projectionr   r   ztext_projection.weightzlm_head.weightzmodel.lm_head.weightFT)Úclip_lÚclip_gÚt5)r§  r¨  r©  ÚllamaÚdtype_t5Údtype_llamaÚspiece_model)r§  r¨  r©  rª  r¬  Ú
model_typeÚ	gemma3_4bÚgemma3_4b_vision)r§  r¨  r©  rª  r«  Úbyt5ÚprunedÚtekken_modelÚqwen3_4bÚqwen3_8bzmodel.zvisual.zmodel.lm_head.)zmodel.language_model.zmodel.visual.zlm_head.Ú
qwen35_08bÚ	qwen35_2bÚ	qwen35_4bÚ	qwen35_9bÚ
qwen35_27b)r®  r­   )r§  r¨  r©  rª  r–  Úgemma_spiece_modelÚjina_spiece_modelÚqwen3_2bÚlm_modelr÷   r  )rY   r†   rZ   r9   rX   r‡   r   )›rÒ  r2   r   r¢  Úclip_text_transformers_convertÚ	transposer³   rb   r˜  rw  r{  rC  rR  r   ÚStableCascadeClipModelr"   ÚStableCascadeTokenizerrd   rS  r™  rš  ÚSD3Tokenizerr^  ÚhidreamÚhidream_clipÚHiDreamTokenizerÚSDXLRefinerClipModelÚSDXLTokenizerrz  Úsd2_clipÚSD2ClipModelÚSD2Tokenizerr|  rž  rX  ÚltÚltxv_teÚLTXVT5TokenizerrZ  r_  Ú	pixart_t5Ú	pixart_teÚPixArtTokenizerr]  r­  ÚteÚWanT5Tokenizerre   r¥  Úmochi_teÚMochiT5Tokenizerr€  r«  ÚCosmosT5Tokenizerr}  Úaura_t5ÚAuraT5ModelÚAuraT5Tokenizerr~  r`  r²  Ú
AceT5ModelÚAceT5TokenizerÚsa_t5Ú	SAT5ModelÚSAT5Tokenizerr  Úlumina2r   ÚLuminaTokenizerr…  Ú
NTokenizerrŽ  rŠ  Ú	gemma3_teÚGemma3_12BTokenizerr  r‚  Úomnigen2ÚOmnigen2Tokenizerrƒ  rc  Úhunyuan_imageÚHunyuanImageTokenizerrj  Úlongcat_imageÚLongCatImageTokenizerÚ
qwen_imageÚQwenImageTokenizerr†  r‡  r=  Úflux2_teÚFlux2Tokenizerrˆ  rV  ri  Úklein_teÚKleinTokenizerÚz_imageÚZImageTokenizerr‰  ÚovisÚOvisTokenizerrŒ  ÚKleinTokenizer8Br‹  Újina_clip_2ÚJinaClip2TextModelWrapperÚJinaClip2TokenizerWrapperr  r  r‘  r’  r“  r£  Úqwen35r  ÚanimaÚAnimaTokenizerr   ÚSD1ClipModelÚSD1Tokenizerry  rU  ÚhyditÚ
HyditModelÚHyditTokenizerÚ	flux_clipÚFluxTokenizerrY  rŸ  Úhunyuan_video_clipÚHunyuanVideoTokenizerr´   rd  ÚHunyuanVideo15Tokenizerrf  Ú
kandinsky5ÚKandinsky5Tokenizerrg  ÚKandinsky5TokenizerImageÚltxav_teÚ	sd_detectÚLTXAVGemmaTokenizerrh  ÚnewbieÚNewBieTokenizerÚace15ÚACE15TokenizerÚSDXLClipModelÚcalculate_parametersÚ
long_cliplÚmodel_options_long_cliprR   )Ústate_dictsrY   rm  rX   r‡   ru  r¤  ÚirZ   Úclip_targetÚte_modelÚqwen35_typeÚ	te_modelsÚhidream_dualclip_classesÚ
hidream_ter§  r¨  r©  rª  Ú	t5_kwargsÚllama_kwargsÚclip_data_gemmaÚclip_data_jinar®  r†   r‰   r"   r   r   r,   rs  *  sœ  €





"
ÿ""
ÿ(""&"4



$(

"rs  c                 C  sD   t jj| dd}t |¡}t ¡ r| ¡ }t jj	|t 
¡ t ¡ dS )NTr2  r[   )r   r¢  r:  r   Úload_gligenr   Úshould_use_fp16Úhalfro   rq   Úget_torch_deviceÚunet_offload_device)r@  Údatar   r   r   r,   r    s
   
r  c                 C  s    t j | ¡}d| ¡ v rdS dS )Nr   zx
HINT: This seems to be a Lora file and Lora files should be put in the lora folder and loaded with a lora loader node..rL  )ÚosÚpathÚbasenameÚlower)r&  r9   Úfilenamer   r   r,   Úmodel_detection_error_hint  s   r*  Tc                 C  sô   t  d¡ t|||d|dd\}}}	}
|d u r1t| dƒ}t |¡}W d   ƒ n1 s,w   Y  |d d }|d }d	|v rb|d	 d
krb| ¡ }G dd„ dtjj	tjj
ƒ}| d||jjƒ¡ |}| di ¡ dd ¡}|d uru| |¡ |||	fS )NzyWarning: The load checkpoint with config function is deprecated and will eventually be removed, please use the other one.FT)Ú
output_vaeÚoutput_clipÚoutput_clipvisionrY   Úoutput_modelÚrr   rb   Úcond_stage_configÚparameterizationÚvc                   @  r¢  )z.load_checkpoint.<locals>.ModelSamplingAdvancedNr£  r   r   r   r,   ÚModelSamplingAdvanced(  r¥  r3  Úmodel_samplingr   )r   r    Úload_checkpoint_guess_configÚopenÚyamlr3  r   r   r4  ÚModelSamplingDiscreteÚV_PREDICTIONÚadd_object_patchr   Úmodel_configre   r™   )Úconfig_pathr@  r+  r,  rY   r9   rR  r   r"   rS  Ú_ÚstreamÚmodel_config_paramsÚclip_configrŠ   r3  r   r   r   r,   Úload_checkpoint  s$   
ÿ

rA  c	                 C  s    t jj| dd\}	}
t|	||||||||
|d
}|d u r'td | t| |	ƒ¡ƒ‚|r:|d d ur:t| |||ff|d _|rN|d d urNt	| |||ff|d j
_|S )NT©ro  )Úte_model_optionsrÆ  r‡   ú,ERROR: Could not detect model type of: {}
{}r   r   )r   r¢  r:  Úload_state_dict_guess_configrÎ  r!   r*  Ú'load_checkpoint_guess_config_model_onlyrt  Ú&load_checkpoint_guess_config_clip_onlyr@   )r@  r+  r,  r-  rY   r.  rX   rC  r‡   rÖ   rÆ  rÍ   r   r   r,   r5  3  s   r5  c              
   C  s    t | ddd||||d^}}|S )NF)rY   rX   rC  r‡   )r5  )r@  rY   rX   rC  r‡   r   r=  r   r   r,   rF  >  s   

ürF  c                 C  s&   t | ddd|d|||d	^}}}|jS )NFT)rY   r.  rX   rC  r‡   )r5  r@   )r@  rY   rX   rC  r‡   r=  r"   r   r   r,   rG  F  s   
ürG  c
           &   	   C  s’  d }
d }d }d }d }t  | ¡}tj | |¡}tj | |¡}t ¡ }| dd ¡}|d u r6tjj	| ||d\} }t j
| ||d}|d u r\t d¡ t| i d}|d u rSd S |d ti dd fS t|jƒ}|jd urhd }|d uro||_| d| dd ¡¡}|d u r…tj|||d}|jd ur“t d ||j¡}nt |||j¡}| ||¡ |jd ur°|r°t | |jd	¡}|rÝt ||¡}|j| ||d
}|	rÆtjjntjj}|||t ¡ d}|j| ||  ¡ d |rùtjj!| dd„ |j"D ƒd	d}| #|¡}t||d}|r˜| dd ¡d u rjg }t|  $¡ ƒD ]}| %d¡r!| &|d t'dƒ … ¡ qt'|ƒdkrji }| D ]}d}|D ]} |p=| (| ¡}q4|sI| | ||< q.|D ]} tjj	| | i d\}!}"|!D ]	}|!| ||< q\|} qM|j)| d}#|#d ur˜| *| ¡}$t'|$ƒdkr“tj |$¡}t+|#||$||$||	d}
nt d¡ |  $¡ }%t'|%ƒdkr«t ,d -|%¡¡ |rÃ|t. /d¡krÃt 0d¡ tj1|gd	d ||
||fS )Nrp  ©rÆ  zTWarning, This is not a checkpoint file, trying to load it as a diffusion model only.©rX   )rÖ   rU   Úweight_dtype©Úmodel_paramsÚsupported_dtypesrJ  TrÜ   r[   ©rÑ   c                 S  s   i | ]}|d “qS )rL  r   )Ú.0r(   r   r   r,   Ú
<dictcomp>„  r0  z0load_state_dict_guess_config.<locals>.<dictcomp>©Úfilter_keys)rÖ   rÆ  z.scaled_fp8Ú
scaled_fp8r   F)r9   )rY   rZ   r†   r9   rX   r‡   zVno CLIP/text encoder weights in checkpoint, the text encoder model will not be loaded.zleft over keys: {}r.  z&loaded diffusion model directly to GPUr`   )2r   Úunet_prefix_from_state_dictr   r¢  r  rJ  r   r"  re   rr  Úmodel_config_from_unetr   r    Úload_diffusion_model_state_dictrö   r{   Úsupported_inference_dtypesÚquant_configrp  Ú
unet_dtypeÚunet_manual_castÚset_inference_dtypeÚclip_vision_prefixr   Úload_clipvision_from_sdÚunet_inital_load_deviceÚ	get_modelro   rp   rq   r#  Úload_model_weightsrÓ   r£  Úvae_key_prefixÚprocess_vae_state_dictr:   Úendswithr´   r2   Ú
startswithr  Úprocess_clip_state_dictrR   r1   r!   rs   rW   r   r~   )&rÖ   r+  r,  r-  rY   r.  rX   rC  rÆ  r‡   r"   Ú
clipvisionrS  r   ro   Údiffusion_model_prefixr†   rJ  rS   rp  r;  Údiffusion_modelÚunet_weight_dtyperY  Úmanual_cast_dtypeÚinital_load_devicerp   Úvae_sdÚscaled_fp8_listr(   Úout_sdÚskipÚprefÚquant_sdÚ	qmetadatar  Úclip_sdÚ	left_overr   r   r,   rE  N  s    






€€



rE  c                 C  sr  |  dd¡}|  dd¡}|du rtjj| d|d\} }t | ¡}tjj| |didd}t|ƒdkrB|} |du rBtjj| d|d\} }tj | ¡}tj 	| ¡}	t
 ¡ }
tj| d|d}|dura| }nIt | d¡}|durxt |d¡}|du rwdS n2t | ¡}|du rƒdS tj |j¡}i }|D ]}|| v rž|  |¡||| < qŽt d	 || |¡¡ qŽt
 ¡ }t|jƒ}|jdurºd}	|du rÇt
j|||	d
}n|}|jdur×t
 d|
|j¡}nt
 ||
|j¡}| ||¡ |durì||_|  dd¡r÷d|jd< | |d¡}|rtjj ntjj!}|||
|d}t
 "|¡s| #|¡ |j$|d| %¡ d |  &¡ }t|ƒdkr7t 'd |¡¡ |S )aÃ  
    Loads a UNet diffusion model from a state dictionary, supporting both diffusers and regular formats.

    Args:
        sd (dict): State dictionary containing model weights and configuration
        model_options (dict, optional): Additional options for model loading. Supports:
            - dtype: Override model data type
            - custom_operations: Custom model operations
            - fp8_optimizations: Enable FP8 optimizations

    Returns:
        ModelPatcher: A wrapped model instance that handles device management and weight loading.
        Returns None if the model configuration cannot be detected.

    The function:
    1. Detects and handles different model formats (regular, diffusers, mmdit)
    2. Configures model dtype based on parameters and device capabilities
    3. Handles weight conversion and device placement
    4. Manages model optimization settings
    5. Loads weights and returns a device-managed model instance
    rU   Nrp  rL  rH  TrQ  r   z{} {}rK  Úfp8_optimizationsFÚfp8r[   rN  z%left over keys in diffusion model: {})(re   r   r¢  rr  r   rT  r£  r2   r  rJ  r   r"  rU  Úconvert_diffusers_mmditÚ model_config_from_diffusers_unetÚunet_to_diffusersÚunet_configr³   r   r    r!   r#  r{   rW  rX  rY  rZ  r[  rp  Úoptimizationsr_  ro   rp   rq   Úis_device_cpurm   r`  rÓ   r:   r   )rÖ   rX   rÆ  r‡   rU   rp  rg  Útemp_sdr†   rJ  rS   r;  rÇ  Údiffusers_keysr(   rT   ri  rY  rj  r   rp   ro   rt  r   r   r,   rV  ³  sp   
ÿ





rV  c                 C  sd   t jj| dd\}}t||||d}|d u r)t d | ¡¡ td | t| |ƒ¡ƒ‚t	| |ff|_
|S )NTrB  )rX   rÆ  r‡   z$ERROR UNSUPPORTED DIFFUSION MODEL {}rD  )r   r¢  r:  rV  r   Úerrorr!   rÎ  r*  Úload_diffusion_modelrt  )Ú	unet_pathrX   r‡   rÖ   rÆ  r   r   r   r,   r€    s   r€  c                 C  ó   t  d¡ t| d|idS )NzeThe load_unet function has been deprecated and will be removed please switch to: load_diffusion_modelrU   rI  )r   r    r€  )r  rU   r   r   r,   Ú	load_unet  ó   
rƒ  c                 C  r‚  )Nz{The load_unet_state_dict function has been deprecated and will be removed please switch to: load_diffusion_model_state_dictrU   rI  )r   r    rV  )rÖ   rU   r   r   r,   Úload_unet_state_dict!  r„  r…  c                 C  sÆ   d }|g}|d ur|  | ¡ ¡ | ¡ }d }	|d ur| ¡ }	|d u r$i }t |¡ |d ur1| ¡ nd }
| ||	|
¡}|D ]}|| ||< q<|D ]}|| }| ¡ sW| ¡ ||< qGtj	j
|| |d d S )NrH  )r´   r¸   rÚ   r   r~   Ústate_dict_for_savingÚis_contiguousÚ
contiguousr   r¢  Úsave_torch_file)Úoutput_pathr   r"   rS  r   rÆ  Ú
extra_keysrs  Úload_modelsrl  Úclip_vision_sdrÖ   r(   Útr   r   r,   Úsave_checkpoint%  s*   
€r  )NNTTNNNr   )iÚ
__future__r   r§  rs   Úenumr   r   r   r   Úcomfy.utilsr   Úldm.models.autoencoderr   r   Úldm.cascade.stage_ar	   Úldm.cascade.stage_c_coderr
   Úldm.audio.autoencoderr   Úcomfy.ldm.genmo.vae.modelÚ1comfy.ldm.lightricks.vae.causal_video_autoencoderÚcomfy.ldm.cosmos.vaeÚcomfy.ldm.wan.vaeÚcomfy.ldm.wan.vae2_2Úcomfy.ldm.hunyuan3d.vaeÚ%comfy.ldm.ace.vae.music_dcae_pipelineÚcomfy.ldm.hunyuan_video.vaeÚ!comfy.ldm.mmaudio.vae.autoencoderÚcomfy.pixel_space_convertÚcomfy.weight_adapterr7  r4  r%  rL  r   r   r   r   r   r   Úcomfy.text_encoders.sd2_clipÚcomfy.text_encoders.sd3_clipÚcomfy.text_encoders.sa_t5Úcomfy.text_encoders.aura_t5Úcomfy.text_encoders.pixart_t5Úcomfy.text_encoders.hyditÚcomfy.text_encoders.fluxÚcomfy.text_encoders.long_cliplÚcomfy.text_encoders.genmoÚcomfy.text_encoders.ltÚ!comfy.text_encoders.hunyuan_videoÚcomfy.text_encoders.cosmosÚcomfy.text_encoders.lumina2Úcomfy.text_encoders.wanÚcomfy.text_encoders.hidreamÚcomfy.text_encoders.aceÚcomfy.text_encoders.omnigen2Úcomfy.text_encoders.qwen_imageÚ!comfy.text_encoders.hunyuan_imageÚcomfy.text_encoders.z_imageÚcomfy.text_encoders.ovisÚcomfy.text_encoders.kandinsky5Úcomfy.text_encoders.jina_clip_2Úcomfy.text_encoders.newbieÚcomfy.text_encoders.animaÚcomfy.text_encoders.ace15Ú!comfy.text_encoders.longcat_imageÚcomfy.text_encoders.qwen35Úcomfy.model_patcherÚ
comfy.loraÚcomfy.lora_convertÚcomfy.hooksÚcomfy.t2i_adapter.adapterÚcomfy.taesd.taesdÚcomfy.taesd.taehvÚcomfy.latent_formatsÚcomfy.ldm.flux.reduxr-   rQ   rR   rö   r-  rB  rC  rQ  rn  rk  rw  r˜  rž  r   rs  r  r*  rA  r5  rF  rG  rE  rV  r€  rƒ  r…  r  r   r   r   r,   Ú<module>   sÌ    c g     AE

 e
ea
	
