o
    ¶Ïi)o ã                   @   s   d Z ddlZddlZddlZddlZddlZddlZddlm	Z	m
Z
 ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlZddlZddlZddlZddlZddlZddlZddlZddlZddlZddlZddl Zddl!Zddl"Zddl#Zddl$Zddl%Zddl&Zddl'Zddl(Zddl)Zddl*Zddl+Zddl,Zddl-Zddl.Zddl/Zddl0Zddl1Zdd	l2m3Z3 d
dl4m5Z5 ddl6Zddl7Zddl8Z8ddl9m:Z: e:rÚddl;m<Z< G dd„ de3ƒZ=dd„ Z>dd„ Z?G dd„ dej@jAƒZBddd„ZCG dd„ deBƒZDdd„ ZEG dd„ deBƒZFG dd „ d eBƒZGG d!d"„ d"eBƒZHG d#d$„ d$eHƒZIG d%d&„ d&eHƒZJG d'd(„ d(eBƒZKG d)d*„ d*eBƒZLG d+d,„ d,ƒZMG d-d.„ d.eMeBƒZNG d/d0„ d0eMeGƒZOG d1d2„ d2eBƒZPG d3d4„ d4eBƒZQG d5d6„ d6eBƒZRG d7d8„ d8eBƒZSG d9d:„ d:eBƒZTG d;d<„ d<eBƒZUG d=d>„ d>eBƒZVG d?d@„ d@eBƒZWG dAdB„ dBeBƒZXG dCdD„ dDeXƒZYG dEdF„ dFeXƒZZG dGdH„ dHeBƒZ[G dIdJ„ dJeBƒZ\G dKdL„ dLeBƒZ]G dMdN„ dNeBƒZ^G dOdP„ dPe^ƒZ_G dQdR„ dRe^ƒZ`G dSdT„ dTeBƒZaG dUdV„ dVeBƒZbG dWdX„ dXeBƒZcG dYdZ„ dZeBƒZdG d[d\„ d\edƒZeG d]d^„ d^eBƒZfG d_d`„ d`efƒZgG dadb„ dbefƒZhG dcdd„ ddefƒZiG dedf„ dfefƒZjG dgdh„ dhefƒZkG didj„ djefƒZlG dkdl„ dlefƒZmG dmdn„ dnefƒZnG dodp„ dpeBƒZoG dqdr„ dreBƒZpG dsdt„ dteBƒZqG dudv„ dveXƒZrG dwdx„ dxerƒZsG dydz„ dzeBƒZtG d{d|„ d|eBƒZuG d}d~„ d~eBƒZvG dd€„ d€eBƒZwG dd‚„ d‚eBƒZxG dƒd„„ d„exƒZyG d…d†„ d†e^ƒZzG d‡dˆ„ dˆezƒZ{G d‰dŠ„ dŠeBƒZ|G d‹dŒ„ dŒe|ƒZ}G ddŽ„ dŽeBƒZ~dS )aÎ  
    This file is part of ComfyUI.
    Copyright (C) 2024 Comfy

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <https://www.gnu.org/licenses/>.
é    N)Ú	UNetModelÚTimestep)ÚStageC)ÚStageB)ÚCLIPEmbeddingNoiseAugmentation)Ú ImageConcatWithNoiseAugmentation)ÚOpenAISignatureMMDITWrapper)ÚEnumé   )Úutils)ÚTYPE_CHECKING)ÚModelPatcherc                   @   s8   e Zd ZdZdZdZdZdZdZdZ	dZ
d	Zd
ZdZdS )Ú	ModelTyper
   é   é   é   é   é   é   é   é	   é
   é   N)Ú__name__Ú
__module__Ú__qualname__ÚEPSÚV_PREDICTIONÚV_PREDICTION_EDMÚSTABLE_CASCADEÚEDMÚFLOWÚV_PREDICTION_CONTINUOUSÚFLUXÚ
IMG_TO_IMGÚFLOW_COSMOSÚIMG_TO_IMG_FLOW© r'   r'   ú./mnt/c/Users/fbmor/ComfyUI/comfy/model_base.pyr   F   s    r   c                 C   s4  t jj}|tjkrt jj}n|tjkrt jj}nu|tjkr&t jj}t jj}ng|tjkr4t jj	}t jj
}nY|tjkrBt jj}t jj}nK|tjkrPt jj}t jj}n=|tjkr^t jj}t jj}n/|tjkrlt jj	}t jj}n!|tjkrvt jj}n|tjkr„t jj}t jj}n	|tjkrt jj}G dd„ d||ƒ}|| ƒS )Nc                   @   s   e Zd ZdS )z%model_sampling.<locals>.ModelSamplingN)r   r   r   r'   r'   r'   r(   ÚModelSamplingu   s    r)   )ÚcomfyÚmodel_samplingÚModelSamplingDiscreter   r   r   r   ÚModelSamplingContinuousEDMr!   ÚCONSTÚModelSamplingDiscreteFlowr   ÚStableCascadeSamplingr    r"   ÚModelSamplingContinuousVr#   ÚModelSamplingFluxr$   r%   ÚCOSMOS_RFLOWÚModelSamplingCosmosRFlowr&   )Úmodel_configÚ
model_typeÚsÚcr)   r'   r'   r(   r+   T   s@   




















r+   c                 C   sJ   t | dƒr#| jtjkr| jtjkrtj | ||¡} | S tj | |d ¡} | S )NÚdtype)Úhasattrr9   ÚtorchÚintÚlongr*   Úmodel_managementÚcast_to_device)Úextrar9   Údevicer'   r'   r(   Úconvert_tensor{   s   
ÿrB   c                       sÎ   e Zd Zejdef‡ fdd„	Zdddi fdd„Zdddi fdd„Zdd	„ Z	d
d„ Z
dd„ Zdd„ Zdd„ Zg fdd„Zdd„ Zd(dd„Zdd„ Zdd„ Zd)dd„Zd d!„ Zd"d#„ Zi fd$d%„Zd&d'„ Z‡  ZS )*Ú	BaseModelNc                    s\  t ƒ  ¡  |j}|j| _|| _|j| _|| _d | _| dd¡su|j	d u r;|j
 dd¡}tjj| dd ¡| j||d}n|j	}|di |¤||dœ¤Ž| _| j ¡  tj ¡ rb| jjtjd t d¡ t d	 |  ¡ | j¡¡ tj | j¡ || _t||ƒ| _| d
d ¡| _| jd u rd| _d| _t d |j¡¡ t d | j¡¡ |j | _ d| _!i | _"d S )NÚdisable_unet_model_creationFÚfp8r9   )Úfp8_optimizationsr5   )rA   Ú
operations)Úmemory_formatz,using channels last mode for diffusion modelz&model weight dtype {}, manual cast: {}Úadm_in_channelsr   r'   zmodel_type {}zadm {})#ÚsuperÚ__init__Úunet_configÚlatent_formatr5   Úmanual_cast_dtyperA   Úcurrent_patcherÚgetÚcustom_operationsÚoptimizationsr*   ÚopsÚpick_operationsÚdiffusion_modelÚevalr>   Úforce_channels_lastÚtor;   Úchannels_lastÚloggingÚdebugÚinfoÚformatÚ	get_dtypeÚarchive_model_dtypesr6   r+   Úadm_channelsÚconcat_keysÚnameÚmemory_usage_factorÚmemory_usage_factor_condsÚmemory_usage_shape_process)Úselfr5   r6   rA   Ú
unet_modelrL   rE   rG   ©Ú	__class__r'   r(   rK   …   s<   

 




zBaseModel.__init__c                 K   s<   t jj | j| t j t jjj|¡¡j||||||fi |¤ŽS ©N)	r*   Úpatcher_extensionÚWrapperExecutorÚnew_class_executorÚ_apply_modelÚget_all_wrappersÚ
WrappersMPÚAPPLY_MODELÚexecute)rf   ÚxÚtÚc_concatÚc_crossattnÚcontrolÚtransformer_optionsÚkwargsr'   r'   r(   Úapply_model«   s   ýüüzBaseModel.apply_modelc                 K   sl  |}| j  ||¡}	|d ur tj|	gtj ||	j|	j¡g dd}	|}
|  	¡ }|	 
|¡}	|	j}| j  |¡ ¡ }|
d urBtj |
||¡}
i }|D ]-}|| }t|dƒrXt|||ƒ}nt|tƒrog }|D ]}| t|||ƒ¡ qa|}|||< qF| j|fd|i|¤Ž}d|v rŒt |	| d¡¡}	| j|	|f|
||dœ|¤Ž}t|ƒdkr¬t |¡s¬t |¡\}}| j  || ¡ |¡S )Nr
   ©Údimr9   rs   Úlatent_shapes)Úcontextrw   rx   )r+   Úcalculate_inputr;   Úcatr*   r>   r?   rA   r9   Úget_dtype_inferencerX   ÚtimestepÚfloatr:   rB   Ú
isinstanceÚlistÚappendÚprocess_timestepr   Úunpack_latentsÚpoprU   ÚlenÚ	is_tensorÚpack_latentsÚcalculate_denoised)rf   rs   rt   ru   rv   rw   rx   ry   ÚsigmaÚxcr~   r9   rA   Úextra_condsÚor@   ÚexÚextÚmodel_outputÚ_r'   r'   r(   rn   ²   s:   &



zBaseModel._apply_modelc                 K   ó   |S rj   r'   )rf   r‚   ry   r'   r'   r(   r‡   Ù   ó   zBaseModel.process_timestepc                 C   s   | j jS rj   )rU   r9   )rf   r'   r'   r(   r^   Ü   ó   zBaseModel.get_dtypec                 C   s   |   ¡ }| jd ur| j}|S rj   )r^   rN   )rf   r9   r'   r'   r(   r   ß   s   
zBaseModel.get_dtype_inferencec                 K   ó   d S rj   r'   ©rf   ry   r'   r'   r(   Ú
encode_admæ   r—   zBaseModel.encode_admc           
      K   sö  t | jƒdkryg }| d| dd ¡¡}| dd ¡}|d u r%| dd ¡}n|  |¡}| dd ¡}|d }|jdd … |jdd … kr‰t ||jd	 |jd
 dd¡}|jdkr‰|jd |jd k rytj	j
 |ddddd|jd |jd  fdd¡}n|d d …d d …d |jd …f }t ||jd ¡}|d uræt |jƒt |jƒkrª|d d …d d…f }|jd }| dt|j| d … ƒ ¡}|jd
d … |jd
d … krÛt ||jd	 |jd
 dd¡}t | ¡ |jd ¡}| jD ]†}|d ur|dkrý| | |¡¡ nW|dkr| | |¡¡ nI|dkr| d| |¡ ¡ n9|dkr1| t |¡d d …d d…f ¡ n#|dkr?| |  |¡¡ n|dkrT| t |¡d d …d d…f ¡ |dkro|d urg| | |¡¡ qé| t |¡¡ qétj|dd}	|	S d S )Nr   Úconcat_maskÚdenoise_maskÚconcat_latent_imageÚlatent_imageÚnoiserA   r
   éÿÿÿÿéþÿÿÿÚbilinearÚcenterr   éýÿÿÿÚconstantr   )r¡   r
   ÚmaskÚmasked_imageÚmask_invertedç      ð?Úconcat_imager{   )rŠ   ra   rP   Úprocess_latent_inÚshaper   Úcommon_upscaleÚndimr;   ÚnnÚ
functionalÚpadÚresize_to_batch_sizeÚreshapeÚtupleÚroundr†   rX   Ú	ones_likeÚblank_inpaint_image_likeÚ
zeros_liker€   )
rf   ry   Úcond_concatr   rž   r    rA   Únum_dimÚckÚdatar'   r'   r(   Úconcat_condé   s^   

2 




€
"

 

€zBaseModel.concat_condc                 C   s   dS )zäOverride in subclasses to handle model-specific cond slicing for context windows.
        Return a sliced cond object, or None to fall through to default handling.
        Use comfy.context_windows.slice_cond() for common cases.Nr'   ©rf   Úcond_keyÚ
cond_valueÚwindowÚx_inrA   Úretain_index_listr'   r'   r(   Úresize_cond_for_context_window"  s   z(BaseModel.resize_cond_for_context_windowc                 K   sÄ   i }| j di |¤Ž}|d urtj |¡|d< | jdi |¤Ž}|d ur*tj |¡|d< | dd ¡}|d ur<tj |¡|d< | dd ¡}|d urNtj |¡|d< | dd ¡}|d ur`tj |¡|d< |S )	Nru   ÚyÚ
cross_attnrv   Úcross_attn_controlnetÚcrossattn_controlnetÚnoise_concatr'   )r¾   r*   ÚcondsÚCONDNoiseShaper›   ÚCONDRegularrP   ÚCONDCrossAttn)rf   ry   Úoutr¾   ÚadmrÇ   Úcross_attn_cnetru   r'   r'   r(   r   (  s"   zBaseModel.extra_condsÚ Fc           	      C   sž   i }t | ¡ ƒ}|D ]}| |¡r| |¡||t|ƒd … < q
| j |¡}| jj|d|d\}}t|ƒdkr>t	 
d |¡¡ t|ƒdkrLt	 
d |¡¡ ~| S )NF)ÚstrictÚassignr   zunet missing: {}zunet unexpected: {})r…   ÚkeysÚ
startswithr‰   rŠ   r5   Úprocess_unet_state_dictrU   Úload_state_dictrZ   Úwarningr]   )	rf   ÚsdÚunet_prefixrÔ   Úto_loadrÕ   ÚkÚmÚur'   r'   r(   Úload_model_weights@  s   
€zBaseModel.load_model_weightsc                 C   ó   | j  |¡S rj   )rM   Ú
process_in©rf   Úlatentr'   r'   r(   r¬   Q  ó   zBaseModel.process_latent_inc                 C   rá   rj   )rM   Úprocess_outrã   r'   r'   r(   Úprocess_latent_outT  rå   zBaseModel.process_latent_outc                 C   s   g }|d ur|  | j |¡¡ |d ur|  | j |¡¡ |d ur)|  | j |¡¡ | j |¡}| jtjkr<t	 
g ¡|d< |D ]}| |¡ q>|S )NÚv_pred)r†   r5   Ú"process_clip_state_dict_for_savingÚ!process_vae_state_dict_for_savingÚ)process_clip_vision_state_dict_for_savingÚ"process_unet_state_dict_for_savingr6   r   r   r;   ÚtensorÚupdate)rf   Úunet_state_dictÚclip_state_dictÚvae_state_dictÚclip_vision_state_dictÚ	extra_sdsrÚ   r'   r'   r(   Ústate_dict_for_savingW  s   zBaseModel.state_dict_for_savingc                 C   s   d| _ dd„ }|| _d S )N)r§   r¨   c                 S   sn   t  | ¡}|d d …df  d9  < |d d …df  d9  < |d d …df  d9  < |d d …df  d9  < |S )	Nr   gŽðHPê?r
   gâX·Ñ æ¿r   g¯”eˆc]ä?r   gD‹lçû©Á?)r;   r·   )rŸ   Úblank_imager'   r'   r(   r¸   j  s   
z7BaseModel.set_inpaint.<locals>.blank_inpaint_image_like)ra   r¸   )rf   r¸   r'   r'   r(   Úset_inpainth  s   
zBaseModel.set_inpaintc                 K   s2   | j  | |jd gdgt|jƒd   ¡||¡S ©Nr   r
   )r+   Únoise_scalingr´   r­   rŠ   ©rf   rŽ   r    rŸ   ry   r'   r'   r(   Úscale_latent_inpaintt  s   2zBaseModel.scale_latent_inpaintc           
      C   sÚ   |g}| j D ].}| |d ¡}|d ur4|| jv r*g }|D ]}| | j| |ƒ¡ q|}t|ƒdkr4||7 }qtj ¡ s?tj ¡ r[|  	¡ }t
tdd„ |ƒƒ}	|	tj |¡ d | j d S t
tdd„ |ƒƒ}	|	d | j d S )Nr   c                 S   ó   | d t  | dd … ¡ S ©Nr   r   ©ÚmathÚprod©Úinput_shaper'   r'   r(   Ú<lambda>ˆ  ó    z+BaseModel.memory_required.<locals>.<lambda>g{®Gáz„?i   c                 S   rû   rü   rý   r   r'   r'   r(   r  Œ  r  g333333Ã?)rd   rP   re   r†   rŠ   r*   r>   Úxformers_enabledÚ!pytorch_attention_flash_attentionr   ÚsumÚmapÚ
dtype_sizerc   )
rf   r  Úcond_shapesÚinput_shapesr8   r­   rÏ   r7   r9   Úarear'   r'   r(   Úmemory_requiredw  s$   

€zBaseModel.memory_requiredc                 K   s   i S rj   r'   rš   r'   r'   r(   Úextra_conds_shapes  r—   zBaseModel.extra_conds_shapes)rÒ   F©NNN)r   r   r   r   r   r   rK   rz   rn   r‡   r^   r   r›   r¾   rÅ   r   rà   r¬   rç   rô   rö   rú   r  r  Ú__classcell__r'   r'   rh   r(   rC   „   s&    &'9

rC   ç        c                 C   s  g }g }g }| D ]F}|d j D ]>}	|d }
|d }t|jd | ƒ}||	 |¡tj|g|d|d\}}t ||fd¡|
 }| |
¡ | |¡ | |¡ qqt|ƒdkrˆt 	|¡ 
d¡}|}t|jd | ƒ}||d d …d |jj…f tj|g|dd\}}t ||fd¡}|S )	NÚclip_vision_outputÚstrengthÚnoise_augmentationr
   ©rA   ©Únoise_levelÚseedr   )r  )Úimage_embedsr¶   Úmax_noise_levelrX   r;   rí   r€   r†   rŠ   Ústackr  Ú
time_embedr|   )Úunclip_conditioningrA   Únoise_augmentorÚnoise_augment_merger  Ú
adm_inputsÚweightsÚ	noise_augÚunclip_condÚadm_condÚweightÚnoise_augmentr  Úc_admÚnoise_level_embÚadm_outr'   r'   r(   Ú
unclip_adm“  s*   $

ø
0r)  c                       ó,   e Zd Zejdf‡ fdd„	Zdd„ Z‡  ZS )Ú
SD21UNCLIPNc                    s&   t ƒ j|||d tdi |¤Ž| _d S )Nr  r'   )rJ   rK   r   r  )rf   r5   Únoise_aug_configr6   rA   rh   r'   r(   rK   ¬  s   zSD21UNCLIP.__init__c              	   K   sV   |  dd ¡}|d }|d u rtjd| jf|dS t||| j|  dd¡|  dd¡d	 ƒS )
Nr  rA   r
   r  Úunclip_noise_augment_mergegš™™™™™©?r  r   r   )rP   r;   Úzerosr`   r)  r  )rf   ry   r  rA   r'   r'   r(   r›   °  s
   &zSD21UNCLIP.encode_adm)r   r   r   r   r   rK   r›   r  r'   r'   rh   r(   r+  «  ó    r+  c                 C   sH   d| v r t |  dd ¡| d ||  dd¡d dd d …d d…f S | d S )	Nr  rA   r  r   r   )r  é   Úpooled_output)r)  rP   )Úargsr  r'   r'   r(   Úsdxl_pooled¸  s   8r3  c                       r*  )ÚSDXLRefinerNc                    ó<   t ƒ j|||d tdƒ| _tdi dddœddœ¤Ž| _d S ©	Nr  é   iè  Úsquaredcos_cap_v2)Ú	timestepsÚbeta_scheduler0  )Únoise_schedule_configÚtimestep_dimr'   ©rJ   rK   r   Úembedderr   r  ©rf   r5   r6   rA   rh   r'   r(   rK   ¿  ó   
 zSDXLRefiner.__init__c           
      K   s"  t || jƒ}| dd¡}| dd¡}| dd¡}| dd¡}| dd¡d	kr-| d
d¡}n| d
d¡}g }| |  t |g¡¡¡ | |  t |g¡¡¡ | |  t |g¡¡¡ | |  t |g¡¡¡ | |  t |g¡¡¡ t t |¡¡j	dd 
|jd d¡}	tj| |	j¡|	fddS )NÚwidthé   ÚheightÚcrop_wr   Úcrop_hÚprompt_typerÒ   ÚnegativeÚaesthetic_scoreg      @r   r{   r
   ©r3  r  rP   r†   r>  r;   ÚTensorÚflattenr€   Ú	unsqueezeÚrepeatr­   rX   rA   )
rf   ry   Úclip_pooledrA  rC  rD  rE  rH  rÏ   Úflatr'   r'   r(   r›   Ä  s    &zSDXLRefiner.encode_adm©r   r   r   r   r   rK   r›   r  r'   r'   rh   r(   r4  ¾  ó    r4  c                       r*  )ÚSDXLNc                    r5  r6  r=  r?  rh   r'   r(   rK   Ú  r@  zSDXL.__init__c                 K   s(  t || jƒ}| dd¡}| dd¡}| dd¡}| dd¡}| d|¡}| d|¡}g }	|	 |  t |g¡¡¡ |	 |  t |g¡¡¡ |	 |  t |g¡¡¡ |	 |  t |g¡¡¡ |	 |  t |g¡¡¡ |	 |  t |g¡¡¡ t t |	¡¡j	dd	 
|jd d
¡}
tj| |
j¡|
fd
d	S )NrA  rB  rC  rD  r   rE  Útarget_widthÚtarget_heightr{   r
   rI  )rf   ry   rN  rA  rC  rD  rE  rS  rT  rÏ   rO  r'   r'   r(   r›   ß  s    &zSDXL.encode_admrP  r'   r'   rh   r(   rR  Ù  rQ  rR  c                       s4   e Zd Zejdf‡ fdd„	Zdd„ Zdd„ Z‡  ZS )ÚSVD_img2vidNc                    ó    t ƒ j|||d tdƒ| _d S )Nr  r7  )rJ   rK   r   r>  r?  rh   r'   r(   rK   ô  ó   zSVD_img2vid.__init__c                 K   s   |  dd¡d }|  dd¡}|  dd¡}g }| |  t |g¡¡¡ | |  t |g¡¡¡ | |  t |g¡¡¡ t t |¡¡jdd}|S )	NÚfpsr   r
   Úmotion_bucket_idé   Úaugmentation_levelr   r{   )rP   r†   r>  r;   rJ  rK  r€   rL  )rf   ry   Úfps_idrY  ÚaugmentationrÏ   rO  r'   r'   r(   r›   ø  s   zSVD_img2vid.encode_admc                 K   s  i }| j di |¤Ž}|d urtj |¡|d< | dd ¡}| dd ¡}|d u r+t |¡}|jdd … |jdd … krHt 	||jd |jd dd¡}t 
||jd	 ¡}tj |¡|d
< | dd ¡}|d urktj |¡|d< d|v rytj |d ¡|d< tj |jd	 ¡|d< |S )NrÆ   rž   r    r
   r¡   r¢   r£   r¤   r   ru   rÇ   rv   Útime_conditioningÚtime_contextÚnum_video_framesr'   )r›   r*   rË   rÍ   rP   r;   r¹   r­   r   r®   r³   rÌ   rÎ   ÚCONDConstant)rf   ry   rÏ   rÐ   rŸ   r    rÇ   r'   r'   r(   r     s&   
zSVD_img2vid.extra_conds)	r   r   r   r   r   rK   r›   r   r  r'   r'   rh   r(   rU  ó  s    rU  c                   @   ó   e Zd Zdd„ ZdS )ÚSV3D_uc              	   K   sJ   |  dd¡}g }| |  t t |g¡¡¡¡ t t |¡¡jdd}|S )Nr[  r   r{   )rP   r†   r>  r;   rK  rJ  r€   rL  )rf   ry   r]  rÏ   rO  r'   r'   r(   r›   #  s
   zSV3D_u.encode_admN)r   r   r   r›   r'   r'   r'   r(   rc  "  ó    rc  c                       r*  )ÚSV3D_pNc                    rV  )Nr  é   )rJ   rK   r   Úembedder_512r?  rh   r'   r(   rK   -  rW  zSV3D_p.__init__c                    sÒ   |  dd¡}|  dd¡}|  dd¡}|  dd ¡‰ g }| |  t t |g¡¡¡¡ | |  t t t dt |g¡ ¡d¡¡¡¡ | |  t t t t |g¡¡d¡¡¡¡ t	t
‡ fdd	„|ƒƒ}tj|d
dS )Nr[  r   Ú	elevationÚazimuthr    éZ   g     €v@c                    s   t  | ˆ jd ¡S )Nr   )r   r³   r­   ©Úa©r    r'   r(   r  <  s    z#SV3D_p.encode_adm.<locals>.<lambda>r
   r{   )rP   r†   r>  r;   rK  rJ  rg  Údeg2radÚfmodr…   r  r€   )rf   ry   r]  rh  ri  rÏ   r'   rm  r(   r›   1  s   0,zSV3D_p.encode_adm)r   r   r   r   r   rK   r›   r  r'   r'   rh   r(   re  ,  r/  re  c                       s0   e Zd Zejdddf‡ fdd„	Zdd„ Z‡  ZS )ÚStable_Zero123Nc                    sZ   t ƒ j|||d tjjj|jd |jd |  ¡ |d| _| jj	 
|¡ | jj 
|¡ d S )Nr  r
   r   )r9   rA   )rJ   rK   r*   rS   Úmanual_castÚLinearr­   r^   Úcc_projectionr$  Úcopy_Úbias)rf   r5   r6   rA   Úcc_projection_weightÚcc_projection_biasrh   r'   r(   rK   A  s   (zStable_Zero123.__init__c                 K   sÊ   i }|  dd ¡}|  dd ¡}|d u rt |¡}|jdd … |jdd … kr4t ||jd |jd dd¡}t ||jd ¡}tj 	|¡|d	< |  d
d ¡}|d urc|jd dkr[|  
|¡}tj |¡|d< |S )Nrž   r    r
   r¡   r¢   r£   r¤   r   ru   rÇ   rB  rv   )rP   r;   r¹   r­   r   r®   r³   r*   rË   rÌ   rs  rÎ   )rf   ry   rÏ   rŸ   r    rÇ   r'   r'   r(   r   G  s   

zStable_Zero123.extra_conds©r   r   r   r   r   rK   r   r  r'   r'   rh   r(   rp  @  s    rp  c                       r*  )ÚSD_X4UpscalerNc                    s*   t ƒ j|||d tdddœdd| _d S )Nr  g-Cëâ6?g{®Gáz”?)Úlinear_startÚ
linear_endi^  )r;  r  )rJ   rK   r   r  r?  rh   r'   r(   rK   _  s   zSD_X4Upscaler.__init__c           
      K   s:  i }|  dd ¡}|  dd ¡}|  dd¡}|d }|d d }t| jj| ƒ}|d u r7t |¡d d …d d…f }|jd	d … |jd	d … krWt | 	|¡|jd
 |jd dd¡}tj
|g|d}|dkrp| j| 	|¡||d\}}t ||jd ¡}tj |¡|d< tj |¡|d< |  dd ¡}	|	d ur›tj |	¡|d< |S )Nr«   r    r  r  rA   r  r   r   r
   r¡   r¢   r£   r¤   r  r   r  ru   rÆ   rÇ   rv   )rP   r¶   r  r  r;   r¹   r­   r   r®   rX   rí   r³   r*   rË   rÌ   rÍ   rÎ   )
rf   ry   rÏ   Úimager    r%  rA   r  r  rÇ   r'   r'   r(   r   c  s*   $zSD_X4Upscaler.extra_conds©r   r   r   r   r   rK   r   r  r'   r'   rh   r(   ry  ^  r/  ry  c                   @   rb  )ÚIP2Pc                 K   s–   |  dd ¡}|  dd ¡}|d }|d u rt |¡}n|j|d}|jdd … |jdd … kr=t ||jd |jd dd	¡}t ||jd
 ¡}|  |¡S )Nrž   r    rA   r  r
   r¡   r¢   r£   r¤   r   )	rP   r;   r¹   rX   r­   r   r®   r³   Úprocess_ip2p_image_in)rf   ry   r|  r    rA   r'   r'   r(   r¾   ƒ  s   
zIP2P.concat_condN)r   r   r   r¾   r'   r'   r'   r(   r~  ‚  rd  r~  c                       ó$   e Zd Zejdf‡ fdd„	Z‡  ZS )ÚSD15_instructpix2pixNc                    s    t ƒ j|||d dd„ | _d S )Nr  c                 S   ó   | S rj   r'   ©r|  r'   r'   r(   r  —  ó    z/SD15_instructpix2pix.__init__.<locals>.<lambda>)rJ   rK   r  r?  rh   r'   r(   rK   •  rW  zSD15_instructpix2pix.__init__©r   r   r   r   r   rK   r  r'   r'   rh   r(   r  ”  ó    r  c                       r€  )ÚSDXL_instructpix2pixNc                    s8   t ƒ j|||d |tjkrdd„ | _d S dd„ | _d S )Nr  c                 S   s   t j ¡  | ¡S rj   )r*   Úlatent_formatsrR  râ   rƒ  r'   r'   r(   r  ž  s    z/SDXL_instructpix2pix.__init__.<locals>.<lambda>c                 S   r‚  rj   r'   rƒ  r'   r'   r(   r     r„  )rJ   rK   r   r   r  r?  rh   r'   r(   rK   ›  s   
zSDXL_instructpix2pix.__init__r…  r'   r'   rh   r(   r‡  š  r†  r‡  c                       s,   e Zd Zdd„ Zejdf‡ fdd„	Z‡  ZS )ÚLotusc                 K   st   i }|  dd ¡}tj |¡|d< |d }t ddg¡ ¡  |¡}t t 	|¡t 
|¡g¡ d¡}tj |¡|d< |S )NrÇ   rv   rA   r
   r   rÆ   )rP   r*   rË   rÎ   r;   rí   rƒ   rX   r€   ÚsinÚcosrL  rÍ   )rf   ry   rÏ   rÇ   rA   Útask_embr'   r'   r(   r   £  s    zLotus.extra_condsNc                    ó   t ƒ j|||d d S ©Nr  ©rJ   rK   r?  rh   r'   r(   rK   ­  ó   zLotus.__init__)r   r   r   r   r   r$   rK   r  r'   r'   rh   r(   r‰  ¢  s    
r‰  c                       r*  )ÚStableCascade_CNc                    ó   t ƒ j|||td d S ©N©rA   rg   )rJ   rK   r   r?  rh   r'   r(   rK   ±  ó   zStableCascade_C.__init__c           	      K   sÞ   i }|d }|d urt j |¡|d< d|v r8g }|d D ]}|d }| |d j d¡| ¡ qtj|dd}nt d	¡}t j |¡|d
< t j t d¡¡|d< t j t d¡¡|d< | 	dd ¡}|d urmt j 
|¡|d< |S )Nr1  Úclip_text_pooledr  r  r  r   r
   r{   )r
   r
   rB  Úclip_img©r
   ÚscaÚcrprÇ   Ú	clip_text)r*   rË   rÍ   r†   r  rL  r;   r€   r.  rP   rÎ   )	rf   ry   rÏ   r–  Úembedsr"  r$  r—  rÇ   r'   r'   r(   r   ´  s$   
zStableCascade_C.extra_conds©r   r   r   r   r   rK   r   r  r'   r'   rh   r(   r‘  °  ó    r‘  c                       r*  )ÚStableCascade_BNc                    r’  r“  )rJ   rK   r   r?  rh   r'   r(   rK   Í  r•  zStableCascade_B.__init__c              	   K   s¨   i }|  dd ¡}|d }|d urtj |¡|d< |  dtjdd|jd d d	 |jd
 d d	 f|j|j|j	d¡}tj |j
|j	d¡|d< tj t d¡¡|d< |S )Nr    r1  ÚclipÚstable_cascade_priorr
   é   r   r   é*   r   ©r9   ÚlayoutrA   r  Úeffnetr˜  r™  )rP   r*   rË   rÍ   r;   r.  r­   r9   r¥  rA   rX   )rf   ry   rÏ   r    r–  Úpriorr'   r'   r(   r   Ð  s   DzStableCascade_B.extra_condsr  r'   r'   rh   r(   rŸ  Ì  rž  rŸ  c                       ó8   e Zd Zejdf‡ fdd„	Zdd„ Z‡ fdd„Z‡  ZS )ÚSD3Nc                    r’  r“  )rJ   rK   r   r?  rh   r'   r(   rK   á  r•  zSD3.__init__c                 K   ó   |d S ©Nr1  r'   rš   r'   r'   r(   r›   ä  r˜   zSD3.encode_admc                    ó:   t ƒ jdi |¤Ž}| dd ¡}|d urtj |¡|d< |S ©NrÇ   rv   r'   ©rJ   r   rP   r*   rË   rÍ   ©rf   ry   rÏ   rÇ   rh   r'   r(   r   ç  ó
   zSD3.extra_conds©	r   r   r   r   r!   rK   r›   r   r  r'   r'   rh   r(   r©  à  ó    r©  c                       ó0   e Zd Zejdf‡ fdd„	Z‡ fdd„Z‡  ZS )ÚAuraFlowNc                    ó    t ƒ j|||tjjjjd d S r“  )rJ   rK   r*   ÚldmÚauraÚmmditÚMMDiTr?  rh   r'   r(   rK   ð  ó    zAuraFlow.__init__c                    r¬  r­  r®  r¯  rh   r'   r(   r   ó  r°  zAuraFlow.extra_conds©r   r   r   r   r!   rK   r   r  r'   r'   rh   r(   r´  ï  ó    r´  c                       s:   e Zd Zejdf‡ fdd„	Zdd„ Zd‡ fdd„	Z‡  ZS )	ÚStableAudio1Nc                    sh   t ƒ j|||tjjjjd tjjjjdddd| _	tjjjjdddd| _
| j	 |¡ | j
 |¡ d S )Nr”  rB  r   rf  )Úmin_valÚmax_val)rJ   rK   r*   r¶  ÚaudioÚditÚAudioDiffusionTransformerÚ	embeddersÚNumberConditionerÚseconds_start_embedderÚseconds_total_embedderrØ   )rf   r5   Úseconds_start_embedder_weightsÚseconds_total_embedder_weightsr6   rA   rh   r'   r(   rK   ü  s
   zStableAudio1.__init__c                 K   sø   i }|  dd ¡}|d }|  dd¡}|  dt|jd d ƒ¡}|  |g¡d  |¡}|  |g¡d  |¡}tj||gdd d	¡}	t	j
 |	¡|d
< |  dd ¡}
|
d urztj|
 |¡| |
jd ddf¡| |
jd ddf¡gdd}
t	j
 |
¡|d< |S )Nr    rA   Úseconds_startr   Úseconds_totalr¡   gHáz®‡5@r{   )r
   r¡   Úglobal_embedrÇ   r
   rv   )rP   r<   r­   rÅ  rX   rÆ  r;   r€   r´   r*   rË   rÍ   rM  )rf   ry   rÏ   r    rA   rÉ  rÊ  Úseconds_start_embedÚseconds_total_embedrË  rÇ   r'   r'   r(   r     s   >zStableAudio1.extra_condsc           
         s^   t ƒ j||||d}| j ¡ | j ¡ dœ}|D ]}|| }|D ]}	||	 |d ||	¡< qq|S )N)rð   rñ   rò   )z'conditioner.conditioners.seconds_start.z'conditioner.conditioners.seconds_total.z{}{})rJ   rô   rÅ  Ú
state_dictrÆ  r]   )
rf   rï   rð   rñ   rò   rÚ   ÚdrÝ   r7   Úlrh   r'   r(   rô     s   ÿz"StableAudio1.state_dict_for_savingr  )	r   r   r   r   r"   rK   r   rô   r  r'   r'   rh   r(   r½  û  s    r½  c                       r³  )Ú
HunyuanDiTNc                    rµ  r“  )rJ   rK   r*   r¶  ÚhyditÚmodelsÚ
HunYuanDiTr?  rh   r'   r(   rK   #  rº  zHunyuanDiT.__init__c              
      sú   t ƒ jdi |¤Ž}| dd ¡}|d urtj |¡|d< | dd ¡}|d ur-tj |¡|d< | dd ¡}|d ur?tj |¡|d< | dd ¡}|d urQtj |¡|d< | d	d
¡}| dd
¡}| d|¡}	| d|¡}
tj t |||
|	ddgg¡¡|d< |S )NrÇ   rv   Úattention_maskÚtext_embedding_maskÚconditioning_mt5xlÚencoder_hidden_states_t5Úattention_mask_mt5xlÚtext_embedding_mask_t5rA  rB  rC  rS  rT  r   Úimage_meta_sizer'   ©rJ   r   rP   r*   rË   rÍ   r;   ÚFloatTensor)rf   ry   rÏ   rÇ   rÕ  r×  rÙ  rA  rC  rS  rT  rh   r'   r(   r   &  s&   $zHunyuanDiT.extra_condsr}  r'   r'   rh   r(   rÑ  "  r¼  rÑ  c                       r³  )ÚPixArtNc                    rµ  r“  )rJ   rK   r*   r¶  ÚpixartÚpixartmsÚPixArtMSr?  rh   r'   r(   rK   A  rº  zPixArt.__init__c              	      s¤   t ƒ jdi |¤Ž}| dd ¡}|d urtj |¡|d< | dd ¡}| dd ¡}|d urP|d urPtj t ||gg¡¡|d< tj t | d|| ¡gg¡¡|d< |S )	NrÇ   rv   rA  rC  Úc_sizeÚaspect_ratioÚc_arr'   rÜ  )rf   ry   rÏ   rÇ   rA  rC  rh   r'   r(   r   D  s   &zPixArt.extra_condsrx  r'   r'   rh   r(   rÞ  @  r¼  rÞ  c                       sR   e Zd Zejdejjjj	f‡ fdd„	Z
dd„ Zdd„ Z‡ fdd	„Zd
d„ Z‡  ZS )ÚFluxNc                    s   t ƒ j||||d d| _d S ©Nr”  )Úref_latents)rJ   rK   rd   ©rf   r5   r6   rA   rg   rh   r'   r(   rK   T  s   
zFlux.__init__c                 K   sÊ  z| j jjjd | j j| j j  }W n
   | jjd }Y | jjd }||kr)d S | dd ¡}| dd ¡}|d }|d u rBt 	|¡}t
 | |¡|jd |jd d	d
¡}t
 ||jd ¡}|  |¡}||d krj|S | d| dd ¡¡}|d u r…t |¡d d …d d…f }tj|ddd}t
 | |¡|jd d |jd d d	d
¡}| |jd |jd d d|jd d d¡ ddddd¡ |jd d|jd d |jd d ¡}t
 ||jd ¡}tj||fddS )Nr
   Úin_channelsÚout_channelsrž   r    rA   r¡   r¢   r£   r¤   r   r   rœ   r   T©r|   Úkeepdimr   r   r   r{   )rU   Úimg_inr$  r­   Ú
patch_sizer5   rL   rP   r;   r¹   r   r®   rX   r³   r¬   r·   ÚmeanÚviewÚpermuter´   r€   )rf   ry   Únum_channelsrê  r|  r    rA   r§   r'   r'   r(   r¾   X  s2   $
$
,`zFlux.concat_condc                 K   s   |  dd ¡S r«  ©rP   rš   r'   r'   r(   r›   }  rå   zFlux.encode_admc                    sN  t ƒ jdi |¤Ž}| dd ¡}|d urtj |¡|d< | dd ¡}|d ur\|d j}| dd ¡}|d ur\t |d | j	j
 ¡t |d | j	j
 ¡}}t ||||f¡}tj |¡|d< | dd	¡}	|	d urrtj t |	g¡¡|d< | d
d ¡}
|
d ur¥g }|
D ]
}| |  |¡¡ q€tj |¡|d< | dd ¡}|d ur¥tj |¡|d< |S )NrÇ   rv   rÕ  r    Úattention_mask_img_shaper   r   Úguidanceg      @Úreference_latentsrç  Úreference_latents_methodÚref_latents_methodr'   )rJ   r   rP   r*   rË   rÍ   r­   rþ   ÚceilrU   rî  r   Úupscale_dit_maskr;   rÝ  r†   r¬   ÚCONDListra  )rf   ry   rÏ   rÇ   rÕ  r­   Úmask_ref_sizeÚh_tokÚw_tokrõ  rç  ÚlatentsÚlatrø  rh   r'   r(   r   €  s2   
.zFlux.extra_condsc                 K   ó<   i }|  dd ¡}|d urtddttdd„ |ƒƒgƒ|d< |S )Nrö  r
   r¢  c                 S   ó   t  |  ¡ dd … ¡S ©Nr   ©rþ   rÿ   Úsizerk  r'   r'   r(   r  ¥  ó    z)Flux.extra_conds_shapes.<locals>.<lambda>rç  ©rP   r…   r  r  ©rf   ry   rÏ   rç  r'   r'   r(   r  ¡  ó
    zFlux.extra_conds_shapes)r   r   r   r   r#   r*   r¶  ÚfluxÚmodelrå  rK   r¾   r›   r   r  r  r'   r'   rh   r(   rå  S  s    %!rå  c                       s:   e Zd Zdddi f‡ fdd„	Zdd„ Z‡ fdd„Z‡  ZS )ÚLongCatImageNc           
         s€   |  ¡ }| di ¡}t|ƒ}|d urt|jd ƒnd}	| dd¡ | d|	¡ | d|	¡ ||d< tƒ j||||||fi |¤ŽS )NÚrope_optionsr
   g      €@Úshift_trª   Úshift_yÚshift_x)ÚcopyrP   Údictrƒ   r­   Ú
setdefaultrJ   rn   )
rf   rs   rt   ru   rv   rw   rx   ry   Ú	rope_optsÚpe_lenrh   r'   r(   rn   ©  s   zLongCatImage._apply_modelc                 K   r™   rj   r'   rš   r'   r'   r(   r›   ´  r—   zLongCatImage.encode_admc                    s"   t ƒ jdi |¤Ž}| dd ¡ |S )Nrõ  r'   )rJ   r   r‰   ©rf   ry   rÏ   rh   r'   r(   r   ·  s   zLongCatImage.extra_conds)r   r   r   rn   r›   r   r  r'   r'   rh   r(   r  ¨  s    r  c                       s   e Zd Z‡ fdd„Z‡  ZS )ÚFlux2c                    sn   t ƒ jdi |¤Ž}| dd ¡}|d ur5d}|jd |k r-tjj |dd||jd  df¡}tj	 
|¡|d< |S )NrÇ   rf  r
   r   rv   r'   )rJ   r   rP   r­   r;   r°   r±   r²   r*   rË   rÍ   )rf   ry   rÏ   rÇ   Útarget_text_lenrh   r'   r(   r   ½  s   "zFlux2.extra_conds)r   r   r   r   r  r'   r'   rh   r(   r  ¼  ó    r  c                       r³  )Ú
GenmoMochiNc                    s"   t ƒ j|||tjjjjjd d S r“  )rJ   rK   r*   r¶  ÚgenmoÚjoint_modelÚasymm_models_jointÚAsymmDiTJointr?  rh   r'   r(   rK   È  s   "zGenmoMochi.__init__c                    s~   t ƒ jdi |¤Ž}| dd ¡}|d ur+tj |¡|d< tj tdt 	|¡ 
¡ ƒ¡|d< | dd ¡}|d ur=tj |¡|d< |S )NrÕ  r
   Ú
num_tokensrÇ   rv   r'   )rJ   r   rP   r*   rË   rÍ   ra  Úmaxr;   r  Úitem©rf   ry   rÏ   rÕ  rÇ   rh   r'   r(   r   Ë  s    zGenmoMochi.extra_condsr»  r'   r'   rh   r(   r  Ç  r¼  r  c                       óB   e Zd Zejdf‡ fdd„	Z‡ fdd„Zd
dd„Zdd	„ Z‡  Z	S )ÚLTXVNc                    rµ  r“  )rJ   rK   r*   r¶  Ú
lightricksr  Ú	LTXVModelr?  rh   r'   r(   rK   ×  rº  zLTXV.__init__c                    sê   t ƒ jd
i |¤Ž}| dd ¡}|d urtj |¡|d< | dd ¡}|d ur-tj |¡|d< tj | dd¡¡|d< | d| dd ¡¡}|d urOtj |¡|d< | dd ¡}|d uratj |¡|d< | d	d ¡}|d urstj |¡|d	< |S )NrÕ  rÇ   rv   Ú
frame_rateé   rœ   r   Úkeyframe_idxsÚguide_attention_entriesr'   ©rJ   r   rP   r*   rË   rÍ   ra  )rf   ry   rÏ   rÕ  rÇ   r   r)  r*  rh   r'   r(   r   Ú  s$   zLTXV.extra_condsc              	   K   sP   |d u r|S | j j || |jd gdg|jd   ¡ d d …d d…f ¡d S r÷   )rU   Ú
patchifierÚpatchifyrð  r­   r¯   )rf   r‚   rs   r   ry   r'   r'   r(   r‡   ó  s   DzLTXV.process_timestepc                 K   ó   |S rj   r'   rù   r'   r'   r(   rú   ø  r—   zLTXV.scale_latent_inpaintrj   ©
r   r   r   r   r#   rK   r   r‡   rú   r  r'   r'   rh   r(   r$  Ö  s
    
r$  c                       r#  )ÚLTXAVNc                    rµ  r“  )rJ   rK   r*   r¶  r%  Úav_modelÚ
LTXAVModelr?  rh   r'   r(   rK   ü  rº  zLTXAV.__init__c                    sÄ  t ƒ jdi |¤Ž}| dd ¡}|d }|d urtj |¡|d< | dd ¡}|d urJt| jdƒrB| jj|j	||  
¡ d| dd¡d}tj |¡|d	< tj | d
d¡¡|d
< | d| dd ¡¡}d }|d ur€d|v r€t ||d ¡}t|ƒdkr||d }|d }|d urŒtj |¡|d< |d ur˜tj |¡|d< | dd ¡}|d urªtj |¡|d< | dd ¡}	|	d ur¼tj |	¡|d< | dd ¡}
|
d urÎtj |
¡|d< | dd ¡}|d uràtj |¡|d< |S )NrÕ  rA   rÇ   Úpreprocess_text_embeds©rA   r9   Úunprocessed_ltxav_embedsF)Úunprocessedrv   r'  r(  rœ   r   r}   r
   r   Úaudio_denoise_maskr)  r*  Ú	ref_audior'   )rJ   r   rP   r*   rË   rÍ   r:   rU   r3  rX   r   ra  r   rˆ   rŠ   )rf   ry   rÏ   rÕ  rA   rÇ   r   r7  r)  r}   r*  r8  rh   r'   r(   r   ÿ  sF   &zLTXAV.extra_condsc              	   K   s´   |}|}|d ur*| j j || |jd gdg|jd   ¡ d d …d d…f ¡d }|d urV| j j || |jd gdg|jd   ¡ d d …d d…d d …d d…f ¡d }||fS r÷   )rU   r,  r-  rð  r­   r¯   Úa_patchifier)rf   r‚   rs   r   r7  ry   Ú
v_timestepÚ
a_timestepr'   r'   r(   r‡   /  s   DPzLTXAV.process_timestepc                 K   r.  rj   r'   rù   r'   r'   r(   rú   :  r—   zLTXAV.scale_latent_inpaint)NNr/  r'   r'   rh   r(   r0  û  s
    
0r0  c                       s@   e Zd Zejdf‡ fdd„	Zdd„ Z‡ fdd„Zdd	„ Z‡  Z	S )
ÚHunyuanVideoNc                    rµ  r“  ©rJ   rK   r*   r¶  Úhunyuan_videor  r<  r?  rh   r'   r(   rK   >  rº  zHunyuanVideo.__init__c                 K   rª  r«  r'   rš   r'   r'   r(   r›   A  r˜   zHunyuanVideo.encode_admc                    sà   t ƒ jdi |¤Ž}| dd ¡}|d urtj |¡|d< | dd ¡}|d ur-tj |¡|d< | dd¡}|d urCtj t |g¡¡|d< | dd ¡}|d urYtj t |g¡¡|d< | dd ¡}|d urntj |  |¡¡|d< |S )	NrÕ  rÇ   rv   rõ  ç      @Úguiding_frame_indexÚ
ref_latentr'   )	rJ   r   rP   r*   rË   rÍ   r;   rÝ  r¬   )rf   ry   rÏ   rÕ  rÇ   rõ  r@  rA  rh   r'   r(   r   D  s"   zHunyuanVideo.extra_condsc                 K   r–   rj   r'   ©rf   rŸ   ry   r'   r'   r(   rú   [  r—   z!HunyuanVideo.scale_latent_inpaint)
r   r   r   r   r!   rK   r›   r   rú   r  r'   r'   rh   r(   r<  =  s
    r<  c                       r³  )ÚHunyuanVideoI2VNc                    ó   t ƒ j|||d d| _d S )Nr  )r«   r©   ©rJ   rK   ra   r?  rh   r'   r(   rK   _  ó   
zHunyuanVideoI2V.__init__c                    ó   t ƒ jdd|i|¤ŽS ©NrŸ   r'   ©rJ   rú   rB  rh   r'   r(   rú   c  r  z$HunyuanVideoI2V.scale_latent_inpaint©r   r   r   r   r!   rK   rú   r  r'   r'   rh   r(   rC  ^  ó    rC  c                       r³  )ÚHunyuanVideoSkyreelsI2VNc                    rD  )Nr  )r«   rE  r?  rh   r'   r(   rK   g  rF  z HunyuanVideoSkyreelsI2V.__init__c                    rG  rH  rI  rB  rh   r'   r(   rú   k  r  z,HunyuanVideoSkyreelsI2V.scale_latent_inpaintrJ  r'   r'   rh   r(   rL  f  rK  rL  c                       ó:   e Zd Zejddf‡ fdd„	Z‡ fdd„Zdd„ Z‡  ZS )	ÚCosmosVideoFNc                    ó6   t ƒ j|||tjjjjd || _| jrd| _d S d S ©Nr”  )r©   )	rJ   rK   r*   r¶  Úcosmosr  Ú
GeneralDITÚimage_to_videora   ©rf   r5   r6   rS  rA   rh   r'   r(   rK   o  ó
   
ÿzCosmosVideo.__init__c                    sv   t ƒ jdi |¤Ž}| dd ¡}|d urtj |¡|d< | dd ¡}|d ur-tj |¡|d< tj | dd ¡¡|d< |S )NrÕ  rÇ   rv   r'  rX  r'   r+  r"  rh   r'   r(   r   u  s   zCosmosVideo.extra_condsc                 K   sv   |  |jd gdgt|jƒd   ¡}d}|dkr|| }| j tj|g|j|jd|¡}||d | jj	d  d  S )Nr   r
   r4  r   g      à?)
r´   r­   rŠ   r+   r   r;   rí   rA   r9   Ú
sigma_data©rf   rŽ   r    rŸ   ry   Úsigma_noise_augmentationr'   r'   r(   rú     s   & z CosmosVideo.scale_latent_inpaint)	r   r   r   r   r    rK   r   rú   r  r'   r'   rh   r(   rN  n  s    rN  c                       óD   e Zd Zejddf‡ fdd„	Z‡ fdd„Zddd„Zd	d
„ Z‡  Z	S )ÚCosmosPredict2FNc                    rO  rP  )	rJ   rK   r*   r¶  rQ  Úpredict2ÚMiniTrainDITrS  ra   rT  rh   r'   r(   rK   Š  rU  zCosmosPredict2.__init__c                    s~   t ƒ jdi |¤Ž}| dd ¡}|d urtj |¡|d< | d| dd ¡¡}|d ur1tj |¡|d< tj | dd ¡¡|d< |S )NrÇ   rv   rœ   r   r'  rX  r'   r+  )rf   ry   rÏ   rÇ   r   rh   r'   r(   r     s   zCosmosPredict2.extra_condsc                 K   sh   |d u r|S |j dkr|S |jg d¢dd}dd|  | |jd dddd¡|  }|jg d¢d	}|S )
Nr   ©r
   r   r   Trë  r  rª   r   r
   r{   )r¯   rï  r´   r­   Úsqueeze)rf   r‚   rs   r   ry   Úcondition_video_mask_B_1_T_1_1Úc_noise_B_1_T_1_1rÏ   r'   r'   r(   r‡     s   
(zCosmosPredict2.process_timestepc                 K   sr   |  |jd gdgt|jƒd   ¡}d}|dkr|| }| j tj|g|j|jd|¡}||d  }|d|  S )Nr   r
   r4  rª   )	r´   r­   rŠ   r+   r   r;   rí   rA   r9   rW  r'   r'   r(   rú   §  s   & z#CosmosPredict2.scale_latent_inpaintrj   )
r   r   r   r   r%   rK   r   r‡   rú   r  r'   r'   rh   r(   rZ  ‰  s
    

rZ  c                       r³  )ÚAnimaNc                    rµ  r“  )rJ   rK   r*   r¶  Úanimar  ra  r?  rh   r'   r(   rK   ±  rº  zAnima.__init__c                    sê   t ƒ jdi |¤Ž}| dd ¡}| dd ¡}| dd ¡}|d }|d urs|d urk|d ur6| d¡ d¡ |¡}| d¡}t ¡ r[| jj|j||  	¡ d|j|d|j||  	¡ dd	}nt
j |¡|d< t
j |¡|d< t
j |¡|d
< |S )NrÇ   Ú	t5xxl_idsÚt5xxl_weightsrA   r   r¡   r4  r  )rd  rv   r'   )rJ   r   rP   rL  rX   r;   Úis_inference_mode_enabledrU   r3  r   r*   rË   rÍ   )rf   ry   rÏ   rÇ   rc  rd  rA   rh   r'   r(   r   ´  s    
8zAnima.extra_condsr»  r'   r'   rh   r(   ra  °  r¼  ra  c                       ó8   e Zd Zejdf‡ fdd„	Z‡ fdd„Zdd„ Z‡  ZS )ÚLumina2Nc                    ó&   t ƒ j|||tjjjjd d| _d S ræ  )rJ   rK   r*   r¶  Úluminar  ÚNextDiTrd   r?  rh   r'   r(   rK   Ê  ó   
zLumina2.__init__c              
      sä  t ƒ jdi |¤Ž}| dd ¡}|d ur4t |¡| ¡ kr$tj |¡|d< tj 	t
dt |¡ ¡ ƒ¡|d< | dd ¡}|d urUtj |¡|d< d|vrUtj 	|jd ¡|d< | dd ¡}|d urgtj |¡|d< | dttd	d
„ | di g¡ƒƒ¡}|d ur½t|ƒdkr½g }|D ]'}|d ur®|jd }	|jj}
| |j |
d |	d d |	d d |
d ¡¡ q‡t|ƒdkr½tj |¡|d< | dd ¡}|d urÞg }|D ]
}| |  |¡¡ qËtj |¡|d< | dd ¡}|d urðtj |¡|d< |S )NrÕ  r
   r  rÇ   rv   r1  r–  Úclip_vision_outputsc                 S   s
   |   d¡S )Nr  ró  rk  r'   r'   r(   r  à  s   
 z%Lumina2.extra_conds.<locals>.<lambda>r  r   r¢  r   r¡   Úsiglip_featsrö  rç  Úreference_latents_text_embedsÚref_contextsr'   )rJ   r   rP   r;   Únumelr  r*   rË   rÍ   ra  r   r!  r­   r…   r  rŠ   Úimage_sizesÚlast_hidden_stater†   r´   rû  r¬   )rf   ry   rÏ   rÕ  rÇ   r–  rl  Úsigfeatsr  Ú
image_sizer­   rç  rÿ  r   ro  rh   r'   r(   r   Î  sF    $
0€zLumina2.extra_condsc                 K   r  )Nrö  r
   r¢  c                 S   r  r  r  rk  r'   r'   r(   r  ü  r  z,Lumina2.extra_conds_shapes.<locals>.<lambda>rç  r  r  r'   r'   r(   r  ø  r	  zLumina2.extra_conds_shapes©	r   r   r   r   r!   rK   r   r  r  r'   r'   rh   r(   rg  É  s    *rg  c                   @   s   e Zd Zejdfdd„ZdS )ÚZImagePixelSpaceNc                 C   s&   t j| |||tjjjjd d| _d S ræ  )rC   rK   r*   r¶  ri  r  ÚNextDiTPixelSpacerd   r?  r'   r'   r(   rK      rk  zZImagePixelSpace.__init__)r   r   r   r   r!   rK   r'   r'   r'   r(   rv  ÿ  r  rv  c                       s:   e Zd Zejddf‡ fdd„	Zdd„ Z‡ fdd„Z‡  ZS )	ÚWAN21FNc                    s&   t ƒ j|||tjjjjd || _d S r“  )rJ   rK   r*   r¶  Úwanr  ÚWanModelrS  rT  rh   r'   r(   rK     rk  zWAN21.__init__c              
   K   s°  |  dd ¡}| jjjjd |jd  }|dkrd S |  dd ¡}|d }|d u r=t|jƒ}||d< tj||j|j	|j
d}nD| jj}t | |¡|jd |jd d	d
¡}td|jd |ƒD ]}|  |d d …||| …f ¡|d d …||| …f< q\t ||jd ¡}||jd d kr–| jr”||jd kr–|S |jd |d kr«|d d …d |d …f }|  d|  dd ¡¡}	|	d u rÇt |¡d d …d d…f }	nc|	jd dkrÖtj|	ddd}	d|	 }	t |	 |¡|jd |jd d	d
¡}	|	jd |jd k rtjjj|	ddddd|jd |	jd  fddd}	|	jd dkr!|	 ddddd¡}	t |	|jd ¡}	|  dd¡}
|
dkrOtj|d d …d |
…f |	|d d …|
d …f fddS tj|	|fddS )Nr    r
   r   rž   rA   r¤  r¡   r¢   r£   r¤   r   rœ   r   Trë  rª   r¥   r¦   ©ÚmodeÚvalueÚconcat_mask_indexr{   )rP   rU   Úpatch_embeddingr$  r­   r…   r;   r.  r9   r¥  rA   rM   Úlatent_channelsr   r®   rX   Úranger¬   r³   rS  r¹   rï  r°   r±   r²   rM  r€   )rf   ry   r    Úextra_channelsr|  rA   Úshape_imageÚ
latent_dimÚir§   r~  r'   r'   r(   r¾   	  sH   
$4$2
4zWAN21.concat_condc                    sÊ   t ƒ jd
i |¤Ž}| dd ¡}|d urtj |¡|d< | dd ¡}|d ur.tj |j¡|d< | dd ¡}|d urCtj |  |¡¡|d< | dd ¡}|d urctj |  |d ¡d d …d d …df ¡|d	< |S )NrÇ   rv   r  Úclip_feaÚtime_dim_concatrö  r¡   r   Úreference_latentr'   )rJ   r   rP   r*   rË   rÍ   Úpenultimate_hidden_statesr¬   )rf   ry   rÏ   rÇ   r  r‡  rö  rh   r'   r(   r   8  s   ,zWAN21.extra_conds©	r   r   r   r   r!   rK   r¾   r   r  r'   r'   rh   r(   rx    s    /rx  c                       óB   e Zd Zejddf‡ fdd„	Z‡ fdd„Zg f‡ fdd„	Z‡  ZS )	Ú
WAN21_VaceFNc                    ó*   t t| ƒj|||tjjjjd || _d S r“  )	rJ   rx  rK   r*   r¶  ry  r  ÚVaceWanModelrS  rT  rh   r'   r(   rK   N  ó    
zWAN21_Vace.__init__c              	      s‚  t ƒ jdi |¤Ž}| dd ¡}t|jƒ}| dd ¡}|d u r-d|d< tj||j|jdg}| dd ¡}|d u rJd|d< tj	||j|jdgt
|ƒ }g }tt
|ƒƒD ]J}|| j|j|jdd	}	td
|	jd dƒD ]}
|  |	d d …|
|
d …f ¡|	d d …|
|
d …f< qitj|	|| j|j|jdgdd}	| |	¡ qRtj|dd}tj |¡|d< | ddgt
|ƒ ¡}tj |¡|d< |S )Nr    Úvace_framesé    r
   r4  Ú	vace_maské@   T)rA   r9   r  r   r¢  r{   Úvace_contextÚvace_strengthrª   r'   )rJ   r   rP   r…   r­   r;   r.  rA   r9   ÚonesrŠ   r  rX   r¬   r€   r†   r  r*   rË   rÍ   ra  )rf   ry   rÏ   r    Únoise_shaper  r§   Úvace_frames_outÚjÚvfr…  r•  rh   r'   r(   r   R  s.   
4$zWAN21_Vace.extra_condsc                    s8   |dkrt jj||||d|dS tƒ j||||||dS )Nr”  r   )Útemporal_dimrÄ   ©rÄ   ©r*   Úcontext_windowsÚ
slice_condrJ   rÅ   r¿   rh   r'   r(   rÅ   o  s   z)WAN21_Vace.resize_cond_for_context_window©	r   r   r   r   r!   rK   r   rÅ   r  r'   r'   rh   r(   rŒ  M  s    rŒ  c                       s2   e Zd Zejddf‡ fdd„	Z‡ fdd„Z‡  ZS )ÚWAN21_CameraFNc                    r  r“  )	rJ   rx  rK   r*   r¶  ry  r  ÚCameraWanModelrS  rT  rh   r'   r(   rK   u  r  zWAN21_Camera.__init__c                    ó:   t ƒ jdi |¤Ž}| dd ¡}|d urtj |¡|d< |S )NÚcamera_conditionsr'   r®  )rf   ry   rÏ   r¤  rh   r'   r(   r   y  r°  zWAN21_Camera.extra_condsr»  r'   r'   rh   r(   r¡  t  s    r¡  c                       r‹  )	Ú
WAN21_HuMoFNc                    r  r“  )	rJ   rx  rK   r*   r¶  ry  r  ÚHumoWanModelrS  rT  rh   r'   r(   rK     r  zWAN21_HuMo.__init__c                    sê  t ƒ jdi |¤Ž}| dd ¡}| dd ¡}|d ur!tj |¡|d< d|vr>| dd ¡}|d ur<tj |  |d ¡¡|d< |S t|jƒ}|d  d7  < t	j
||j|jd	}t	 g d
¢¡ ddddd¡}t	 g d¢¡ ddddd¡}	t	 g d¢¡ ddddd¡}
|
|d d …dd …f< ||d d …dd …d d…f< |	|d d …dd …dd…f< tj |¡|d< | dd ¡}|d uró|  |d ¡}t|jƒ}|d  d|d  7  < t	j
||j|jd	}||d d …dd …f< d|d d …dd…f< tj |¡|d< |S )Nr    Úaudio_embedru   rö  r¡   rˆ  r
   r   r4  )gƒÀÊ¡E¶ë?grùé·¯Û¿g_ÎQÚ[¿gs×òAß¿gòAÏfÕçà¿gŸ«­Ø_ví?g&äƒžÍªï¿gtF”ö_Ü?g”‡…ZÓ¼á¿gMóŽSt$‡¿gq¬‹Ûhâ?g333333ã¿g0»'µê¿gQÚ|aÖ¿g,Ôšæ§È¿g¤ß¾œ3æ¿r¢  )gÀ[ Añcñ?g˜nƒÀÊó¿g¾0™*•?g4€·@‚âÛ¿g2æ®%äƒä¿gÉå?¤ß> @g<½R–!Žù¿g7À[ Aô?g/Ý$ë¿gÓ¼ãÉÝ¿gHPüs×î?gTã¥›Ä ú¿gL7‰A`åö¿gÎQÚ¼¿g=›UŸ«­Ø¿gÚ|a2U÷¿)g€&Â†§ë?gA‚âÇ˜»ý¿g:#J{ƒ/Ä?gHáz®GÁ?gvOjM×¿g(í¾°@gF¶óýÔxÿ¿gNbX9´ù?gÃõ(\Âð¿gi oÅë¿g¼?Æò?g¾Á&Sý¿gx$(~ù¿g0»'µè¿g±áé•²Õ¿gË¡E¶óýõ¿r   é   rª   r'   )rJ   r   rP   r*   rË   rÍ   r¬   r…   r­   r;   r.  rA   r9   rí   rð  rÌ   )rf   ry   rÏ   r    r§  rö  r—  Úconcat_latentÚzero_vae_values_firstÚzero_vae_values_secondÚzero_vae_valuesrA  Úref_latent_shapeÚref_latent_fullrh   r'   r(   r   …  s<   
ì
zWAN21_HuMo.extra_condsc                    ó6   |dkrt jj||||ddS tƒ j||||||dS ©Nr§  r
   )r›  rœ  r  r¿   rh   r'   r(   rÅ   ¨  ó   z)WAN21_HuMo.resize_cond_for_context_windowr   r'   r'   rh   r(   r¥  €  s    #r¥  c                       r‹  )	ÚWAN22_AnimateFNc                    r  r“  )	rJ   rx  rK   r*   r¶  ry  Úmodel_animateÚAnimateWanModelrS  rT  rh   r'   r(   rK   ®  r  zWAN22_Animate.__init__c                    sd   t ƒ jdi |¤Ž}| dd ¡}|d urtj |¡|d< | dd ¡}|d ur0tj |  |¡¡|d< |S )NÚface_video_pixelsÚface_pixel_valuesÚpose_video_latentÚpose_latentsr'   ©rJ   r   rP   r*   rË   rÍ   r¬   )rf   ry   rÏ   rµ  r¸  rh   r'   r(   r   ²  s   zWAN22_Animate.extra_condsc              	      sZ   |dkrt jj||||ddddS |dkr!t jj||||dddS tƒ j||||||dS )	Nr¶  r   r   r
   )r›  Útemporal_scaleÚtemporal_offsetr¸  )r›  r»  rœ  r  r¿   rh   r'   r(   rÅ   ¾  s
   z,WAN22_Animate.resize_cond_for_context_windowr   r'   r'   rh   r(   r²  ­  s    r²  c                       sH   e Zd Zejdf‡ fdd„	Z‡ fdd„Zdd„ Zg f‡ fdd	„	Z‡  Z	S )
Ú	WAN22_S2VNc                    s8   t t| ƒj|||tjjjjd d| _ddd„ i| _	d S )Nr”  )rˆ  Úreference_motionr½  c                 S   ó   | d | d d| d | d gS ©Nr   r
   g      ø?r¢   r¡   r'   ©r­   r'   r'   r(   r  É  ó    z$WAN22_S2V.__init__.<locals>.<lambda>)
rJ   rx  rK   r*   r¶  ry  r  ÚWanModel_S2Vrd   re   r?  rh   r'   r(   rK   Æ  s    zWAN22_S2V.__init__c                    s¼   t ƒ jdi |¤Ž}| dd ¡}|d urtj |¡|d< | dd ¡}|d ur2tj |  |d ¡¡|d< | dd ¡}|d urGtj |  |¡¡|d< | dd ¡}|d ur\tj |  |¡¡|d< |S )Nr§  rö  r¡   rˆ  r½  Úcontrol_videor'   r¹  )rf   ry   rÏ   r§  rö  r½  rÃ  rh   r'   r(   r   Ë  s   zWAN22_S2V.extra_condsc                 K   s^   i }|  dd ¡}|d urtddttdd„ |ƒƒd gƒ|d< |  dd ¡}|d ur-|j|d< |S )Nrö  r
   r¢  c                 S   ó   t  |  ¡ ¡S rj   r  rk  r'   r'   r(   r  â  ó    z.WAN22_S2V.extra_conds_shapes.<locals>.<lambda>rˆ  r½  ©rP   r…   r  r  r­   )rf   ry   rÏ   rç  r½  r'   r'   r(   r  Þ  s   $
zWAN22_S2V.extra_conds_shapesc                    r¯  r°  r  r¿   rh   r'   r(   rÅ   é  r±  z(WAN22_S2V.resize_cond_for_context_window)
r   r   r   r   r!   rK   r   r  rÅ   r  r'   r'   rh   r(   r¼  Å  s
    r¼  c                       rY  )ÚWAN22FNc                    r  r“  )	rJ   rx  rK   r*   r¶  ry  r  rz  rS  rT  rh   r'   r(   rK   ï  r  zWAN22.__init__c                    r£  )Nr   r'   r®  )rf   ry   rÏ   r   rh   r'   r(   r   ó  r°  zWAN22.extra_condsc                 K   sr   |d u r|S t j|d d …d d …d d …d d …d d …f ddd| |jd gdg|jd   ¡  |jd d¡}|S )Nr]  Trë  r   r
   r¡   )r;   rï  rð  r­   r¯   r´   )rf   r‚   rs   r   ry   Útemp_tsr'   r'   r(   r‡   ú  s   bzWAN22.process_timestepc                 K   r.  rj   r'   rù   r'   r'   r(   rú      r—   zWAN22.scale_latent_inpaintrj   )
r   r   r   r   r!   rK   r   r‡   rú   r  r'   r'   rh   r(   rÇ  î  s
    
rÇ  c                       s&   e Zd Zejddf‡ fdd„	Z‡  ZS )ÚWAN21_FlowRVSFNc                    s4   d|j d< tt| ƒj|||tjjjjd || _	d S )NÚt2vr6   r”  )
rL   rJ   rx  rK   r*   r¶  ry  r  rz  rS  rT  rh   r'   r(   rK     s   
 
zWAN21_FlowRVS.__init__)r   r   r   r   r&   rK   r  r'   r'   rh   r(   rÉ    s    rÉ  c                       rM  )	ÚWAN21_SCAILFNc                    s>   t t| ƒj|||tjjjjd d| _ddd„ i| _	|| _
d S )Nr”  )rˆ  r¸  r¸  c                 S   r¾  r¿  r'   rÀ  r'   r'   r(   r    rÁ  z&WAN21_SCAIL.__init__.<locals>.<lambda>)rJ   rx  rK   r*   r¶  ry  r  ÚSCAILWanModelrd   re   rS  rT  rh   r'   r(   rK   
  s    
zWAN21_SCAIL.__init__c                    sÎ   t ƒ jd	i |¤Ž}| dd ¡}|d ur8|  |d ¡}t |d d …d d…f ¡}tj||gdd}tj 	|¡|d< | dd ¡}|d ure|  |¡}t |d d …d d…f ¡}tj||gdd}tj 	|¡|d< |S )
Nrö  r¡   r   r
   r{   rˆ  r·  r¸  r'   )
rJ   r   rP   r¬   r;   r·   r€   r*   rË   rÍ   )rf   ry   rÏ   rö  rA  Úref_maskr¸  Ú	pose_maskrh   r'   r(   r     s   
zWAN21_SCAIL.extra_condsc                 K   st   i }|  dd ¡}|d urtddttdd„ |ƒƒd gƒ|d< |  dd ¡}|d ur8|jd	 dg|jd
d … ¢|d< |S )Nrö  r
   r¨  c                 S   rÄ  rj   r  rk  r'   r'   r(   r  '  rÅ  z0WAN21_SCAIL.extra_conds_shapes.<locals>.<lambda>r¢  rˆ  r·  r   r   r¸  rÆ  )rf   ry   rÏ   rç  r¸  r'   r'   r(   r  #  s   $ zWAN21_SCAIL.extra_conds_shapesru  r'   r'   rh   r(   rË  	  s    rË  c                       r³  )ÚHunyuan3Dv2Nc                    rµ  r“  )rJ   rK   r*   r¶  Ú	hunyuan3dr  rÏ  r?  rh   r'   r(   rK   0  rº  zHunyuan3Dv2.__init__c                    óf   t ƒ jdi |¤Ž}| dd ¡}|d urtj |¡|d< | dd¡}|d ur1tj t |g¡¡|d< |S ©NrÇ   rv   rõ  g      @r'   rÜ  ©rf   ry   rÏ   rÇ   rõ  rh   r'   r(   r   3  ó   zHunyuan3Dv2.extra_condsr»  r'   r'   rh   r(   rÏ  /  r¼  rÏ  c                       r³  )ÚHunyuan3Dv2_1Nc                    rµ  r“  )rJ   rK   r*   r¶  Úhunyuan3dv2_1Ú
hunyuanditÚHunYuanDiTPlainr?  rh   r'   r(   rK   ?  rº  zHunyuan3Dv2_1.__init__c                    rÑ  rÒ  rÜ  rÓ  rh   r'   r(   r   B  rÔ  zHunyuan3Dv2_1.extra_condsr»  r'   r'   rh   r(   rÕ  >  r¼  rÕ  c                       r¨  )ÚHiDreamNc                    rµ  r“  )rJ   rK   r*   r¶  Úhidreamr  ÚHiDreamImageTransformer2DModelr?  rh   r'   r(   rK   N  rº  zHiDream.__init__c                 K   rª  r«  r'   rš   r'   r'   r(   r›   Q  r˜   zHiDream.encode_admc                    sˆ   t ƒ jdi |¤Ž}| dd ¡}|d urtj |¡|d< | dd ¡}|d ur-tj |¡|d< | dd ¡}|d urBtj |  |¡¡|d< |S )NrÇ   rv   Úconditioning_llama3Úencoder_hidden_states_llama3rž   Ú
image_condr'   )rJ   r   rP   r*   rË   rÍ   rÌ   r¬   )rf   ry   rÏ   rÇ   rÜ  rÞ  rh   r'   r(   r   T  s   zHiDream.extra_condsr±  r'   r'   rh   r(   rÙ  M  r²  rÙ  c                       s:   e Zd Zejdejjjj	f‡ fdd„	Z
‡ fdd„Z‡  ZS )ÚChromaNc                    s   t ƒ j||||d d S r“  r  rè  rh   r'   r(   rK   b  r•  zChroma.__init__c                    sB   t ƒ jdi |¤Ž}| dd¡}|d urtj t |g¡¡|d< |S )Nrõ  r   r'   rÜ  )rf   ry   rÏ   rõ  rh   r'   r(   r   e  s
   zChroma.extra_conds)r   r   r   r   r#   r*   r¶  Úchromar  rß  rK   r   r  r'   r'   rh   r(   rß  a  s    rß  c                       r€  )ÚChromaRadianceNc                    rµ  r“  )rJ   rK   r*   r¶  Úchroma_radiancer  rá  r?  rh   r'   r(   rK   n  rº  zChromaRadiance.__init__)r   r   r   r   r#   rK   r  r'   r'   rh   r(   rá  m  r†  rá  c                       r³  )ÚACEStepNc                    rµ  r“  )rJ   rK   r*   r¶  Úacer  ÚACEStepTransformer2DModelr?  rh   r'   r(   rK   r  rº  zACEStep.__init__c                    sª   t ƒ jdi |¤Ž}| dd ¡}| dd ¡}|d ur!tj |¡|d< | dd ¡}|d ur3tj |¡|d< tj tj|jd d|j	|j
d¡|d	< tj | d
d¡¡|d
< |S )Nr    rÇ   rv   Úconditioning_lyricsÚlyric_token_idxr   rf  r4  Úspeaker_embedsÚlyrics_strengthrª   r'   )rJ   r   rP   r*   rË   rÍ   r;   r.  r­   rA   r9   ra  )rf   ry   rÏ   r    rÇ   ræ  rh   r'   r(   r   u  s   (zACEStep.extra_condsr»  r'   r'   rh   r(   rã  q  r¼  rã  c                       r³  )Ú	ACEStep15Nc                    rµ  r“  )rJ   rK   r*   r¶  rä  Ú
ace_step15ÚAceStepConditionGenerationModelr?  rh   r'   r(   rK   …  rº  zACEStep15.__init__c                    sÐ  t ƒ jdi |¤Ž}|d }|d }| dd ¡}|d ur2t |¡dkr*tj d¡|d< tj |¡|d< | dd ¡}|d urDtj |¡|d	< | d
d ¡}|d u sTt	|ƒdkrctj
jj |jd |¡}d}n|d d d …d d …d |jd …f }tj d¡|d< d}|r®| dd ¡}	|	d ur¦tj tj|	|d¡|d< |d d …d d …d d…f }ntj d¡|d< |jd |jd k rÞtj
jj |jd |¡}
tj| |
¡|
d d …d d …|jd d …f gdd}tj |¡|d< |S )NrA   r    rÇ   r   TÚreplace_with_null_embedsrv   ræ  Úlyric_embedÚreference_audio_timbre_latentsr   r¡   Ú	is_coversFÚaudio_codesr  iî  r{   Úrefer_audior'   )rJ   r   rP   r;   Úcount_nonzeror*   rË   ra  rÍ   rŠ   r¶  rä  rë  Úget_silence_latentr­   rí   r€   rX   )rf   ry   rÏ   rA   r    rÇ   ræ  rò  Úpass_audio_codesrñ  r²   rh   r'   r(   r   ˆ  s:   $4zACEStep15.extra_condsr»  r'   r'   rh   r(   rê  „  r¼  rê  c                       rf  )ÚOmnigen2Nc                    rh  ræ  )rJ   rK   r*   r¶  ÚomnigenÚomnigen2ÚOmniGen2Transformer2DModelrd   r?  rh   r'   r(   rK   °  rk  zOmnigen2.__init__c                    sÒ   t ƒ jdi |¤Ž}| dd ¡}|d ur4t |¡| ¡ kr$tj |¡|d< tj 	t
dt |¡ ¡ ƒ¡|d< | dd ¡}|d urFtj |¡|d< | dd ¡}|d urgg }|D ]
}| |  |¡¡ qTtj |¡|d< |S )	NrÕ  r
   r  rÇ   rv   rö  rç  r'   )rJ   r   rP   r;   rp  r  r*   rË   rÍ   ra  r   r!  r†   r¬   rû  )rf   ry   rÏ   rÕ  rÇ   rç  rÿ  r   rh   r'   r(   r   ´  s     zOmnigen2.extra_condsc                 K   ó@   i }|  dd ¡}|d urtddttdd„ |ƒƒd gƒ|d< |S )Nrö  r
   r¢  c                 S   rÄ  rj   r  rk  r'   r'   r(   r  Ê  rÅ  z-Omnigen2.extra_conds_shapes.<locals>.<lambda>rç  r  r  r'   r'   r(   r  Æ  ó
   $zOmnigen2.extra_conds_shapesru  r'   r'   rh   r(   rö  ¯  s    rö  c                       rf  )Ú	QwenImageNc                    rh  ræ  )rJ   rK   r*   r¶  Ú
qwen_imager  ÚQwenImageTransformer2DModelrd   r?  rh   r'   r(   rK   Î  rk  zQwenImage.__init__c           	         sÄ   t ƒ jdi |¤Ž}| dd ¡}|d urtj |¡|d< | dd ¡}|d ur-tj |¡|d< | dd ¡}|d ur`g }|D ]
}| |  |¡¡ q;tj |¡|d< | dd ¡}|d ur`tj 	|¡|d< |S )	NrÕ  rÇ   rv   rö  rç  r÷  rø  r'   )
rJ   r   rP   r*   rË   rÍ   r†   r¬   rû  ra  )	rf   ry   rÏ   rÕ  rÇ   rç  rÿ  r   rø  rh   r'   r(   r   Ò  s"   zQwenImage.extra_condsc                 K   rú  )Nrö  r
   r¢  c                 S   rÄ  rj   r  rk  r'   r'   r(   r  ê  rÅ  z.QwenImage.extra_conds_shapes.<locals>.<lambda>rç  r  r  r'   r'   r(   r  æ  rû  zQwenImage.extra_conds_shapes)	r   r   r   r   r#   rK   r   r  r  r'   r'   rh   r(   rü  Í  s    rü  c                       r³  )ÚHunyuanImage21Nc                    rµ  r“  r=  r?  rh   r'   r(   rK   î  rº  zHunyuanImage21.__init__c                    sÀ   t ƒ jdi |¤Ž}| dd ¡}|d ur$t |¡| ¡ kr$tj |¡|d< | dd ¡}|d ur6tj |¡|d< | dd ¡}|d urHtj |¡|d< | dd¡}|d ur^tj t 	|g¡¡|d< |S )	NrÕ  rÇ   rv   Úconditioning_byt5smallÚtxt_byt5rõ  r?  r'   )
rJ   r   rP   r;   rp  r  r*   rË   rÍ   rÝ  )rf   ry   rÏ   rÕ  rÇ   r   rõ  rh   r'   r(   r   ñ  s   zHunyuanImage21.extra_condsr»  r'   r'   rh   r(   rÿ  í  r¼  rÿ  c                       s$   e Zd Zdd„ Z‡ fdd„Z‡  ZS )ÚHunyuanImage21Refinerc                 K   s  |  dd ¡}|  dd ¡}|  dd¡}|d }|d u r-t|jƒ}tj||j|j|jd}|S t 	| 
|¡|jd |jd d	d
¡}|  |¡}t ||jd ¡}|dkr€tjdd}| |  dd¡d ¡ tj|j||jdd 
|j¡}|| td| dƒ|  }|S d| }|S )Nr    rž   r  r  rA   r¤  r¡   r¢   r£   r¤   r   Úcpur  r  r   ©Ú	generatorr9   rA   rª   ç      è?)rP   r…   r­   r;   r.  r9   r¥  rA   r   r®   rX   r¬   r³   Ú	GeneratorÚmanual_seedÚrandnÚmin)rf   ry   r    r|  r  rA   rƒ  r  r'   r'   r(   r¾     s&   
$ö
ÿz!HunyuanImage21Refiner.concat_condc                    ó&   t ƒ jdi |¤Ž}tj d¡|d< |S )NTÚdisable_time_rr'   ©rJ   r   r*   rË   ra  r  rh   r'   r(   r     ó   z!HunyuanImage21Refiner.extra_conds)r   r   r   r¾   r   r  r'   r'   rh   r(   r    s    r  c                       r¨  )ÚHunyuanVideo15Nc                    r  rŽ  r  r?  rh   r'   r(   rK   "  r  zHunyuanVideo15.__init__c           
   
   K   sÖ  |  dd ¡}| jjjjjd |jd  d }|dkrd S |  dd ¡}|d }|d u r@t|jƒ}||d< tj||j	|j
|jd}nD| jj}t | |¡|jd |jd d	d
¡}td|jd |ƒD ]}|  |d d …||| …f ¡|d d …||| …f< q_t ||jd ¡}|  d|  dd ¡¡}	|	d u r t |¡d d …d d…f }	nBd|	 }	t |	 |¡|jd |jd d	d
¡}	|	jd |jd k rÙtjjj|	ddddd|jd |	jd  fddd}	t |	|jd ¡}	tj||	fddS )Nr    r
   r   rž   rA   r¤  r¡   r¢   r£   r¤   rœ   r   rª   r¥   r¦   r{  r{   )rP   rU   rí  Úprojr$  r­   r…   r;   r.  r9   r¥  rA   rM   r€  r   r®   rX   r  r¬   r³   r¹   r°   r±   r²   r€   )
rf   ry   r    r‚  r|  rA   rƒ  r„  r…  r§   r'   r'   r(   r¾   %  s0    
$4$2zHunyuanVideo15.concat_condc                    sæ   t ƒ jd
i |¤Ž}| dd ¡}|d ur$t |¡| ¡ kr$tj |¡|d< | dd ¡}|d ur6tj |¡|d< | dd ¡}|d urHtj |¡|d< | dd¡}|d ur^tj t 	|g¡¡|d< | dd ¡}|d urqtj |j
¡|d	< |S )NrÕ  rÇ   rv   r   r  rõ  r?  r  r†  r'   )rJ   r   rP   r;   rp  r  r*   rË   rÍ   rÝ  rr  )rf   ry   rÏ   rÕ  rÇ   r   rõ  r  rh   r'   r(   r   E  s$   zHunyuanVideo15.extra_condsrŠ  r'   r'   rh   r(   r  !  s     r  c                       r¨  )ÚHunyuanVideo15_SR_DistilledNc                    r  rŽ  r  r?  rh   r'   r(   rK   ^  r  z$HunyuanVideo15_SR_Distilled.__init__c                 K   s‚  |  dd ¡}|  dd ¡}|  dd¡}|d }|d u r>tj|jd |jd d d |jd	 |jd
 |jd gtj ¡ d}|S t | 	|¡|jd |jd
 dd¡}t 
||jd ¡}t|jd d d|jd  d ƒ}|dkr¯tjdd}| |  dd¡d ¡ tj|d d …|f j||jdd 	|j¡}|| td| dƒ|d d …|f   |d d …|f< |S d|d d …|f  |d d …|f< |S )Nr    rž   r  r  rA   r   r
   r   r¥   r¢   r¡   r  r£   r¤   r  r  r   r  rª   r  )rP   r;   r.  r­   r*   r>   Úintermediate_devicer   r®   rX   r³   Úslicer  r  r	  r9   rA   r
  )rf   ry   r    r|  r  rA   Úlq_image_slicer  r'   r'   r(   r¾   a  s$   D$õ"*2 ÿz'HunyuanVideo15_SR_Distilled.concat_condc                    r  )NFr  r'   r  r  rh   r'   r(   r   w  r  z'HunyuanVideo15_SR_Distilled.extra_condsrŠ  r'   r'   rh   r(   r  ]  s    r  c                       s@   e Zd Zejdf‡ fdd„	Zdd„ Zdd„ Z‡ fdd	„Z‡  Z	S )
Ú
Kandinsky5Nc                    rµ  r“  )rJ   rK   r*   r¶  Ú
kandinsky5r  r  r?  rh   r'   r(   rK   }  rº  zKandinsky5.__init__c                 K   rª  r«  r'   rš   r'   r'   r(   r›   €  r˜   zKandinsky5.encode_admc              
   K   sì   |  dd ¡}|d }t |¡}|  d|  dd ¡¡}|d u r+t |¡d d …d d…f }nBd| }t | |¡|jd |jd d	d
¡}|jd |jd k rdtjjj	|ddddd|jd |jd  fddd}t 
||jd ¡}tj||fddS )Nr    rA   rœ   r   r
   rª   r¡   r¢   r£   r¤   r¥   r   r¦   r{  r{   )rP   r;   r¹   r   r®   rX   r­   r°   r±   r²   r³   r€   )rf   ry   r    rA   r|  r§   r'   r'   r(   r¾   ƒ  s   
$2zKandinsky5.concat_condc                    sˆ   t ƒ jdi |¤Ž}| dd ¡}|d urtj |¡|d< | dd ¡}|d ur-tj |¡|d< | dd ¡}|d urBtj |  |¡¡|d< |S )NrÕ  rÇ   rv   Útime_dim_replacer'   r¹  )rf   ry   rÏ   rÕ  rÇ   r  rh   r'   r(   r   ”  s   zKandinsky5.extra_conds)
r   r   r   r   r!   rK   r›   r¾   r   r  r'   r'   rh   r(   r  |  s
    r  c                       r*  )ÚKandinsky5ImageNc                    r  rŽ  r  r?  rh   r'   r(   rK   ¤  r  zKandinsky5Image.__init__c                 K   r™   rj   r'   rš   r'   r'   r(   r¾   §  r—   zKandinsky5Image.concat_cond)r   r   r   r   r!   rK   r¾   r  r'   r'   rh   r(   r  £  rž  r  c                       r€  )Ú
RT_DETR_v4Nc                    rµ  r“  )rJ   rK   r*   r¶  Úrt_detrÚ	rtdetr_v4ÚRTv4r?  rh   r'   r(   rK   «  rº  zRT_DETR_v4.__init__)r   r   r   r   r!   rK   r  r'   r'   rh   r(   r  ª  r†  r  )r  N)Ú__doc__Úcomfy.ldm.hunyuan3dv2_1r*   Ú"comfy.ldm.hunyuan3dv2_1.hunyuanditr;   rZ   Úcomfy.ldm.lightricks.av_modelÚcomfy.context_windowsÚ.comfy.ldm.modules.diffusionmodules.openaimodelr   r   Úcomfy.ldm.cascade.stage_cr   Úcomfy.ldm.cascade.stage_br   Ú,comfy.ldm.modules.encoders.noise_aug_modulesr   Ú,comfy.ldm.modules.diffusionmodules.upscalingr   Ú(comfy.ldm.modules.diffusionmodules.mmditr   Ú.comfy.ldm.genmo.joint_model.asymm_models_jointÚcomfy.ldm.aura.mmditÚcomfy.ldm.pixart.pixartmsÚcomfy.ldm.hydit.modelsÚcomfy.ldm.audio.ditÚcomfy.ldm.audio.embeddersÚcomfy.ldm.flux.modelÚcomfy.ldm.lightricks.modelÚcomfy.ldm.hunyuan_video.modelÚcomfy.ldm.cosmos.modelÚcomfy.ldm.cosmos.predict2Úcomfy.ldm.lumina.modelÚcomfy.ldm.wan.modelÚcomfy.ldm.wan.model_animateÚcomfy.ldm.hunyuan3d.modelÚcomfy.ldm.hidream.modelÚcomfy.ldm.chroma.modelÚcomfy.ldm.chroma_radiance.modelÚcomfy.ldm.ace.modelÚcomfy.ldm.omnigen.omnigen2Úcomfy.ldm.qwen_image.modelÚcomfy.ldm.kandinsky5.modelÚcomfy.ldm.anima.modelÚcomfy.ldm.ace.ace_step15Úcomfy.ldm.rt_detr.rtdetr_v4Úcomfy.model_managementÚcomfy.patcher_extensionÚcomfy.condsÚ	comfy.opsÚenumr	   rÒ   r   Úcomfy.latent_formatsÚcomfy.model_samplingrþ   Útypingr   Úcomfy.model_patcherr   r   r+   rB   r°   ÚModulerC   r)  r+  r3  r4  rR  rU  rc  re  rp  ry  r~  r  r‡  r‰  r‘  rŸ  r©  r´  r½  rÑ  rÞ  rå  r  r  r  r$  r0  r<  rC  rL  rN  rZ  ra  rg  rv  rx  rŒ  r¡  r¥  r²  r¼  rÇ  rÉ  rË  rÏ  rÕ  rÙ  rß  rá  rã  rê  rö  rü  rÿ  r  r  r  r  r  r  r'   r'   r'   r(   Ú<module>   sè    '	  
/
$'U%B!'6I'-)&+ <'