o
    i&3                     @   s   d dl mZ d dlmZ d dlZd dlmZmZ d dlmZ d dl	Z
d dlmZmZmZ d dlmZmZ d dlmZ d	d
lmZmZmZmZ eG dd deZG dd deZdS )    )	dataclass)OptionalN)Tensornn)repeat)EmbedNDDoubleStreamBlockSingleStreamBlock)ChromaChromaParams)Approximator   )NerfEmbedderNerfGLUBlockNerfFinalLayerNerfFinalLayerConvc                   @   s\   e Zd ZU eed< eed< eed< eed< eed< eed< eed< eej ed< e	ed	< d
S )ChromaRadianceParams
patch_sizenerf_hidden_sizenerf_mlp_ratio
nerf_depthnerf_max_freqsnerf_tile_sizenerf_final_head_typenerf_embedder_dtypeuse_x0N)
__name__
__module____qualname__int__annotations__strr   torchdtypebool r%   r%   =/mnt/c/Users/fbmor/ComfyUI/comfy/ldm/chroma_radiance/model.pyr      s   
 r   c                   @   s   e Zd ZdZd'ddZedejfddZd	e	de	fd
dZ
de	de	dede	fddZde	de	dededededede	fddZdedefddZdd Zdi fde	de	d e	d!ee	 d"ee d#ed$ede	fd%d&ZdS )(ChromaRadiancez;
    Transformer model for flow matching on sequences.
    NTc           	   	      sp  d u rt dtj _tdi |_j_j_j	_	j
j dkr;tdj
 dj j
j }tj|krStdj d| j
_
j_j_j_j_j_t|jjd_jjj
jjd d	_jjj
 d
_tjjjj d_t fddtjD _ t fddtj!D _"t#jj$j%j&pՈ d_'t fddtj(D _)j*dkrt+j$j d_,nj*dkrt-j$j d_.n
dj* }t|g _/g _0d_1j2r63dt45g  d S d S )NzBAttempt to create ChromaRadiance object without setting operationsr   zHidden size z  must be divisible by num_heads zGot z but expected positional dim )dimthetaaxes_dimT)kernel_sizestridebiasr#   device)r#   r.   )in_dim
hidden_dimout_dimn_layersr#   r.   
operationsc                    s,   g | ]}t jjjjd  dqS )F)	mlp_ratioqkv_bias
modulationr#   r.   r3   )r   hidden_size	num_headsr4   r5   .0_r.   r#   r3   paramsselfr%   r&   
<listcomp>[   s    	z+ChromaRadiance.__init__.<locals>.<listcomp>c                    s(   g | ]}t jjjd  dqS )F)r4   r6   r#   r.   r3   )r	   r7   r8   r4   r9   r<   r%   r&   r?   i   s    )in_channelshidden_size_input	max_freqsr#   r.   r3   c              
      s&   g | ]}t jjj d qS ))hidden_size_shidden_size_xr4   r#   r.   r3   )r   r7   r   r   r9   )r.   r#   r3   r=   r%   r&   r?      s    linear)out_channelsr#   r.   r3   convz!Unsupported nerf_final_head_type F__x0__r%   )6RuntimeErrorr   Module__init__r#   r   r=   r   r@   rF   r7   r8   
ValueErrorsumr*   r/   r1   r0   r2   r   r)   pe_embedderConv2dimg_in_patchLinearcontext_in_dimtxt_inr   distilled_guidance_layer
ModuleListrangedepthdouble_blocksdepth_single_blockssingle_blocksr   r   r   r   nerf_image_embedderr   nerf_blocksr   r   nerf_final_layerr   nerf_final_layer_conv
skip_mmditskip_ditliter   register_bufferr"   tensor)	r>   image_modelfinal_layerr#   r.   r3   kwargspe_dimerrstrr%   r<   r&   rK   /   s   			


zChromaRadiance.__init__returnc                 C   s(   | j jdkr	| jS | j jdkr| jS t)NrE   rG   )r=   r   r]   r^   NotImplementedError)r>   r%   r%   r&   _nerf_final_layer   s
   z ChromaRadiance._nerf_final_layerimgc                 C   s   |  |}|dddS )N   r   )rP   flatten	transpose)r>   rl   r%   r%   r&   img_in   s   
zChromaRadiance.img_inimg_origimg_outr=   c              	   C   s   |j \}}}}|j d }|j}	tjj||	|	d}
|
dd}
||| |j}|
|| ||	d dd}
|jdkrL||jkrL| 	||
||||	|}n| 
|
}| jD ]}|||}qT|dd}|||d}|dd}tjj|||f|	|	d}| |S )Nr   )r+   r,   rm   r   )output_sizer+   r,   )shaper   r   
functionalunfoldro   reshaper7   r   forward_tiled_nerfr[   r\   foldrk   )r>   rq   rr   r=   BCHWnum_patchesr   nerf_pixelsnerf_hiddenimg_dctblockr%   r%   r&   forward_nerf   s,   



zChromaRadiance.forward_nerfr   r   batchchannelsr   r   c                 C   s   |j }g }	td||D ]2}
t|
| |}||
| ||  }||
| ||  }| |}| jD ]}|||}q0|	| qtj|	ddS )z
        Processes the NeRF head in tiles to save memory.
        nerf_hidden has shape [B, L, D]
        nerf_pixels has shape [B, L, C * P * P]
        r   )r(   )r   rV   minr[   r\   appendr"   cat)r>   r   r   r   r   r   r   r=   	tile_sizeoutput_tilesiendnerf_hidden_tilenerf_pixels_tileimg_dct_tiler   r%   r%   r&   ry      s   

z!ChromaRadiance.forward_tiled_nerf	overridesc                    s   | j |sS fddjD td tfdd|D }|r.dd| }t|t fdd| D }|rJd	d| }t||O jd
i S )Nc                    s   i | ]}|t  |qS r%   )getattrr:   k)r=   r%   r&   
<dictcomp>  s    z?ChromaRadiance.radiance_get_override_params.<locals>.<dictcomp>)r   c                 3   s    | ]	}| vr|V  qd S Nr%   r   )params_dictr%   r&   	<genexpr>
  s    z>ChromaRadiance.radiance_get_override_params.<locals>.<genexpr>z?Unknown key(s) in transformer_options chroma_radiance_options: z, c                 3   s<    | ]\}}t |tt|s|d us| vr|V  qd S r   )
isinstancetyper   )r:   r   v)nullable_keysr=   r%   r&   r     s    zAInvalid value(s) in transformer_options chroma_radiance_options: r%   )r=   __dataclass_fields__	frozensettuplejoinrL   items	__class__)r>   r   bad_keyser%   )r   r=   r   r&   radiance_get_override_params  s"   z+ChromaRadiance.radiance_get_override_paramsc                 C   s    d}|| | dddd|  S )Ng        rs   r   )view)r>   	predictednoisy	timestepsepsr%   r%   r&   _apply_x0_residual  s   z!ChromaRadiance._apply_x0_residualxtimestepcontextguidancecontroltransformer_optionsrf   c                 K   s  |j \}}	}
}tjj|| j| jf}|jdkrtd|jdkr%td| |	di }|j d | j }|j d | j }t
j||df|j|jd}|d d d d d	f t
jd
|d	 ||j|jdd	 |d d d d d	f< |d d d d df t
jd
|d	 ||j|jdd
 |d d d d df< t|d|d}t
j||j d	 df|j|jd}| j|||||||||	dd d	}| |||d d d d d |
d |f }t| dr| |||}|S )N   z0Input img tensor must be in [B, C, H, W] format.   z)Input txt tensors must have 3 dimensions.chroma_radiance_optionsrs   )r.   r#   r   r   )stepsr.   r#   rm   zh w c -> b (h w) c)battention_mask)	attn_maskrH   )ru   comfyldm
common_ditpad_to_patch_sizer   ndimrL   r   getr"   zerosr.   r#   linspace	unsqueezer   forward_origr   hasattrr   )r>   r   r   r   r   r   r   rf   bschwrl   r=   h_lenw_lenimg_idstxt_idsrr   outr%   r%   r&   _forward   s:   


JJ 
*
zChromaRadiance._forward)NTNNN)r   r   r   __doc__rK   propertyr   rJ   rk   r   rp   r   r   r   ry   dictr   r   r   r   r%   r%   r%   r&   r'   *   sj    
v
,	
%	r'   )dataclassesr   typingr   r"   r   r   einopsr   comfy.ldm.common_ditr   comfy.ldm.flux.layersr   r   r	   comfy.ldm.chroma.modelr
   r   comfy.ldm.chroma.layersr   layersr   r   r   r   r   r'   r%   r%   r%   r&   <module>   s   