o
    i*[                     @   s  d dl Z d dlmZ d dlm  mZ d dlmZ ddlm	Z	m
Z
mZ d dlZejjZdZG dd dejZG dd	 d	ejZd
d Zdd ZG dd dejZG dd dejZG dd dejZG dd dejZG dd dejZG dd dejZdd ZG dd dejZdS )    N)	rearrange   )AttentionBlockCausalConv3dRMS_norm   c                       s,   e Zd Z fddZddgfddZ  ZS )Resamplec              	      s  |dv sJ t    || _|| _|dkr*ttjdddtj||ddd| _	d S |d	krNttjdddtj||ddd| _	t
||d
 ddd| _d S |dkrettdtj||ddd| _	d S |dkrttdtj||ddd| _	t
||dddd| _d S t | _	d S )N)none
upsample2d
upsample3ddownsample2ddownsample3dr
   )       @r   znearest-exact)scale_factormode   r   paddingr   r   )r   r   r   )r   r   r   r   )r   r   r   r   )r   r   )strider   )r   r   r   )r   r   r   )r   r   )super__init__dimr   nn
SequentialUpsampleopsConv2dresampler   	time_conv	ZeroPad2dIdentity)selfr   r   	__class__ 2/mnt/c/Users/fbmor/ComfyUI/comfy/ldm/wan/vae2_2.pyr      s>   



zResample.__init__Nr   c                 C   s  |  \}}}}}| jdkr|d ur|d }	||	 d u r)d||	< |d  d7  < n|d d d d t d d d d d f  }
|
jd dk rs||	 d urs||	 dkrstj||	 d d d d dd d d d f d|
j	|
gdd}
|
jd dk r||	 d ur||	 dkrtjt
|
|
j	|
gdd}
||	 dkr| |}n| |||	 }|
||	< |d  d7  < ||d||||}t|d d dd d d d d d d d f |d d dd d d d d d d d f fd}||||d ||}|jd }t|d	}| |}t|d
|d}| jdkrr|d urr|d }	||	 d u r3| ||	< |d  d7  < |S |d d d d dd d d d d f  }
| t||	 d d d d dd d d d d f |gd}|
||	< |d  d7  < |S )Nr   r   Repr   r   r   r   zb c t h w -> (b t) c h wz(b t) c h w -> b c t h w)tr   )sizer   CACHE_Tcloneshapetorchcat	unsqueezetodevice
zeros_liker   reshapestackr   r   )r!   x
feat_cachefeat_idxbcr)   hwidxcache_xr$   r$   r%   forward9   sl   
,,R



*4zResample.forward__name__
__module____qualname__r   r?   __classcell__r$   r$   r"   r%   r      s    'r   c                       s.   e Zd Zd fdd	ZddgfddZ  ZS )	ResidualBlock        c                    s   t    || _|| _tt|ddt t||dddt|ddt t	|t||ddd| _
||kr?t||d| _d S t | _d S )NFimagesr   r   r   )r   r   in_dimout_dimr   r   r   SiLUr   Dropoutresidualr    shortcut)r!   rI   rJ   dropoutr"   r$   r%   r   w   s"   



zResidualBlock.__init__Nr   c              	   C   s   |}| j D ]k}t|trl|d url|d }|d d d d t d d d d d f  }|jd dk rX|| d urXtj|| d d d d dd d d d f d	|j
|gdd}||||d}|||< |d  d7  < q||}q|| | S )Nr   r   r'   r(   )
cache_list	cache_idxr   )rM   
isinstancer   r+   r,   r-   r.   r/   r0   r1   r2   rN   )r!   r6   r7   r8   old_xlayerr=   r>   r$   r$   r%   r?      s&   
,,
zResidualBlock.forward)rF   r@   r$   r$   r"   r%   rE   u   s    rE   c                 C   s\   |dkr| S |   dkrt| d||d} | S |   dkr&t| d||d} | S td| j )Nr      z b c (h q) (w r) -> b (c r q) h wqr   z$b c f (h q) (w r) -> b (c r q) f h wzInvalid input shape: )r   r   
ValueErrorr-   r6   
patch_sizer$   r$   r%   patchify   s    	r]   c                 C   sL   |dkr| S |   dkrt| d||d} | S |   dkr$t| d||d} | S )Nr   rU   z b (c r q) h w -> b c (h q) (w r)rV   rY   z$b (c r q) f h w -> b c f (h q) (w r))r   r   r[   r$   r$   r%   
unpatchify   s   	r^   c                       s6   e Zd Z	d fdd	ZdejdejfddZ  ZS )		AvgDown3Dr   c                    s`   t    || _|| _|| _|| _| j| j | j | _|| j | dks&J || j | | _d S Nr   )r   r   in_channelsout_channelsfactor_tfactor_sfactor
group_sizer!   ra   rb   rc   rd   r"   r$   r%   r      s   
zAvgDown3D.__init__r6   returnc           	   
   C   s   | j |jd | j   | j  }dddd|df}t||}|j\}}}}}||||| j  | j || j | j|| j | j}|dddddddd }|||| j || j  || j || j }||| j	| j
|| j  || j || j }|jdd	}|S )
Nr   r   r   r   rY      rU      r(   )rc   r-   Fpadviewrd   permute
contiguousre   rb   rf   mean)	r!   r6   pad_trl   BCTHWr$   r$   r%   r?      s@   
zAvgDown3D.forwardr   )rA   rB   rC   r   r.   Tensorr?   rD   r$   r$   r"   r%   r_      s    r_   c                       sB   e Zd Z	ddedef fddZddejdejfd	d
Z  ZS )DupUp3Dr   ra   rb   c                    s`   t    || _|| _|| _|| _| j| j | j | _|| j | dks&J || j | | _d S r`   )r   r   ra   rb   rc   rd   re   repeatsrg   r"   r$   r%   r      s   
zDupUp3D.__init__Fr6   rh   c                 C   s   |j | jdd}||d| j| j| j| j|d|d|d}|ddddddd	d }||d| j|d| j |d| j |d| j }|ri|d d d d | jd d d d d d f }|S )
Nr   r(   r   r   r   rU   rY   rj   ri   )	repeat_interleaverz   rm   r*   rb   rc   rd   rn   ro   )r!   r6   first_chunkr$   r$   r%   r?     s,   
,zDupUp3D.forwardrw   F)	rA   rB   rC   intr   r.   rx   r?   rD   r$   r$   r"   r%   ry      s     ry   c                       s2   e Zd Z		d fdd	ZddgfddZ  ZS )	Down_ResidualBlockFc           
         s   t    t|||rdnd|rdndd| _g }t|D ]}|t||| |}q|r;|r0dnd}	|t||	d tj	| | _
d S )Nr   r   rc   rd   r   r   r   )r   r   r_   avg_shortcutrangeappendrE   r   r   r   downsamples)
r!   rI   rJ   rO   multtemperal_downsample	down_flagr   _r   r"   r$   r%   r   +  s   


zDown_ResidualBlock.__init__Nr   c                 C   s*   |}| j D ]}||||}q|| | S N)r   r   )r!   r6   r7   r8   x_copymoduler$   r$   r%   r?   I  s   
zDown_ResidualBlock.forwardFFr@   r$   r$   r"   r%   r   )  s
    r   c                       s4   e Zd Z		d fdd	ZddgdfddZ  ZS )	Up_ResidualBlockFc           
         s   t    |rt|||rdnd|rdndd| _nd | _g }t|D ]}|t||| |}q"|rA|r6dnd}	|t||	d tj	| | _
d S )Nr   r   r   r   r
   r   )r   r   ry   r   r   r   rE   r   r   r   	upsamples)
r!   rI   rJ   rO   r   temperal_upsampleup_flagr   r   r   r"   r$   r%   r   S  s"   



zUp_ResidualBlock.__init__Nr   c                 C   s>   |}| j D ]}||||}q| jd ur| ||}|| S |S r   )r   r   )r!   r6   r7   r8   r|   x_mainr   
x_shortcutr$   r$   r%   r?   s  s   

zUp_ResidualBlock.forwardr   r@   r$   r$   r"   r%   r   Q  s
     r   c                       sD   e Zd Zddg ddg g ddf fdd	Zd	d
gfddZ  ZS )	Encoder3d   rU   r   r   rU   rU   r   TTFrF   c                    s2  t     | _|| _|| _|| _|| _|| _ fdddg| D }d}	td|d ddd| _	g }
t
t|d d	 |dd  D ])\}\}}|t|k rQ|| nd
}|
t||||||t|d kd |	d }	qAtj|
 | _tt|||t|t|||| _tt|d
dt t||ddd| _d S )Nc                       g | ]} | qS r$   r$   .0ur(   r$   r%   
<listcomp>      z&Encoder3d.__init__.<locals>.<listcomp>r   g      ?   r   r   r   r'   F)rI   rJ   rO   r   r   r   r   rG   )r   r   r   z_dimdim_multnum_res_blocksattn_scalesr   r   conv1	enumerateziplenr   r   r   r   r   rE   r   middler   rK   head)r!   r   r   r   r   r   r   rO   dimsscaler   irI   rJ   t_down_flagr"   r(   r%   r     sL   

*
	



zEncoder3d.__init__Nr   c              	   C   s  |d ura|d }|d d d d t  d d d d d f  }|jd dk rL|| d urLtj|| d d d d dd d d d f d|j|gdd}| ||| }|||< |d  d7  < n| |}| j	D ]}|d urv||||}qi||}qi| j
D ]}t|tr|d ur||||}q~||}q~| jD ]k}t|tr|d ur|d }|d d d d t  d d d d d f  }|jd dk r|| d urtj|| d d d d dd d d d f d|j|gdd}|||| }|||< |d  d7  < q||}q|S Nr   r   r'   r(   r   )r+   r,   r-   r.   r/   r0   r1   r2   r   r   r   rR   rE   r   r   )r!   r6   r7   r8   r=   r>   rT   r$   r$   r%   r?     sT   ,,





,,
zEncoder3d.forwardr@   r$   r$   r"   r%   r   ~  s    9r   c                       sF   e Zd Zddg ddg g ddf fdd	Zd	d
gdfddZ  ZS )	Decoder3dr   rU   r   r   )FTTrF   c                    sL  t     | _|| _|| _|| _|| _|| _ fdd|d g|d d d  D }t||d ddd| _	t
t|d |d |t|d t|d |d || _g }	tt|d d |dd  D ]'\}
\}}|
t|k rr||
 nd}|	t||||d ||
t|d kd	 qbt
j|	 | _t
t|dd
t
 t|dddd| _d S )Nc                    r   r$   r$   r   r(   r$   r%   r     r   z&Decoder3d.__init__.<locals>.<listcomp>r'   r   r   r   r   F)rI   rJ   rO   r   r   r   rG   r   )r   r   r   r   r   r   r   r   r   r   r   r   rE   r   r   r   r   r   r   r   r   r   rK   r   )r!   r   r   r   r   r   r   rO   r   r   r   rI   rJ   	t_up_flagr"   r(   r%   r     sJ   

&
*	

zDecoder3d.__init__Nr   Fc              	   C   s  |d ura|d }|d d d d t  d d d d d f  }|jd dk rL|| d urLtj|| d d d d dd d d d f d|j|gdd}| ||| }|||< |d  d7  < n| |}| j	D ]}t
|tr{|d ur{||||}qi||}qi| jD ]}|d ur|||||}q||}q| jD ]m}t
|tr|d ur|d }|d d d d t  d d d d d f  }|jd dk r|| d urtj|| d d d d dd d d d f d|j|gdd}|||| }|||< |d  d7  < q||}q|S r   )r+   r,   r-   r.   r/   r0   r1   r2   r   r   rR   rE   r   r   r   )r!   r6   r7   r8   r|   r=   r>   rT   r$   r$   r%   r?   )  sT   ,,





,,
zDecoder3d.forwardr@   r$   r$   r"   r%   r     s    5r   c                 C   s(   d}|   D ]}t|tr|d7 }q|S )Nr   r   )modulesrR   r   )modelcountmr$   r$   r%   count_conv3d_  s   
r   c                       sX   e Zd Zdddg ddg g ddf fdd		Zd
d Zdd Zdd ZdddZ  ZS )WanVAE         r   r   r   rF   c	           	         s   t    || _|| _|| _|| _|| _|| _|d d d | _t	||d |||| j|| _
t|d |d d| _t||d| _t|||||| j|| _d S )Nr'   r   r   )r   r   r   r   r   r   r   r   r   r   encoderr   r   conv2r   decoder)	r!   r   dec_dimr   r   r   r   r   rO   r"   r$   r%   r   i  s8   
	
zWanVAE.__init__c              	   C   s   dg}d gt | j }t|dd}|jd }d|d d  }t|D ]N}dg}|dkrE| j|d d d d d dd d d d f ||d}q"| j|d d d d dd|d   dd|  d d d d f ||d}t||gd}q"| |jddd\}	}
|	S )Nr   r   r\   r   rU   r7   r8   r(   )	r   r   r]   r-   r   r.   r/   r   chunk)r!   r6   conv_idxfeat_mapr)   iter_r   outout_mulog_varr$   r$   r%   encode  s*   
$8zWanVAE.encodec           	   	   C   s   dg}d gt | j }|jd }| |}t|D ]I}dg}|dkr?| j|d d d d ||d d d d d f ||dd}q| j|d d d d ||d d d d d f ||d}t||gd}qt|dd}|S )Nr   r   r   T)r7   r8   r|   r   r   )r   r   r-   r   r   r.   r/   r^   )	r!   zr   r   r   r6   r   r   r   r$   r$   r%   decode  s*   

((zWanVAE.decodec                 C   s$   t d| }t |}|| | S )N      ?)r.   exp
randn_like)r!   r   r   stdepsr$   r$   r%   reparameterize  s   
zWanVAE.reparameterizeFc                 C   s>   |  |\}}|r|S td|dd }||t|  S )Nr   g      >g      4@)r   r.   r   clampr   )r!   imgsdeterministicr   r   r   r$   r$   r%   sample  s
   zWanVAE.sampler}   )	rA   rB   rC   r   r   r   r   r   rD   r$   r$   r"   r%   r   g  s    *r   )r.   torch.nnr   torch.nn.functional
functionalrk   einopsr   vaer   r   r   	comfy.opscomfyr   disable_weight_initr+   Moduler   rE   r]   r^   r_   ry   r   r   r   r   r   r   r$   r$   r$   r%   <module>   s(   e-6-(-tm