o
    i                     @   sl   d dl Z ddlmZ d dlZzd dlZW n	   ed Y d dlmZ ddlm	Z	 G dd de j
jZdS )    N   )AutoencoderDCz,torchaudio missing, ACE model will be broken)ADaMoSHiFiGANV1c                       sZ   e Zd Zdi i f fdd	Zdd Ze dddZe ddd	Zdd
dZ	  Z
S )	MusicDCAENc                    s   t t|   td
i || _td
i || _|d u rd| _n|| _t	t
ddg| _d| _d| _ttd| _d| _d| _| j| j | _d| _d	| _d S )Ni        ?g      &g      @g[5;j!Ai      gec]?gq )superr   __init__r   dcaer   vocodersource_sample_rate
transformsCompose	Normalize	transformmin_mel_valuemax_mel_valueintroundaudio_chunk_sizemel_chunk_sizetime_dimention_multiplelatent_chunk_sizescale_factorshift_factor)selfr   dcae_configvocoder_config	__class__r   C/mnt/c/Users/fbmor/ComfyUI/comfy/ldm/ace/vae/music_dcae_pipeline.pyr
      s"   

zMusicDCAE.__init__c                 C   s>   g }t t|D ]}| j|| }|| qt|}|S N)rangelenr   mel_transformappendtorchstack)r   audiosmelsiimager   r   r!   forward_mel&   s   
zMusicDCAE.forward_melc           	      C   s   |d u rt |jd g|jd  }||j}|d u r| j}|dkr+tj||d}|jd }|d dkrDt j	j
|dd|d  f}| |}|| j | j| j  }| |}g }|D ]}| j|d}|| q]t j|dd}|| j | j }|S )N   r   D  i   )dim)r'   tensorshapetodevicer   
torchaudio
functionalresamplennpadr-   r   r   r   r   encoder	unsqueezer&   catr   r   )	r   r)   audio_lengthssrmax_audio_lenr*   latentsmellatentr   r   r!   encode.   s(   


zMusicDCAE.encodec                 C   s   || j  | j }g }|D ];}| j|d}|d d }|| j| j  | j }| j|d 	d}|d ur@t
j|d|}nd}|| q|d urVdd t||D }t|S )Nr   r   r   r/   c                 S   s(   g | ]\}}|d d d |f   qS r"   )cpu).0wavlengthr   r   r!   
<listcomp>\   s   ( z$MusicDCAE.decode.<locals>.<listcomp>)r   r   r   decoderr<   r   r   r   decodesqueezer6   r7   r8   r&   zipr'   r(   )r   rA   r>   r?   	pred_wavsrC   r*   rG   r   r   r!   rK   I   s   
zMusicDCAE.decodec                 C   s4   | j |||d\}}| j|||d\}}||||fS )N)r)   r>   r?   )rA   r>   r?   )rD   rK   )r   r)   r>   r?   rA   latent_lengthsrN   r   r   r!   forward_   s   zMusicDCAE.forward)NN)__name__
__module____qualname__r
   r-   r'   no_gradrD   rK   rP   __classcell__r   r   r   r!   r      s    r   )r'   autoencoder_dcr   loggingr6   warningtorchvision.transformsr   music_vocoderr   r9   Moduler   r   r   r   r!   <module>   s   