o
    iF                     @   s  d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlZd dlZ	d dl
Z
d dlmZ d dlmZ d dlmZmZmZ ddlmZ dUd	ed
ede
jfddZde
jde
jde
jfddZdddde
jdedB dedB defddZdVde
jdedB dejfddZ		dWde
jdedB dedefddZdXdejdedefd d!ZdYde
jdede
jfd#d$Zde
jd%ede
jfd&d'Z		dWde
jdedB dedefd(d)Z dZd,ej!d-ed.edefd/d0Z"		d[d1ej#d-ej$dB d2ej%dB defd3d4Z&	*	+dZd5e	j'd6ed-ed.edef
d7d8Z(d9e
jde	j'fd:d;Z)d,ej!defd<d=Z*d1ej#d>e+dej#fd?d@Z,dAe
jde
jfdBdCZ-dDe.de/fdEdFZ0	G	H	I	Jd\dKe
jde
jfdLdMZ1dKejde
jfdNdOZ2dPedefdQdRZ3dPedefdSdTZ4dS )]    N)BytesIO)Image)common_upscale)Input	InputImplTypes   )mimetype_to_extensionRGBAimage_bytesiomodereturnc                 C   s:   t | }||}t|tjd }t|	dS )a  Converts image data from BytesIO to a torch.Tensor.

    Args:
        image_bytesio: BytesIO object containing the image data.
        mode: The PIL mode to convert the image to (e.g., "RGB", "RGBA").

    Returns:
        A torch.Tensor representing the image (1, H, W, C).

    Raises:
        PIL.UnidentifiedImageError: If the image data cannot be identified.
        ValueError: If the specified mode is invalid.
    g     o@r   )
r   openconvertnparrayastypefloat32torch
from_numpy	unsqueeze)r   r   imageimage_array r   >/mnt/c/Users/fbmor/ComfyUI/comfy_api_nodes/util/conversions.pybytesio_to_image_tensor   s   

r   image1image2c                 C   sZ   | j dd |j dd kr$t|dd| j d | j d dddd}tj| |fddS )	z
    Converts a pair of image tensors to a batch tensor.
    If the images are not the same size, the smaller image is resized to
    match the larger image.
    r   N   bilinearcenterr   dim)shaper   movedimr   cat)r   r   r   r   r   image_tensor_pair_to_batch'   s   
r'     @ 	image/png)total_pixels	mime_typer   r*   r+   c                C   s<   |sd}t | |d}t||d}t  dt| |_|S )a  Converts a torch.Tensor image to a named BytesIO object.

    Args:
        image: Input torch.Tensor image.
        total_pixels: Maximum total pixels for downscaling. If None, no downscaling is performed.
        mime_type: Target image MIME type (e.g., 'image/png', 'image/jpeg', 'image/webp', 'video/mp4').

    Returns:
        Named BytesIO object containing the image data, with pointer set to the start of buffer.
    r)   r*   r+   .)tensor_to_pilpil_to_bytesiouuiduuid4r	   name)r   r*   r+   	pil_image
img_binaryr   r   r   tensor_to_bytesio8   s   r6   c                 C   s^   t | jdkr| d } |  }|durt|d|d }| d tj	}t
|}|S )zVConverts a single torch.Tensor image [H, W, C] to a PIL Image, optionally downscaling.   r   Nr,      )lenr$   cpudownscale_image_tensorr   squeezenumpyr   r   uint8r   	fromarray)r   r*   input_tensorimage_npimgr   r   r   r/   Q   s   
r/   image_tensorc                 C   s4   t | |d}t||d}| }t|d}|S )a  Convert [B, H, W, C] or [H, W, C] tensor to a base64 string.

    Args:
        image_tensor: Input torch.Tensor image.
        total_pixels: Maximum total pixels for downscaling. If None, no downscaling is performed.
        mime_type: Target image MIME type (e.g., 'image/png', 'image/jpeg', 'image/webp', 'video/mp4').

    Returns:
        Base64 encoded string of the image.
    r,   r-   utf-8)r/   r0   getvaluebase64	b64encodedecode)rC   r*   r+   r4   img_byte_arr	img_bytesbase64_encoded_stringr   r   r   tensor_to_base64_string^   s
   rL   rB   c                 C   sH   |sd}t  }|dd  }|dkrd}| j||d |d |S )z)Converts a PIL Image to a BytesIO object.r)   /r   JPGJPEG)formatr   )r   splituppersaveseek)rB   r+   rI   
pil_formatr   r   r   r0   u   s   
r0      c                 C   s   |  dd}t|}t||jd |jd   }|dkr| S t|jd | }t|jd | }t|||dd}| dd}|S )zCDownscale input image tensor to roughly the specified total pixels.r   r   r7   r   lanczosdisabled)r%   intmathsqrtr$   roundr   )r   r*   samplestotalscale_bywidthheightsr   r   r   r;      s   r;   max_sidec          
      C   sx   |  dd}|jd |jd }}t||}||kr| S || }t|| }t|| }t|||dd}	|	 dd}	|	S )zQDownscale input image tensor so the largest dimension is at most max_side pixels.r   r   r   r7   rW   rX   )r%   r$   maxr\   r   )
r   rc   r]   ra   r`   max_dimr_   	new_width
new_heightrb   r   r   r   "downscale_image_tensor_by_max_side   s   
rh   c                 C   s   t | ||}d| d| S )au  Converts a tensor image to a Data URI string.

    Args:
        image_tensor: Input torch.Tensor image.
        total_pixels: Maximum total pixels for downscaling. If None, no downscaling is performed.
        mime_type: Target image MIME type (e.g., 'image/png', 'image/jpeg', 'image/webp').

    Returns:
        Data URI string (e.g., 'data:image/png;base64,...').
    data:;base64,)rL   )rC   r*   r+   base64_stringr   r   r   tensor_to_data_uri   s   rl   mp4aacaudiocontainer_format
codec_namec                 C   s>   | d }| d }t |}t||||}| }t|dS )z+Converts an audio input to a base64 string.sample_ratewaveformrD   )"audio_tensor_to_contiguous_ndarrayaudio_ndarray_to_bytesiorE   rF   rG   rH   )ro   rp   rq   rr   rs   audio_data_npaudio_bytes_ioaudio_bytesr   r   r   audio_to_base64_string   s   ry   videocodecc                 C   sT   t  }| j||pt| dtjj|pt| dtjjd |d t	
| dS )a  
    Converts a video input to a base64 string.

    Args:
        video: The video input to convert
        container_format: Optional container format to use (defaults to video.container if available)
        codec: Optional codec to use (defaults to video.codec if available)
    	containerr{   )rP   r{   r   rD   )r   save_togetattrr   VideoContainerMP4
VideoCodecH264rT   rF   rG   rE   rH   )rz   rp   r{   video_bytes_ior   r   r   video_to_base64_string   s   
r   rv   rr   c           	      C   s   t  }tj|d|dA}|j||d}tjj| d| jd dkr!dndd	}||_d|_|	|D ]}|
| q0|	d
D ]}|
| q=W d
   n1 sOw   Y  |d |S )zD
    Encodes a numpy array of audio data into a BytesIO object.
    wr   rP   ratefltpr   r   stereomonorP   layoutN)r   avr   
add_stream
AudioFramefrom_ndarrayr$   rr   ptsencodemuxrT   )	rv   rr   rp   rq   rw   output_containeraudio_streamframepacketr   r   r   ru      s$   	
ru   rs   c                 C   sh   | j dks| jd dkrtd| jd dkr| d } | d   }|jtj	kr2|
tj	}|S )aG  
    Prepares audio waveform for av library by converting to a contiguous numpy array.

    Args:
        waveform: a tensor of shape (1, channels, samples) derived from a Comfy `AUDIO` type.

    Returns:
        Contiguous numpy array of the audio waveform. If the audio was batched,
            the first item is taken.
    r7   r   r   z5Expected waveform tensor shape (1, channels, samples))ndimr$   
ValueErrorr<   r:   
contiguousr=   dtyper   r   r   )rs   rv   r   r   r   rt      s   rt   c                 C   s   | d   }t }tj|ddd}|jd| d d}d|_tjj|d	d
	d
d
  d|jd	 d
kr7dndd}| d |_d	|_||| ||d  |  |d	 |S )Nrs   r   mp3r   
libmp3lamerr   r   i  r   r   r   fltr   r   r   )r:   r   r   r   r   bit_rater   r   r%   reshapefloatr=   r$   rr   r   r   r   closerT   )ro   rs   output_bufferr   
out_streamr   r   r   r   audio_input_to_mp3  s"   

r   duration_secc              
   C   s~  t  }d}d}z|  }tj|dd}tj|ddd}d}d}|jD ]P}td|jt| t|tj	rT|j
d|jd	}|j|_|j|_d
|_td|j|j|j q#t|tjrs|j
d|jd	}|j|_|j|_td|j|j q#|jjd j}	t||	 }
|
d d }|dkrtdd}d}|r|jddD ]}||kr n||D ]}|| q|d7 }q| D ]}|| qtd|| |r|d |jddD ]}|j|kr n||D ]}|| q|d7 }q| D ]}|| qtd| |  |  |d t|W S  ty> } z|dur'|  |dur0|  tdt | |d}~ww )aQ  
    Returns a new VideoInput object trimmed from the beginning to the specified duration,
    using av to avoid loading entire video into memory.

    Args:
        video: Input video to trim
        duration_sec: Duration in seconds to keep from the beginning

    Returns:
        VideoFromFile object that owns the output buffer
    Nr)r   r   rm   r   zFound stream: type=%s, class=%sh264r   yuv420pz!Added video stream: %sx%s @ %sfpsrn   z%Added audio stream: %sHz, %s channelsr      z7Video too short: need at least 16 frames for Moonvalley)rz   r   z$Encoded %s video frames (target: %s))ro   zEncoded %s audio frameszFailed to trim video: )!r   get_stream_sourcer   r   streamslogginginfotype
isinstanceVideoStreamr   average_rater`   ra   pix_fmtAudioStreamrr   r   channelsrz   rY   r   rH   r   r   rT   timer   r   VideoFromFile	ExceptionRuntimeErrorstr)rz   r   r   input_containerr   input_sourcevideo_streamr   streamfpsestimated_framestarget_framesframe_countaudio_frame_countr   r   er   r   r   
trim_video&  s~   







r   wavc                 C   sL   | j jr| S | j tjkr|  d S | j tjkr|  d S td| j  )zOConvert audio to float 32 bits PCM format. Copy-paste from nodes_audio.py file.i   l        zUnsupported wav dtype: )r   is_floating_pointr   int16r   int32r   )r   r   r   r   _f32_pcm  s   r   rx   c                 C   s4  t t| p}|jjstd|jjd }t|jj}|}g }|j	p$d}|j
|jdD ]C}| }t|}	|	jdkrB|	d}	n(|	jd |krY|	jd |krY|	dd }	n|	jd |krj|	d|  }	||	 q,W d   n1 szw   Y  |stdtj|dd}
t|
}
|
d |d	S )
z
    Decode any common audio container from bytes using PyAV and return
    a Comfy AUDIO dict: {"waveform": [1, C, T] float32, "sample_rate": int}.
    z"No audio stream found in response.r   r   )r   r   NzDecoded zero audio frames.r"   )rs   rr   )r   r   r   r   ro   r   rY   codec_contextrr   r   rH   index
to_ndarrayr   r   r   r   r$   	transposer   r   tappendr&   r   )rx   afr   in_srout_srframes
n_channelsr   arrbufr   r   r   r   audio_bytes_to_audio_input  s2   


r   nearest-exactrX   TFmaskc           	      C   sd   |j \}}}}| d} | dd} t| ||||d} | dd} |s(| d} |s0| dk } | S )zaResize mask to be the same dimensions as an image, while maintaining proper format for API calls.r   r   )r`   ra   upscale_methodcropg      ?)r$   r   r%   r   r<   r   )	r   r   r   r   allow_gradientadd_channel_dim_ra   r`   r   r   r   resize_mask_to_image  s   	

r   c                 C   s   |  d} tj| gd ddS )z]Make mask have the expected amount of dims (4) and channels (3) to be recognized as an image.r   r7   r"   )r   r   r&   )r   r   r   r   convert_mask_to_image  s   
r   filepathc                 C   sB   t | d}| }W d   n1 sw   Y  t|dS )z(Converts a text file to a base64 string.rbNrD   )r   readrF   rG   rH   )r   ffile_contentr   r   r   text_filepath_to_base64_string  s   
r   c                 C   s2   t | }t| \}}|du rd}d| d| S )z#Converts a text file to a data URI.Nzapplication/octet-streamri   rj   )r   	mimetypes
guess_type)r   rk   r+   r   r   r   r   text_filepath_to_data_uri  s
   r   )r
   )r(   )r(   r)   )r)   )rV   )rm   rn   )NN)r   rX   TF)5rF   r   rZ   r   r1   ior   r   r=   r   r   PILr   comfy.utilsr   comfy_api.latestr   r   r   _helpersr	   r   Tensorr   r'   rY   r6   r/   rL   r0   r;   rh   rl   Audiory   Videor   r   r   ndarrayru   rt   r   r   r   r   bytesdictr   r   r   r   r   r   r   r   r   <module>   s    
 



l&
