o
    i=                     @   s  d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlm	Z	 d dl
Z
d dlZd dlmZmZ dd ZG dd dejZG d	d
 d
ejZdOddZG dd dejZG dd dejZdd Zdd ZG dd dejZG dd dejZG dd deZG dd deZG dd deZG d d! d!eZG d"d# d#eZG d$d% d%eZG d&d' d'eZ G d(d) d)eZ!G d*d+ d+ejZ"G d,d- d-eZ#G d.d/ d/eZ$G d0d1 d1eZ%G d2d3 d3eZ&G d4d5 d5eZ'G d6d7 d7eZ(G d8d9 d9eZ)G d:d; d;eZ*G d<d= d=eZ+G d>d? d?eZ,G d@dA dAeZ-G dBdC dCejZ.G dDdE dEejZ/G dFdG dGejZ0G dHdI dIejZ1G dJdK dKeZ2dLe2fdMdNZ3dS )P    N)Image)override)ComfyExtensionioc                 C   s   | st dg }| D ]8}tj||}ttj|}|jdkr&|	dd }|
d}t|tjd }t|d }|| q
|S )aK  Utility function to load and process a list of images.

    Args:
        image_files: List of image filenames
        input_dir: Base directory containing the images
        resize_method: How to handle images of different sizes ("None", "Stretch", "Crop", "Pad")

    Returns:
        torch.Tensor: Batch of processed images
    zNo valid images found in inputIc                 S   s   | d S )Ngp? )ir   r   8/mnt/c/Users/fbmor/ComfyUI/comfy_extras/nodes_dataset.py<lambda>$   s    z)load_and_process_images.<locals>.<lambda>RGB     o@N)
ValueErrorospathjoinnode_helperspillowr   openmodepointconvertnparrayastypefloat32torch
from_numpyappend)image_files	input_diroutput_imagesfile
image_pathimg	img_array
img_tensorr   r   r	   load_and_process_images   s   

r'   c                   @   $   e Zd Zedd Zedd ZdS )LoadImageDataSetFromFolderNodec                 C   s:   t jddddt jjdt ddgt jjddd	d
gdS )NLoadImageDataSetFromFolderzLoad Image Dataset from FolderdatasetTfolderThe folder to load images from.optionstooltipimagesList of loaded imagesdisplay_nameis_output_listr0   node_idr4   categoryis_experimentalinputsoutputs)r   SchemaComboInputfolder_pathsget_input_subfoldersr   Outputclsr   r   r	   define_schema.   s$   z,LoadImageDataSetFromFolderNode.define_schemac                    sF   t jt |}g d  fddt |D }t||}t|S )N.pngz.jpgz.jpegz.webpc                    s&   g | ] t  fd dD r qS )c                 3       | ]
}   |V  qd S r   lowerendswith.0extfr   r	   	<genexpr>L       zDLoadImageDataSetFromFolderNode.execute.<locals>.<listcomp>.<genexpr>)anyrL   valid_extensionsrN   r	   
<listcomp>I   s    z:LoadImageDataSetFromFolderNode.execute.<locals>.<listcomp>)	r   r   r   r?   get_input_directorylistdirr'   r   
NodeOutput)rC   r,   sub_input_dirr   output_tensorr   rT   r	   executeE   s   


z&LoadImageDataSetFromFolderNode.executeN__name__
__module____qualname__classmethodrD   r\   r   r   r   r	   r)   -   s
    
r)   c                   @   r(   )"LoadImageTextDataSetFromFolderNodec                 C   sJ   t jddddt jjdt ddgt jjddd	d
t jjdddd
gdS )NLoadImageTextDataSetFromFolderz'Load Image and Text Dataset from Folderr+   Tr,   r-   r.   r1   r2   r3   textszList of text captionsr6   )	r   r<   r=   r>   r?   r@   r   rA   StringrB   r   r   r	   rD   S   s.   z0LoadImageTextDataSetFromFolderNode.define_schemac              	      s~  t d|  tjt |}g dg }t|D ]F tj| t fddD r6|	 qtj
rbd} dd  rPt dd }|fdd	tD |  qd
d	 |D }g }|D ]7}tj||}tj|rt|ddd}	|	  }
|	|
 W d    n1 sw   Y  qn|	d qnt||}t dt| d| d t||S )NzLoading images from folder: rE   c                 3   rG   r   rH   rK   )itemr   r	   rP   y   rQ   z=LoadImageTextDataSetFromFolderNode.execute.<locals>.<genexpr>   _r   c                    s0   g | ] t  fd dD rtj qS )c                 3   rG   r   rH   rK   rN   r   r	   rP      rQ   zHLoadImageTextDataSetFromFolderNode.execute.<locals>.<listcomp>.<genexpr>)rR   r   r   r   rS   )r   rU   rN   r	   rV      s    z>LoadImageTextDataSetFromFolderNode.execute.<locals>.<listcomp>c                 S   s$   g | ]}| tj|d  dqS )rg   .txt)replacer   r   splitextrL   rO   r   r   r	   rV      s    rutf-8encoding Loaded z images from .)logginginfor   r   r   r?   rW   rX   rR   r   isdirsplitisdigitintextendexistsr   readstripr'   lenr   rY   )rC   r,   rZ   r   repeatcaption_file_pathcaptionscaption_filecaption_pathrO   captionr[   r   )rf   r   rU   r	   r\   o   sH   	
z*LoadImageTextDataSetFromFolderNode.executeNr]   r   r   r   r	   rb   R   s
    
rb   imagec           
      C   s"  t j|dd g }t| D ]\}}t|tjrk| dkr)|jd dkr)|d}| dkrR|jd dv rR|jd dkrR|jd dkrR|jd dkrR|	ddd}|
  }t|d	 dd
tj}t|}n	tdt| | d|dd}t j||}	||	 || q|S )a  Utility function to save a list of image tensors to disk.

    Args:
        image_list: List of image tensors (each [1, H, W, C] or [H, W, C] or [C, H, W])
        output_dir: Directory to save images to
        prefix: Filename prefix

    Returns:
        List of saved filenames
    Texist_ok   r   rg      )rg   r   r      r      zExpected torch.Tensor, got rh   05drF   )r   makedirs	enumerate
isinstancer   Tensordimshapesqueezepermutecpunumpyr   clipr   uint8r   	fromarrayr   typer   r   saver   )

image_list
output_dirprefixsaved_filesidxr&   r%   r$   filenamefilepathr   r   r	   save_images_to_folder   s(   

r   c                   @   r(   )SaveImageDataSetToFolderNodec                 C   sJ   t jddddddt jjdddt jjddd	d
t jjdddddgg dS )NSaveImageDataSetToFolderzSave Image Dataset to Folderr+   Tr1   List of images to save.r0   folder_name?Name of the folder to save images to (inside output directory).defaultr0   filename_prefixr   !Prefix for saved image filenames.r   r0   advancedr7   r4   r8   r9   is_output_nodeis_input_listr:   r;   r   r<   r   r>   re   rB   r   r   r	   rD      s,   z*SaveImageDataSetToFolderNode.define_schemac                 C   sR   |d }|d }t jt |}t|||}tdt| d| d t	
 S )Nr   Saved z images to rs   )r   r   r   r?   get_output_directoryr   rt   ru   r~   r   rY   )rC   r1   r   r   r   r   r   r   r	   r\      s   z$SaveImageDataSetToFolderNode.executeNr]   r   r   r   r	   r      s
    
r   c                   @   r(   ) SaveImageTextDataSetToFolderNodec                 C   sX   t jddddddt jjdddt jjdd	dt jjd
dddt jjdddddgg dS )NSaveImageTextDataSetToFolderz%Save Image and Text Dataset to Folderr+   Tr1   r   r   rd   zList of text captions to save.r   r   r   r   r   r   r   r   r   rB   r   r   r	   rD      s.   z.SaveImageTextDataSetToFolderNode.define_schemac              	   C   s   |d }|d }t jt |}t|||}tt||D ]/\}\}}	|dd}
t j||
}t	|ddd}|
|	 W d    n1 sHw   Y  qtdt| d| d	 t S )
Nr   rF   ri   wrn   ro   r   z images and captions to rs   )r   r   r   r?   r   r   r   ziprj   r   writert   ru   r~   r   rY   )rC   r1   rd   r   r   r   r   r   r   r   caption_filenamer   rO   r   r   r	   r\     s   z(SaveImageTextDataSetToFolderNode.executeNr]   r   r   r   r	   r      s
    
r   c                 C   sN   |   dkr| jd dkr| d} |   d ddtj}t	
|S )zConvert tensor to PIL Image.r   r   rg   r   )r   r   r   r   r   r   r   r   r   r   r   )r&   r%   r   r   r	   tensor_to_pil'  s   
 
r   c                 C   s$   t | t jd }t|d S )zConvert PIL Image to tensor.r   r   )r   r   r   r   r   r   )r$   r%   r   r   r	   pil_to_tensor/  s   r   c                   @   d   e Zd ZdZdZdZdZg ZdZdZ	e
dd Ze
dd Ze
dd Ze
d	d
 Ze
dd ZdS )ImageProcessingNodea  Base class for image processing nodes that operate on images.

    Child classes should set:
        node_id: Unique node identifier (required)
        display_name: Display name (optional, defaults to node_id)
        description: Node description (optional)
        extra_inputs: List of additional io.Input objects beyond "images" (optional)
        is_group_process: None (auto-detect), True (group), or False (individual) (optional)
        is_output_list: True (list output) or False (single output) (optional, default True)

    Child classes must implement ONE of:
        _process(cls, image, **kwargs) -> tensor  (for single-item processing)
        _group_process(cls, images, **kwargs) -> list[tensor]  (for group processing)
    Nc                 C      | j dur| j S t}d}| jD ]}d|jv r|} nqd}| jD ]}d|jv r+|} nq |duo3||u}|duo;||u}|rH|rHt| j d|sT|sTt| j d|S zDetect whether this node uses group or individual processing.

        Returns:
            bool: True if group processing, False if individual processing
        N_process_group_processzj: Cannot override both _process and _group_process. Override only one, or set is_group_process explicitly.z1: Must override either _process or _group_process)is_group_processr   __mro____dict__r   r^   rC   
base_classprocess_definerklassgroup_definerhas_process	has_groupr   r   r	   _detect_processing_modeO  4   






z+ImageProcessingNode._detect_processing_modec                 C   s   | j d u rt| j d|  }| jd ur| jn|}tjjd|r#dnddg}|| j	 tj
| j | jp7| j dd||tjjd|dd	gd
S )N  must set node_id class variabler1   zList of images to process.zImage to process.r   dataset/imageTzProcessed imagesr3   r7   r4   r8   r9   r   r:   r;   )r7   NotImplementedErrorr^   r   r5   r   r   r>   rz   extra_inputsr<   r4   rA   )rC   is_groupoutput_is_listr:   r   r   r	   rD   {  s2   


z!ImageProcessingNode.define_schemac                 K   s~   |   }i }| D ]\}}t|tr t|dkr |d ||< q
|||< q
|r1| j|fi |}n	| j|fi |}t|S zEExecute the node. Routes to _process or _group_process based on mode.rg   r   )	r   itemsr   listr~   r   r   r   rY   )rC   r1   kwargsr   paramskvresultr   r   r	   r\     s   

zImageProcessingNode.executec                 K      t | j d)zOverride this method for single-item processing.

        Args:
            image: tensor - Single image tensor
            **kwargs: Additional parameters (already extracted from lists)

        Returns:
            tensor - Processed image
         must implement _process methodr   r^   )rC   r   r   r   r   r	   r        zImageProcessingNode._processc                 K   r   )a  Override this method for group processing.

        Args:
            images: list[tensor] - List of image tensors
            **kwargs: Additional parameters (already extracted from lists)

        Returns:
            list[tensor] - Processed images
        % must implement _group_process methodr   )rC   r1   r   r   r   r	   r        
z"ImageProcessingNode._group_processr^   r_   r`   __doc__r7   r4   descriptionr   r   r5   ra   r   rD   r\   r   r   r   r   r   r	   r   8  s$    
+
'

r   c                   @   r   )TextProcessingNodea  Base class for text processing nodes that operate on texts.

    Child classes should set:
        node_id: Unique node identifier (required)
        display_name: Display name (optional, defaults to node_id)
        description: Node description (optional)
        extra_inputs: List of additional io.Input objects beyond "texts" (optional)
        is_group_process: None (auto-detect), True (group), or False (individual) (optional)
        is_output_list: True (list output) or False (single output) (optional, default True)

    Child classes must implement ONE of:
        _process(cls, text, **kwargs) -> str  (for single-item processing)
        _group_process(cls, texts, **kwargs) -> list[str]  (for group processing)
    Nc                 C   r   r   )r   r   r   r   r   r^   r   r   r   r	   r     r   z*TextProcessingNode._detect_processing_modec                 C   s|   | j d u rt| j d|  }tjjd|rdnddg}|| j tj	| j | j
p-| j dd||tjjd| jdd	gd
S )Nr   rd   zList of texts to process.zText to process.r   zdataset/textTzProcessed textsr3   r   )r7   r   r^   r   r   re   r>   rz   r   r<   r4   rA   r5   )rC   r   r:   r   r   r	   rD     s.   


z TextProcessingNode.define_schemac                 K   s   |   }i }| D ]\}}t|tr t|dkr |d ||< q
|||< q
|r1| j|fi |}n	| j|fi |}| jrHt	|rD|S |gS t	|gS r   )
r   r   r   r   r~   r   r   r5   r   rY   )rC   rd   r   r   r   r   r   r   r   r   r	   r\   8  s   
zTextProcessingNode.executec                 K   r   )zOverride this method for single-item processing.

        Args:
            text: str - Single text string
            **kwargs: Additional parameters (already extracted from lists)

        Returns:
            str - Processed text
        r   r   )rC   textr   r   r   r	   r   T  r   zTextProcessingNode._processc                 K   r   )zOverride this method for group processing.

        Args:
            texts: list[str] - List of text strings
            **kwargs: Additional parameters (already extracted from lists)

        Returns:
            list[str] - Processed texts
        r   r   )rC   rd   r   r   r   r	   r   a  r   z!TextProcessingNode._group_processr   r   r   r   r	   r     s$    
+


r   c                   @   <   e Zd ZdZdZdZejjdddddd	gZ	e
d
d ZdS )ResizeImagesByShorterEdgeNodeResizeImagesByShorterEdgezResize Images by Shorter EdgezbResize images so that the shorter edge matches the specified length while preserving aspect ratio.shorter_edge   rg       z#Target length for the shorter edge.r   minmaxr0   c                 C   s`   t |}|j\}}||k r|}t|||  }n
|}t|||  }|||ftjj}t|S r   )r   sizery   resizer   
ResamplingLANCZOSr   )rC   r   r   r$   r   hnew_wnew_hr   r   r	   r     s   
z&ResizeImagesByShorterEdgeNode._processNr^   r_   r`   r7   r4   r   r   Intr>   r   ra   r   r   r   r   r	   r   t      
r   c                   @   r   )ResizeImagesByLongerEdgeNodeResizeImagesByLongerEdgezResize Images by Longer EdgezaResize images so that the longer edge matches the specified length while preserving aspect ratio.longer_edgei   rg   r   z"Target length for the longer edge.r   c           
      C   s   g }|D ]5}t |}|j\}}||kr|}t|||  }	n
|}	t|||  }|||	ftjj}|t| qt	j
|ddS )Nr   r   )r   r   ry   r   r   r   r   r   r   r   cat)
rC   r   r   resized_imagesimage_ir$   r   r   r   r   r   r   r	   r     s   
z%ResizeImagesByLongerEdgeNode._processNr   r   r   r   r	   r     r   r   c                   @   sP   e Zd ZdZdZdZejjdddddd	ejjd
ddddd	gZ	e
dd ZdS )CenterCropImagesNodeCenterCropImageszCenter Crop Imagesz3Center crop all images to the specified dimensions.widthr   rg   r   Crop width.r   heightCrop height.c           	      C   sj   t |}td|j| d }td|j| d }t|j|| }t|j|| }|||||f}t|S )Nr   r   )r   r   r  r  r   cropr   )	rC   r   r  r  r$   lefttoprightbottomr   r   r	   r     s   zCenterCropImagesNode._processNr   r   r   r   r	   r    s    r  c                	   @   sd   e Zd ZdZdZdZejjdddddd	ejjd
ddddd	ejjdddddd	gZ	e
dd ZdS )RandomCropImagesNodeRandomCropImageszRandom Crop ImageszMRandomly crop all images to the specified dimensions (for data augmentation).r  r   rg   r   r  r   r  r	  seedr       Random seed.c                 C   s   t j|d  t|}td|j| }td|j| }|dkr)t jd|d nd}|dkr8t jd|d nd}	t|j|| }
t|j|	| }|	||	|
|f}t
|S )N    r   rg   )r   randomr  r   r   r  r  randintr   r
  r   )rC   r   r  r  r  r$   max_leftmax_topr  r  r  r  r   r   r	   r     s   zRandomCropImagesNode._processNr   r   r   r   r	   r    s    
r  c                	   @   sT   e Zd ZdZdZdZejjdddddd	d
ejjdddddd	d
gZ	e
dd ZdS )NormalizeImagesNodeNormalizeImageszNormalize Imagesz3Normalize images using mean and standard deviation.mean      ?              ?zMean value for normalization.Tr   r   r   r0   r   stdgMbP?z%Standard deviation for normalization.c                 C   s   || | S r   r   )rC   r   r  r   r   r   r	   r        zNormalizeImagesNode._processNr^   r_   r`   r7   r4   r   r   Floatr>   r   ra   r   r   r   r   r	   r    s.    r  c                   @   r   )AdjustBrightnessNodeAdjustBrightnesszAdjust Brightnessz Adjust brightness of all images.factorr  r         @zCBrightness factor. 1.0 = no change, <1.0 = darker, >1.0 = brighter.r   c                 C   s   ||  ddS )Nr  r  clamprC   r   r&  r   r   r	   r   	  s   zAdjustBrightnessNode._processNr"  r   r   r   r	   r$    r   r$  c                   @   r   )AdjustContrastNodeAdjustContrastzAdjust ContrastzAdjust contrast of all images.r&  r  r  r'  zMContrast factor. 1.0 = no change, <1.0 = less contrast, >1.0 = more contrast.r   c                 C   s   |d | d  ddS )Nr  r  r  r(  r*  r   r   r	   r        zAdjustContrastNode._processNr"  r   r   r   r	   r+    r   r+  c                   @   s@   e Zd ZdZdZdZdZejj	dddddd	gZ
ed
d ZdS )ShuffleDatasetNodeShuffleDatasetzShuffle Image Datasetz4Randomly shuffle the order of images in the dataset.Tr  r   r  r  r   c                    s2   t j|d  t jt } fdd|D S )Nr  c                       g | ]} | qS r   r   rL   r   r1   r   r	   rV   0      z5ShuffleDatasetNode._group_process.<locals>.<listcomp>)r   r  r  permutationr~   )rC   r1   r  indicesr   r2  r	   r   ,  s   z!ShuffleDatasetNode._group_processN)r^   r_   r`   r7   r4   r   r   r   r   r>   r   ra   r   r   r   r   r	   r.  !  s    
r.  c                   @   (   e Zd ZdZedd Zedd ZdS )ShuffleImageTextDatasetNodez:Special node that shuffles both images and texts together.c                 C   sh   t jdddddt jjdddt jjdd	dt jjd
dddddgt jjddddt jjddddgdS )NShuffleImageTextDatasetzShuffle Image-Text Datasetr   Tr1   zList of images to shuffle.r   rd   zList of texts to shuffle.r  r   r  r  r   zShuffled imagesr3   zShuffled textsr   )r   r<   r   r>   re   r   rA   rB   r   r   r	   rD   6  s4   z)ShuffleImageTextDatasetNode.define_schemac                    sX   |d }t j|d  t jt } fdd|D }fdd|D }t||S )Nr   r  c                    r0  r   r   r1  r2  r   r	   rV   Z  r3  z7ShuffleImageTextDatasetNode.execute.<locals>.<listcomp>c                    r0  r   r   r1  )rd   r   r	   rV   [  r3  )r   r  r  r4  r~   r   rY   )rC   r1   rd   r  r5  shuffled_imagesshuffled_textsr   )r1   rd   r	   r\   U  s   z#ShuffleImageTextDatasetNode.executeNr^   r_   r`   r   ra   rD   r\   r   r   r   r	   r7  3  s    
r7  c                   @   $   e Zd ZdZdZdZedd ZdS )TextToLowercaseNodeTextToLowercasezText to LowercasezConvert all texts to lowercase.c                 C      |  S r   )rI   rC   r   r   r   r	   r   g     zTextToLowercaseNode._processNr^   r_   r`   r7   r4   r   ra   r   r   r   r   r	   r=  b      r=  c                   @   r<  )TextToUppercaseNodeTextToUppercasezText to UppercasezConvert all texts to uppercase.c                 C   r?  r   )upperr@  r   r   r	   r   q  rA  zTextToUppercaseNode._processNrB  r   r   r   r	   rD  l  rC  rD  c                   @   r   )TruncateTextNodeTruncateTextzTruncate Textz'Truncate all texts to a maximum length.
max_lengthM   rg   i'  zMaximum text length.r   c                 C   s   |d | S r   r   )rC   r   rI  r   r   r	   r     r!  zTruncateTextNode._processNr   r   r   r   r	   rG  v  s    
rG  c                   @   8   e Zd ZdZdZdZejjddddgZ	e
dd	 Zd
S )AddTextPrefixNodeAddTextPrefixzAdd Text PrefixzAdd a prefix to all texts.r   rq   zPrefix to add.r   c                 C   s   || S r   r   )rC   r   r   r   r   r	   r     rA  zAddTextPrefixNode._processNr^   r_   r`   r7   r4   r   r   re   r>   r   ra   r   r   r   r   r	   rL        rL  c                   @   rK  )AddTextSuffixNodeAddTextSuffixzAdd Text SuffixzAdd a suffix to all texts.suffixrq   zSuffix to add.r   c                 C   s   || S r   r   )rC   r   rR  r   r   r	   r     rA  zAddTextSuffixNode._processNrN  r   r   r   r	   rP    rO  rP  c                   @   sH   e Zd ZdZdZdZejjddddejjddd	dgZ	e
d
d ZdS )ReplaceTextNodeReplaceTextzReplace TextzReplace text in all texts.findrq   zText to find.r   rj   zText to replace with.c                 C   s   | ||S r   )rj   )rC   r   rU  rj   r   r   r	   r     r!  zReplaceTextNode._processNrN  r   r   r   r	   rS    s    rS  c                   @   r<  )StripWhitespaceNodeStripWhitespacezStrip Whitespacez5Strip leading and trailing whitespace from all texts.c                 C   r?  r   )r}   r@  r   r   r	   r     rA  zStripWhitespaceNode._processNrB  r   r   r   r	   rV    rC  rV  c                   @   sF   e Zd ZdZdZdZdZdZej	j
dddd	d
ddgZedd ZdS )ImageDeduplicationNodezRRemove duplicate or very similar images from the dataset using perceptual hashing.ImageDeduplicationzImage Deduplicationz9Remove duplicate or very similar images from the dataset.Tsimilarity_thresholdgffffff?r  r  zmSimilarity threshold (0-1). Higher means more similar. Images above this threshold are considered duplicates.r  c                    s   t dkrg S dd  dd } fddD }g }tt D ]7}d}|D ])}||| || }	d	|	d
  }
|
|krPd}td| d| d|
dd  nq'|sX|| q!fdd|D }tdt | dt  d |S )z1Remove duplicate images using perceptual hashing.r   c                    sV   t | }|dtjjd}t| }t|t	|  d
 fdd|D }|S )zMCompute a simple perceptual hash by resizing to 8x8 and comparing to average.)   r[  Lrq   c                 3   s     | ]}| kr
d ndV  qdS )10Nr   )rL   pavgr   r	   rP     s    zNImageDeduplicationNode._group_process.<locals>.compute_hash.<locals>.<genexpr>)r   r   r   r   r   r   r   getdatasumr~   r   )r&   r$   	img_smallpixels	hash_bitsr   r`  r	   compute_hash  s   z;ImageDeduplicationNode._group_process.<locals>.compute_hashc                 S   s   t dd t| |D S )z2Compute Hamming distance between two hash strings.c                 s   s    | ]	\}}||kV  qd S r   r   )rL   c1c2r   r   r	   rP     s    zRImageDeduplicationNode._group_process.<locals>.hamming_distance.<locals>.<genexpr>)rc  r   )hash1hash2r   r   r	   hamming_distance  r-  z?ImageDeduplicationNode._group_process.<locals>.hamming_distancec                    s   g | ]} |qS r   r   )rL   r$   )rg  r   r	   rV     r3  z9ImageDeduplicationNode._group_process.<locals>.<listcomp>Fr  g      P@TzImage z is similar to image z (similarity: z.3fz), skippingc                    r0  r   r   r1  r2  r   r	   rV     r3  zDeduplication: kept z out of  images)r~   rangert   ru   r   )rC   r1   rZ  rl  hasheskeep_indicesr   is_duplicatejdistance
similarityunique_imagesr   )rg  r1   r	   r     s4   
z%ImageDeduplicationNode._group_processN)r^   r_   r`   r   r7   r4   r   r   r   r#  r>   r   ra   r   r   r   r   r	   rX    s"    rX  c                   @   s   e Zd ZdZdZdZdZdZdZe	j
jddd	d
dde	j
jddddddde	j
jddddddde	j
jdddddddgZedd ZdS )ImageGridNodez3Combine multiple images into a single grid/collage.	ImageGridz
Image Gridz+Arrange multiple images into a grid layout.TFcolumnsr   rg      zNumber of columns in the grid.r   
cell_width       i   zWidth of each cell in the grid.r  cell_heightz Height of each cell in the grid.paddingr   2   zPadding between images.c                 C   s   t |dkr
tdt |}|| d | }|| |d |  }|| |d |  }	td||	fd}
t|D ].\}}|| }|| }t|}|||ftjj}|||  }|||  }|
	|||f q7t
d| d| d| d	| d|	 d
 t|
S )zArrange images into a grid.r   z(Cannot create grid from empty image listrg   r   )r   r   r   Created xz grid with z	 images ())r~   r   r   newr   r   r   r   r   pastert   ru   r   )rC   r1   rx  rz  r}  r~  
num_imagesrows
grid_widthgrid_heightgridr   r&   rowcolr$   r  yr   r   r	   r   &  s&   "zImageGridNode._group_processN)r^   r_   r`   r   r7   r4   r   r   r5   r   r   r>   r   ra   r   r   r   r   r	   rv    sH    rv  c                   @   ,   e Zd ZdZdZdZdZdZedd Z	dS )	MergeImageListsNodez.Merge multiple image lists into a single list.MergeImageListszMerge Image Listsz*Concatenate multiple image lists into one.Tc                 C      t dt| d |S )zASimply return the images list (already merged by input handling).zMerged image list contains rm  rt   ru   r~   )rC   r1   r   r   r	   r   U     z"MergeImageListsNode._group_processN
r^   r_   r`   r   r7   r4   r   r   ra   r   r   r   r   r	   r  M      r  c                   @   r  )	MergeTextListsNodez-Merge multiple text lists into a single list.MergeTextListszMerge Text Listsz)Concatenate multiple text lists into one.Tc                 C   r  )z@Simply return the texts list (already merged by input handling).zMerged text list contains z textsr  )rC   rd   r   r   r	   r   f  r  z!MergeTextListsNode._group_processNr  r   r   r   r	   r  ^  r  r  c                   @   r6  )ResolutionBucketzIBucket latents and conditions by resolution for efficient batch training.c                 C   sT   t jdddddt jjdddt jjdd	dgt jjddd
dt jjddddgdS )Nr  zResolution Bucketr+   Tlatentsz-List of latent dicts to bucket by resolution.r   conditioningz7List of conditioning lists (must match latents length).z8List of batched latent dicts, one per resolution bucket.r3   z3List of condition lists, one per resolution bucket.r   )r   r<   Latentr>   ConditioningrA   rB   r   r   r	   rD   u  s6   zResolutionBucket.define_schemac                 C   s  t |t |krtdt | dt | dg }g }t||D ]"\}}|d }|jd }t|D ]}	|||	  |||	  q1q i }
t||D ]0\}}|jd |jd }}||f}||
vrhg g d|
|< |
| d	 | |
| d
 | qJg }g }|
 D ]0\\}}}tj|d	 dd}|d|i ||d
  t	
d| d| dt |d	  d qt	
dt |
 dt | d t||S )NNumber of latents (') does not match number of conditions (z).samplesr   )r  
conditionsr  r  r   zResolution bucket (r  z):  samplesr  z resolution buckets from )r~   r   r   r   rn  r   r   r   stackrt   ru   r   rY   )rC   r  r  flat_latentsflat_conditionslatent_dictcondr  
batch_sizer   bucketslatentr   r   keyoutput_latentsoutput_conditionsbucket_datastacked_latentsr   r   r	   r\     s@   
 zResolutionBucket.executeNr;  r   r   r   r	   r  r  s    
r  c                   @   s*   e Zd ZdZedd ZedddZdS )MakeTrainingDatasetzHEncode images with VAE and texts with CLIP to create a training dataset.c                 C   sv   t jddgddddt jjdddt jjd	d
dt jjdddt jjddddgt jjddddt j	jddddgdS )Nr  zencode datasetzMake Training Datasetr+   Tr1   zList of images to encode.r   vaez)VAE model for encoding images to latents.r   z-CLIP model for encoding text to conditioning.rd   zoList of text captions. Can be length n (matching images), 1 (repeated for all), or omitted (uses empty string).)optionalr0   r  List of latent dictsr3   r  List of conditioning lists)r7   search_aliasesr4   r8   r9   r   r:   r;   )
r   r<   r   r>   VaeClipre   r  rA   r  rB   r   r   r	   rD     s@   z!MakeTrainingDataset.define_schemaNc              	   C   sX  |d }|d }t |}|d u st |dkrdg}t |dkr(|dkr(|| }nt ||kr>tdt | d| d| dtd| d	 g }|D ]}||d d d d d d d d
f }|d|i qKtdt | d g }	|D ]}
|
dkr||d}n
||
}||}|	| qwtdt | dt |	 d t	||	S )Nr   rq   rg   zNumber of texts (z#) does not match number of images (z ). Text list should have length z
, 1, or 0.z	Encoding z images with VAE...r   r  z texts with CLIP...zCreated dataset with z latents and z conditioning.)
r~   r   rt   ru   encoder   encode_from_tokens_scheduledtokenizer   rY   )rC   r1   r  r   rd   r  latents_listr&   latent_tensorconditioning_listr   r  tokensr   r   r	   r\     s<   
&

zMakeTrainingDataset.executer   r;  r   r   r   r	   r    s    
$r  c                   @   r6  )SaveTrainingDatasetz?Save encoded training dataset (latents + conditioning) to disk.c                 C   s`   t jddgdddddt jjdddt jjd	d
dt jjddddt jjdddddddgg d	S )Nr  zexport training datazSave Training Datasetr+   Tr  z.List of latent dicts from MakeTrainingDataset.r   r  z4List of conditioning lists from MakeTrainingDataset.r   training_datasetz9Name of folder to save dataset (inside output directory).r   
shard_sizei  rg   i z!Number of samples per shard file.r  )	r7   r  r4   r8   r9   r   r   r:   r;   )r   r<   r  r>   r  re   r   rB   r   r   r	   rD   '  s@   z!SaveTrainingDataset.define_schemac                 C   s  |d }|d }t |t |krtdt | dt | dtjt |}tj|dd t |}|| d | }t	d| d	| d
| d t
|D ]Y}|| }	t|	| |}
||	|
 ||	|
 d}d|dd}tj||}t|d}t|| W d    n1 sw   Y  t	d|d  d| d| d|
|	  d	 qN|||d}tj|d}t|d}tj||dd W d    n1 sw   Y  t	d| d	| d t S )Nr   r  r  z/). Something went wrong in dataset preparation.Tr   rg   zSaving z samples to z shards in ...)r  r  shard_04d.pklwbzSaved shard /: z (z	 samples))num_samples
num_shardsr  zmetadata.jsonr   r   )indentzSuccessfully saved rs   )r~   r   r   r   r   r?   r   r   rt   ru   rn  r   r   r   r   jsondumpr   rY   )rC   r  r  r   r  r   r  r  	shard_idx	start_idxend_idx
shard_datashard_filename
shard_pathrO   metadatametadata_pathr   r   r	   r\   K  sJ   

$zSaveTrainingDataset.executeNr;  r   r   r   r	   r  %  s    
#r  c                   @   r6  )LoadTrainingDatasetz(Load encoded training dataset from disk.c                 C   sL   t jdddgdddt jjddd	d
gt jjddddt jjddddgdS )Nr  zimport datasetztraining datazLoad Training Datasetr+   Tr   r  zFName of folder containing the saved dataset (inside output directory).r   r  r  r3   r  r  )r7   r  r4   r8   r9   r:   r;   )r   r<   re   r>   r  rA   r  rB   r   r   r	   rD     s0   z!LoadTrainingDataset.define_schemac           
   	   C   s*  t jt |}t j|std| tdd t |D }|s+td| t	
dt| d| d g }g }|D ]A}t j||}t|d}t|}	W d    n1 s]w   Y  ||	d	  ||	d
  t	
d| dt|	d	  d q?t	
dt| d| d t||S )NzDataset directory not found: c                 S   s$   g | ]}| d r|dr|qS )r  r  )
startswithrJ   rl   r   r   r	   rV     s    z/LoadTrainingDataset.execute.<locals>.<listcomp>zNo shard files found in zLoading z shards from r  rbr  r  rr   r  r  zSuccessfully loaded z samples from rs   )r   r   r   r?   r   r{   r   sortedrX   rt   ru   r~   r   r   loadrz   r   rY   )
rC   r   dataset_dirshard_filesall_latentsall_conditioning
shard_filer  rO   r  r   r   r	   r\     s2   "zLoadTrainingDataset.executeNr;  r   r   r   r	   r    s    
r  c                   @   s(   e Zd Zedeeej  fddZdS )DatasetExtensionreturnc                    s>   t ttttttttt	t
tttttttttttttttttgS r   )r)   rb   r   r   r   r   r  r  r  r$  r+  r.  r7  r=  rD  rG  rL  rP  rS  rV  rX  rv  r  r  r  r  r  r  )selfr   r   r	   get_node_list  s<   zDatasetExtension.get_node_listN)	r^   r_   r`   r   r   r   r   	ComfyNoder  r   r   r   r	   r    s    r  r  c                      s   t  S r   )r  r   r   r   r	   comfy_entrypoint   s   r  )r   )4rt   r   r  r   r   r   PILr   typing_extensionsr   r?   r   comfy_api.latestr   r   r'   r  r)   rb   r   r   r   r   r   r   r   r   r   r  r  r  r$  r+  r.  r7  r=  rD  rG  rL  rP  rS  rV  rX  rv  r  r  r  r  r  r  r  r  r   r   r   r	   <module>   s`    %
J0(3	  /

GL]VfM(