o
    iӋ                     @   s  d dl mZmZmZ d dlZd dlmZ d dlZd dlmZ d dl	m
Z
mZ d dlm  mZ d dlmZmZ d dlmZ d dlmZ d dlZd dlZd dlZG dd	 d	ejZG d
d dejZG dd dejZG dd dejZi fdejdejdejfddZ G dd dZ!G dd dejZ"G dd dejZ#G dd dejZ$G dd dejZ%G dd  d ejZ&G d!d" d"Z'G d#d$ d$ejZ(G d%d& d&ejZ)G d'd( d(ejZ*G d)d* d*ejZ+dS )+    )OptionalTupleListN)repeat)TimestepEmbedding	Timesteps)
apply_roperope)	LastLayer)optimized_attentionc                       s@   e Zd Zdedee f fddZdejdejfddZ  Z	S )	EmbedNDthetaaxes_dimc                    s   t    || _|| _d S N)super__init__r   r   )selfr   r   	__class__ 5/mnt/c/Users/fbmor/ComfyUI/comfy/ldm/hidream/model.pyr      s   

zEmbedND.__init__idsreturnc                    s6    j d }tj fddt|D dd}|dS )Nc                    s(   g | ]}t  d |f j| jqS ).)r	   r   r   .0ir   r   r   r   
<listcomp>   s   ( z#EmbedND.forward.<locals>.<listcomp>dim   )shapetorchcatrange	unsqueeze)r   r   n_axesembr   r   r   forward   s   

zEmbedND.forward)
__name__
__module____qualname__intr   r   r$   Tensorr*   __classcell__r   r   r   r   r      s    r   c                       s.   e Zd Z				d	 fdd	Zdd Z  ZS )

PatchEmbedr"         Nc                    s8   t    || _|| _|j|| | |d||d| _d S )NTbiasdtypedevice)r   r   
patch_sizeout_channelsLinearproj)r   r8   in_channelsr9   r6   r7   
operationsr   r   r   r   %   s   
"zPatchEmbed.__init__c                 C   s   |  |}|S r   )r;   )r   latentr   r   r   r*   1      
zPatchEmbed.forward)r"   r2   r3   NNNr+   r,   r-   r   r*   r0   r   r   r   r   r1   $   s    r1   c                       &   e Zd Zd fdd	Zdd Z  ZS )PooledEmbedNc                    s"   t    t|||||d| _d S )Nr<   time_embed_dimr6   r7   r=   )r   r   r   pooled_embedder)r   text_emb_dimhidden_sizer6   r7   r=   r   r   r   r   7   s   
zPooledEmbed.__init__c                 C   s
   |  |S r   )rE   )r   pooled_embedr   r   r   r*   ;   s   
zPooledEmbed.forwardNNNr@   r   r   r   r   rB   6       rB   c                       s&   e Zd Zd fdd	Zdd Z  ZS )TimestepEmbed   Nc                    s2   t    t|ddd| _t|||||d| _d S )NTr   )num_channelsflip_sin_to_cosdownscale_freq_shiftrC   )r   r   r   	time_projr   timestep_embedder)r   rG   frequency_embedding_sizer6   r7   r=   r   r   r   r   @   s   
zTimestepEmbed.__init__c                 C   s    |  |j|d}| |}|S )Nr6   )rP   torQ   )r   	timestepswdtypet_embr   r   r   r*   E   s   
zTimestepEmbed.forward)rL   NNNr@   r   r   r   r   rK   ?   s    rK   querykeyvaluec              
   C   sx   t | | jd d| jd | jd  ||jd d|jd |jd  ||jd d|jd |jd  | jd |dS )Nr   r   r"   transformer_options)r   viewr#   )rX   rY   rZ   r]   r   r   r   	attentionK   s   xr_   c                   @   sJ   e Zd ZdZdddi fdejdeej deej dejdejf
dd	ZdS )
HiDreamAttnProcessor_flashattnzYAttention processor used typically in processing the SD3-like self-attention projections.Nimage_tokensimage_tokens_maskstext_tokensr	   r   c                 O   sZ  |j }	|jd }
|||j|	d}|||j|	d}||}|jd }||j }|	|
d|j|}|	|
d|j|}|	|
d|j|}|d urX||	|
ddd }|j
s|||j|	d}|||j|	d}||}|	|
d|j|}|	|
d|j|}|	|
d|j|}|jd }|jd }tj||gdd}tj||gdd}tj||gdd}n|}|}|}|jd |jd d krt|||\}}n,|jddd\}}|jddd\}}t|||\}}tj||gdd}tj||gdd}t||||d}|j
s&tj|||gdd\}}||}||}||fS ||}|S )	Nr   rS   r      r    r   r"   r\   )r6   r#   
q_rms_normto_qrT   
k_rms_normto_kto_vheadsr^   singleq_rms_norm_tto_q_tk_rms_norm_tto_k_tto_v_tr$   r%   r   chunkr_   splitto_outto_out_t)r   attnra   rb   rc   r	   r]   argskwargsr6   
batch_sizequery_ikey_ivalue_i	inner_dimhead_dimquery_tkey_tvalue_tnum_image_tokensnum_text_tokensrX   rY   rZ   query_1query_2key_1key_2hidden_stateshidden_states_ihidden_states_tr   r   r   __call__R   sT   









z'HiDreamAttnProcessor_flashattn.__call__)r+   r,   r-   __doc__r$   FloatTensorr   r   r   r   r   r   r`   O   s"    
r`   c                       s   e Zd Z										ddeded	ed
edededededef fddZdddi fdejdejdejdejdej	f
ddZ
  ZS )HiDreamAttention   @   FTh㈵>N	query_dimrj   dim_headupcast_attentionupcast_softmaxscale_qkepsout_dimrk   c                    s  t    |	d ur|	n|| | _|| _|| _|| _|	d ur|	n|| _|| _| jr,|d nd| _|	d ur7|	| n|| _	|| _
|
| _|j}|| _||| j||d| _|| j| j||d| _|| j| j||d| _|| j| j||d| _|j| j|||d| _|j| j|||d| _|
s||| j||d| _|| j| j||d| _|| j| j||d| _|| j| j||d| _|j| j|||d| _|j| j|||d| _|| _d S )Ng      g      ?r6   r7   )r   r   r|   r   r   r   r   r   scalerj   sliceable_head_dimrk   r:   
linear_clsrf   rh   ri   rs   RMSNormre   rg   rm   ro   rp   rt   rl   rn   	processor)r   r   rj   r   r   r   r   r   r   r   rk   r6   r7   r=   r   r   r   r   r      s6   

zHiDreamAttention.__init__norm_image_tokensrb   norm_text_tokensr	   r   c                 C   s   | j | |||||dS )N)ra   rb   rc   r	   r]   )r   )r   r   rb   r   r	   r]   r   r   r   r*      s   zHiDreamAttention.forward)r   r   FFTr   NNFNNN)r+   r,   r-   r.   boolfloatr   r$   r   r/   r*   r0   r   r   r   r   r      sZ    
3r   c                
       sB   e Zd Z			ddedededee f fddZd	d
 Z  ZS )FeedForwardSwiGLUrL   Nr!   
hidden_dimmultiple_offfn_dim_multiplierc                    s   t    td| d }|d urt|| }||| d |  }|j||d||d| _|j||d||d| _|j||d||d| _d S )Nr"      rd   Fr4   )r   r   r.   r:   w1w2w3)r   r!   r   r   r   r6   r7   r=   r   r   r   r      s   
zFeedForwardSwiGLU.__init__c                 C   s$   |  tjj| || | S r   )r   r$   nn
functionalsilur   r   )r   xr   r   r   r*      s   $zFeedForwardSwiGLU.forward)rL   NNNN)	r+   r,   r-   r.   r   r   r   r*   r0   r   r   r   r   r      s    r   c                       s0   e Zd Zd fdd	Zddd	Zd
d Z  ZS )MoEGater2   r"   {Gz?Nc                    s`   t    || _|| _d| _|| _d| _d| _|| _t	
tj| j| jf||d| _|   d S )NsoftmaxFr   )r   r   top_kn_routed_expertsscoring_funcalphaseq_auxnorm_topk_prob
gating_dimr   	Parameterr$   emptyweightreset_parameters)r   	embed_dimnum_routed_expertsnum_activated_expertsaux_loss_alphar6   r7   r=   r   r   r   r      s   
 zMoEGate.__init__r   c                 C   s   d S r   r   )r   r   r   r   r     s   zMoEGate.reset_parametersc                 C   s   |j \}}}|d|}t|tjj| j|j|j	dd }| j
dkr)|jdd}ntd| j
 tj|| jddd\}}| jdkrR| jrR|jdd	d
d }	||	 }d }
|||
fS )Nr   r   r   r    z/insupportable scoring function for MoE gating: F)kr!   sortedrd   T)r!   keepdimg#B;)r#   r^   Flinearcomfymodel_managementcast_tor   r6   r7   r   r   NotImplementedErrorr$   topkr   r   sum)r   r   bszseq_lenhlogitsscorestopk_weighttopk_idxdenominatoraux_lossr   r   r   r*     s   "

zMoEGate.forward)r2   r"   r   NNN)r   N)r+   r,   r-   r   r   r*   r0   r   r   r   r   r      s    
r   c                	       sJ   e Zd Z	ddedededef fddZdd	 Ze d
d Z  Z	S )MOEFeedForwardSwiGLUNr!   r   r   r   c                    sh   t    td  d| _t fddt|D | _t|| d| _	|| _
d S )Nr"   r6   r7   r=   c              	      s   g | ]}t  d qS )r   )r   r   r7   r!   r6   r   r=   r   r   r   *  s    z1MOEFeedForwardSwiGLU.__init__.<locals>.<listcomp>)r   r   r   r6   r7   r=   )r   r   r   shared_expertsr   
ModuleListr&   expertsr   gater   )r   r!   r   r   r   r6   r7   r=   r   r   r   r      s   
&
zMOEFeedForwardSwiGLU.__init__c                 C   s   |j }|}|j}| |\}}}|d|jd }|d}	 |j| jdd}tj||d}	t| j	D ]\}
}||||
k j
|d|	||
k< q3|	jg |jdR  |d jdd}	|	j| j
|d}	|	| | }	|	S )Nr   Tr   r    rS   rd   )r6   r#   r   r^   repeat_interleaver   r$   
empty_like	enumerater   rT   r'   r   	moe_inferr   )r   r   wtypeidentity
orig_shaper   r   r   flat_topk_idxyr   expertr   r   r   r*   3  s   
"(zMOEFeedForwardSwiGLU.forwardc                 C   s   t |}| }|   d}|| j }t|D ]M\}}	|dkr'dn||d  }
|
|	kr2q| j	| }||
|	 }|| }||}|
|||
|	   ||j}|jd|ddd|jd |dd q|S )Nr   rd   r   r   )reduce)r$   
zeros_likeargsortbincountcpunumpycumsumr   r   r   mul_rT   r6   scatter_reduce_r^   r   r#   )r   r   flat_expert_indicesflat_expert_weightsexpert_cacheidxstokens_per_expert
token_idxsr   end_idx	start_idxr   exp_token_idxexpert_tokens
expert_outr   r   r   r   G  s    


*zMOEFeedForwardSwiGLU.moe_inferrI   )
r+   r,   r-   r.   r   r*   r$   no_gradr   r0   r   r   r   r   r     s    r   c                       rA   )TextProjectionNc                    s$   t    |j||d||d| _d S )NF)in_featuresout_featuresr5   r6   r7   )r   r   r:   r   )r   r   rG   r6   r7   r=   r   r   r   r   ^  s   
zTextProjection.__init__c                 C   s   |  |}|S r   )r   )r   captionr   r   r   r   r*   b  r?   zTextProjection.forwardrI   r@   r   r   r   r   r   ]  rJ   r   c                   @   s   e Zd ZdZdZdS )	BlockTyperd   r"   N)r+   r,   r-   TransformerBlockSingleTransformerBlockr   r   r   r   r   g  s    r   c                          e Zd Z			ddededededef
 fd	d
Zddddi fdejdeej deej deej dejdejfddZ  Z	S )"HiDreamImageSingleTransformerBlockr2   r"   Nr!   num_attention_headsattention_head_dimr   r   c	           	   
      s   t    || _tt |j|d| d||d| _|j|dd||d| _	t
|||t d|||d| _|j|dd||d| _|dkrSt|d	| |||||d
| _d S t|d	| |||d| _d S )N   Tr4   ư>Fr   elementwise_affiner6   r7   r   rj   r   r   rk   r6   r7   r=   r   r2   r!   r   r   r   r6   r7   r=   r!   r   r6   r7   r=   )r   r   r  r   
SequentialSiLUr:   adaLN_modulation	LayerNormnorm1_ir   r`   attn1norm3_ir   ff_ir   	r   r!   r  r  r   r   r6   r7   r=   r   r   r   r   m  s2   
	
z+HiDreamImageSingleTransformerBlock.__init__ra   rb   rc   adaln_inputr	   r   c                 C   s   |j }| |d d d f jddd\}}	}
}}}| |j|d}|d|	  | }| j||||d}|
| | }| |j|d}|d|  | }|| |j|d }|| }|S )Nr  r   r    rS   rd   r	   r]   )r6   r  rq   r  rT   r  r  r  )r   ra   rb   rc   r  r	   r]   r   shift_msa_iscale_msa_i
gate_msa_ishift_mlp_iscale_mlp_i
gate_mlp_ir   attn_output_iff_output_ir   r   r   r*     s"   	z*HiDreamImageSingleTransformerBlock.forwardr2   r"   NNN
r+   r,   r-   r.   r   r$   r   r   r*   r0   r   r   r   r   r  l  sB    +r  c                       r  )HiDreamImageTransformerBlockr2   r"   Nr!   r  r  r   r   c	           	   
      s   t    || _tt |j|d| d||d| _|j|dd||d| _	|j|dd||d| _
t|||t d|||d| _|j|dd||d| _|dkr]t|d	| |||||d
| _nt|d	| |||d| _|j|ddd| _t|d	| |||d| _d S )N   Tr4   r  Fr  r	  r   r2   r
  r  )r   r  )r   r   r  r   r  r  r:   r  r  r  norm1_tr   r`   r  r  r   r  r   norm3_tff_tr  r   r   r   r     s8   
	

z%HiDreamImageTransformerBlock.__init__ra   rb   rc   r  r	   r   c                 C   s*  |j }| |d d d f jddd\}}	}
}}}}}}}}}| |j|d}|d|	  | }| |j|d}|d|  | }| j|||||d\}}|
| | }|| | }| |j|d}|d|  | }| |j|d}|d|  | }|| 	| }|| 
| }|| }|| }||fS )Nr"  r   r    rS   rd   r  )r6   r  rq   r  rT   r#  r  r  r$  r  r%  )r   ra   rb   rc   r  r	   r]   r   r  r  r  r  r  r  shift_msa_tscale_msa_t
gate_msa_tshift_mlp_tscale_mlp_t
gate_mlp_tr   r   r  attn_output_tr  ff_output_tr   r   r   r*     s4   	
z$HiDreamImageTransformerBlock.forwardr  r   r   r   r   r   r!    sB    0r!  c                       s   e Zd Zddejdddfdededededed	ef fd
dZddddi fdejde	ej de	ej dejdejdejfddZ
  ZS )HiDreamImageBlockr2   r"   Nr!   r  r  r   r   
block_typec
              
      s<   t    tjttjti}
|
| ||||||||	d| _d S )Nr   )r   r   r   r   r!  r   r  block)r   r!   r  r  r   r   r/  r6   r7   r=   block_classesr   r   r   r     s   

zHiDreamImageBlock.__init__ra   rb   rc   r  r	   r   c                 C   s   | j ||||||dS )Nr\   )r0  )r   ra   rb   rc   r  r	   r]   r   r   r   r*   (  s   	zHiDreamImageBlock.forward)r+   r,   r-   r   r   r.   r   r$   r   r   r*   r0   r   r   r   r   r.    sH    r.  c                       sJ  e Zd Z													
				d,dee dedee dededededee dedededeeef deeef dee f fddZdd Zde	j
deeeef  d ee	j
 fd!d"Zd-d#d$Zdddddi fde	j
d%e	j
d&ee	j
 d'ee	j
 fd(d)Zdddddi fde	j
d%e	j
d&ee	j
 d'ee	j
 d e	j
f
d*d+Z  ZS ).HiDreamImageTransformer2DModelNr                   r2   r"   r4  r4  r5  r5  r8   r<   r9   
num_layersnum_single_layersr  r  caption_channelsrF   r   r   axes_dims_ropemax_resolutionllama_layersc              
      s~  |_ |_|_|_|_d_t   _|p|_	jj _
|_tj
 d_t|	j
 d_t||j
 d_td|d_t fddtjD _t fddtjD _tj
|j	 d_|d	 g||  |d
 g }g }|D ]}|t|j
 d qt|_|d
 |d	  ||  _d S )NFr   )r8   r<   r9   r6   r7   r=   i'  )r   r   c                    .   g | ]}t jjjtj d 	qS )	r!   r  r  r   r   r/  r6   r7   r=   )r.  r|   r  r  r   r   r   r7   r6   r   r   r=   r   r   r   r   h      
z;HiDreamImageTransformer2DModel.__init__.<locals>.<listcomp>c                    r@  rA  )r.  r|   r  r  r   r   r   rB  r   r   r   w  rC  rd   r   )r   rG   r6   r7   r=   )r8   r  r  r:  r;  gradient_checkpointingr   r   r6   r9   r|   r?  rK   
t_embedderrB   
p_embedderr1   
x_embedderr   pe_embedderr   r   r&   double_stream_blockssingle_stream_blocksr
   final_layerappendr   caption_projectionmax_seq)r   r8   r<   r9   r:  r;  r  r  r<  rF   r   r   r=  r>  r?  image_modelr6   r7   r=   rM  caption_channelr   rB  r   r   <  sJ   



z'HiDreamImageTransformer2DModel.__init__c                 C   s~   t |s*|jdk}t|tr|rt jnt j}n|rt jnt j}t j	|g||d}nt
|jdkr8|d  |}||}|S )Nmpsr   r   )r$   	is_tensortype
isinstancer   float32float64int32int64tensorlenr#   rT   expand)r   rU   rx   r7   is_mpsr6   r   r   r   expand_timesteps  s   



z/HiDreamImageTransformer2DModel.expand_timestepsr   	img_sizesr   c              
   C   sh   g }t |D ]$\}}|\}}|tj||d || f d||dd| j| jd qtj|dd}|S )Nrd   r   z$B H W (p1 p2 C) -> B C (H p1) (W p2)p1p2r   r    )r   rL  einops	rearrangereshaper8   r$   r%   )r   r   r^  x_arrr   img_sizepHpWr   r   r   
unpatchify  s   $z)HiDreamImageTransformer2DModel.unpatchifyc                 C   s  | j | j  }t|tjr|jd }|j}|j}nt|}|d j}|d j}tj||f||d}|d urTt	|D ]\}	}
d||	d|
d |
d  f< q8t
j|d|d}n.t|tjr|jd | j  |jd | j  }}t
j|d| j | j d	}||gg| }d }nt|||fS )
Nr   r   rd   zB C S p -> B S (p C))pr[   r   z&B C (H p1) (W p2) -> B (H W) (p1 p2 C)r_  )r8   rT  r$   r/   r#   r7   r6   rZ  zerosr   rb  rc  r   )r   r   rN  r^  pz2Br7   r6   x_masksr   rf  rg  rh  r   r   r   patchify  s(   


"
z'HiDreamImageTransformer2DModel.patchifytr   contextc	           	   
   C   s8   t jj| j| t jt jjj|||||||||S r   )	r   patcher_extensionWrapperExecutornew_class_executor_forwardget_all_wrappers
WrappersMPDIFFUSION_MODELexecute)	r   r   rp  r   rq  encoder_hidden_states_llama3
image_condcontrolr]   r   r   r   r*     s   
z&HiDreamImageTransformer2DModel.forwardc	           +   	      s  |j \}	}
}}|d urtj||gdd}tjj|| j| jf}|}|}|}d }|j d }|j}| 	|||j
}| ||}| |}|| }| || j|\}}}|d u r|d \}}tj||d|j
d}|d tj||j
dd d d f  |d< |d tj||j
dd d d f  |d< t|d|d	}| |}|d
d  fdd| jD  | jd urg }t D ]\}}| j| |}||d|j d }|| q| | jd |}||d|j d } | tj| d j d
  d j d
   d j d
  d|j
|jd}tj||fd
d}| |}d} tj d  d gd
d}!|!j d
 }"t| jD ],\}#}$ |  }%tj|!|%gd
d}&|$|||&|||d\}}!|!d d d |"f }!| d
7 } q.|j d
 }'tj||!gd
d}|j d
 }(|d urtj||!j d
 |%j d
  f|j
|jd})tj||)gd
d}t| jD ]*\}#}$ |  }%tj||%gd
d}|$||d |||d}|d d d |(f }| d
7 } q|d d d |'df }| ||}*| |*|}*|*d d d d d |d |f  S )Nr   r    r   r   )r7   ).rd   ).r"   zh w c -> b (h w) c)brd   c                    s   g | ]} | qS r   r   )r   r   encoder_hidden_statesr   r   r     s    z;HiDreamImageTransformer2DModel._forward.<locals>.<listcomp>r[   )r7   r6   )ra   rb   rc   r  r	   r]   .)r#   r$   r%   r   ldm
common_ditpad_to_patch_sizer8   r6   r]  r7   rE  rF  ro  rN  rk  aranger   rG  movedimr?  rM  r   r^   rL  rH  rI  onesrJ  rK  ri  )+r   r   rp  r   rq  rz  r{  r|  r]   bscr   wr   rU   pooled_embedsT5_encoder_hidden_statesr^  rx   hidden_states_typerF  r  rb   rg  rh  img_idsnew_encoder_hidden_statesr   enc_hidden_statetxt_idsr   r	   block_idinitial_encoder_hidden_states%initial_encoder_hidden_states_seq_lenbidr0  !cur_llama31_encoder_hidden_statescur_encoder_hidden_statesimage_tokens_seq_lenhidden_states_seq_lenencoder_attention_mask_onesoutputr   r~  r   ru    s   

((


(





"z'HiDreamImageTransformer2DModel._forward)Nr   Nr3  r4  r5  r6  Nr7  r2   r"   r8  r9  NNNNNr   )r+   r,   r-   r   r.   r   r   r   r]  r$   r/   ri  ro  r*   ru  r0   r   r   r   r   r2  ;  s    	


R*


r2  ),typingr   r   r   r$   torch.nnr   rb  r   comfy.ldm.lightricks.modelr   r   torch.nn.functionalr   r   comfy.ldm.flux.mathr   r	   comfy.ldm.flux.layersr
   comfy.ldm.modules.attentionr   comfy.model_managementr   comfy.patcher_extensioncomfy.ldm.common_ditModuler   r1   rB   rK   r/   r_   r`   r   r   r   r   r   r   r  r!  r.  r2  r   r   r   r   <module>   s:    	 EC->
IZ,