o
    i\                     @   sD  d dl mZmZ d dlZd dlmZ d dlm  mZ d dl	m
Z
mZ d dlmZ d dlmZ d dlmZ d dlZd dlZdd Zd	ejd
ejdejfddZG dd dejZG dd dejZG dd dejZG dd dejZG dd dejZG dd dejZG dd dejZG dd dejZ G dd dejZ!dS )     )OptionalTupleN)	rearrangerepeat)	Timesteps)EmbedND)optimized_attention_maskedc                 C   sn   | j d dkr	| S | jg | j d d dddR  }|d |d  |d |d   }|j| j  j| jdS )N   r      ).r   ).r	   dtype)shapereshapetor   )x	freqs_cist_t_out r   8/mnt/c/Users/fbmor/ComfyUI/comfy/ldm/omnigen/omnigen2.pyapply_rotary_emb   s
   & r   r   yreturnc                 C   s   t | | S N)Fsilu)r   r   r   r   r   swiglu   s   r   c                       s>   e Zd Zd
dedef fddZdejdejfdd	Z  ZS )TimestepEmbeddingNin_channelstime_embed_dimc                    s@   t    |j||||d| _t | _|j||||d| _d S )Nr   device)super__init__Linearlinear_1nnSiLUactlinear_2)selfr   r    r   r"   
operations	__class__r   r   r$      s   

zTimestepEmbedding.__init__sampler   c                 C   s"   |  |}| |}| |}|S r   )r&   r)   r*   )r+   r/   r   r   r   forward$   s   


zTimestepEmbedding.forward)NNN	__name__
__module____qualname__intr$   torchTensorr0   __classcell__r   r   r-   r   r      s    r   c                
       sV   e Zd Zddedef fddZdejdejd	eejejejejf fd
dZ	  Z
S )LuminaRMSNormZeroh㈵>Nembedding_dimnorm_epsc                    sJ   t    t | _|jt|dd| ||d| _|j||||d| _	d S )N      r!   epsr   r"   )
r#   r$   r'   r(   r   r%   minlinearRMSNormnorm)r+   r;   r<   r   r"   r,   r-   r   r   r$   ,   s   

zLuminaRMSNormZero.__init__r   embr   c                 C   sP   |  | |}|jddd\}}}}| |d|d d d f   }||||fS )Nr>   r	   dim)rB   r   chunkrD   )r+   r   rE   	scale_msagate_msa	scale_mlpgate_mlpr   r   r   r0   2   s   zLuminaRMSNormZero.forward)r:   NNN)r2   r3   r4   r5   floatr$   r6   r7   r   r0   r8   r   r   r-   r   r9   +   s    6r9   c                       sT   e Zd Zddededededee f
 fd	d
Zdej	dej	dej	fddZ
  ZS )LuminaLayerNormContinuousFư>Nr;   conditioning_embedding_dimelementwise_affiner@   out_dimc	           	         sj   t    t | _|j||||d| _|j|||||d| _|d ur0|j||d||d| _	d S d | _	d S )Nr!   Tbiasr   r"   )
r#   r$   r'   r(   r   r%   r&   	LayerNormrD   r*   )	r+   r;   rP   rQ   r@   rR   r   r"   r,   r-   r   r   r$   :   s
   

,z"LuminaLayerNormContinuous.__init__r   conditioning_embeddingr   c                 C   sT   |  | ||j}| |d| d d d d d f  }| jd ur(| |}|S )Nr	   )r&   r   r   r   rD   r*   )r+   r   rV   rE   r   r   r   r0   A   s
   $

z!LuminaLayerNormContinuous.forward)FrO   NNNN)r2   r3   r4   r5   boolrM   r   r$   r6   r7   r0   r8   r   r   r-   r   rN   9   s    ($rN   c                       sB   e Zd Zddededef fddZdejd	ejfd
dZ  ZS )LuminaFeedForward   NrG   	inner_dimmultiple_ofc                    sd   t    ||| d |  }|j||d||d| _|j||d||d| _|j||d||d| _d S )Nr	   FrS   )r#   r$   r%   r&   r*   linear_3)r+   rG   rZ   r[   r   r"   r,   r-   r   r   r$   J   s
   
zLuminaFeedForward.__init__r   r   c                 C   s&   |  || |}}| t||S r   )r&   r\   r*   r   )r+   r   h1h2r   r   r   r0   Q   s   zLuminaFeedForward.forward)rY   NNNr1   r   r   r-   r   rX   I   s    rX   c                       s`   e Zd Zddeded	ed
edef
 fddZdejdejdejde	ejejf fddZ
  ZS )'Lumina2CombinedTimestepCaptionEmbedding      rY   r:         ?Nhidden_sizetext_feat_dimfrequency_embedding_sizer<   timestep_scalec	           	   
      sf   t    t|dd|d| _t|t|d|||d| _t|j	||||d|j
||d||d| _d S )NT        )num_channelsflip_sin_to_cosdownscale_freq_shiftscaler=   )r   r    r   r"   r,   r?   rS   )r#   r$   r   	time_projr   rA   timestep_embedderr'   
SequentialrC   r%   caption_embedder)	r+   rc   rd   re   r<   rf   r   r"   r,   r-   r   r   r$   W   s   

z0Lumina2CombinedTimestepCaptionEmbedding.__init__timesteptext_hidden_statesr   r   c                 C   s.   |  |j|d}| |}| |}||fS )Nr   )rl   r   rm   ro   )r+   rp   rq   r   timestep_proj
time_embedcaption_embedr   r   r   r0   `   s   

z/Lumina2CombinedTimestepCaptionEmbedding.forward)r`   ra   rY   r:   rb   NNN)r2   r3   r4   r5   rM   r$   r6   r7   r   r   r0   r8   r   r   r-   r   r_   V   s    $4	r_   c                       sp   e Zd Zddededededed	ef fd
dZddi fdejdejde	ej de	ej dejf
ddZ
  ZS )	Attentionr:   FN	query_dimdim_headheadskv_headsr@   rT   c
           
   	      s   t    || _|| _|| _|d | _|	j||| |||d| _|	j||| |||d| _|	j||| |||d| _	|	j
||||d| _|	j
||||d| _t|	j|| ||||dtd| _d S )Ng      rS   r?   rg   )r#   r$   rx   ry   rw   rk   r%   to_qto_kto_vrC   norm_qnorm_kr'   rn   Dropoutto_out)
r+   rv   rw   rx   ry   r@   rT   r   r"   r,   r-   r   r   r$   h   s   


zAttention.__init__hidden_statesencoder_hidden_statesattention_maskimage_rotary_embr   c              	   C   s  |j \}}}| |}	| |}
| |}|	|d| j| j}	|
|d| j| j}
||d| j| j}| |	}	| 	|
}
|d urKt
|	|}	t
|
|}
|	dd}	|
dd}
|dd}| j| jk ry|
j| j| j dd}
|j| j| j dd}t|	|
|| j|d|d}| jd |}|S )Nr
   r	   r   rF   T)skip_reshapetransformer_optionsr   )r   rz   r{   r|   viewrx   rw   ry   r}   r~   r   	transposerepeat_interleaver   r   )r+   r   r   r   r   r   
batch_sizesequence_length_querykeyvaluer   r   r   r0   {   s*   






zAttention.forward)r:   FNNNr2   r3   r4   r5   rM   rW   r$   r6   r7   r   r0   r8   r   r   r-   r   ru   g   s    (@ru   c                       sn   e Zd Zddedededededed	ef fd
dZdi fdejdejdejde	ej dejf
ddZ
  ZS )OmniGen2TransformerBlockTNrG   num_attention_headsnum_kv_headsr[   ffn_dim_multiplierr<   
modulationc                    s   t    || _t||| ||dd||	|
d	| _t|d| |||	|
d| _|r2t||||	|
d| _n
|
j	||||	d| _|
j	||||	d| _
|
j	||||	d| _|
j	||||	d| _d S )Nr:   F)	rv   rw   rx   ry   r@   rT   r   r"   r,   r>   )rG   rZ   r[   r   r"   r,   )r;   r<   r   r"   r,   r?   )r#   r$   r   ru   attnrX   feed_forwardr9   norm1rC   	ffn_norm1norm2	ffn_norm2)r+   rG   r   r   r[   r   r<   r   r   r"   r,   r-   r   r   r$      s.   

z!OmniGen2TransformerBlock.__init__r   r   r   tembr   c                 C   s   | j rD| ||\}}}}	| j|||||d}
||d | |
  }| | |d|d  }||	d | |  }|S | |}| j|||||d}
|| |
 }| | |}|| | }|S )Nr   r	   )	r   r   r   	unsqueezetanhr   r   r   r   )r+   r   r   r   r   r   norm_hidden_statesrJ   rK   rL   attn_output
mlp_outputr   r   r   r0      s   
z OmniGen2TransformerBlock.forward)TNNNr   r   r   r-   r   r      s    ,:r   c                
       sL   e Zd Zddedeeeef deeeef def fddZd	d
 Z  ZS )OmniGen2RotaryPosEmbedi,     r   r   thetaaxes_dim	axes_lens
patch_sizec                    s<   t    || _|| _|| _|| _tt|| j|d| _d S )N)rG   r   r   )	r#   r$   r   r   r   r   r   sumrope_embedder)r+   r   r   r   r   r-   r   r   r$      s   
zOmniGen2RotaryPosEmbed.__init__c	           &      C   s8  | j }	dd t|||D }
t|
}tdd |D }t|}tj||dtj|d}tt||
D ]\}\}}ttj|tj|dd||d |f< |}|}|| d urt|| || D ]]\}}|\}}||	 ||	 }}ttj|tj|dd|d	 }ttj|tj|dd	|d
	 }|||||| df< |||||| df< |||||| df< |t||7 }||7 }q\|| \}}||	 ||	 }}ttj|tj|dd|d	 }ttj|tj|dd	|d
	 }|||||df< |||||df< |||||df< q1| 
|dd}t|j}||d< tj|||jd} t|j}!||!d< tj|!||jd}"t|j}#||#d< tj|#||jd}$tt||||
D ]C\}\}}}%}||d |f | |d |f< ||||t| f |"|d t|f< |||t| |t| |% f |$|d |%f< qP| |"|$|||
fS )Nc                 S   s"   g | ]\}}}|t | | qS r   r   ).0cap_lenref_img_lenimg_lenr   r   r   
<listcomp>   s   " z2OmniGen2RotaryPosEmbed.forward.<locals>.<listcomp>c                 S   s   g | ]}t |qS r   r   )r   r   r   r   r   r             r!   zl -> l 3zh -> h w)wzw -> h w)hr   r	   r   r"   r   )r   zipmaxr6   zerosint32	enumerater   arangeflattenr   movedimlistr   r   r   )&r+   r   encoder_seq_lenl_effective_cap_lenl_effective_ref_img_lenl_effective_img_lenref_img_sizes	img_sizesr"   pseq_lengthsmax_seq_lenmax_ref_img_lenmax_img_lenposition_idsicap_seq_lenseq_lenpe_shiftpe_shift_lenref_img_sizer   HWref_H_tokensref_W_tokensrow_idscol_idsH_tokensW_tokensr   cap_freqs_cis_shapecap_freqs_cisref_img_freqs_cis_shaperef_img_freqs_cisimg_freqs_cis_shapeimg_freqs_cisr   r   r   r   r0      sV   $  
  


"(4zOmniGen2RotaryPosEmbed.forward)r   r   )r2   r3   r4   r5   r   r$   r0   r8   r   r   r-   r   r      s    4r   c                       s   e Zd Z													
							d%dededee dededededededee dedeeeef deeeef dedef fddZdd  Zi fd!d"Z	ddi fd#d$Z
  ZS )&OmniGen2Transformer2DModelr      N 	           rY   r:       r   r   r   r=   rb   r   r   out_channelsrc   
num_layersnum_refiner_layersr   r   r[   r   r<   axes_dim_roper   rd   rf   c                    s  t    || _|p|| _| _| _td|||d| _j|| |  d| _	j|| |  d| _
t|| d| _t f	ddt|D | _t f	ddt|D | _t f	ddt|D | _t f	d	dt|D | _ttd
dd|| | j  d| _ttjd d| _d S )Ni'  )r   r   r   r   r!   )rc   rd   r<   rf   r   r"   r,   c                    (   g | ]}t d  d
qS Tr   r   r"   r,   r   r   r   	r"   r   r   rc   r[   r<   r   r   r,   r   r   r   @      z7OmniGen2Transformer2DModel.__init__.<locals>.<listcomp>c                    r   r   r   r   r   r   r   r   G  r   c                    r   )Fr   r   r   r   r   r   r   N  r   c                    r   r   r   r   r   r   r   r   U  r   r=   FrO   )r;   rP   rQ   r@   rR   r   r"   r,      r   )r#   r$   r   r   rc   r   r   r   r%   
x_embedderref_image_patch_embedderr_   time_caption_embedr'   
ModuleListrangenoise_refinerref_image_refinercontext_refinerlayersrN   rA   norm_out	Parameterr6   emptyimage_index_embedding)r+   r   r   r   rc   r   r   r   r   r[   r   r<   r   r   rd   rf   image_modelr"   r   r,   r-   r   r   r$     sN   





z#OmniGen2Transformer2DModel.__init__c                    s"  t |}| j dd |D } fdd|D }|d ur:tt fdd|}dd |D g| } fdd|D }ndd t|D }d	d t|D }d }|d urug }	|D ]}
|
 \}}}}t|
d
  d}
|	|
 qVtj	|	dd}|}| \}}}}t|d
  d}||d d ||||fS )Nc                 S   s    g | ]}| d | dfqS )r	   r   size)r   imgr   r   r   r   j       zBOmniGen2Transformer2DModel.flat_and_pad_to_seq.<locals>.<listcomp>c                    s    g | ]\}}|  |   qS r   r   )r   r   r   r   r   r   r   k  r  c                    s   t jj|   fS r   )comfyldm
common_ditpad_to_patch_size)refr  r   r   <lambda>n  r   z@OmniGen2Transformer2DModel.flat_and_pad_to_seq.<locals>.<lambda>c                 S   s,   g | ]}|d ur| d| dfnd qS )Nr   r   r  )r   imgsr   r   r   r   o     , c                    s,   g | ]}|d ur fdd|D ndgqS )Nc                    s$   g | ]}|d    |d    qS )r   r	   r   )r   r   r  r   r   r   p  s   $ zMOmniGen2Transformer2DModel.flat_and_pad_to_seq.<locals>.<listcomp>.<listcomp>r   r   )r   _ref_img_sizesr  r   r   r   p  r  c                 S   s   g | ]}d qS r   r   r   r   r   r   r   r  s    c                 S   s   g | ]}d gqS )r   r   r   r   r   r   r   s  s    z&b c (h p1) (w p2) -> b (h w) (p1 p2 c))p1p2r	   rF   )
lenr   r   mapr   r  r   appendr6   cat)r+   r   ref_image_hidden_statesr   r   r   r   r   flat_ref_img_hidden_statesr  ref_imgBCr   r   r  flat_hidden_statesr   r  r   flat_and_pad_to_seqf  s4   z.OmniGen2Transformer2DModel.flat_and_pad_to_seqc                 C   s   t |}| |}|d urR| |}tjj| j|j|jd}t	|D ]/}d}t
|| D ]$\}}||||| d d f ||  ||||| d d f< ||7 }q,q"| jD ]}|||||	|
d}qU|d ur}| jD ]}|||||	|
d}qhtj||gdd}|S )Nr!   r   r   r	   rF   )r  r   r   r  model_managementcast_tor   r   r"   r   r   r   r   r6   r  )r+   r   r  padded_img_maskpadded_ref_img_masknoise_rotary_embref_img_rotary_embr   r   r   r   r   r   r   shiftjr   layerr   r   r   img_patch_embed_and_refine  s$   

8


z5OmniGen2Transformer2DModel.img_patch_embed_and_refinec           (      K   s  |j \}	}
}}tjj|| j| jf}|j \}}}}d| }|}|}|}|j}| |||d j\}}| 	||\}}}}}}}}| 
|j d |j d |g|j d  |||||\}}}} }!}"| jD ]
}#|#||||d}qb|j d }$| j||||||||||d
}%tj||%gdd}d }| jD ]}#|#||| ||d}q| ||}| j}&t|d d |$ d f d||& ||& |&|&dd d d d d |d |f }'|' S )Nrb   r   r	   r   rF   z&b (h w) (p1 p2 c) -> b c (h p1) (w p2))r   r   r  r  )r   r  r  r	  r
  r   r"   r   r   r  r   r   r&  r6   r  r   r   r   )(r+   r   	timestepscontext
num_tokensref_latentsr   r   kwargsr  r  r   r   r   r   H_paddedW_paddedrp   rq   text_attention_maskr  r"   r   img_maskref_img_maskr   r   r   r   context_rotary_embr"  r!  
rotary_embencoder_seq_lengthsr   r%  r   combined_img_hidden_statesr   outputr   r   r   r0     sT   


	
Jz"OmniGen2Transformer2DModel.forward)r   r   Nr   r   r   r   r   rY   Nr:   r   r   r=   rb   NNNN)r2   r3   r4   r5   r   rM   r   r$   r  r&  r0   r8   r   r   r-   r   r     sl    	
T#r   )"typingr   r   r6   torch.nnr'   torch.nn.functional
functionalr   einopsr   r   comfy.ldm.lightricks.modelr   comfy.ldm.flux.layersr   comfy.ldm.modules.attentionr   comfy.model_managementr  comfy.ldm.common_ditr   r7   r   Moduler   r9   rN   rX   r_   ru   r   r   r   r   r   r   r   <module>   s*   	3/H